Skip to content

Commit 1b41057

Browse files
Merge pull request #733 from HannahVernon/feature/dismiss-reliability-718
fix: dismiss reliability - write lock, batched UPDATE, transaction, timeout (#718)
2 parents 9447cb1 + 800892a commit 1b41057

6 files changed

Lines changed: 544 additions & 168 deletions

File tree

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Threading.Tasks;
5+
using DuckDB.NET.Data;
6+
using PerformanceMonitorLite.Database;
7+
using PerformanceMonitorLite.Tests.Helpers;
8+
using Xunit;
9+
10+
namespace PerformanceMonitorLite.Tests;
11+
12+
/// <summary>
13+
/// Tests that dismiss operations use batched UPDATEs within transactions
14+
/// and that the write lock prevents race conditions.
15+
/// </summary>
16+
public class DismissReliabilityTests : IDisposable
17+
{
18+
private readonly string _tempDir;
19+
private readonly string _dbPath;
20+
private readonly TestAlertDataHelper _helper;
21+
22+
public DismissReliabilityTests()
23+
{
24+
_tempDir = Path.Combine(Path.GetTempPath(), "LiteTests_" + Guid.NewGuid().ToString("N")[..8]);
25+
Directory.CreateDirectory(_tempDir);
26+
_dbPath = Path.Combine(_tempDir, "test.duckdb");
27+
_helper = new TestAlertDataHelper(_dbPath);
28+
}
29+
30+
public void Dispose()
31+
{
32+
try
33+
{
34+
if (Directory.Exists(_tempDir))
35+
Directory.Delete(_tempDir, recursive: true);
36+
}
37+
catch
38+
{
39+
/* Best-effort cleanup */
40+
}
41+
}
42+
43+
private async Task<DuckDBConnection> InitializeDatabaseAsync()
44+
{
45+
var initializer = new DuckDbInitializer(_dbPath);
46+
await initializer.InitializeAsync();
47+
48+
var connection = new DuckDBConnection($"Data Source={_dbPath}");
49+
await connection.OpenAsync(TestContext.Current.CancellationToken);
50+
return connection;
51+
}
52+
53+
[Fact]
54+
public async Task BatchedUpdate_DismissesMultipleAlertsInSingleStatement()
55+
{
56+
using var connection = await InitializeDatabaseAsync();
57+
58+
// Capture fixed timestamps to ensure insert and update match exactly
59+
var timestamps = new DateTime[5];
60+
for (int i = 0; i < 5; i++)
61+
timestamps[i] = DateTime.UtcNow.AddHours(-(i + 1));
62+
63+
// Insert 5 live alerts with the captured timestamps
64+
for (int i = 0; i < 5; i++)
65+
{
66+
await _helper.InsertLiveAlertAsync(
67+
connection,
68+
timestamps[i],
69+
1, "Server1", $"Alert_{i}");
70+
}
71+
72+
// Build a batched UPDATE matching the pattern used in DismissAlertsAsync
73+
var valuesClauses = new System.Text.StringBuilder();
74+
var parameters = new List<DuckDBParameter>();
75+
for (int i = 0; i < 5; i++)
76+
{
77+
if (i > 0) valuesClauses.Append(", ");
78+
var p1 = $"${i * 3 + 1}";
79+
var p2 = $"${i * 3 + 2}";
80+
var p3 = $"${i * 3 + 3}";
81+
valuesClauses.Append($"({p1}, {p2}, {p3})");
82+
parameters.Add(new DuckDBParameter { Value = timestamps[i] });
83+
parameters.Add(new DuckDBParameter { Value = 1 });
84+
parameters.Add(new DuckDBParameter { Value = $"Alert_{i}" });
85+
}
86+
87+
using var cmd = connection.CreateCommand();
88+
cmd.CommandText = $@"
89+
UPDATE config_alert_log
90+
SET dismissed = TRUE
91+
WHERE dismissed = FALSE
92+
AND (alert_time, server_id, metric_name) IN (VALUES {valuesClauses})";
93+
foreach (var p in parameters)
94+
cmd.Parameters.Add(p);
95+
96+
var affected = await cmd.ExecuteNonQueryAsync(TestContext.Current.CancellationToken);
97+
Assert.Equal(5, affected);
98+
99+
// Verify all are dismissed
100+
using var checkCmd = connection.CreateCommand();
101+
checkCmd.CommandText = "SELECT COUNT(1) FROM config_alert_log WHERE dismissed = FALSE";
102+
var remaining = Convert.ToInt64(await checkCmd.ExecuteScalarAsync(TestContext.Current.CancellationToken));
103+
Assert.Equal(0, remaining);
104+
}
105+
106+
[Fact]
107+
public async Task BatchedUpdate_ReturnsCorrectCount_WhenSomeAlreadyDismissed()
108+
{
109+
using var connection = await InitializeDatabaseAsync();
110+
111+
// Insert 3 alerts, dismiss 1 beforehand
112+
var time1 = DateTime.UtcNow.AddHours(-1);
113+
var time2 = DateTime.UtcNow.AddHours(-2);
114+
var time3 = DateTime.UtcNow.AddHours(-3);
115+
116+
await _helper.InsertLiveAlertAsync(connection, time1, 1, "Server1", "Alert_A");
117+
await _helper.InsertLiveAlertAsync(connection, time2, 1, "Server1", "Alert_B");
118+
await _helper.InsertLiveAlertAsync(connection, time3, 1, "Server1", "Alert_C", dismissed: true);
119+
120+
// Batch dismiss all 3 — only 2 should be affected (Alert_C already dismissed)
121+
using var cmd = connection.CreateCommand();
122+
cmd.CommandText = @"
123+
UPDATE config_alert_log
124+
SET dismissed = TRUE
125+
WHERE dismissed = FALSE
126+
AND (alert_time, server_id, metric_name) IN (VALUES ($1, $2, $3), ($4, $5, $6), ($7, $8, $9))";
127+
cmd.Parameters.Add(new DuckDBParameter { Value = time1 });
128+
cmd.Parameters.Add(new DuckDBParameter { Value = 1 });
129+
cmd.Parameters.Add(new DuckDBParameter { Value = "Alert_A" });
130+
cmd.Parameters.Add(new DuckDBParameter { Value = time2 });
131+
cmd.Parameters.Add(new DuckDBParameter { Value = 1 });
132+
cmd.Parameters.Add(new DuckDBParameter { Value = "Alert_B" });
133+
cmd.Parameters.Add(new DuckDBParameter { Value = time3 });
134+
cmd.Parameters.Add(new DuckDBParameter { Value = 1 });
135+
cmd.Parameters.Add(new DuckDBParameter { Value = "Alert_C" });
136+
137+
var affected = await cmd.ExecuteNonQueryAsync(TestContext.Current.CancellationToken);
138+
Assert.Equal(2, affected);
139+
}
140+
141+
[Fact]
142+
public async Task Transaction_RollbackRestoresState()
143+
{
144+
using var connection = await InitializeDatabaseAsync();
145+
146+
await _helper.InsertLiveAlertAsync(
147+
connection, DateTime.UtcNow.AddHours(-1), 1, "Server1", "High CPU");
148+
149+
// Begin transaction, dismiss, then rollback
150+
using var beginCmd = connection.CreateCommand();
151+
beginCmd.CommandText = "BEGIN TRANSACTION";
152+
await beginCmd.ExecuteNonQueryAsync(TestContext.Current.CancellationToken);
153+
154+
using var updateCmd = connection.CreateCommand();
155+
updateCmd.CommandText = @"
156+
UPDATE config_alert_log
157+
SET dismissed = TRUE
158+
WHERE metric_name = 'High CPU'
159+
AND dismissed = FALSE";
160+
var affected = await updateCmd.ExecuteNonQueryAsync(TestContext.Current.CancellationToken);
161+
Assert.Equal(1, affected);
162+
163+
using var rollbackCmd = connection.CreateCommand();
164+
rollbackCmd.CommandText = "ROLLBACK";
165+
await rollbackCmd.ExecuteNonQueryAsync(TestContext.Current.CancellationToken);
166+
167+
// Alert should still be undismissed after rollback
168+
using var checkCmd = connection.CreateCommand();
169+
checkCmd.CommandText = "SELECT COUNT(1) FROM config_alert_log WHERE dismissed = FALSE AND metric_name = 'High CPU'";
170+
var undismissed = Convert.ToInt64(await checkCmd.ExecuteScalarAsync(TestContext.Current.CancellationToken));
171+
Assert.Equal(1, undismissed);
172+
}
173+
174+
[Fact]
175+
public async Task Transaction_CommitPersistsState()
176+
{
177+
using var connection = await InitializeDatabaseAsync();
178+
179+
await _helper.InsertLiveAlertAsync(
180+
connection, DateTime.UtcNow.AddHours(-1), 1, "Server1", "High CPU");
181+
182+
// Begin transaction, dismiss, then commit
183+
using var beginCmd = connection.CreateCommand();
184+
beginCmd.CommandText = "BEGIN TRANSACTION";
185+
await beginCmd.ExecuteNonQueryAsync(TestContext.Current.CancellationToken);
186+
187+
using var updateCmd = connection.CreateCommand();
188+
updateCmd.CommandText = @"
189+
UPDATE config_alert_log
190+
SET dismissed = TRUE
191+
WHERE metric_name = 'High CPU'
192+
AND dismissed = FALSE";
193+
await updateCmd.ExecuteNonQueryAsync(TestContext.Current.CancellationToken);
194+
195+
using var commitCmd = connection.CreateCommand();
196+
commitCmd.CommandText = "COMMIT";
197+
await commitCmd.ExecuteNonQueryAsync(TestContext.Current.CancellationToken);
198+
199+
// Alert should be dismissed after commit
200+
using var checkCmd = connection.CreateCommand();
201+
checkCmd.CommandText = "SELECT COUNT(1) FROM config_alert_log WHERE dismissed = TRUE AND metric_name = 'High CPU'";
202+
var dismissed = Convert.ToInt64(await checkCmd.ExecuteScalarAsync(TestContext.Current.CancellationToken));
203+
Assert.Equal(1, dismissed);
204+
}
205+
206+
[Fact]
207+
public async Task WriteLock_BlocksReadersDuringDismiss()
208+
{
209+
var initializer = new DuckDbInitializer(_dbPath);
210+
await initializer.InitializeAsync();
211+
212+
// Acquire write lock on this thread
213+
using var writeLock = initializer.AcquireWriteLock();
214+
215+
// A second write lock with timeout from a different thread should throw TimeoutException
216+
Exception? caughtException = null;
217+
var thread = new System.Threading.Thread(() =>
218+
{
219+
try
220+
{
221+
using var secondLock = initializer.AcquireWriteLock(timeout: TimeSpan.FromMilliseconds(50));
222+
}
223+
catch (Exception ex)
224+
{
225+
caughtException = ex;
226+
}
227+
});
228+
thread.Start();
229+
thread.Join(2000);
230+
231+
Assert.NotNull(caughtException);
232+
Assert.IsType<TimeoutException>(caughtException);
233+
Assert.Contains("could not acquire", caughtException.Message, StringComparison.OrdinalIgnoreCase);
234+
}
235+
236+
[Fact]
237+
public async Task WriteLock_Timeout_ThrowsTimeoutException()
238+
{
239+
var initializer = new DuckDbInitializer(_dbPath);
240+
await initializer.InitializeAsync();
241+
242+
// Simulate archival holding the write lock on a background thread
243+
using var archivalLock = initializer.AcquireWriteLock();
244+
245+
// A concurrent dismiss attempt with timeout should throw TimeoutException
246+
var lockAcquired = false;
247+
var thread = new System.Threading.Thread(() =>
248+
{
249+
try
250+
{
251+
using var dismissLock = initializer.AcquireWriteLock(timeout: TimeSpan.FromMilliseconds(100));
252+
lockAcquired = true;
253+
}
254+
catch (TimeoutException)
255+
{
256+
lockAcquired = false;
257+
}
258+
});
259+
thread.Start();
260+
thread.Join(2000);
261+
262+
Assert.False(lockAcquired, "Dismiss should not acquire write lock while archival holds it");
263+
}
264+
265+
[Fact]
266+
public async Task DismissAll_UsesWriteLock()
267+
{
268+
using var connection = await InitializeDatabaseAsync();
269+
270+
// Insert alerts
271+
await _helper.InsertLiveAlertAsync(
272+
connection, DateTime.UtcNow.AddHours(-1), 1, "Server1", "High CPU");
273+
await _helper.InsertLiveAlertAsync(
274+
connection, DateTime.UtcNow.AddHours(-2), 1, "Server1", "Blocking");
275+
276+
// DismissAll targets the live table — should work with write lock
277+
using var cmd = connection.CreateCommand();
278+
cmd.CommandText = @"
279+
UPDATE config_alert_log
280+
SET dismissed = TRUE
281+
WHERE dismissed = FALSE";
282+
var affected = await cmd.ExecuteNonQueryAsync(TestContext.Current.CancellationToken);
283+
Assert.Equal(2, affected);
284+
285+
// Verify all dismissed
286+
using var checkCmd = connection.CreateCommand();
287+
checkCmd.CommandText = "SELECT COUNT(1) FROM config_alert_log WHERE dismissed = FALSE";
288+
var remaining = Convert.ToInt64(await checkCmd.ExecuteScalarAsync(TestContext.Current.CancellationToken));
289+
Assert.Equal(0, remaining);
290+
}
291+
}

Lite/Controls/AlertsHistoryTab.xaml.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,14 @@ private async void DismissSelected_Click(object sender, RoutedEventArgs e)
308308
}
309309
await LoadAlertsAsync();
310310
}
311+
catch (TimeoutException)
312+
{
313+
MessageBox.Show(
314+
"The database is currently busy (archival or maintenance in progress).\n\nPlease try again in a few moments.",
315+
"Dismiss Unavailable",
316+
MessageBoxButton.OK,
317+
MessageBoxImage.Warning);
318+
}
311319
catch (Exception ex)
312320
{
313321
AppLogger.Error("AlertsHistory", $"Failed to dismiss selected alerts: {ex.Message}");
@@ -358,6 +366,14 @@ private async void DismissAll_Click(object sender, RoutedEventArgs e)
358366
}
359367
await LoadAlertsAsync();
360368
}
369+
catch (TimeoutException)
370+
{
371+
MessageBox.Show(
372+
"The database is currently busy (archival or maintenance in progress).\n\nPlease try again in a few moments.",
373+
"Dismiss Unavailable",
374+
MessageBoxButton.OK,
375+
MessageBoxImage.Warning);
376+
}
361377
catch (Exception ex)
362378
{
363379
AppLogger.Error("AlertsHistory", $"Failed to dismiss all alerts: {ex.Message}");

Lite/Database/DuckDbInitializer.cs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,21 @@ exception that prevented Dispose(). Since we're already protected by a read lock
4949
/// <summary>
5050
/// Acquires an exclusive write lock on the database. Blocks until all readers finish.
5151
/// Dispose the returned object to release the lock.
52+
/// When a timeout is specified, throws <see cref="TimeoutException"/> if the lock
53+
/// cannot be acquired within the given duration (e.g., archival is in progress).
5254
/// </summary>
53-
public IDisposable AcquireWriteLock()
55+
public IDisposable AcquireWriteLock(TimeSpan? timeout = null)
5456
{
55-
s_dbLock.EnterWriteLock();
57+
if (timeout.HasValue)
58+
{
59+
if (!s_dbLock.TryEnterWriteLock(timeout.Value))
60+
throw new TimeoutException(
61+
"Could not acquire database write lock — another operation (archival or maintenance) may be in progress. Please try again in a few moments.");
62+
}
63+
else
64+
{
65+
s_dbLock.EnterWriteLock();
66+
}
5667
return new LockReleaser(s_dbLock, write: true);
5768
}
5869

Lite/Database/LockedConnection.cs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,20 @@
1313
namespace PerformanceMonitorLite.Database;
1414

1515
/// <summary>
16-
/// Wraps a DuckDBConnection with a read lock that is released when the connection is disposed.
17-
/// Ensures UI reads hold the lock for their entire duration, preventing CHECKPOINT or compaction
18-
/// from reorganizing the database file while a reader has stale file offsets.
16+
/// Wraps a DuckDBConnection with a lock (read or write) that is released when the connection is disposed.
17+
/// Ensures operations hold the lock for their entire duration, preventing CHECKPOINT or compaction
18+
/// from reorganizing the database file while the connection is active.
1919
/// </summary>
2020
public sealed class LockedConnection : IDisposable, IAsyncDisposable
2121
{
2222
private readonly DuckDBConnection _connection;
23-
private readonly IDisposable _readLock;
23+
private readonly IDisposable _lock;
2424
private bool _disposed;
2525

26-
public LockedConnection(DuckDBConnection connection, IDisposable readLock)
26+
public LockedConnection(DuckDBConnection connection, IDisposable @lock)
2727
{
2828
_connection = connection;
29-
_readLock = readLock;
29+
_lock = @lock;
3030
}
3131

3232
/// <summary>
@@ -40,14 +40,14 @@ public void Dispose()
4040
if (_disposed) return;
4141
_disposed = true;
4242
_connection.Dispose();
43-
_readLock.Dispose();
43+
_lock.Dispose();
4444
}
4545

4646
public async ValueTask DisposeAsync()
4747
{
4848
if (_disposed) return;
4949
_disposed = true;
5050
await _connection.DisposeAsync();
51-
_readLock.Dispose();
51+
_lock.Dispose();
5252
}
5353
}

0 commit comments

Comments
 (0)