[backend/queue] Fix QueueService race condition causing transient queue stalls

This commit is contained in:
Laura Hausmann 2024-07-22 05:59:20 +02:00
parent 998d1a0e65
commit 7ea2f71abe
No known key found for this signature in database
GPG key ID: D044E84C5BE01605
2 changed files with 23 additions and 8 deletions

View file

@ -0,0 +1,7 @@
namespace Iceshrimp.Backend.Core.Helpers;
public class SemaphorePlus(int maxCount) : SemaphoreSlim(maxCount, maxCount)
{
private readonly int _maxCount = maxCount;
public int ActiveCount => _maxCount - CurrentCount;
}

View file

@ -24,7 +24,6 @@ public class QueueService(
private readonly List<IPostgresJobQueue> _queues = []; private readonly List<IPostgresJobQueue> _queues = [];
public readonly BackgroundTaskQueue BackgroundTaskQueue = new(queueConcurrency.Value.BackgroundTask); public readonly BackgroundTaskQueue BackgroundTaskQueue = new(queueConcurrency.Value.BackgroundTask);
public readonly DeliverQueue DeliverQueue = new(queueConcurrency.Value.Deliver); public readonly DeliverQueue DeliverQueue = new(queueConcurrency.Value.Deliver);
public readonly InboxQueue InboxQueue = new(queueConcurrency.Value.Inbox); public readonly InboxQueue InboxQueue = new(queueConcurrency.Value.Inbox);
public readonly PreDeliverQueue PreDeliverQueue = new(queueConcurrency.Value.PreDeliver); public readonly PreDeliverQueue PreDeliverQueue = new(queueConcurrency.Value.PreDeliver);
@ -279,6 +278,7 @@ public class PostgresJobQueue<T>(
TimeSpan timeout TimeSpan timeout
) : IPostgresJobQueue where T : class ) : IPostgresJobQueue where T : class
{ {
private readonly SemaphorePlus _semaphore = new(parallelism);
private readonly AsyncAutoResetEvent _delayedChannel = new(); private readonly AsyncAutoResetEvent _delayedChannel = new();
private readonly AsyncAutoResetEvent _queuedChannel = new(); private readonly AsyncAutoResetEvent _queuedChannel = new();
private ILogger<QueueService> _logger = null!; private ILogger<QueueService> _logger = null!;
@ -311,9 +311,8 @@ public class PostgresJobQueue<T>(
await using var db = GetDbContext(scope); await using var db = GetDbContext(scope);
var queuedCount = await db.GetJobQueuedCount(name, _workerId, token); var queuedCount = await db.GetJobQueuedCount(name, _workerId, token);
var runningCount = await db.GetJobRunningCount(name, _workerId, token); var actualParallelism = Math.Min(parallelism - _semaphore.ActiveCount, queuedCount);
var actualParallelism = Math.Min(parallelism - runningCount, queuedCount);
if (actualParallelism <= 0) if (actualParallelism <= 0)
{ {
await _queuedChannel.WaitAsync(token).SafeWaitAsync(queueToken); await _queuedChannel.WaitAsync(token).SafeWaitAsync(queueToken);
@ -322,10 +321,14 @@ public class PostgresJobQueue<T>(
// ReSharper disable MethodSupportsCancellation // ReSharper disable MethodSupportsCancellation
var queuedChannelCts = new CancellationTokenSource(); var queuedChannelCts = new CancellationTokenSource();
if (runningCount + queuedCount < parallelism) if (_semaphore.ActiveCount + queuedCount < parallelism)
{ {
_ = _queuedChannel.WaitWithoutResetAsync() _ = _queuedChannel.WaitWithoutResetAsync()
.ContinueWith(_ => queuedChannelCts.Cancel()) .ContinueWith(_ =>
{
if (_semaphore.ActiveCount < parallelism)
queuedChannelCts.Cancel();
})
.SafeWaitAsync(queueToken); .SafeWaitAsync(queueToken);
} }
// ReSharper restore MethodSupportsCancellation // ReSharper restore MethodSupportsCancellation
@ -467,6 +470,7 @@ public class PostgresJobQueue<T>(
await using var jobScope = GetScope(); await using var jobScope = GetScope();
try try
{ {
await _semaphore.WaitAsync(CancellationToken.None).SafeWaitAsync(token);
await ProcessJobAsync(processorScope, jobScope, token); await ProcessJobAsync(processorScope, jobScope, token);
} }
catch (Exception e) catch (Exception e)
@ -475,6 +479,10 @@ public class PostgresJobQueue<T>(
_logger.LogError("Queue worker(s) for queue {queue} might be degraded or stalled. Please report this bug to the developers.", _logger.LogError("Queue worker(s) for queue {queue} might be degraded or stalled. Please report this bug to the developers.",
name); name);
} }
finally
{
_semaphore.Release();
}
} }
private async Task ProcessJobAsync(IServiceScope processorScope, IServiceScope jobScope, CancellationToken token) private async Task ProcessJobAsync(IServiceScope processorScope, IServiceScope jobScope, CancellationToken token)