From 11caf32ebb5aa0e423a5acbb2fc9f72ed335fd74 Mon Sep 17 00:00:00 2001 From: Laura Hausmann Date: Fri, 16 Feb 2024 00:54:13 +0100 Subject: [PATCH] [backend/services] Media cleanup cron task (ISH-66, ISH-27) --- .../Core/Configuration/Config.cs | 16 ++- .../Core/CronTasks/MediaCleanupTask.cs | 44 +++++++++ .../Core/Extensions/ServiceExtensions.cs | 2 + .../Core/Helpers/AssemblyHelpers.cs | 16 +++ .../Core/Queues/BackgroundTaskQueue.cs | 53 +++++++++- .../Core/Services/CronService.cs | 97 +++++++++++++++++++ .../Core/Services/DriveService.cs | 2 +- .../Core/Services/QueueService.cs | 1 + Iceshrimp.Backend/configuration.ini | 6 +- 9 files changed, 230 insertions(+), 7 deletions(-) create mode 100644 Iceshrimp.Backend/Core/CronTasks/MediaCleanupTask.cs create mode 100644 Iceshrimp.Backend/Core/Helpers/AssemblyHelpers.cs create mode 100644 Iceshrimp.Backend/Core/Services/CronService.cs diff --git a/Iceshrimp.Backend/Core/Configuration/Config.cs b/Iceshrimp.Backend/Core/Configuration/Config.cs index 56a5cdeb..485207c1 100644 --- a/Iceshrimp.Backend/Core/Configuration/Config.cs +++ b/Iceshrimp.Backend/Core/Configuration/Config.cs @@ -68,14 +68,19 @@ public sealed class Config { } public sealed class StorageSection { - private readonly TimeSpan? _mediaRetention; + public readonly TimeSpan? MediaRetentionTimeSpan; public Enums.FileStorage Mode { get; init; } = Enums.FileStorage.Local; public string? MediaRetention { - get => _mediaRetention?.ToString(); + get => MediaRetentionTimeSpan?.ToString(); init { if (value == null || string.IsNullOrWhiteSpace(value) || value.Trim() == "0") { - _mediaRetention = null; + MediaRetentionTimeSpan = null; + return; + } + + if (value.Trim() == "-1") { + MediaRetentionTimeSpan = TimeSpan.MaxValue; return; } @@ -84,7 +89,7 @@ public sealed class Config { var suffix = value[^1]; - _mediaRetention = suffix switch { + MediaRetentionTimeSpan = suffix switch { 'd' => TimeSpan.FromDays(num), 'w' => TimeSpan.FromDays(num * 7), 'm' => TimeSpan.FromDays(num * 30), @@ -94,6 +99,9 @@ public sealed class Config { } } + public bool CleanAvatars = false; + public bool CleanBanners = false; + public LocalStorageSection? Local { get; init; } public ObjectStorageSection? ObjectStorage { get; init; } } diff --git a/Iceshrimp.Backend/Core/CronTasks/MediaCleanupTask.cs b/Iceshrimp.Backend/Core/CronTasks/MediaCleanupTask.cs new file mode 100644 index 00000000..0016c286 --- /dev/null +++ b/Iceshrimp.Backend/Core/CronTasks/MediaCleanupTask.cs @@ -0,0 +1,44 @@ +using System.Diagnostics.CodeAnalysis; +using Iceshrimp.Backend.Core.Configuration; +using Iceshrimp.Backend.Core.Database; +using Iceshrimp.Backend.Core.Queues; +using Iceshrimp.Backend.Core.Services; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Options; + +namespace Iceshrimp.Backend.Core.CronTasks; + +[SuppressMessage("ReSharper", "UnusedType.Global", Justification = "Instantiated at runtime by CronService")] +public class MediaCleanupTask : ICronTask { + public async Task Invoke(IServiceProvider provider) { + var config = provider.GetRequiredService>().Value; + if (config.MediaRetentionTimeSpan == TimeSpan.MaxValue) return; + + var logger = provider.GetRequiredService>(); + logger.LogInformation("Starting media cleanup task..."); + + var db = provider.GetRequiredService(); + var queueService = provider.GetRequiredService(); + + var cutoff = DateTime.UtcNow - (config.MediaRetentionTimeSpan ?? TimeSpan.Zero); + + var query = db.DriveFiles.Where(p => !p.IsLink && p.UserHost != null && p.CreatedAt < cutoff); + + if (!config.CleanAvatars) query = query.Where(p => !db.Users.Any(u => u.AvatarId == p.Id)); + if (!config.CleanBanners) query = query.Where(p => !db.Users.Any(u => u.BannerId == p.Id)); + + var fileIds = query.Select(p => p.Id); + + logger.LogInformation("Expiring {count} files...", await fileIds.CountAsync()); + foreach (var fileId in fileIds) { + await queueService.BackgroundTaskQueue.EnqueueAsync(new DriveFileDeleteJob { + DriveFileId = fileId, + Expire = true + }); + } + } + + // Midnight + public TimeSpan Trigger => TimeSpan.Zero; + public CronTaskType Type => CronTaskType.Daily; +} \ No newline at end of file diff --git a/Iceshrimp.Backend/Core/Extensions/ServiceExtensions.cs b/Iceshrimp.Backend/Core/Extensions/ServiceExtensions.cs index 5efe38f9..b2496d34 100644 --- a/Iceshrimp.Backend/Core/Extensions/ServiceExtensions.cs +++ b/Iceshrimp.Backend/Core/Extensions/ServiceExtensions.cs @@ -52,6 +52,7 @@ public static class ServiceExtensions { .AddSingleton() .AddSingleton() .AddSingleton() + .AddSingleton() .AddSingleton() .AddSingleton() .AddSingleton() @@ -62,6 +63,7 @@ public static class ServiceExtensions { // Hosted services = long running background tasks // Note: These need to be added as a singleton as well to ensure data consistency + services.AddHostedService(provider => provider.GetRequiredService()); services.AddHostedService(provider => provider.GetRequiredService()); } diff --git a/Iceshrimp.Backend/Core/Helpers/AssemblyHelpers.cs b/Iceshrimp.Backend/Core/Helpers/AssemblyHelpers.cs new file mode 100644 index 00000000..eb76f470 --- /dev/null +++ b/Iceshrimp.Backend/Core/Helpers/AssemblyHelpers.cs @@ -0,0 +1,16 @@ +using System.Reflection; + +namespace Iceshrimp.Backend.Core.Helpers; + +public static class AssemblyHelpers { + public static IEnumerable GetTypesWithAttribute(Type attribute, Assembly? assembly = null) { + assembly ??= Assembly.GetExecutingAssembly(); + return assembly.GetTypes().Where(type => Attribute.IsDefined(type, attribute)); + } + + public static IEnumerable GetImplementationsOfInterface(Type @interface, Assembly? assembly = null) { + assembly ??= Assembly.GetExecutingAssembly(); + return assembly.GetTypes().Where(type => type is { IsAbstract: false, IsClass: true } && + type.GetInterfaces().Contains(@interface)); + } +} \ No newline at end of file diff --git a/Iceshrimp.Backend/Core/Queues/BackgroundTaskQueue.cs b/Iceshrimp.Backend/Core/Queues/BackgroundTaskQueue.cs index eaac1565..38e84e8b 100644 --- a/Iceshrimp.Backend/Core/Queues/BackgroundTaskQueue.cs +++ b/Iceshrimp.Backend/Core/Queues/BackgroundTaskQueue.cs @@ -20,7 +20,10 @@ public abstract class BackgroundTaskQueue { CancellationToken token ) { if (job is DriveFileDeleteJob driveFileDeleteJob) { - await ProcessDriveFileDelete(driveFileDeleteJob, scope, token); + if (driveFileDeleteJob.Expire) + await ProcessDriveFileExpire(driveFileDeleteJob, scope, token); + else + await ProcessDriveFileDelete(driveFileDeleteJob, scope, token); } } @@ -60,6 +63,53 @@ public abstract class BackgroundTaskQueue { } } } + + private static async Task ProcessDriveFileExpire( + DriveFileDeleteJob job, + IServiceProvider scope, + CancellationToken token + ) { + var db = scope.GetRequiredService(); + var logger = scope.GetRequiredService>(); + logger.LogDebug("Expiring file {id}...", job.DriveFileId); + + var file = await db.DriveFiles.FirstOrDefaultAsync(p => p.Id == job.DriveFileId, cancellationToken: token); + if (file is not { UserHost: not null, Uri: not null }) return; + + file.IsLink = true; + file.Url = file.Uri; + file.ThumbnailUrl = null; + file.WebpublicUrl = null; + file.ThumbnailAccessKey = null; + file.WebpublicAccessKey = null; + file.StoredInternal = false; + + await db.Users.Where(p => p.AvatarId == file.Id) + .ExecuteUpdateAsync(p => p.SetProperty(u => u.AvatarUrl, file.Uri), cancellationToken: token); + await db.Users.Where(p => p.BannerId == file.Id) + .ExecuteUpdateAsync(p => p.SetProperty(u => u.BannerUrl, file.Uri), cancellationToken: token); + await db.SaveChangesAsync(token); + + if (file.AccessKey == null) return; + + string?[] paths = [file.AccessKey, file.ThumbnailAccessKey, file.WebpublicAccessKey]; + if (!await db.DriveFiles.AnyAsync(p => p.Id != file.Id && p.AccessKey == file.AccessKey, + cancellationToken: token)) { + if (file.StoredInternal) { + var pathBase = scope.GetRequiredService>().Value.Local?.Path + ?? throw new Exception("Cannot delete locally stored file: pathBase is null"); + + paths.Where(p => p != null) + .Select(p => Path.Combine(pathBase, p!)) + .Where(File.Exists).ToList() + .ForEach(File.Delete); + } + else { + var storageSvc = scope.GetRequiredService(); + await storageSvc.RemoveFilesAsync(paths.Where(p => p != null).Select(p => p!).ToArray()); + } + } + } } [ProtoContract] @@ -69,4 +119,5 @@ public class BackgroundTaskJob : Job; [ProtoContract] public class DriveFileDeleteJob : BackgroundTaskJob { [ProtoMember(1)] public required string DriveFileId; + [ProtoMember(2)] public required bool Expire; } \ No newline at end of file diff --git a/Iceshrimp.Backend/Core/Services/CronService.cs b/Iceshrimp.Backend/Core/Services/CronService.cs new file mode 100644 index 00000000..57215c53 --- /dev/null +++ b/Iceshrimp.Backend/Core/Services/CronService.cs @@ -0,0 +1,97 @@ +using Iceshrimp.Backend.Core.Helpers; + +namespace Iceshrimp.Backend.Core.Services; + +public class CronService(IServiceScopeFactory serviceScopeFactory) : BackgroundService { + protected override Task ExecuteAsync(CancellationToken token) { + var tasks = AssemblyHelpers.GetImplementationsOfInterface(typeof(ICronTask)) + .Select(p => Activator.CreateInstance(p) as ICronTask) + .Where(p => p != null) + .Cast(); + + foreach (var task in tasks) { + ICronTrigger trigger = task.Type switch { + CronTaskType.Daily => new DailyTrigger(task.Trigger, token), + CronTaskType.Interval => new IntervalTrigger(task.Trigger, token), + _ => throw new ArgumentOutOfRangeException() + }; + + trigger.OnTrigger += async () => await task.Invoke(serviceScopeFactory.CreateScope().ServiceProvider); + } + + return Task.CompletedTask; + } +} + +public interface ICronTask { + public Task Invoke(IServiceProvider provider); + + public TimeSpan Trigger { get; } + public CronTaskType Type { get; } +} + +public enum CronTaskType { + Daily, + Interval +} + +public interface ICronTrigger { + public event Action? OnTrigger; +} + +file class DailyTrigger : ICronTrigger, IDisposable { + private TimeSpan TriggerTime { get; } + private CancellationToken CancellationToken { get; } + private Task RunningTask { get; set; } + + public DailyTrigger(TimeSpan triggerTime, CancellationToken cancellationToken) { + TriggerTime = triggerTime; + CancellationToken = cancellationToken; + + RunningTask = Task.Run(async () => { + while (!CancellationToken.IsCancellationRequested) { + var nextTrigger = DateTime.Today + TriggerTime - DateTime.Now; + if (nextTrigger < TimeSpan.Zero) + nextTrigger = nextTrigger.Add(new TimeSpan(24, 0, 0)); + await Task.Delay(nextTrigger, CancellationToken); + OnTrigger?.Invoke(); + } + }, CancellationToken); + } + + public void Dispose() { + RunningTask.Dispose(); + RunningTask = null!; + GC.SuppressFinalize(this); + } + + public event Action? OnTrigger; + ~DailyTrigger() => Dispose(); +} + +file class IntervalTrigger : ICronTrigger, IDisposable { + private TimeSpan TriggerInterval { get; } + private CancellationToken CancellationToken { get; } + private Task RunningTask { get; set; } + + public IntervalTrigger(TimeSpan triggerInterval, CancellationToken cancellationToken) { + TriggerInterval = triggerInterval; + CancellationToken = cancellationToken; + + RunningTask = Task.Run(async () => { + while (!CancellationToken.IsCancellationRequested) { + await Task.Delay(TriggerInterval, CancellationToken); + OnTrigger?.Invoke(); + } + }, CancellationToken); + } + + public void Dispose() { + RunningTask.Dispose(); + RunningTask = null!; + GC.SuppressFinalize(this); + } + + public event Action? OnTrigger; + ~IntervalTrigger() => Dispose(); +} \ No newline at end of file diff --git a/Iceshrimp.Backend/Core/Services/DriveService.cs b/Iceshrimp.Backend/Core/Services/DriveService.cs index fa479297..9bcaa0f5 100644 --- a/Iceshrimp.Backend/Core/Services/DriveService.cs +++ b/Iceshrimp.Backend/Core/Services/DriveService.cs @@ -258,7 +258,7 @@ public class DriveService( } public async Task RemoveFile(string fileId) { - var job = new DriveFileDeleteJob { DriveFileId = fileId }; + var job = new DriveFileDeleteJob { DriveFileId = fileId, Expire = false }; await queueSvc.BackgroundTaskQueue.EnqueueAsync(job); } diff --git a/Iceshrimp.Backend/Core/Services/QueueService.cs b/Iceshrimp.Backend/Core/Services/QueueService.cs index fd6a7489..40334e83 100644 --- a/Iceshrimp.Backend/Core/Services/QueueService.cs +++ b/Iceshrimp.Backend/Core/Services/QueueService.cs @@ -161,6 +161,7 @@ public class JobQueue( [ProtoInclude(100, typeof(InboxJob))] [ProtoInclude(101, typeof(DeliverJob))] [ProtoInclude(102, typeof(PreDeliverJob))] +[ProtoInclude(103, typeof(BackgroundTaskJob))] public abstract class Job { public enum JobStatus { Queued, diff --git a/Iceshrimp.Backend/configuration.ini b/Iceshrimp.Backend/configuration.ini index 425fcea6..bdaacff1 100644 --- a/Iceshrimp.Backend/configuration.ini +++ b/Iceshrimp.Backend/configuration.ini @@ -58,9 +58,13 @@ Port = 6379 ;; Options: [Local, ObjectStorage] Mode = Local -;; Amount of time remote media is retained in the cache (0 = disabled) +;; Amount of time remote media is retained in the cache (0 = disabled, -1 = infinite) MediaRetention = 30d +;; Whether to cleanup avatars & banners past the media retention time +CleanAvatars = false +CleanBanners = false + [Storage:Local] ;; Path where media is stored at. Must be writable for the service user. Path = /path/to/media/location