[backend/drive] Improve media fixup algorithm performance (ISH-520)

This commit is contained in:
Laura Hausmann 2024-10-18 23:30:26 +02:00
parent 6124eadb53
commit 055957f509
No known key found for this signature in database
GPG key ID: D044E84C5BE01605
3 changed files with 39 additions and 38 deletions

View file

@ -479,27 +479,36 @@ public class DriveService(
}
}
public async Task<(bool original, bool thumbnail, bool @public)> VerifyFileExistence(DriveFile file)
public async Task<HashSet<string>> GetAllFileNamesFromObjectStorage()
{
return storageConfig.Value.ObjectStorage?.Bucket != null
? await storageSvc.EnumerateFilesAsync().ToArrayAsync().AsTask().ContinueWithResult(p => p.ToHashSet())
: [];
}
public HashSet<string> GetAllFileNamesFromLocalStorage()
{
return storageConfig.Value.Local?.Path is { } path && Directory.Exists(path)
? Directory.EnumerateFiles(path).Select(Path.GetFileName).NotNull().ToHashSet()
: [];
}
public static bool VerifyFileExistence(
DriveFile file, HashSet<string> objectStorageFiles, HashSet<string> localStorageFiles,
out bool original, out bool thumbnail, out bool @public
)
{
string?[] allFilenames = [file.AccessKey, file.ThumbnailAccessKey, file.PublicAccessKey];
var filenames = allFilenames.NotNull().ToArray();
var missing = file.StoredInternal
? filenames.Where(p => !VerifyFileLocalStorage(p)).ToArray()
: await filenames.Select(async p => (name: p, exists: await storageSvc.VerifyFileExistenceAsync(p)))
.AwaitAllAsync()
.ContinueWithResult(p => p.Where(i => !i.exists).Select(i => i.name).ToArray());
? filenames.Where(p => !localStorageFiles.Contains(p)).ToArray()
: filenames.Where(p => !objectStorageFiles.Contains(p)).ToArray();
var original = !missing.Contains(file.AccessKey);
var thumbnail = file.ThumbnailAccessKey == null || !missing.Contains(file.ThumbnailAccessKey);
var @public = file.PublicAccessKey == null || !missing.Contains(file.PublicAccessKey);
original = !missing.Contains(file.AccessKey);
thumbnail = file.ThumbnailAccessKey == null || !missing.Contains(file.ThumbnailAccessKey);
@public = file.PublicAccessKey == null || !missing.Contains(file.PublicAccessKey);
return (original, thumbnail, @public);
}
private bool VerifyFileLocalStorage(string filename)
{
var pathBase = storageConfig.Value.Local?.Path ?? throw new Exception("Local storage path cannot be null");
return File.Exists(Path.Combine(pathBase, filename));
return original && thumbnail && @public;
}
private static string GenerateDerivedFileName(string filename, string newExt)

View file

@ -123,22 +123,6 @@ public class ObjectStorageService(IOptions<Config.StorageSection> config, HttpCl
await _bucket.DeleteAsync(filenames.Select(GetKeyWithPrefix).ToImmutableList());
}
public async Task<bool> VerifyFileExistenceAsync(string filename)
{
if (_bucket == null)
throw new Exception("Refusing to verify file existence from object storage with invalid configuration");
try
{
var res = await _bucket.ListAsync(GetKeyWithPrefix(filename));
return res.Count == 1;
}
catch
{
return false;
}
}
public async IAsyncEnumerable<string> EnumerateFilesAsync()
{
if (_bucket == null)

View file

@ -152,17 +152,25 @@ public class StorageMaintenanceService(
var modified = 0;
logger.LogInformation("Validating all files, this may take a long time...");
var localFiles = driveSvc.GetAllFileNamesFromLocalStorage();
var objStorageFiles = await driveSvc.GetAllFileNamesFromObjectStorage();
await foreach (var file in query.AsChunkedAsyncEnumerable(50, p => p.Id))
{
if (++progress % 100 == 0)
if (++progress % 500 == 0)
logger.LogInformation("Validating files... ({idx}/{total})", progress, total);
var res = await driveSvc.VerifyFileExistence(file);
if (res == (true, true, true)) continue;
if (
DriveService.VerifyFileExistence(file, objStorageFiles, localFiles, out var original,
out var thumbnail, out var @public)
)
{
continue;
}
modified++;
if (!res.original)
if (!original)
{
if (dryRun)
{
@ -174,7 +182,7 @@ public class StorageMaintenanceService(
continue;
}
if (!res.thumbnail)
if (!thumbnail)
{
if (dryRun)
{
@ -189,7 +197,7 @@ public class StorageMaintenanceService(
}
}
if (!res.@public)
if (!@public)
{
if (dryRun)
{
@ -238,7 +246,7 @@ public class StorageMaintenanceService(
.NotNull()
.Append(".iceshrimp-test")
.ToHashSet());
logger.LogInformation("Loaded {count} files from database.", filenames.Count);
if (options.Value.Local?.Path is { } path && Directory.Exists(path))