From a4717da8ab40992cf982eb40dfd1f778fedcb34b Mon Sep 17 00:00:00 2001 From: Laura Hausmann Date: Thu, 6 Mar 2025 15:52:14 +0100 Subject: [PATCH] [backend/core] Fix link verification for sites served with Transfer-Encoding: chunked --- .../Core/Extensions/StreamExtensions.cs | 31 +++++++++++++++++ .../Core/Services/DriveService.cs | 33 +------------------ .../Core/Services/UserService.cs | 13 +++++--- 3 files changed, 41 insertions(+), 36 deletions(-) diff --git a/Iceshrimp.Backend/Core/Extensions/StreamExtensions.cs b/Iceshrimp.Backend/Core/Extensions/StreamExtensions.cs index 97a48b84..1d55c4f1 100644 --- a/Iceshrimp.Backend/Core/Extensions/StreamExtensions.cs +++ b/Iceshrimp.Backend/Core/Extensions/StreamExtensions.cs @@ -28,4 +28,35 @@ public static class StreamExtensions ValueTask DoReadAsync() => source.ReadAsync(new Memory(buffer), cancellationToken); } + + /// + /// We can't trust the Content-Length header, and it might be null. + /// This makes sure that we only ever read up to maxLength into memory. + /// + /// The response content stream + /// The maximum length to buffer (null = unlimited) + /// The content length, if known + /// A CancellationToken, if applicable + /// Either a buffered MemoryStream, or Stream.Null + public static async Task GetSafeStreamOrNullAsync( + this Stream stream, long? maxLength, long? contentLength, CancellationToken token = default + ) + { + if (maxLength is 0) return Stream.Null; + if (contentLength > maxLength) return Stream.Null; + + MemoryStream buf = new(); + if (contentLength < maxLength) + maxLength = contentLength.Value; + + await stream.CopyToAsync(buf, maxLength, token); + if (maxLength == null || buf.Length <= maxLength) + { + buf.Seek(0, SeekOrigin.Begin); + return buf; + } + + await buf.DisposeAsync(); + return Stream.Null; + } } \ No newline at end of file diff --git a/Iceshrimp.Backend/Core/Services/DriveService.cs b/Iceshrimp.Backend/Core/Services/DriveService.cs index 59103f2a..dec72976 100644 --- a/Iceshrimp.Backend/Core/Services/DriveService.cs +++ b/Iceshrimp.Backend/Core/Services/DriveService.cs @@ -120,7 +120,7 @@ public class DriveService( ? storageConfig.Value.MaxCacheSizeBytes : 0; - var stream = await GetSafeStreamOrNullAsync(input, maxLength, res.Content.Headers.ContentLength); + var stream = await input.GetSafeStreamOrNullAsync(maxLength, res.Content.Headers.ContentLength); try { return await StoreFileAsync(stream, user, request, skipImageProcessing); @@ -629,37 +629,6 @@ public class DriveService( int GetTargetRes() => config.TargetRes ?? throw new Exception("TargetRes is required to encode images"); // @formatter:on } - - /// - /// We can't trust the Content-Length header, and it might be null. - /// This makes sure that we only ever read up to maxLength into memory. - /// - /// The response content stream - /// The maximum length to buffer (null = unlimited) - /// The content length, if known - /// A CancellationToken, if applicable - /// Either a buffered MemoryStream, or Stream.Null - private static async Task GetSafeStreamOrNullAsync( - Stream stream, long? maxLength, long? contentLength, CancellationToken token = default - ) - { - if (maxLength is 0) return Stream.Null; - if (contentLength > maxLength) return Stream.Null; - - MemoryStream buf = new(); - if (contentLength < maxLength) - maxLength = contentLength.Value; - - await stream.CopyToAsync(buf, maxLength, token); - if (maxLength == null || buf.Length <= maxLength) - { - buf.Seek(0, SeekOrigin.Begin); - return buf; - } - - await buf.DisposeAsync(); - return Stream.Null; - } } public class DriveFileCreationRequest diff --git a/Iceshrimp.Backend/Core/Services/UserService.cs b/Iceshrimp.Backend/Core/Services/UserService.cs index 1e340219..3ceace6d 100644 --- a/Iceshrimp.Backend/Core/Services/UserService.cs +++ b/Iceshrimp.Backend/Core/Services/UserService.cs @@ -1203,13 +1203,14 @@ public class UserService( try { - var res = await httpClient.GetAsync(uri, HttpCompletionOption.ResponseHeadersRead); + const int maxLength = 1_000_000; + var res = await httpClient.GetAsync(uri, HttpCompletionOption.ResponseHeadersRead); if ( res is not { IsSuccessStatusCode: true, - Content.Headers: { ContentType.MediaType: "text/html", ContentLength: <= 1_000_000 } + Content.Headers: { ContentType.MediaType: "text/html", ContentLength: null or <= maxLength } } ) { @@ -1220,9 +1221,13 @@ public class UserService( continue; } - var html = await res.Content.ReadAsStringAsync(); - var document = await new HtmlParser().ParseDocumentAsync(html); + var contentLength = res.Content.Headers.ContentLength; + var stream = await res.Content.ReadAsStreamAsync() + .ContinueWithResult(p => p.GetSafeStreamOrNullAsync(maxLength, contentLength)); + if (stream == Stream.Null) throw new Exception("Response size limit exceeded"); + + var document = await new HtmlParser().ParseDocumentAsync(stream); var headLinks = document.Head?.Children.Where(el => el.NodeName.ToLower() == "link").ToList() ?? []; userProfileField.IsVerified =