From c450903051244076f499925452acadbedb249fe5 Mon Sep 17 00:00:00 2001 From: Laura Hausmann Date: Mon, 26 Feb 2024 18:50:30 +0100 Subject: [PATCH] [backend/federation] Handle mentions in non-misskey user bios & user fields correctly (ISH-92) --- .../Federation/ActivityPub/UserRenderer.cs | 4 +- .../Federation/ActivityPub/UserResolver.cs | 32 +++++++- .../Helpers/LibMfm/Conversion/MfmConverter.cs | 20 ++++- .../LibMfm/Parsing/HtmlMentionsExtractor.cs | 53 +++++++++++++ .../Core/Services/NoteService.cs | 5 +- .../Services/UserProfileMentionsResolver.cs | 77 ++++++++++++++++++- .../Core/Services/UserService.cs | 47 ++++++++--- 7 files changed, 217 insertions(+), 21 deletions(-) create mode 100644 Iceshrimp.Backend/Core/Helpers/LibMfm/Parsing/HtmlMentionsExtractor.cs diff --git a/Iceshrimp.Backend/Core/Federation/ActivityPub/UserRenderer.cs b/Iceshrimp.Backend/Core/Federation/ActivityPub/UserRenderer.cs index b98b6f8c..b8d0ab66 100644 --- a/Iceshrimp.Backend/Core/Federation/ActivityPub/UserRenderer.cs +++ b/Iceshrimp.Backend/Core/Federation/ActivityPub/UserRenderer.cs @@ -10,7 +10,7 @@ using Microsoft.Extensions.Options; namespace Iceshrimp.Backend.Core.Federation.ActivityPub; -public class UserRenderer(IOptions config, DatabaseContext db, MfmConverter mfmConverter) +public class UserRenderer(IOptions config, DatabaseContext db) { /// /// This function is meant for compacting an actor into the @id form as specified in ActivityStreams @@ -58,7 +58,7 @@ public class UserRenderer(IOptions config, DatabaseConte Url = new ASLink(user.GetPublicUrl(config.Value)), Username = user.Username, DisplayName = user.DisplayName ?? user.Username, - Summary = profile?.Description != null ? await mfmConverter.FromHtmlAsync(profile.Description) : null, + Summary = profile?.Description != null ? await MfmConverter.FromHtmlAsync(profile.Description) : null, MkSummary = profile?.Description, IsCat = user.IsCat, IsDiscoverable = user.IsExplorable, diff --git a/Iceshrimp.Backend/Core/Federation/ActivityPub/UserResolver.cs b/Iceshrimp.Backend/Core/Federation/ActivityPub/UserResolver.cs index 397a9bb0..a7b589cd 100644 --- a/Iceshrimp.Backend/Core/Federation/ActivityPub/UserResolver.cs +++ b/Iceshrimp.Backend/Core/Federation/ActivityPub/UserResolver.cs @@ -1,8 +1,10 @@ using AsyncKeyedLock; +using Iceshrimp.Backend.Core.Configuration; using Iceshrimp.Backend.Core.Database.Tables; using Iceshrimp.Backend.Core.Federation.WebFinger; using Iceshrimp.Backend.Core.Middleware; using Iceshrimp.Backend.Core.Services; +using Microsoft.Extensions.Options; namespace Iceshrimp.Backend.Core.Federation.ActivityPub; @@ -10,7 +12,8 @@ public class UserResolver( ILogger logger, UserService userSvc, WebFingerService webFingerSvc, - FollowupTaskService followupTaskSvc + FollowupTaskService followupTaskSvc, + IOptions config ) { private static readonly AsyncKeyedLocker KeyedLocker = new(o => @@ -162,6 +165,33 @@ public class UserResolver( } } + public async Task ResolveAsyncLimited(string uri, Func limitReached) + { + // First, let's see if we already know the user + var user = await userSvc.GetUserFromQueryAsync(uri); + if (user != null) + return await GetUpdatedUser(user); + + if (uri.StartsWith($"https://{config.Value.WebDomain}/")) return null; + + // We don't, so we need to run WebFinger + var (acct, resolvedUri) = await WebFingerAsync(uri); + + // Check the database again with the new data + if (resolvedUri != uri) user = await userSvc.GetUserFromQueryAsync(resolvedUri); + if (user == null && acct != uri) await userSvc.GetUserFromQueryAsync(acct); + if (user != null) + return await GetUpdatedUser(user); + + if (limitReached()) return null; + + using (await KeyedLocker.LockAsync(resolvedUri)) + { + // Pass the job on to userSvc, which will create the user + return await userSvc.CreateUserAsync(resolvedUri, acct); + } + } + private async Task GetUpdatedUser(User user) { if (!user.NeedsUpdate) return user; diff --git a/Iceshrimp.Backend/Core/Helpers/LibMfm/Conversion/MfmConverter.cs b/Iceshrimp.Backend/Core/Helpers/LibMfm/Conversion/MfmConverter.cs index b23f7418..d9a3de34 100644 --- a/Iceshrimp.Backend/Core/Helpers/LibMfm/Conversion/MfmConverter.cs +++ b/Iceshrimp.Backend/Core/Helpers/LibMfm/Conversion/MfmConverter.cs @@ -17,7 +17,7 @@ namespace Iceshrimp.Backend.Core.Helpers.LibMfm.Conversion; public class MfmConverter(IOptions config) { - public async Task FromHtmlAsync(string? html, List? mentions = null) + public static async Task FromHtmlAsync(string? html, List? mentions = null) { if (html == null) return null; @@ -34,6 +34,24 @@ public class MfmConverter(IOptions config) return sb.ToString().Trim(); } + public static async Task> ExtractMentionsFromHtmlAsync(string? html) + { + if (html == null) return []; + + // Ensure compatibility with AP servers that send both
as well as newlines + var regex = new Regex(@"\r?\n", RegexOptions.IgnoreCase); + html = regex.Replace(html, "\n"); + + var dom = await new HtmlParser().ParseDocumentAsync(html); + if (dom.Body == null) return []; + + var parser = new HtmlMentionsExtractor(); + foreach (var node in dom.Body.ChildNodes) + parser.ParseChildren(node); + + return parser.Mentions; + } + public async Task ToHtmlAsync(IEnumerable nodes, List mentions, string? host) { var context = BrowsingContext.New(); diff --git a/Iceshrimp.Backend/Core/Helpers/LibMfm/Parsing/HtmlMentionsExtractor.cs b/Iceshrimp.Backend/Core/Helpers/LibMfm/Parsing/HtmlMentionsExtractor.cs new file mode 100644 index 00000000..d2e5b896 --- /dev/null +++ b/Iceshrimp.Backend/Core/Helpers/LibMfm/Parsing/HtmlMentionsExtractor.cs @@ -0,0 +1,53 @@ +using AngleSharp.Dom; +using AngleSharp.Html.Dom; + +namespace Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing; + +internal class HtmlMentionsExtractor +{ + internal List Mentions { get; } = []; + + private void ParseNode(INode node) + { + if (node.NodeType is NodeType.Text) + return; + if (node.NodeType is NodeType.Comment or NodeType.Document) + return; + + switch (node.NodeName) + { + case "A": + { + if (node is not HtmlElement el) return; + var href = el.GetAttribute("href"); + if (href == null) return; + if (el.ClassList.Contains("u-url") && el.ClassList.Contains("mention")) + Mentions.Add(href); + return; + } + case "PRE": + { + if (node.ChildNodes is [{ NodeName: "CODE" }]) + return; + ParseChildren(node); + return; + } + case "BR": + case "BLOCKQUOTE": + { + return; + } + + default: + { + ParseChildren(node); + return; + } + } + } + + internal void ParseChildren(INode node) + { + foreach (var child in node.ChildNodes) ParseNode(child); + } +} \ No newline at end of file diff --git a/Iceshrimp.Backend/Core/Services/NoteService.cs b/Iceshrimp.Backend/Core/Services/NoteService.cs index 65c20157..b828f461 100644 --- a/Iceshrimp.Backend/Core/Services/NoteService.cs +++ b/Iceshrimp.Backend/Core/Services/NoteService.cs @@ -33,7 +33,6 @@ public class NoteService( ActivityPub.NoteRenderer noteRenderer, ActivityPub.UserRenderer userRenderer, ActivityPub.MentionsResolver mentionsResolver, - MfmConverter mfmConverter, DriveService driveSvc, NotificationService notificationSvc, EventService eventSvc, @@ -342,7 +341,7 @@ public class NoteService( Id = IdHelpers.GenerateSlowflakeId(createdAt), Uri = note.Id, Url = note.Url?.Id, //FIXME: this doesn't seem to work yet - Text = note.MkContent ?? await mfmConverter.FromHtmlAsync(note.Content, mentions), + Text = note.MkContent ?? await MfmConverter.FromHtmlAsync(note.Content, mentions), Cw = note.Summary, UserId = actor.Id, CreatedAt = createdAt, @@ -446,7 +445,7 @@ public class NoteService( await ResolveNoteMentionsAsync(note); mentionedLocalUserIds = mentionedLocalUserIds.Except(previousMentionedLocalUserIds).ToList(); - dbNote.Text = note.MkContent ?? await mfmConverter.FromHtmlAsync(note.Content, mentions); + dbNote.Text = note.MkContent ?? await MfmConverter.FromHtmlAsync(note.Content, mentions); dbNote.Cw = note.Summary; if (dbNote.Cw is { Length: > 100000 }) diff --git a/Iceshrimp.Backend/Core/Services/UserProfileMentionsResolver.cs b/Iceshrimp.Backend/Core/Services/UserProfileMentionsResolver.cs index 34362ad3..d8b6df9a 100644 --- a/Iceshrimp.Backend/Core/Services/UserProfileMentionsResolver.cs +++ b/Iceshrimp.Backend/Core/Services/UserProfileMentionsResolver.cs @@ -2,6 +2,8 @@ using System.Diagnostics.CodeAnalysis; using Iceshrimp.Backend.Core.Configuration; using Iceshrimp.Backend.Core.Database.Tables; using Iceshrimp.Backend.Core.Extensions; +using Iceshrimp.Backend.Core.Federation.ActivityStreams.Types; +using Iceshrimp.Backend.Core.Helpers.LibMfm.Conversion; using Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing; using Iceshrimp.Backend.Core.Helpers.LibMfm.Types; using Microsoft.Extensions.Options; @@ -10,13 +12,82 @@ namespace Iceshrimp.Backend.Core.Services; public class UserProfileMentionsResolver( ActivityPub.UserResolver userResolver, - IOptions config, - ILogger logger + IOptions config ) { private int _recursionLimit = 10; - public async Task> ResolveMentions(UserProfile.Field[]? fields, string? bio, string? host) + public async Task> ResolveMentions( + ASActor actor, string? host + ) + { + var fields = actor.Attachments?.OfType() + .Where(p => p is { Name: not null, Value: not null }) + .ToList() ?? []; + + if (fields is not { Count: > 0 } && (actor.MkSummary ?? actor.Summary) == null) return []; + var parsedFields = await fields.SelectMany(p => [p.Name, p.Value]) + .Select(async p => await MfmConverter.ExtractMentionsFromHtmlAsync(p)) + .AwaitAllAsync(); + + var parsedBio = actor.MkSummary == null ? await MfmConverter.ExtractMentionsFromHtmlAsync(actor.Summary) : []; + + var userUris = parsedFields.Prepend(parsedBio).SelectMany(p => p).ToList(); + var mentionNodes = new List(); + + if (actor.MkSummary != null) + { + var nodes = MfmParser.Parse(actor.MkSummary); + mentionNodes = EnumerateMentions(nodes); + } + + var users = await mentionNodes + .DistinctBy(p => p.Acct) + .Select(async p => + { + try + { + return await userResolver.ResolveAsyncLimited(p.Username, p.Host ?? host, + () => _recursionLimit-- <= 0); + } + catch + { + return null; + } + }) + .AwaitAllNoConcurrencyAsync(); + + users.AddRange(await userUris + .Distinct() + .Select(async p => + { + try + { + return await userResolver.ResolveAsyncLimited(p, () => _recursionLimit-- <= 0); + } + catch + { + return null; + } + }) + .AwaitAllNoConcurrencyAsync()); + + return users.Where(p => p != null) + .Cast() + .DistinctBy(p => p.Id) + .Select(p => new Note.MentionedUser + { + Host = p.Host, + Uri = p.Uri ?? p.GetPublicUri(config.Value), + Url = p.UserProfile?.Url, + Username = p.Username + }) + .ToList(); + } + + public async Task> ResolveMentions( + UserProfile.Field[]? fields, string? bio, string? host + ) { if (fields is not { Length: > 0 } && bio == null) return []; var input = (fields ?? []) diff --git a/Iceshrimp.Backend/Core/Services/UserService.cs b/Iceshrimp.Backend/Core/Services/UserService.cs index 2f216053..d24ea632 100644 --- a/Iceshrimp.Backend/Core/Services/UserService.cs +++ b/Iceshrimp.Backend/Core/Services/UserService.cs @@ -25,7 +25,6 @@ public class UserService( ActivityPub.ActivityRenderer activityRenderer, ActivityPub.ActivityDeliverService deliverSvc, DriveService driveSvc, - MfmConverter mfmConverter, FollowupTaskService followupTaskSvc, NotificationService notificationSvc, EmojiService emojiSvc @@ -108,12 +107,12 @@ public class UserService( .Where(p => p is { Name: not null, Value: not null }) .Select(async p => new UserProfile.Field { - Name = p.Name!, Value = await mfmConverter.FromHtmlAsync(p.Value) ?? "" + Name = p.Name!, Value = await MfmConverter.FromHtmlAsync(p.Value) ?? "" }) .AwaitAllAsync() : null; - var bio = actor.MkSummary ?? await mfmConverter.FromHtmlAsync(actor.Summary); + var bio = actor.MkSummary ?? await MfmConverter.FromHtmlAsync(actor.Summary); user = new User { @@ -165,7 +164,7 @@ public class UserService( var processPendingDeletes = await ResolveAvatarAndBanner(user, actor); await db.SaveChangesAsync(); await processPendingDeletes(); - await UpdateProfileMentionsInBackground(user); + await UpdateProfileMentionsInBackground(user, actor); return user; } catch (UniqueConstraintException) @@ -246,7 +245,7 @@ public class UserService( .Where(p => p is { Name: not null, Value: not null }) .Select(async p => new UserProfile.Field { - Name = p.Name!, Value = await mfmConverter.FromHtmlAsync(p.Value) ?? "" + Name = p.Name!, Value = await MfmConverter.FromHtmlAsync(p.Value) ?? "" }) .AwaitAllAsync() : null; @@ -260,7 +259,7 @@ public class UserService( var processPendingDeletes = await ResolveAvatarAndBanner(user, actor); - user.UserProfile.Description = actor.MkSummary ?? await mfmConverter.FromHtmlAsync(actor.Summary); + user.UserProfile.Description = actor.MkSummary ?? await MfmConverter.FromHtmlAsync(actor.Summary); //user.UserProfile.Birthday = TODO; //user.UserProfile.Location = TODO; user.UserProfile.Fields = fields?.ToArray() ?? []; @@ -270,7 +269,7 @@ public class UserService( db.Update(user); await db.SaveChangesAsync(); await processPendingDeletes(); - await UpdateProfileMentionsInBackground(user); + await UpdateProfileMentionsInBackground(user, actor); return user; } @@ -605,7 +604,7 @@ public class UserService( [SuppressMessage("ReSharper", "EntityFramework.NPlusOne.IncompleteDataQuery", Justification = "Projectables")] [SuppressMessage("ReSharper", "EntityFramework.NPlusOne.IncompleteDataUsage", Justification = "Same as above")] [SuppressMessage("ReSharper", "SuggestBaseTypeForParameter", Justification = "Method only makes sense for users")] - private async Task UpdateProfileMentionsInBackground(User user) + private async Task UpdateProfileMentionsInBackground(User user, ASActor? actor) { var task = followupTaskSvc.ExecuteTask("UpdateProfileMentionsInBackground", async provider => { @@ -614,9 +613,35 @@ public class UserService( .GetRequiredService(); var bgUser = await bgDbContext.Users.IncludeCommonProperties().FirstOrDefaultAsync(p => p.Id == user.Id); if (bgUser?.UserProfile == null) return; - bgUser.UserProfile.Mentions = - await bgMentionsResolver.ResolveMentions(bgUser.UserProfile.Fields, bgUser.UserProfile.Description, - bgUser.Host); + + if (actor != null) + { + var mentions = await bgMentionsResolver.ResolveMentions(actor, bgUser.Host); + var fields = actor.Attachments != null + ? await actor.Attachments + .OfType() + .Where(p => p is { Name: not null, Value: not null }) + .Select(async p => new UserProfile.Field + { + Name = p.Name!, + Value = await MfmConverter.FromHtmlAsync(p.Value, mentions) ?? "" + }) + .AwaitAllAsync() + : null; + + bgUser.UserProfile.Mentions = mentions; + bgUser.UserProfile.Fields = fields?.ToArray() ?? []; + bgUser.UserProfile.Description = actor.MkSummary ?? + await MfmConverter.FromHtmlAsync(actor.Summary, + bgUser.UserProfile.Mentions); + } + else + { + bgUser.UserProfile.Mentions = await bgMentionsResolver.ResolveMentions(bgUser.UserProfile.Fields, + bgUser.UserProfile.Description, + bgUser.Host); + } + bgDbContext.Update(bgUser.UserProfile); await bgDbContext.SaveChangesAsync(); });