[backend/federation] Handle mention parsing edge cases properly (ISH-50)

This commit is contained in:
Laura Hausmann 2024-02-12 21:13:32 +01:00
parent f371503b2f
commit bf916f7046
No known key found for this signature in database
GPG key ID: D044E84C5BE01605
10 changed files with 105 additions and 91 deletions

View file

@ -23,24 +23,27 @@ public class NoteRenderer(
? await RenderAsync(note.Renote, accounts, mentions, --recurse) ? await RenderAsync(note.Renote, accounts, mentions, --recurse)
: null; : null;
var text = note.Text; //TODO: append quote uri var text = note.Text; //TODO: append quote uri
var content = text != null ? await mfmConverter.ToHtmlAsync(text, note.MentionedRemoteUsers) : null;
if (mentions == null) { if (mentions == null) {
mentions = await db.Users.Where(p => note.Mentions.Contains(p.Id)) mentions = await db.Users.Where(p => note.Mentions.Contains(p.Id))
.Select(u => new Mention { .Select(u => new Mention(u, config.Value.WebDomain))
Id = u.Id,
Username = u.Username,
Acct = u.Acct,
Url = (u.UserProfile != null
? u.UserProfile.Url ?? u.Uri
: u.Uri) ?? $"https://{config.Value.WebDomain}/@{u.Username}"
})
.ToListAsync(); .ToListAsync();
} }
else { else {
mentions = [..mentions.Where(p => note.Mentions.Contains(p.Id))]; mentions = [..mentions.Where(p => note.Mentions.Contains(p.Id))];
} }
var mentionedUsers = mentions.Select(p => new Note.MentionedUser {
Host = p.Host ?? config.Value.AccountDomain,
Uri = p.Uri,
Username = p.Username,
Url = p.Url
}).ToList();
var content = text != null
? await mfmConverter.ToHtmlAsync(text, mentionedUsers, note.UserHost)
: null;
var account = accounts?.FirstOrDefault(p => p.Id == note.UserId) ?? await userRenderer.RenderAsync(note.User); var account = accounts?.FirstOrDefault(p => p.Id == note.UserId) ?? await userRenderer.RenderAsync(note.User);
var res = new Status { var res = new Status {
@ -77,14 +80,7 @@ public class NoteRenderer(
private async Task<List<Mention>> GetMentions(IEnumerable<Note> notes) { private async Task<List<Mention>> GetMentions(IEnumerable<Note> notes) {
var ids = notes.SelectMany(n => n.Mentions).Distinct(); var ids = notes.SelectMany(n => n.Mentions).Distinct();
return await db.Users.Where(p => ids.Contains(p.Id)) return await db.Users.Where(p => ids.Contains(p.Id))
.Select(u => new Mention { .Select(u => new Mention(u, config.Value.WebDomain))
Id = u.Id,
Username = u.Username,
Acct = u.Acct,
Url = u.UserProfile != null
? u.UserProfile.Url ?? u.Uri ?? $"https://{config.Value.WebDomain}/@{u.Username}"
: u.Uri ?? $"https://{config.Value.WebDomain}/@{u.Username}"
})
.ToListAsync(); .ToListAsync();
} }

View file

@ -27,7 +27,7 @@ public class UserRenderer(IOptions<Config.InstanceSection> config, MfmConverter
FollowersCount = user.FollowersCount, FollowersCount = user.FollowersCount,
FollowingCount = user.FollowingCount, FollowingCount = user.FollowingCount,
StatusesCount = user.NotesCount, StatusesCount = user.NotesCount,
Note = await mfmConverter.ToHtmlAsync(profile?.Description ?? "", []), Note = await mfmConverter.ToHtmlAsync(profile?.Description ?? "", [], user.Host),
Url = profile?.Url ?? user.Uri ?? $"https://{user.Host ?? config.Value.WebDomain}/@{user.Username}", Url = profile?.Url ?? user.Uri ?? $"https://{user.Host ?? config.Value.WebDomain}/@{user.Username}",
AvatarStaticUrl = user.AvatarUrl ?? $"https://{config.Value.WebDomain}/identicon/{user.Id}", //TODO AvatarStaticUrl = user.AvatarUrl ?? $"https://{config.Value.WebDomain}/identicon/{user.Id}", //TODO
HeaderUrl = user.BannerUrl ?? _transparent, HeaderUrl = user.BannerUrl ?? _transparent,

View file

@ -1,10 +1,29 @@
using System.Diagnostics.CodeAnalysis;
using Iceshrimp.Backend.Core.Database.Tables;
using J = System.Text.Json.Serialization.JsonPropertyNameAttribute; using J = System.Text.Json.Serialization.JsonPropertyNameAttribute;
using JI = System.Text.Json.Serialization.JsonIgnoreAttribute;
namespace Iceshrimp.Backend.Controllers.Mastodon.Schemas.Entities; namespace Iceshrimp.Backend.Controllers.Mastodon.Schemas.Entities;
public class Mention { public class Mention() {
[J("id")] public required string Id { get; set; } [J("id")] public required string Id { get; set; }
[J("username")] public required string Username { get; set; } [J("username")] public required string Username { get; set; }
[J("acct")] public required string Acct { get; set; } [J("acct")] public required string Acct { get; set; }
[J("url")] public required string Url { get; set; } [J("url")] public required string Url { get; set; }
// internal properties that won't be serialized
[JI] public required string Uri;
[JI] public required string? Host;
[SetsRequiredMembers]
public Mention(User u, string webDomain) : this() {
Id = u.Id;
Username = u.Username;
Host = u.Host;
Acct = u.Acct;
Uri = u.Uri ?? $"https://{webDomain}/users/{u.Id}";
Url = u.UserProfile != null
? u.UserProfile.Url ?? u.Uri ?? $"https://{webDomain}/@{u.Username}"
: u.Uri ?? $"https://{webDomain}/@{u.Username}";
}
} }

View file

@ -5,6 +5,12 @@ using EntityFrameworkCore.Projectables;
namespace Iceshrimp.Backend.Core.Extensions; namespace Iceshrimp.Backend.Core.Extensions;
public static class StringExtensions { public static class StringExtensions {
public static bool EqualsInvariant(this string s1, string s2) =>
string.Equals(s1, s2, StringComparison.InvariantCulture);
public static bool EqualsIgnoreCase(this string s1, string s2) =>
string.Equals(s1, s2, StringComparison.InvariantCultureIgnoreCase);
public static string Truncate(this string target, int maxLength) { public static string Truncate(this string target, int maxLength) {
return target[..Math.Min(target.Length, maxLength)]; return target[..Math.Min(target.Length, maxLength)];
} }

View file

@ -1,12 +1,9 @@
using Iceshrimp.Backend.Core.Configuration; using Iceshrimp.Backend.Core.Configuration;
using Iceshrimp.Backend.Core.Database;
using Iceshrimp.Backend.Core.Database.Tables; using Iceshrimp.Backend.Core.Database.Tables;
using Iceshrimp.Backend.Core.Extensions; using Iceshrimp.Backend.Core.Extensions;
using Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing; using Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing;
using Iceshrimp.Backend.Core.Helpers.LibMfm.Serialization; using Iceshrimp.Backend.Core.Helpers.LibMfm.Serialization;
using Iceshrimp.Backend.Core.Helpers.LibMfm.Types; using Iceshrimp.Backend.Core.Helpers.LibMfm.Types;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Caching.Distributed;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
namespace Iceshrimp.Backend.Core.Federation.ActivityPub; namespace Iceshrimp.Backend.Core.Federation.ActivityPub;
@ -17,68 +14,64 @@ using SplitDomainMapping = IReadOnlyDictionary<(string usernameLower, string web
/// Resolves mentions into their canonical form. This is required for handling split domain mentions correctly, as it cannot be guaranteed that remote instances handle split domain users correctly. /// Resolves mentions into their canonical form. This is required for handling split domain mentions correctly, as it cannot be guaranteed that remote instances handle split domain users correctly.
/// </summary> /// </summary>
public class MentionsResolver( public class MentionsResolver(
DatabaseContext db, IOptions<Config.InstanceSection> config
IOptions<Config.InstanceSection> config,
IDistributedCache cache
) { ) {
public async Task<string> ResolveMentions( public string ResolveMentions(
string mfm, string? host, string mfm, string? host,
List<Note.MentionedUser> mentionCache, List<Note.MentionedUser> mentionCache,
SplitDomainMapping splitDomainMapping SplitDomainMapping splitDomainMapping
) { ) {
var nodes = MfmParser.Parse(mfm); var nodes = MfmParser.Parse(mfm);
nodes = await ResolveMentions(nodes, host, mentionCache, splitDomainMapping); nodes = ResolveMentions(nodes, host, mentionCache, splitDomainMapping);
return MfmSerializer.Serialize(nodes); return MfmSerializer.Serialize(nodes);
} }
public async Task<IEnumerable<MfmNode>> ResolveMentions( public IEnumerable<MfmNode> ResolveMentions(
IEnumerable<MfmNode> nodes, string? host, IEnumerable<MfmNode> nodes, string? host,
List<Note.MentionedUser> mentionCache, List<Note.MentionedUser> mentionCache,
SplitDomainMapping splitDomainMapping SplitDomainMapping splitDomainMapping
) { ) {
var nodesList = nodes.ToList(); var nodesList = nodes.ToList();
foreach (var mention in nodesList.SelectMany(p => p.Children.Append(p)).OfType<MfmMentionNode>())
await ResolveMention(mention, host, mentionCache, splitDomainMapping); // We need to call .ToList() on this so we can modify the collection in the loop
foreach (var mention in nodesList.SelectMany(p => p.Children.Append(p)).OfType<MfmMentionNode>().ToList())
nodesList[nodesList.IndexOf(mention)] = ResolveMention(mention, host, mentionCache, splitDomainMapping);
return nodesList; return nodesList;
} }
private async Task ResolveMention( private MfmInlineNode ResolveMention(
MfmMentionNode node, string? host, MfmMentionNode node, string? host,
IEnumerable<Note.MentionedUser> mentionCache, IEnumerable<Note.MentionedUser> mentionCache,
SplitDomainMapping splitDomainMapping SplitDomainMapping splitDomainMapping
) { ) {
var finalHost = node.Host ?? host; // Fall back to object host, as localpart-only mentions are relative to the instance the note originated from
node.Host ??= host ?? config.Value.AccountDomain;
if (finalHost == config.Value.AccountDomain || finalHost == config.Value.WebDomain) if (node.Host == config.Value.WebDomain)
finalHost = null; node.Host = config.Value.AccountDomain;
if (finalHost != null &&
splitDomainMapping.TryGetValue((node.Username.ToLowerInvariant(), finalHost), out var value)) if (node.Host != config.Value.AccountDomain &&
finalHost = value; splitDomainMapping.TryGetValue((node.Username.ToLowerInvariant(), node.Host), out var value))
node.Host = value;
var resolvedUser = var resolvedUser =
mentionCache.FirstOrDefault(p => string.Equals(p.Username, node.Username, mentionCache.FirstOrDefault(p => p.Username.EqualsIgnoreCase(node.Username) && p.Host == node.Host);
StringComparison.InvariantCultureIgnoreCase) &&
p.Host == finalHost);
if (resolvedUser != null) { if (resolvedUser != null) {
node.Username = resolvedUser.Username; node.Username = resolvedUser.Username;
node.Host = resolvedUser.Host; node.Host = resolvedUser.Host;
node.Acct = $"@{resolvedUser.Username}@{resolvedUser.Host}"; node.Acct = $"@{resolvedUser.Username}@{resolvedUser.Host}";
}
else { return node;
async Task<string> FetchLocalUserCapitalization() {
var username = await db.Users.Where(p => p.UsernameLower == node.Username.ToLowerInvariant())
.Select(p => p.Username)
.FirstOrDefaultAsync();
return username ?? node.Username;
} }
node.Username = await cache.FetchAsync($"localUserNameCapitalization:{node.Username.ToLowerInvariant()}", return new MfmPlainNode {
TimeSpan.FromHours(24), FetchLocalUserCapitalization); Children = [
new MfmTextNode {
node.Host = config.Value.AccountDomain; Text = node.Acct
node.Acct = $"@{node.Username}@{config.Value.AccountDomain}"; }
} ]
};
} }
} }

View file

@ -51,22 +51,24 @@ public class NoteRenderer(IOptions<Config.InstanceSection> config, MfmConverter
return new ASNote { return new ASNote {
Id = id, Id = id,
Content = note.Text != null ? await mfmConverter.ToHtmlAsync(note.Text, []) : null,
AttributedTo = [new ASObjectBase(userId)], AttributedTo = [new ASObjectBase(userId)],
Type = $"{Constants.ActivityStreamsNs}#Note", Type = $"{Constants.ActivityStreamsNs}#Note",
MkContent = note.Text, MkContent = note.Text,
PublishedAt = note.CreatedAt, PublishedAt = note.CreatedAt,
Sensitive = note.Cw != null, Sensitive = note.Cw != null,
InReplyTo = replyId, InReplyTo = replyId,
Cc = cc,
To = to,
Tags = tags,
Content = note.Text != null
? await mfmConverter.ToHtmlAsync(note.Text, mentions, note.UserHost)
: null,
Source = note.Text != null Source = note.Text != null
? new ASNoteSource { ? new ASNoteSource {
Content = note.Text, Content = note.Text,
MediaType = "text/x.misskeymarkdown" MediaType = "text/x.misskeymarkdown"
} }
: null, : null
Cc = cc,
To = to,
Tags = tags
}; };
} }
} }

View file

@ -6,6 +6,7 @@ using AngleSharp.Dom;
using AngleSharp.Html.Parser; using AngleSharp.Html.Parser;
using Iceshrimp.Backend.Core.Configuration; using Iceshrimp.Backend.Core.Configuration;
using Iceshrimp.Backend.Core.Database.Tables; using Iceshrimp.Backend.Core.Database.Tables;
using Iceshrimp.Backend.Core.Extensions;
using Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing; using Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing;
using Iceshrimp.Backend.Core.Helpers.LibMfm.Types; using Iceshrimp.Backend.Core.Helpers.LibMfm.Types;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
@ -31,44 +32,44 @@ public class MfmConverter(IOptions<Config.InstanceSection> config) {
return sb.ToString().Trim(); return sb.ToString().Trim();
} }
public async Task<string> ToHtmlAsync(IEnumerable<MfmNode> nodes, List<Note.MentionedUser> mentions) { public async Task<string> ToHtmlAsync(IEnumerable<MfmNode> nodes, List<Note.MentionedUser> mentions, string? host) {
var context = BrowsingContext.New(); var context = BrowsingContext.New();
var document = await context.OpenNewAsync(); var document = await context.OpenNewAsync();
var element = document.CreateElement("p"); var element = document.CreateElement("p");
foreach (var node in nodes) element.AppendNodes(FromMfmNode(document, node, mentions)); foreach (var node in nodes) element.AppendNodes(FromMfmNode(document, node, mentions, host));
await using var sw = new StringWriter(); await using var sw = new StringWriter();
await element.ToHtmlAsync(sw); await element.ToHtmlAsync(sw);
return sw.ToString(); return sw.ToString();
} }
public async Task<string> ToHtmlAsync(string mfm, List<Note.MentionedUser> mentions) { public async Task<string> ToHtmlAsync(string mfm, List<Note.MentionedUser> mentions, string? host) {
var nodes = MfmParser.Parse(mfm); var nodes = MfmParser.Parse(mfm);
return await ToHtmlAsync(nodes, mentions); return await ToHtmlAsync(nodes, mentions, host);
} }
private INode FromMfmNode(IDocument document, MfmNode node, List<Note.MentionedUser> mentions) { private INode FromMfmNode(IDocument document, MfmNode node, List<Note.MentionedUser> mentions, string? host) {
switch (node) { switch (node) {
case MfmBoldNode: { case MfmBoldNode: {
var el = document.CreateElement("b"); var el = document.CreateElement("b");
AppendChildren(el, document, node, mentions); AppendChildren(el, document, node, mentions, host);
return el; return el;
} }
case MfmSmallNode: { case MfmSmallNode: {
var el = document.CreateElement("small"); var el = document.CreateElement("small");
AppendChildren(el, document, node, mentions); AppendChildren(el, document, node, mentions, host);
return el; return el;
} }
case MfmStrikeNode: { case MfmStrikeNode: {
var el = document.CreateElement("del"); var el = document.CreateElement("del");
AppendChildren(el, document, node, mentions); AppendChildren(el, document, node, mentions, host);
return el; return el;
} }
case MfmItalicNode: case MfmItalicNode:
case MfmFnNode: { case MfmFnNode: {
var el = document.CreateElement("i"); var el = document.CreateElement("i");
AppendChildren(el, document, node, mentions); AppendChildren(el, document, node, mentions, host);
return el; return el;
} }
case MfmCodeBlockNode codeBlockNode: { case MfmCodeBlockNode codeBlockNode: {
@ -80,7 +81,7 @@ public class MfmConverter(IOptions<Config.InstanceSection> config) {
} }
case MfmCenterNode: { case MfmCenterNode: {
var el = document.CreateElement("div"); var el = document.CreateElement("div");
AppendChildren(el, document, node, mentions); AppendChildren(el, document, node, mentions, host);
return el; return el;
} }
case MfmEmojiCodeNode emojiCodeNode: { case MfmEmojiCodeNode emojiCodeNode: {
@ -114,25 +115,20 @@ public class MfmConverter(IOptions<Config.InstanceSection> config) {
case MfmLinkNode linkNode: { case MfmLinkNode linkNode: {
var el = document.CreateElement("a"); var el = document.CreateElement("a");
el.SetAttribute("href", linkNode.Url); el.SetAttribute("href", linkNode.Url);
AppendChildren(el, document, node, mentions); AppendChildren(el, document, node, mentions, host);
return el; return el;
} }
case MfmMentionNode mentionNode: { case MfmMentionNode mentionNode: {
var el = document.CreateElement("span"); var el = document.CreateElement("span");
if (mentionNode.Host == config.Value.AccountDomain || mentionNode.Host == config.Value.WebDomain) // Fall back to object host, as localpart-only mentions are relative to the instance the note originated from
mentionNode.Host = null; mentionNode.Host ??= host ?? config.Value.AccountDomain;
var mention = mentionNode.Host == null if (mentionNode.Host == config.Value.WebDomain)
? new Note.MentionedUser { mentionNode.Host = config.Value.AccountDomain;
Host = config.Value.AccountDomain,
Uri = $"https://{config.Value.WebDomain}/@{mentionNode.Username}", var mention = mentions.FirstOrDefault(p => p.Username.EqualsIgnoreCase(mentionNode.Username) &&
Username = mentionNode.Username p.Host.EqualsIgnoreCase(mentionNode.Host));
}
: mentions.FirstOrDefault(p => string.Equals(p.Username, mentionNode.Username,
StringComparison.InvariantCultureIgnoreCase) &&
string.Equals(p.Host, mentionNode.Host,
StringComparison.InvariantCultureIgnoreCase));
if (mention == null) { if (mention == null) {
el.TextContent = mentionNode.Acct; el.TextContent = mentionNode.Acct;
} }
@ -152,7 +148,7 @@ public class MfmConverter(IOptions<Config.InstanceSection> config) {
} }
case MfmQuoteNode: { case MfmQuoteNode: {
var el = document.CreateElement("blockquote"); var el = document.CreateElement("blockquote");
AppendChildren(el, document, node, mentions); AppendChildren(el, document, node, mentions, host);
return el; return el;
} }
case MfmTextNode textNode: { case MfmTextNode textNode: {
@ -188,7 +184,7 @@ public class MfmConverter(IOptions<Config.InstanceSection> config) {
} }
case MfmPlainNode: { case MfmPlainNode: {
var el = document.CreateElement("span"); var el = document.CreateElement("span");
AppendChildren(el, document, node, mentions); AppendChildren(el, document, node, mentions, host);
return el; return el;
} }
default: { default: {
@ -198,8 +194,8 @@ public class MfmConverter(IOptions<Config.InstanceSection> config) {
} }
private void AppendChildren(INode element, IDocument document, MfmNode parent, private void AppendChildren(INode element, IDocument document, MfmNode parent,
List<Note.MentionedUser> mentions List<Note.MentionedUser> mentions, string? host
) { ) {
foreach (var node in parent.Children) element.AppendNodes(FromMfmNode(document, node, mentions)); foreach (var node in parent.Children) element.AppendNodes(FromMfmNode(document, node, mentions, host));
} }
} }

View file

@ -80,7 +80,10 @@ public static class MfmSerializer {
break; break;
} }
case MfmPlainNode: { case MfmPlainNode: {
result.Append(node.Children.OfType<MfmTextNode>().Select(p => p.Text)); result.Append("<plain>");
foreach (var s in node.Children.OfType<MfmTextNode>().Select(p => p.Text))
result.Append(s);
result.Append("</plain>");
break; break;
} }
case MfmSmallNode: { case MfmSmallNode: {

View file

@ -49,7 +49,7 @@ public class NoteService(
var (mentionedUserIds, mentions, remoteMentions, splitDomainMapping) = await ResolveNoteMentionsAsync(text); var (mentionedUserIds, mentions, remoteMentions, splitDomainMapping) = await ResolveNoteMentionsAsync(text);
if (text != null) if (text != null)
text = await mentionsResolver.ResolveMentions(text, null, mentions, splitDomainMapping); text = mentionsResolver.ResolveMentions(text, null, mentions, splitDomainMapping);
var actor = await userRenderer.RenderAsync(user); var actor = await userRenderer.RenderAsync(user);
@ -184,8 +184,7 @@ public class NoteService(
dbNote.VisibleUserIds = visibleUserIds.Distinct().ToList(); dbNote.VisibleUserIds = visibleUserIds.Distinct().ToList();
} }
dbNote.Text = await mentionsResolver.ResolveMentions(dbNote.Text, dbNote.UserHost, remoteMentions, dbNote.Text = mentionsResolver.ResolveMentions(dbNote.Text, dbNote.UserHost, mentions, splitDomainMapping);
splitDomainMapping);
} }
user.NotesCount++; user.NotesCount++;

View file

@ -29,7 +29,7 @@ public class MfmTests {
var converter = provider.GetRequiredService<MfmConverter>(); var converter = provider.GetRequiredService<MfmConverter>();
var pre = DateTime.Now; var pre = DateTime.Now;
await converter.ToHtmlAsync(Mfm, []); await converter.ToHtmlAsync(Mfm, [], null);
var post = DateTime.Now; var post = DateTime.Now;
var ms = (post - pre).TotalMilliseconds; var ms = (post - pre).TotalMilliseconds;
Console.WriteLine($"Took {ms} ms"); Console.WriteLine($"Took {ms} ms");