[backend/federation] Handle mention parsing edge cases properly (ISH-50)

This commit is contained in:
Laura Hausmann 2024-02-12 21:13:32 +01:00
parent f371503b2f
commit bf916f7046
No known key found for this signature in database
GPG key ID: D044E84C5BE01605
10 changed files with 105 additions and 91 deletions

View file

@ -22,25 +22,28 @@ public class NoteRenderer(
var renote = note.Renote != null && recurse > 0
? await RenderAsync(note.Renote, accounts, mentions, --recurse)
: null;
var text = note.Text; //TODO: append quote uri
var content = text != null ? await mfmConverter.ToHtmlAsync(text, note.MentionedRemoteUsers) : null;
var text = note.Text; //TODO: append quote uri
if (mentions == null) {
mentions = await db.Users.Where(p => note.Mentions.Contains(p.Id))
.Select(u => new Mention {
Id = u.Id,
Username = u.Username,
Acct = u.Acct,
Url = (u.UserProfile != null
? u.UserProfile.Url ?? u.Uri
: u.Uri) ?? $"https://{config.Value.WebDomain}/@{u.Username}"
})
.Select(u => new Mention(u, config.Value.WebDomain))
.ToListAsync();
}
else {
mentions = [..mentions.Where(p => note.Mentions.Contains(p.Id))];
}
var mentionedUsers = mentions.Select(p => new Note.MentionedUser {
Host = p.Host ?? config.Value.AccountDomain,
Uri = p.Uri,
Username = p.Username,
Url = p.Url
}).ToList();
var content = text != null
? await mfmConverter.ToHtmlAsync(text, mentionedUsers, note.UserHost)
: null;
var account = accounts?.FirstOrDefault(p => p.Id == note.UserId) ?? await userRenderer.RenderAsync(note.User);
var res = new Status {
@ -77,14 +80,7 @@ public class NoteRenderer(
private async Task<List<Mention>> GetMentions(IEnumerable<Note> notes) {
var ids = notes.SelectMany(n => n.Mentions).Distinct();
return await db.Users.Where(p => ids.Contains(p.Id))
.Select(u => new Mention {
Id = u.Id,
Username = u.Username,
Acct = u.Acct,
Url = u.UserProfile != null
? u.UserProfile.Url ?? u.Uri ?? $"https://{config.Value.WebDomain}/@{u.Username}"
: u.Uri ?? $"https://{config.Value.WebDomain}/@{u.Username}"
})
.Select(u => new Mention(u, config.Value.WebDomain))
.ToListAsync();
}

View file

@ -27,7 +27,7 @@ public class UserRenderer(IOptions<Config.InstanceSection> config, MfmConverter
FollowersCount = user.FollowersCount,
FollowingCount = user.FollowingCount,
StatusesCount = user.NotesCount,
Note = await mfmConverter.ToHtmlAsync(profile?.Description ?? "", []),
Note = await mfmConverter.ToHtmlAsync(profile?.Description ?? "", [], user.Host),
Url = profile?.Url ?? user.Uri ?? $"https://{user.Host ?? config.Value.WebDomain}/@{user.Username}",
AvatarStaticUrl = user.AvatarUrl ?? $"https://{config.Value.WebDomain}/identicon/{user.Id}", //TODO
HeaderUrl = user.BannerUrl ?? _transparent,

View file

@ -1,10 +1,29 @@
using System.Diagnostics.CodeAnalysis;
using Iceshrimp.Backend.Core.Database.Tables;
using J = System.Text.Json.Serialization.JsonPropertyNameAttribute;
using JI = System.Text.Json.Serialization.JsonIgnoreAttribute;
namespace Iceshrimp.Backend.Controllers.Mastodon.Schemas.Entities;
public class Mention {
public class Mention() {
[J("id")] public required string Id { get; set; }
[J("username")] public required string Username { get; set; }
[J("acct")] public required string Acct { get; set; }
[J("url")] public required string Url { get; set; }
// internal properties that won't be serialized
[JI] public required string Uri;
[JI] public required string? Host;
[SetsRequiredMembers]
public Mention(User u, string webDomain) : this() {
Id = u.Id;
Username = u.Username;
Host = u.Host;
Acct = u.Acct;
Uri = u.Uri ?? $"https://{webDomain}/users/{u.Id}";
Url = u.UserProfile != null
? u.UserProfile.Url ?? u.Uri ?? $"https://{webDomain}/@{u.Username}"
: u.Uri ?? $"https://{webDomain}/@{u.Username}";
}
}

View file

@ -5,6 +5,12 @@ using EntityFrameworkCore.Projectables;
namespace Iceshrimp.Backend.Core.Extensions;
public static class StringExtensions {
public static bool EqualsInvariant(this string s1, string s2) =>
string.Equals(s1, s2, StringComparison.InvariantCulture);
public static bool EqualsIgnoreCase(this string s1, string s2) =>
string.Equals(s1, s2, StringComparison.InvariantCultureIgnoreCase);
public static string Truncate(this string target, int maxLength) {
return target[..Math.Min(target.Length, maxLength)];
}

View file

@ -1,12 +1,9 @@
using Iceshrimp.Backend.Core.Configuration;
using Iceshrimp.Backend.Core.Database;
using Iceshrimp.Backend.Core.Database.Tables;
using Iceshrimp.Backend.Core.Extensions;
using Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing;
using Iceshrimp.Backend.Core.Helpers.LibMfm.Serialization;
using Iceshrimp.Backend.Core.Helpers.LibMfm.Types;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Caching.Distributed;
using Microsoft.Extensions.Options;
namespace Iceshrimp.Backend.Core.Federation.ActivityPub;
@ -17,68 +14,64 @@ using SplitDomainMapping = IReadOnlyDictionary<(string usernameLower, string web
/// Resolves mentions into their canonical form. This is required for handling split domain mentions correctly, as it cannot be guaranteed that remote instances handle split domain users correctly.
/// </summary>
public class MentionsResolver(
DatabaseContext db,
IOptions<Config.InstanceSection> config,
IDistributedCache cache
IOptions<Config.InstanceSection> config
) {
public async Task<string> ResolveMentions(
public string ResolveMentions(
string mfm, string? host,
List<Note.MentionedUser> mentionCache,
SplitDomainMapping splitDomainMapping
) {
var nodes = MfmParser.Parse(mfm);
nodes = await ResolveMentions(nodes, host, mentionCache, splitDomainMapping);
nodes = ResolveMentions(nodes, host, mentionCache, splitDomainMapping);
return MfmSerializer.Serialize(nodes);
}
public async Task<IEnumerable<MfmNode>> ResolveMentions(
public IEnumerable<MfmNode> ResolveMentions(
IEnumerable<MfmNode> nodes, string? host,
List<Note.MentionedUser> mentionCache,
SplitDomainMapping splitDomainMapping
) {
var nodesList = nodes.ToList();
foreach (var mention in nodesList.SelectMany(p => p.Children.Append(p)).OfType<MfmMentionNode>())
await ResolveMention(mention, host, mentionCache, splitDomainMapping);
// We need to call .ToList() on this so we can modify the collection in the loop
foreach (var mention in nodesList.SelectMany(p => p.Children.Append(p)).OfType<MfmMentionNode>().ToList())
nodesList[nodesList.IndexOf(mention)] = ResolveMention(mention, host, mentionCache, splitDomainMapping);
return nodesList;
}
private async Task ResolveMention(
private MfmInlineNode ResolveMention(
MfmMentionNode node, string? host,
IEnumerable<Note.MentionedUser> mentionCache,
SplitDomainMapping splitDomainMapping
) {
var finalHost = node.Host ?? host;
// Fall back to object host, as localpart-only mentions are relative to the instance the note originated from
node.Host ??= host ?? config.Value.AccountDomain;
if (finalHost == config.Value.AccountDomain || finalHost == config.Value.WebDomain)
finalHost = null;
if (finalHost != null &&
splitDomainMapping.TryGetValue((node.Username.ToLowerInvariant(), finalHost), out var value))
finalHost = value;
if (node.Host == config.Value.WebDomain)
node.Host = config.Value.AccountDomain;
if (node.Host != config.Value.AccountDomain &&
splitDomainMapping.TryGetValue((node.Username.ToLowerInvariant(), node.Host), out var value))
node.Host = value;
var resolvedUser =
mentionCache.FirstOrDefault(p => string.Equals(p.Username, node.Username,
StringComparison.InvariantCultureIgnoreCase) &&
p.Host == finalHost);
mentionCache.FirstOrDefault(p => p.Username.EqualsIgnoreCase(node.Username) && p.Host == node.Host);
if (resolvedUser != null) {
node.Username = resolvedUser.Username;
node.Host = resolvedUser.Host;
node.Acct = $"@{resolvedUser.Username}@{resolvedUser.Host}";
}
else {
async Task<string> FetchLocalUserCapitalization() {
var username = await db.Users.Where(p => p.UsernameLower == node.Username.ToLowerInvariant())
.Select(p => p.Username)
.FirstOrDefaultAsync();
return username ?? node.Username;
}
node.Username = await cache.FetchAsync($"localUserNameCapitalization:{node.Username.ToLowerInvariant()}",
TimeSpan.FromHours(24), FetchLocalUserCapitalization);
node.Host = config.Value.AccountDomain;
node.Acct = $"@{node.Username}@{config.Value.AccountDomain}";
return node;
}
return new MfmPlainNode {
Children = [
new MfmTextNode {
Text = node.Acct
}
]
};
}
}

View file

@ -51,22 +51,24 @@ public class NoteRenderer(IOptions<Config.InstanceSection> config, MfmConverter
return new ASNote {
Id = id,
Content = note.Text != null ? await mfmConverter.ToHtmlAsync(note.Text, []) : null,
AttributedTo = [new ASObjectBase(userId)],
Type = $"{Constants.ActivityStreamsNs}#Note",
MkContent = note.Text,
PublishedAt = note.CreatedAt,
Sensitive = note.Cw != null,
InReplyTo = replyId,
Cc = cc,
To = to,
Tags = tags,
Content = note.Text != null
? await mfmConverter.ToHtmlAsync(note.Text, mentions, note.UserHost)
: null,
Source = note.Text != null
? new ASNoteSource {
Content = note.Text,
MediaType = "text/x.misskeymarkdown"
}
: null,
Cc = cc,
To = to,
Tags = tags
: null
};
}
}

View file

@ -6,6 +6,7 @@ using AngleSharp.Dom;
using AngleSharp.Html.Parser;
using Iceshrimp.Backend.Core.Configuration;
using Iceshrimp.Backend.Core.Database.Tables;
using Iceshrimp.Backend.Core.Extensions;
using Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing;
using Iceshrimp.Backend.Core.Helpers.LibMfm.Types;
using Microsoft.Extensions.Options;
@ -31,44 +32,44 @@ public class MfmConverter(IOptions<Config.InstanceSection> config) {
return sb.ToString().Trim();
}
public async Task<string> ToHtmlAsync(IEnumerable<MfmNode> nodes, List<Note.MentionedUser> mentions) {
public async Task<string> ToHtmlAsync(IEnumerable<MfmNode> nodes, List<Note.MentionedUser> mentions, string? host) {
var context = BrowsingContext.New();
var document = await context.OpenNewAsync();
var element = document.CreateElement("p");
foreach (var node in nodes) element.AppendNodes(FromMfmNode(document, node, mentions));
foreach (var node in nodes) element.AppendNodes(FromMfmNode(document, node, mentions, host));
await using var sw = new StringWriter();
await element.ToHtmlAsync(sw);
return sw.ToString();
}
public async Task<string> ToHtmlAsync(string mfm, List<Note.MentionedUser> mentions) {
public async Task<string> ToHtmlAsync(string mfm, List<Note.MentionedUser> mentions, string? host) {
var nodes = MfmParser.Parse(mfm);
return await ToHtmlAsync(nodes, mentions);
return await ToHtmlAsync(nodes, mentions, host);
}
private INode FromMfmNode(IDocument document, MfmNode node, List<Note.MentionedUser> mentions) {
private INode FromMfmNode(IDocument document, MfmNode node, List<Note.MentionedUser> mentions, string? host) {
switch (node) {
case MfmBoldNode: {
var el = document.CreateElement("b");
AppendChildren(el, document, node, mentions);
AppendChildren(el, document, node, mentions, host);
return el;
}
case MfmSmallNode: {
var el = document.CreateElement("small");
AppendChildren(el, document, node, mentions);
AppendChildren(el, document, node, mentions, host);
return el;
}
case MfmStrikeNode: {
var el = document.CreateElement("del");
AppendChildren(el, document, node, mentions);
AppendChildren(el, document, node, mentions, host);
return el;
}
case MfmItalicNode:
case MfmFnNode: {
var el = document.CreateElement("i");
AppendChildren(el, document, node, mentions);
AppendChildren(el, document, node, mentions, host);
return el;
}
case MfmCodeBlockNode codeBlockNode: {
@ -80,7 +81,7 @@ public class MfmConverter(IOptions<Config.InstanceSection> config) {
}
case MfmCenterNode: {
var el = document.CreateElement("div");
AppendChildren(el, document, node, mentions);
AppendChildren(el, document, node, mentions, host);
return el;
}
case MfmEmojiCodeNode emojiCodeNode: {
@ -114,25 +115,20 @@ public class MfmConverter(IOptions<Config.InstanceSection> config) {
case MfmLinkNode linkNode: {
var el = document.CreateElement("a");
el.SetAttribute("href", linkNode.Url);
AppendChildren(el, document, node, mentions);
AppendChildren(el, document, node, mentions, host);
return el;
}
case MfmMentionNode mentionNode: {
var el = document.CreateElement("span");
if (mentionNode.Host == config.Value.AccountDomain || mentionNode.Host == config.Value.WebDomain)
mentionNode.Host = null;
// Fall back to object host, as localpart-only mentions are relative to the instance the note originated from
mentionNode.Host ??= host ?? config.Value.AccountDomain;
var mention = mentionNode.Host == null
? new Note.MentionedUser {
Host = config.Value.AccountDomain,
Uri = $"https://{config.Value.WebDomain}/@{mentionNode.Username}",
Username = mentionNode.Username
}
: mentions.FirstOrDefault(p => string.Equals(p.Username, mentionNode.Username,
StringComparison.InvariantCultureIgnoreCase) &&
string.Equals(p.Host, mentionNode.Host,
StringComparison.InvariantCultureIgnoreCase));
if (mentionNode.Host == config.Value.WebDomain)
mentionNode.Host = config.Value.AccountDomain;
var mention = mentions.FirstOrDefault(p => p.Username.EqualsIgnoreCase(mentionNode.Username) &&
p.Host.EqualsIgnoreCase(mentionNode.Host));
if (mention == null) {
el.TextContent = mentionNode.Acct;
}
@ -152,7 +148,7 @@ public class MfmConverter(IOptions<Config.InstanceSection> config) {
}
case MfmQuoteNode: {
var el = document.CreateElement("blockquote");
AppendChildren(el, document, node, mentions);
AppendChildren(el, document, node, mentions, host);
return el;
}
case MfmTextNode textNode: {
@ -188,7 +184,7 @@ public class MfmConverter(IOptions<Config.InstanceSection> config) {
}
case MfmPlainNode: {
var el = document.CreateElement("span");
AppendChildren(el, document, node, mentions);
AppendChildren(el, document, node, mentions, host);
return el;
}
default: {
@ -198,8 +194,8 @@ public class MfmConverter(IOptions<Config.InstanceSection> config) {
}
private void AppendChildren(INode element, IDocument document, MfmNode parent,
List<Note.MentionedUser> mentions
List<Note.MentionedUser> mentions, string? host
) {
foreach (var node in parent.Children) element.AppendNodes(FromMfmNode(document, node, mentions));
foreach (var node in parent.Children) element.AppendNodes(FromMfmNode(document, node, mentions, host));
}
}

View file

@ -80,7 +80,10 @@ public static class MfmSerializer {
break;
}
case MfmPlainNode: {
result.Append(node.Children.OfType<MfmTextNode>().Select(p => p.Text));
result.Append("<plain>");
foreach (var s in node.Children.OfType<MfmTextNode>().Select(p => p.Text))
result.Append(s);
result.Append("</plain>");
break;
}
case MfmSmallNode: {

View file

@ -49,7 +49,7 @@ public class NoteService(
var (mentionedUserIds, mentions, remoteMentions, splitDomainMapping) = await ResolveNoteMentionsAsync(text);
if (text != null)
text = await mentionsResolver.ResolveMentions(text, null, mentions, splitDomainMapping);
text = mentionsResolver.ResolveMentions(text, null, mentions, splitDomainMapping);
var actor = await userRenderer.RenderAsync(user);
@ -184,8 +184,7 @@ public class NoteService(
dbNote.VisibleUserIds = visibleUserIds.Distinct().ToList();
}
dbNote.Text = await mentionsResolver.ResolveMentions(dbNote.Text, dbNote.UserHost, remoteMentions,
splitDomainMapping);
dbNote.Text = mentionsResolver.ResolveMentions(dbNote.Text, dbNote.UserHost, mentions, splitDomainMapping);
}
user.NotesCount++;

View file

@ -29,7 +29,7 @@ public class MfmTests {
var converter = provider.GetRequiredService<MfmConverter>();
var pre = DateTime.Now;
await converter.ToHtmlAsync(Mfm, []);
await converter.ToHtmlAsync(Mfm, [], null);
var post = DateTime.Now;
var ms = (post - pre).TotalMilliseconds;
Console.WriteLine($"Took {ms} ms");