[backend/federation] Improve hashtag handling (ISH-651)

This commit is contained in:
Laura Hausmann 2025-01-12 12:27:34 +01:00
parent 09919bdc77
commit 9e37942e9d
No known key found for this signature in database
GPG key ID: D044E84C5BE01605
4 changed files with 60 additions and 21 deletions

View file

@ -49,7 +49,9 @@ public class MfmConverter(
public AsyncLocal<bool> SupportsHtmlFormatting { get; } = new(); public AsyncLocal<bool> SupportsHtmlFormatting { get; } = new();
public AsyncLocal<bool> SupportsInlineMedia { get; } = new(); public AsyncLocal<bool> SupportsInlineMedia { get; } = new();
public static async Task<HtmlMfmData> FromHtmlAsync(string? html, List<Note.MentionedUser>? mentions = null) public static async Task<HtmlMfmData> FromHtmlAsync(
string? html, List<Note.MentionedUser>? mentions = null, List<string>? hashtags = null
)
{ {
var media = new List<MfmInlineMedia>(); var media = new List<MfmInlineMedia>();
if (html == null) return new HtmlMfmData("", media); if (html == null) return new HtmlMfmData("", media);
@ -68,7 +70,7 @@ public class MfmConverter(
if (dom.Body == null) return new HtmlMfmData("", media); if (dom.Body == null) return new HtmlMfmData("", media);
var sb = new StringBuilder(); var sb = new StringBuilder();
var parser = new MfmHtmlParser(mentions ?? [], media); var parser = new MfmHtmlParser(mentions ?? [], hashtags ?? [], media);
dom.Body.ChildNodes.Select(parser.ParseNode).ToList().ForEach(s => sb.Append(s)); dom.Body.ChildNodes.Select(parser.ParseNode).ToList().ForEach(s => sb.Append(s));
return new HtmlMfmData(sb.ToString().Trim(), media); return new HtmlMfmData(sb.ToString().Trim(), media);
} }

View file

@ -5,7 +5,11 @@ using Iceshrimp.Backend.Core.Helpers.LibMfm.Conversion;
namespace Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing; namespace Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing;
internal class HtmlParser(IEnumerable<Note.MentionedUser> mentions, ICollection<MfmInlineMedia> media) internal class HtmlParser(
IEnumerable<Note.MentionedUser> mentions,
IEnumerable<string> hashtags,
ICollection<MfmInlineMedia> media
)
{ {
internal string? ParseNode(INode node) internal string? ParseNode(INode node)
{ {
@ -35,6 +39,14 @@ internal class HtmlParser(IEnumerable<Note.MentionedUser> mentions, ICollection<
: $"<plain>{el.TextContent}</plain>"; : $"<plain>{el.TextContent}</plain>";
} }
// Hubzilla marks tags as class="zrl", so we have to account for that here
if ((el.GetAttribute("rel") is "tag" || el.ClassList.Contains("zrl")) && el.HasAttribute("href"))
{
var text = el.TextContent;
if (hashtags.Contains((text.StartsWith('#') ? text[1..] : text).ToLowerInvariant()))
return text;
}
if (el.TextContent == href && (href.StartsWith("http://") || href.StartsWith("https://"))) if (el.TextContent == href && (href.StartsWith("http://") || href.StartsWith("https://")))
return href; return href;
@ -87,7 +99,8 @@ internal class HtmlParser(IEnumerable<Note.MentionedUser> mentions, ICollection<
if (node is not HtmlElement el) return node.TextContent; if (node is not HtmlElement el) return node.TextContent;
var src = el.GetAttribute("src"); var src = el.GetAttribute("src");
if (src == null || !Uri.TryCreate(src, UriKind.Absolute, out var uri) && uri is { Scheme: "http" or "https" }) if (src == null
|| !Uri.TryCreate(src, UriKind.Absolute, out var uri) && uri is { Scheme: "http" or "https" })
return node.TextContent; return node.TextContent;
var alt = el.GetAttribute("alt") ?? el.GetAttribute("title"); var alt = el.GetAttribute("alt") ?? el.GetAttribute("title");

View file

@ -201,16 +201,6 @@ public class NoteService(
} }
var tags = ResolveHashtags(data.ParsedText, data.ASNote); var tags = ResolveHashtags(data.ParsedText, data.ASNote);
if (tags.Count > 0 && data.Text != null && data.ASNote != null)
{
// @formatter:off
var match = data.ASNote.Tags?.OfType<ASHashtag>().Where(p => p.Name != null && p.Href != null) ?? [];
//TODO: refactor this to use the nodes object instead of matching on text
data.Text = match.Aggregate(data.Text, (current, tag) => current.Replace($"[#{tag.Name!.TrimStart('#')}]({tag.Href})", $"#{tag.Name!.TrimStart('#')}")
.Replace($"#[{tag.Name!.TrimStart('#')}]({tag.Href})", $"#{tag.Name!.TrimStart('#')}"));
// @formatter:on
}
var mastoReplyUserId = data.Reply?.UserId != data.User.Id var mastoReplyUserId = data.Reply?.UserId != data.User.Id
? data.Reply?.UserId ? data.Reply?.UserId
: data.Reply.MastoReplyUserId ?? data.Reply.ReplyUserId ?? data.Reply.UserId; : data.Reply.MastoReplyUserId ?? data.Reply.ReplyUserId ?? data.Reply.UserId;
@ -1009,7 +999,13 @@ public class NoteService(
if (text == null) if (text == null)
{ {
(text, htmlInlineMedia) = await MfmConverter.FromHtmlAsync(note.Content, mentionData.Mentions); var hashtags = note.Tags?.OfType<ASHashtag>()
.Select(p => p.Name?.ToLowerInvariant().TrimStart('#'))
.NotNull()
.ToList()
?? [];
(text, htmlInlineMedia) = await MfmConverter.FromHtmlAsync(note.Content, mentionData.Mentions, hashtags);
} }
var cw = note.Summary; var cw = note.Summary;
@ -1099,7 +1095,15 @@ public class NoteService(
List<MfmInlineMedia>? htmlInlineMedia = null; List<MfmInlineMedia>? htmlInlineMedia = null;
if (text == null) if (text == null)
(text, htmlInlineMedia) = await MfmConverter.FromHtmlAsync(note.Content, mentionData.Mentions); {
var hashtags = note.Tags?.OfType<ASHashtag>()
.Select(p => p.Name?.ToLowerInvariant().TrimStart('#'))
.NotNull()
.ToList()
?? [];
(text, htmlInlineMedia) = await MfmConverter.FromHtmlAsync(note.Content, mentionData.Mentions, hashtags);
}
var cw = note.Summary; var cw = note.Summary;

View file

@ -156,8 +156,18 @@ public class UserService(
.AwaitAllAsync() .AwaitAllAsync()
: null; : null;
var bio = actor.MkSummary?.ReplaceLineEndings("\n").Trim() var bio = actor.MkSummary?.ReplaceLineEndings("\n").Trim();
?? (await MfmConverter.FromHtmlAsync(actor.Summary)).Mfm; if (bio == null)
{
var asHashtags = actor.Tags?.OfType<ASHashtag>()
.Select(p => p.Name?.ToLowerInvariant().TrimStart('#'))
.NotNull()
.ToList()
?? [];
bio = (await MfmConverter.FromHtmlAsync(actor.Summary, hashtags: asHashtags)).Mfm;
}
var tags = ResolveHashtags(MfmParser.Parse(bio), actor); var tags = ResolveHashtags(MfmParser.Parse(bio), actor);
user = new User user = new User
@ -320,8 +330,18 @@ public class UserService(
var processPendingDeletes = await ResolveAvatarAndBannerAsync(user, actor); var processPendingDeletes = await ResolveAvatarAndBannerAsync(user, actor);
user.UserProfile.Description = actor.MkSummary?.ReplaceLineEndings("\n").Trim() user.UserProfile.Description = actor.MkSummary?.ReplaceLineEndings("\n").Trim();
?? (await MfmConverter.FromHtmlAsync(actor.Summary)).Mfm; if (user.UserProfile.Description == null)
{
var asHashtags = actor.Tags?.OfType<ASHashtag>()
.Select(p => p.Name?.ToLowerInvariant().TrimStart('#'))
.NotNull()
.ToList()
?? [];
user.UserProfile.Description = (await MfmConverter.FromHtmlAsync(actor.Summary, hashtags: asHashtags)).Mfm;
}
//user.UserProfile.Birthday = TODO; //user.UserProfile.Birthday = TODO;
//user.UserProfile.Location = TODO; //user.UserProfile.Location = TODO;
user.UserProfile.Fields = fields?.ToArray() ?? []; user.UserProfile.Fields = fields?.ToArray() ?? [];