[backend/federation] Improve hashtag handling (ISH-651)

This commit is contained in:
Laura Hausmann 2025-01-12 12:27:34 +01:00
parent 09919bdc77
commit 9e37942e9d
No known key found for this signature in database
GPG key ID: D044E84C5BE01605
4 changed files with 60 additions and 21 deletions

View file

@ -49,7 +49,9 @@ public class MfmConverter(
public AsyncLocal<bool> SupportsHtmlFormatting { get; } = new();
public AsyncLocal<bool> SupportsInlineMedia { get; } = new();
public static async Task<HtmlMfmData> FromHtmlAsync(string? html, List<Note.MentionedUser>? mentions = null)
public static async Task<HtmlMfmData> FromHtmlAsync(
string? html, List<Note.MentionedUser>? mentions = null, List<string>? hashtags = null
)
{
var media = new List<MfmInlineMedia>();
if (html == null) return new HtmlMfmData("", media);
@ -68,7 +70,7 @@ public class MfmConverter(
if (dom.Body == null) return new HtmlMfmData("", media);
var sb = new StringBuilder();
var parser = new MfmHtmlParser(mentions ?? [], media);
var parser = new MfmHtmlParser(mentions ?? [], hashtags ?? [], media);
dom.Body.ChildNodes.Select(parser.ParseNode).ToList().ForEach(s => sb.Append(s));
return new HtmlMfmData(sb.ToString().Trim(), media);
}

View file

@ -5,7 +5,11 @@ using Iceshrimp.Backend.Core.Helpers.LibMfm.Conversion;
namespace Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing;
internal class HtmlParser(IEnumerable<Note.MentionedUser> mentions, ICollection<MfmInlineMedia> media)
internal class HtmlParser(
IEnumerable<Note.MentionedUser> mentions,
IEnumerable<string> hashtags,
ICollection<MfmInlineMedia> media
)
{
internal string? ParseNode(INode node)
{
@ -35,6 +39,14 @@ internal class HtmlParser(IEnumerable<Note.MentionedUser> mentions, ICollection<
: $"<plain>{el.TextContent}</plain>";
}
// Hubzilla marks tags as class="zrl", so we have to account for that here
if ((el.GetAttribute("rel") is "tag" || el.ClassList.Contains("zrl")) && el.HasAttribute("href"))
{
var text = el.TextContent;
if (hashtags.Contains((text.StartsWith('#') ? text[1..] : text).ToLowerInvariant()))
return text;
}
if (el.TextContent == href && (href.StartsWith("http://") || href.StartsWith("https://")))
return href;
@ -87,7 +99,8 @@ internal class HtmlParser(IEnumerable<Note.MentionedUser> mentions, ICollection<
if (node is not HtmlElement el) return node.TextContent;
var src = el.GetAttribute("src");
if (src == null || !Uri.TryCreate(src, UriKind.Absolute, out var uri) && uri is { Scheme: "http" or "https" })
if (src == null
|| !Uri.TryCreate(src, UriKind.Absolute, out var uri) && uri is { Scheme: "http" or "https" })
return node.TextContent;
var alt = el.GetAttribute("alt") ?? el.GetAttribute("title");
@ -137,4 +150,4 @@ internal class HtmlParser(IEnumerable<Note.MentionedUser> mentions, ICollection<
{
return string.Join(null, node.ChildNodes.Select(ParseNode));
}
}
}

View file

@ -201,16 +201,6 @@ public class NoteService(
}
var tags = ResolveHashtags(data.ParsedText, data.ASNote);
if (tags.Count > 0 && data.Text != null && data.ASNote != null)
{
// @formatter:off
var match = data.ASNote.Tags?.OfType<ASHashtag>().Where(p => p.Name != null && p.Href != null) ?? [];
//TODO: refactor this to use the nodes object instead of matching on text
data.Text = match.Aggregate(data.Text, (current, tag) => current.Replace($"[#{tag.Name!.TrimStart('#')}]({tag.Href})", $"#{tag.Name!.TrimStart('#')}")
.Replace($"#[{tag.Name!.TrimStart('#')}]({tag.Href})", $"#{tag.Name!.TrimStart('#')}"));
// @formatter:on
}
var mastoReplyUserId = data.Reply?.UserId != data.User.Id
? data.Reply?.UserId
: data.Reply.MastoReplyUserId ?? data.Reply.ReplyUserId ?? data.Reply.UserId;
@ -1009,7 +999,13 @@ public class NoteService(
if (text == null)
{
(text, htmlInlineMedia) = await MfmConverter.FromHtmlAsync(note.Content, mentionData.Mentions);
var hashtags = note.Tags?.OfType<ASHashtag>()
.Select(p => p.Name?.ToLowerInvariant().TrimStart('#'))
.NotNull()
.ToList()
?? [];
(text, htmlInlineMedia) = await MfmConverter.FromHtmlAsync(note.Content, mentionData.Mentions, hashtags);
}
var cw = note.Summary;
@ -1099,7 +1095,15 @@ public class NoteService(
List<MfmInlineMedia>? htmlInlineMedia = null;
if (text == null)
(text, htmlInlineMedia) = await MfmConverter.FromHtmlAsync(note.Content, mentionData.Mentions);
{
var hashtags = note.Tags?.OfType<ASHashtag>()
.Select(p => p.Name?.ToLowerInvariant().TrimStart('#'))
.NotNull()
.ToList()
?? [];
(text, htmlInlineMedia) = await MfmConverter.FromHtmlAsync(note.Content, mentionData.Mentions, hashtags);
}
var cw = note.Summary;

View file

@ -156,8 +156,18 @@ public class UserService(
.AwaitAllAsync()
: null;
var bio = actor.MkSummary?.ReplaceLineEndings("\n").Trim()
?? (await MfmConverter.FromHtmlAsync(actor.Summary)).Mfm;
var bio = actor.MkSummary?.ReplaceLineEndings("\n").Trim();
if (bio == null)
{
var asHashtags = actor.Tags?.OfType<ASHashtag>()
.Select(p => p.Name?.ToLowerInvariant().TrimStart('#'))
.NotNull()
.ToList()
?? [];
bio = (await MfmConverter.FromHtmlAsync(actor.Summary, hashtags: asHashtags)).Mfm;
}
var tags = ResolveHashtags(MfmParser.Parse(bio), actor);
user = new User
@ -320,8 +330,18 @@ public class UserService(
var processPendingDeletes = await ResolveAvatarAndBannerAsync(user, actor);
user.UserProfile.Description = actor.MkSummary?.ReplaceLineEndings("\n").Trim()
?? (await MfmConverter.FromHtmlAsync(actor.Summary)).Mfm;
user.UserProfile.Description = actor.MkSummary?.ReplaceLineEndings("\n").Trim();
if (user.UserProfile.Description == null)
{
var asHashtags = actor.Tags?.OfType<ASHashtag>()
.Select(p => p.Name?.ToLowerInvariant().TrimStart('#'))
.NotNull()
.ToList()
?? [];
user.UserProfile.Description = (await MfmConverter.FromHtmlAsync(actor.Summary, hashtags: asHashtags)).Mfm;
}
//user.UserProfile.Birthday = TODO;
//user.UserProfile.Location = TODO;
user.UserProfile.Fields = fields?.ToArray() ?? [];