using System.Text;
using System.Text.RegularExpressions;
using AngleSharp;
using AngleSharp.Dom;
using AngleSharp.Html.Parser;
using Iceshrimp.Backend.Core.Configuration;
using Iceshrimp.Backend.Core.Database.Tables;
using Iceshrimp.Backend.Core.Extensions;
using Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing;
using Iceshrimp.MfmSharp;
using Iceshrimp.Backend.Core.Services;
using Microsoft.Extensions.Options;
using MfmHtmlParser = Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing.HtmlParser;
using HtmlParser = AngleSharp.Html.Parser.HtmlParser;
namespace Iceshrimp.Backend.Core.Helpers.LibMfm.Conversion;
public readonly record struct MfmInlineMedia(MfmInlineMedia.MediaType Type, string Src, string? Alt)
{
public enum MediaType
{
Other,
Image,
Video,
Audio
}
public static MediaType GetType(string mime)
{
if (mime.StartsWith("image/")) return MediaType.Image;
if (mime.StartsWith("video/")) return MediaType.Video;
if (mime.StartsWith("audio/")) return MediaType.Audio;
return MediaType.Other;
}
}
/// Resulting data after HTML to MFM conversion
public readonly record struct HtmlMfmData(string Mfm, List InlineMedia);
/// Resulting data after MFM to HTML conversion
public readonly record struct MfmHtmlData(string Html, List InlineMedia);
public class MfmConverter(
IOptions config,
MediaProxyService mediaProxy,
FlagService flags
) : ISingletonService
{
public static async Task FromHtmlAsync(
string? html, List? mentions = null, List? hashtags = null
)
{
var media = new List();
if (html == null) return new HtmlMfmData("", media);
// Ensure compatibility with AP servers that send both
as well as newlines
var regex = new Regex(@"
(?:\r?\n)?", RegexOptions.IgnoreCase);
html = regex.Replace(html, "\n");
// Ensure compatibility with AP servers that send non-breaking space characters instead of regular spaces
html = html.Replace("\u00A0", " ");
// Ensure compatibility with AP servers that send CRLF or CR instead of LF-style newlines
html = html.ReplaceLineEndings("\n");
var dom = await new HtmlParser().ParseDocumentAsync(html);
if (dom.Body == null) return new HtmlMfmData("", media);
var sb = new StringBuilder();
var parser = new MfmHtmlParser(mentions ?? [], hashtags ?? [], media);
dom.Body.ChildNodes.Select(parser.ParseNode).ToList().ForEach(s => sb.Append(s));
return new HtmlMfmData(sb.ToString().Trim(), media);
}
public static async Task> ExtractMentionsFromHtmlAsync(string? html)
{
if (html == null) return [];
// Ensure compatibility with AP servers that send both
as well as newlines
var regex = new Regex(@"
\r?\n", RegexOptions.IgnoreCase);
html = regex.Replace(html, "\n");
var dom = await new HtmlParser().ParseDocumentAsync(html);
if (dom.Body == null) return [];
var parser = new HtmlMentionsExtractor();
foreach (var node in dom.Body.ChildNodes)
parser.ParseChildren(node);
return parser.Mentions;
}
public async Task ToHtmlAsync(
IMfmNode[] nodes, List mentions, string? host, string? quoteUri = null,
bool quoteInaccessible = false, bool replyInaccessible = false, string rootElement = "p",
List? emoji = null, List? media = null
)
{
var context = BrowsingContext.New();
var document = await context.OpenNewAsync();
var element = document.CreateElement(rootElement);
var hasContent = nodes.Length > 0;
if (replyInaccessible)
{
var wrapper = document.CreateElement("span");
var re = document.CreateElement("span");
re.TextContent = "RE: \ud83d\udd12"; // lock emoji
wrapper.AppendChild(re);
if (hasContent)
{
wrapper.AppendChild(document.CreateElement("br"));
wrapper.AppendChild(document.CreateElement("br"));
}
element.AppendChild(wrapper);
}
var usedMedia = new List();
foreach (var node in nodes)
element.AppendNodes(FromMfmNode(document, node, mentions, host, usedMedia, emoji, media));
if (quoteUri != null)
{
var a = document.CreateElement("a");
a.SetAttribute("href", quoteUri);
a.TextContent = quoteUri.StartsWith("https://") ? quoteUri[8..] : quoteUri[7..];
var quote = document.CreateElement("span");
quote.ClassList.Add("quote-inline");
if (hasContent)
{
quote.AppendChild(document.CreateElement("br"));
quote.AppendChild(document.CreateElement("br"));
}
var re = document.CreateElement("span");
re.TextContent = "RE: ";
quote.AppendChild(re);
quote.AppendChild(a);
element.AppendChild(quote);
}
else if (quoteInaccessible)
{
var wrapper = document.CreateElement("span");
var re = document.CreateElement("span");
re.TextContent = "RE: \ud83d\udd12"; // lock emoji
if (hasContent)
{
wrapper.AppendChild(document.CreateElement("br"));
wrapper.AppendChild(document.CreateElement("br"));
}
wrapper.AppendChild(re);
element.AppendChild(wrapper);
}
await using var sw = new StringWriter();
await element.ToHtmlAsync(sw);
return new MfmHtmlData(sw.ToString(), usedMedia);
}
public async Task ToHtmlAsync(
string mfm, List mentions, string? host, string? quoteUri = null,
bool quoteInaccessible = false, bool replyInaccessible = false, string rootElement = "p",
List? emoji = null, List? media = null
)
{
var nodes = MfmParser.Parse(mfm);
return await ToHtmlAsync(nodes, mentions, host, quoteUri, quoteInaccessible,
replyInaccessible, rootElement, emoji, media);
}
private INode FromMfmNode(
IDocument document, IMfmNode node, List mentions, string? host,
List usedMedia,
List? emoji = null, List? media = null
)
{
switch (node)
{
case MfmFnNode { Name: "media" } fn when media is { Count: > 0 }:
{
var urlNode = fn.Children.FirstOrDefault();
if (urlNode is MfmUrlNode url)
{
MfmInlineMedia? maybeCurrent = media.FirstOrDefault(m => m.Src == url.Url);
if (maybeCurrent is { } current)
{
usedMedia.Add(current);
if (!flags.SupportsInlineMedia.Value || current.Type == MfmInlineMedia.MediaType.Other)
{
var el = document.CreateElement("a");
el.SetAttribute("href", current.Src);
if (current.Type == MfmInlineMedia.MediaType.Other)
el.SetAttribute("download", "true");
var icon = current.Type switch
{
MfmInlineMedia.MediaType.Image => "\ud83d\uddbc\ufe0f", // framed picture emoji
MfmInlineMedia.MediaType.Video => "\ud83c\udfac", // clapperboard emoji
MfmInlineMedia.MediaType.Audio => "\ud83c\udfb5", // music note emoji
_ => "\ud83d\udcbe", // floppy disk emoji
};
el.TextContent = $"[{icon} {current.Alt ?? current.Src}]";
return el;
}
else
{
var nodeName = current.Type switch
{
MfmInlineMedia.MediaType.Image => "img",
MfmInlineMedia.MediaType.Video => "video",
MfmInlineMedia.MediaType.Audio => "audio",
_ => throw new ArgumentOutOfRangeException()
};
var el = document.CreateElement(nodeName);
el.SetAttribute("src", current.Src);
el.SetAttribute("alt", current.Alt);
return el;
}
}
}
{
var el = CreateInlineFormattingElement(document, "i");
AddHtmlMarkup(document, el, "*");
AppendChildren(el, document, node, mentions, host, usedMedia);
AddHtmlMarkup(document, el, "*");
return el;
}
}
case MfmFnNode { Name: "unixtime" } fn:
{
var el = CreateInlineFormattingElement(document, "i");
if (fn.Children.Length != 1 || fn.Children.FirstOrDefault() is not MfmTextNode textNode)
return Fallback();
double timestamp;
if (!double.TryParse(textNode.Text, out timestamp)) return Fallback();
var date = DateTime.UnixEpoch.AddSeconds(timestamp);
el.TextContent = date.ToString("HH:mm, d MMM yyyy") + " UTC";
return el;
IElement Fallback()
{
AddHtmlMarkup(document, el, "*");
AppendChildren(el, document, node, mentions, host, usedMedia);
AddHtmlMarkup(document, el, "*");
return el;
}
}
case MfmBoldNode:
{
var el = CreateInlineFormattingElement(document, "b");
AddHtmlMarkup(document, el, "**");
AppendChildren(el, document, node, mentions, host, usedMedia);
AddHtmlMarkup(document, el, "**");
return el;
}
case MfmSmallNode:
{
var el = document.CreateElement("small");
AppendChildren(el, document, node, mentions, host, usedMedia);
return el;
}
case MfmStrikeNode:
{
var el = CreateInlineFormattingElement(document, "del");
AddHtmlMarkup(document, el, "~~");
AppendChildren(el, document, node, mentions, host, usedMedia);
AddHtmlMarkup(document, el, "~~");
return el;
}
case MfmItalicNode:
case MfmFnNode:
{
var el = CreateInlineFormattingElement(document, "i");
AddHtmlMarkup(document, el, "*");
AppendChildren(el, document, node, mentions, host, usedMedia);
AddHtmlMarkup(document, el, "*");
return el;
}
case MfmCodeBlockNode codeBlockNode:
{
var el = CreateInlineFormattingElement(document, "pre");
var inner = CreateInlineFormattingElement(document, "code");
inner.TextContent = codeBlockNode.Code;
el.AppendNodes(inner);
return el;
}
case MfmCenterNode:
{
var el = document.CreateElement("div");
AppendChildren(el, document, node, mentions, host, usedMedia);
return el;
}
case MfmEmojiCodeNode emojiCodeNode:
{
var punyHost = host?.ToPunycodeLower();
if (emoji?.FirstOrDefault(p => p.Name == emojiCodeNode.Name && p.Host == punyHost) is { } hit)
{
var el = document.CreateElement("span");
var inner = document.CreateElement("img");
inner.SetAttribute("src", mediaProxy.GetProxyUrl(hit));
inner.SetAttribute("alt", hit.Name);
el.AppendChild(inner);
el.ClassList.Add("emoji");
return el;
}
return document.CreateTextNode($"\u200B:{emojiCodeNode.Name}:\u200B");
}
case MfmHashtagNode hashtagNode:
{
var el = document.CreateElement("a");
el.SetAttribute("href", $"https://{config.Value.WebDomain}/tags/{hashtagNode.Hashtag}");
el.TextContent = $"#{hashtagNode.Hashtag}";
el.SetAttribute("rel", "tag");
el.ClassList.Add("hashtag");
return el;
}
case MfmInlineCodeNode inlineCodeNode:
{
var el = CreateInlineFormattingElement(document, "code");
el.TextContent = inlineCodeNode.Code;
return el;
}
case MfmInlineMathNode inlineMathNode:
{
var el = CreateInlineFormattingElement(document, "code");
el.TextContent = inlineMathNode.Formula;
return el;
}
case MfmMathBlockNode mathBlockNode:
{
var el = CreateInlineFormattingElement(document, "code");
el.TextContent = mathBlockNode.Formula;
return el;
}
case MfmLinkNode linkNode:
{
var el = document.CreateElement("a");
el.SetAttribute("href", linkNode.Url);
el.TextContent = linkNode.Text;
return el;
}
case MfmMentionNode mentionNode:
{
var el = document.CreateElement("span");
// Fall back to object host, as localpart-only mentions are relative to the instance the note originated from
var finalHost = mentionNode.Host ?? host ?? config.Value.AccountDomain;
if (finalHost == config.Value.WebDomain)
finalHost = config.Value.AccountDomain;
Func predicate = finalHost == config.Value.AccountDomain
? p => p.Username.EqualsIgnoreCase(mentionNode.User)
&& (p.Host.EqualsIgnoreCase(finalHost) || p.Host == null)
: p => p.Username.EqualsIgnoreCase(mentionNode.User) && p.Host.EqualsIgnoreCase(finalHost);
if (mentions.FirstOrDefault(predicate) is not { } mention)
{
el.TextContent = $"@{mentionNode.Acct}";
}
else
{
el.ClassList.Add("h-card");
el.SetAttribute("translate", "no");
var a = document.CreateElement("a");
a.ClassList.Add("u-url", "mention");
a.SetAttribute("href", mention.Url ?? mention.Uri);
var span = document.CreateElement("span");
span.TextContent = $"@{mention.Username}";
a.AppendChild(span);
el.AppendChild(a);
}
return el;
}
case MfmQuoteNode:
{
var el = CreateInlineFormattingElement(document, "blockquote");
AddHtmlMarkup(document, el, "> ");
AppendChildren(el, document, node, mentions, host, usedMedia);
AddHtmlMarkupTag(document, el, "br");
AddHtmlMarkupTag(document, el, "br");
return el;
}
case MfmTextNode textNode:
{
var el = document.CreateElement("span");
var nodes = textNode.Text.Split("\r\n")
.SelectMany(p => p.Split('\r'))
.SelectMany(p => p.Split('\n'))
.Select(document.CreateTextNode);
foreach (var htmlNode in nodes)
{
el.AppendNodes(htmlNode);
el.AppendNodes(document.CreateElement("br"));
}
if (el.LastChild != null)
el.RemoveChild(el.LastChild);
return el;
}
case MfmUrlNode urlNode:
{
var el = document.CreateElement("a");
el.SetAttribute("href", urlNode.Url);
var prefix = urlNode.Url.StartsWith("https://") ? "https://" : "http://";
var length = prefix.Length;
el.TextContent = urlNode.Url[length..];
return el;
}
case MfmPlainNode:
{
var el = document.CreateElement("span");
AppendChildren(el, document, node, mentions, host, usedMedia);
return el;
}
default:
{
throw new NotImplementedException("Unsupported MfmNode type");
}
}
}
private void AppendChildren(
INode element, IDocument document, IMfmNode parent,
List mentions, string? host, List usedMedia,
List? emoji = null, List? media = null
)
{
foreach (var node in parent.Children)
element.AppendNodes(FromMfmNode(document, node, mentions, host, usedMedia, emoji, media));
}
private IElement CreateInlineFormattingElement(IDocument document, string name)
{
return document.CreateElement(flags.SupportsHtmlFormatting.Value ? name : "span");
}
private void AddHtmlMarkup(IDocument document, IElement node, string chars)
{
if (flags.SupportsHtmlFormatting.Value) return;
var el = document.CreateElement("span");
el.AppendChild(document.CreateTextNode(chars));
node.AppendChild(el);
}
private void AddHtmlMarkupTag(IDocument document, IElement node, string tag)
{
if (flags.SupportsHtmlFormatting.Value) return;
var el = document.CreateElement(tag);
node.AppendChild(el);
}
}