Iceshrimp.NET/Iceshrimp.Backend/Core/Helpers/LibMfm/Parsing/HtmlParser.cs
Kopper 89060599eb
[backend] Implement inline media
Inline media can be created by:

1. Attach media to note as usual
2. Copy media URL (public one, for remote instances)
3. Use the new $[media url ] MFM extension to place it wherever you
   wish. (The trailing space is necessary as the parser currently
   treats the closing ] as a part of the URL)

The Iceshrimp frontend may make this easier later on (by having a
"copy inline MFM" button on attachments, maybe?)

Federates as <img>, <video>, <audio>, or <a download> HTML tags
depending on the media type for interoperability. (<a download> is
not handled for incoming media yet).

The media will also be present in the attachments field, both as a
fallback for instance software that do not support inline media,
but also for MFM federation to discover which media it is allowed to
embed (and metadata like alt text and sensitive-ness). This is not
required for remote instances sending inline media, as it will be
extracted out from the HTML.

The Iceshrimp frontend does not render inline media yet. That is
blocked on #67.
2024-12-13 22:19:30 +01:00

140 lines
No EOL
3 KiB
C#

using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using Iceshrimp.Backend.Core.Database.Tables;
using Iceshrimp.Backend.Core.Helpers.LibMfm.Conversion;
namespace Iceshrimp.Backend.Core.Helpers.LibMfm.Parsing;
internal class HtmlParser(IEnumerable<Note.MentionedUser> mentions, ICollection<MfmInlineMedia> media)
{
internal string? ParseNode(INode node)
{
if (node.NodeType is NodeType.Text)
return node.TextContent;
if (node.NodeType is NodeType.Comment or NodeType.Document)
return null;
switch (node.NodeName)
{
case "BR":
{
return "\n";
}
case "A":
{
if (node is not HtmlElement el) return node.TextContent;
var href = el.GetAttribute("href");
if (href == null) return $"<plain>{el.TextContent}</plain>";
if (el.ClassList.Contains("u-url") && el.ClassList.Contains("mention"))
{
var mention = mentions.FirstOrDefault(p => p.Uri == href || p.Url == href);
return mention != null
? $"@{mention.Username}@{mention.Host}"
: $"<plain>{el.TextContent}</plain>";
}
if (el.TextContent == href && (href.StartsWith("http://") || href.StartsWith("https://")))
return href;
return $"[{el.TextContent}]({href})";
}
case "H1":
{
return $"【{ParseChildren(node)}】\n";
}
case "B":
case "STRONG":
{
return $"**{ParseChildren(node)}**";
}
case "SMALL":
{
return $"<small>{ParseChildren(node)}</small>";
}
case "S":
case "DEL":
{
return $"~~{ParseChildren(node)}~~";
}
case "I":
case "EM":
{
return $"*{ParseChildren(node)}*";
}
case "PRE":
{
return node.ChildNodes is [{ NodeName: "CODE" }]
? $"\n```\n{string.Join(null, node.ChildNodes[0].TextContent)}\n```\n"
: ParseChildren(node);
}
case "CODE":
{
return $"`{ParseChildren(node)}`";
}
case "BLOCKQUOTE":
{
return node.TextContent.Length > 0
? $"\n> {string.Join("\n> ", node.TextContent.Split("\n"))}"
: null;
}
case "VIDEO":
case "AUDIO":
case "IMG":
{
if (node is not HtmlElement el) return node.TextContent;
var src = el.GetAttribute("src");
if (!Uri.IsWellFormedUriString(src, UriKind.Absolute))
return node.TextContent;
var alt = el.GetAttribute("alt") ?? el.GetAttribute("title");
var type = node.NodeName switch
{
"VIDEO" => MfmInlineMedia.MediaType.Video,
"AUDIO" => MfmInlineMedia.MediaType.Audio,
"IMG" => MfmInlineMedia.MediaType.Image,
_ => MfmInlineMedia.MediaType.Other,
};
media.Add(new MfmInlineMedia(type, src, alt));
return $"$[media {src} ]";
}
case "P":
case "H2":
case "H3":
case "H4":
case "H5":
case "H6":
{
return $"\n\n{ParseChildren(node)}";
}
case "DIV":
case "HEADER":
case "FOOTER":
case "ARTICLE":
case "LI":
case "DT":
case "DD":
{
return $"\n{ParseChildren(node)}";
}
default:
{
return ParseChildren(node);
}
}
}
private string ParseChildren(INode node)
{
return string.Join(null, node.ChildNodes.Select(ParseNode));
}
}