[backend] Implement mfm-to-html

This commit is contained in:
Laura Hausmann 2024-02-02 00:16:28 +01:00
parent 16316b81ff
commit a3c3f6b848
No known key found for this signature in database
GPG key ID: D044E84C5BE01605
14 changed files with 809 additions and 62 deletions

View file

@ -29,7 +29,7 @@ public class ActivityPubController : Controller {
[FromServices] NoteRenderer noteRenderer) {
var note = await db.Notes.FirstOrDefaultAsync(p => p.Id == id);
if (note == null) return NotFound();
var rendered = noteRenderer.Render(note);
var rendered = noteRenderer.RenderAsync(note);
var compacted = LdHelpers.Compact(rendered);
return Ok(compacted);
}

View file

@ -117,6 +117,7 @@ public class MastodonAuthController(DatabaseContext db) : Controller {
return Ok(res);
}
//TODO: implement /oauth/revoke
/*
[HttpPost("/oauth/revoke")]
[ConsumesHybrid]

View file

@ -1,18 +1,19 @@
using Iceshrimp.Backend.Core.Configuration;
using Iceshrimp.Backend.Core.Database.Tables;
using Iceshrimp.Backend.Core.Federation.ActivityStreams.Types;
using Iceshrimp.MfmSharp.Conversion;
using Microsoft.Extensions.Options;
namespace Iceshrimp.Backend.Core.Federation.ActivityPub;
public class NoteRenderer(IOptions<Config.InstanceSection> config) {
public ASNote Render(Note note) {
public async Task<ASNote> RenderAsync(Note note) {
var id = $"https://{config.Value.WebDomain}/notes/{note.Id}";
var userId = $"https://{config.Value.WebDomain}/users/{note.User.Id}";
return new ASNote {
Id = id,
Content = note.Text, //FIXME: render to html
Content = note.Text != null ? await MfmConverter.ToHtmlAsync(note.Text) : null,
AttributedTo = [new LDIdObject(userId)],
Type = "https://www.w3.org/ns/activitystreams#Note",
MkContent = note.Text,

View file

@ -4,6 +4,7 @@ using Iceshrimp.Backend.Core.Database;
using Iceshrimp.Backend.Core.Database.Tables;
using Iceshrimp.Backend.Core.Federation.ActivityStreams.Types;
using Iceshrimp.Backend.Core.Middleware;
using Iceshrimp.MfmSharp.Conversion;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Options;
@ -27,21 +28,21 @@ public class UserRenderer(IOptions<Config.InstanceSection> config, DatabaseConte
: "Person";
return new ASActor {
Id = id,
Type = type,
Inbox = new ASLink($"{id}/inbox"),
Outbox = new ASCollection<ASObject>($"{id}/outbox"),
Followers = new ASCollection<ASObject>($"{id}/followers"),
Following = new ASCollection<ASObject>($"{id}/following"),
SharedInbox = new ASLink($"https://{config.Value.WebDomain}/inbox"),
Url = new ASLink($"https://{config.Value.WebDomain}/@{user.Username}"),
Username = user.Username,
DisplayName = user.Name ?? user.Username,
Summary = profile?.Description != null ? "Not implemented" : null, //TODO: convert to html
MkSummary = profile?.Description,
IsCat = user.IsCat,
Id = id,
Type = type,
Inbox = new ASLink($"{id}/inbox"),
Outbox = new ASCollection<ASObject>($"{id}/outbox"),
Followers = new ASCollection<ASObject>($"{id}/followers"),
Following = new ASCollection<ASObject>($"{id}/following"),
SharedInbox = new ASLink($"https://{config.Value.WebDomain}/inbox"),
Url = new ASLink($"https://{config.Value.WebDomain}/@{user.Username}"),
Username = user.Username,
DisplayName = user.Name ?? user.Username,
Summary = profile?.Description != null ? await MfmConverter.FromHtmlAsync(profile.Description) : null,
MkSummary = profile?.Description,
IsCat = user.IsCat,
IsDiscoverable = user.IsExplorable,
IsLocked = user.IsLocked,
IsLocked = user.IsLocked,
Endpoints = new ASEndpoints {
SharedInbox = new LDIdObject($"https://{config.Value.WebDomain}/inbox")
},

View file

@ -5,6 +5,7 @@ using Iceshrimp.Backend.Core.Federation.ActivityPub;
using Iceshrimp.Backend.Core.Federation.ActivityStreams.Types;
using Iceshrimp.Backend.Core.Helpers;
using Iceshrimp.Backend.Core.Middleware;
using Iceshrimp.MfmSharp.Conversion;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Options;
@ -42,7 +43,7 @@ public class NoteService(
await db.AddAsync(note);
await db.SaveChangesAsync();
var obj = noteRenderer.Render(note);
var obj = await noteRenderer.RenderAsync(note);
var activity = ActivityRenderer.RenderCreate(obj, actor);
await deliverSvc.DeliverToFollowersAsync(activity, user);
@ -86,7 +87,7 @@ public class NoteService(
Id = IdHelpers.GenerateSlowflakeId(),
Uri = note.Id,
Url = note.Url?.Id, //FIXME: this doesn't seem to work yet
Text = note.MkContent ?? await MfmHelpers.FromHtmlAsync(note.Content),
Text = note.MkContent ?? await MfmConverter.FromHtmlAsync(note.Content),
UserId = user.Id,
CreatedAt = note.PublishedAt?.ToUniversalTime() ??
throw GracefulException.UnprocessableEntity("Missing or invalid PublishedAt field"),

View file

@ -14,41 +14,45 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="AngleSharp" Version="1.1.0" />
<PackageReference Include="Asp.Versioning.Http" Version="8.0.0" />
<PackageReference Include="cuid.net" Version="5.0.2" />
<PackageReference Include="dotNetRdf.Core" Version="3.2.1-dev" />
<PackageReference Include="Isopoh.Cryptography.Argon2" Version="2.0.0" />
<PackageReference Include="Microsoft.AspNetCore.DataProtection.EntityFrameworkCore" Version="8.0.1" />
<PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="8.0.0" />
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="8.0.0" />
<PackageReference Include="AngleSharp" Version="1.1.0"/>
<PackageReference Include="Asp.Versioning.Http" Version="8.0.0"/>
<PackageReference Include="cuid.net" Version="5.0.2"/>
<PackageReference Include="dotNetRdf.Core" Version="3.2.1-dev"/>
<PackageReference Include="Isopoh.Cryptography.Argon2" Version="2.0.0"/>
<PackageReference Include="Microsoft.AspNetCore.DataProtection.EntityFrameworkCore" Version="8.0.1"/>
<PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="8.0.0"/>
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="8.0.0"/>
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" Version="8.0.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="Microsoft.Extensions.Caching.StackExchangeRedis" Version="8.0.1" />
<PackageReference Include="Microsoft.Extensions.Configuration.Ini" Version="8.0.0" />
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="8.0.0" />
<PackageReference Include="protobuf-net" Version="3.2.30" />
<PackageReference Include="StackExchange.Redis" Version="2.7.17" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" />
<PackageReference Include="Vite.AspNetCore" Version="1.11.0" />
<PackageReference Include="YamlDotNet" Version="13.7.1" />
<PackageReference Include="Microsoft.Extensions.Caching.StackExchangeRedis" Version="8.0.1"/>
<PackageReference Include="Microsoft.Extensions.Configuration.Ini" Version="8.0.0"/>
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="8.0.0"/>
<PackageReference Include="protobuf-net" Version="3.2.30"/>
<PackageReference Include="StackExchange.Redis" Version="2.7.17"/>
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0"/>
<PackageReference Include="Vite.AspNetCore" Version="1.11.0"/>
<PackageReference Include="YamlDotNet" Version="13.7.1"/>
</ItemGroup>
<ItemGroup>
<AdditionalFiles Include="Pages\Error.cshtml" />
<AdditionalFiles Include="Pages\Shared\_Layout.cshtml" />
<AdditionalFiles Include="Pages\_ViewImports.cshtml" />
<AdditionalFiles Include="Pages\_ViewStart.cshtml" />
<AdditionalFiles Include="Pages\Error.cshtml"/>
<AdditionalFiles Include="Pages\Shared\_Layout.cshtml"/>
<AdditionalFiles Include="Pages\_ViewImports.cshtml"/>
<AdditionalFiles Include="Pages\_ViewStart.cshtml"/>
</ItemGroup>
<ItemGroup>
<Folder Include="Core\Database\Tables\" />
<Folder Include="Core\Database\Tables\"/>
</ItemGroup>
<ItemGroup>
<Content Include="wwwroot\.vite\manifest.json" CopyToPublishDirectory="PreserveNewest" />
<Content Include="wwwroot\.vite\manifest.json" CopyToPublishDirectory="PreserveNewest"/>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Iceshrimp.MfmSharp\Iceshrimp.MfmSharp.csproj"/>
</ItemGroup>
</Project>

View file

@ -0,0 +1,170 @@
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using AngleSharp;
using AngleSharp.Dom;
using AngleSharp.Html.Parser;
using Iceshrimp.MfmSharp.Parsing;
using Iceshrimp.MfmSharp.Types;
using static Iceshrimp.MfmSharp.Parsing.HtmlParser;
using HtmlParser = AngleSharp.Html.Parser.HtmlParser;
namespace Iceshrimp.MfmSharp.Conversion;
public static class MfmConverter {
public static async Task<string> FromHtmlAsync(string? html) {
if (html == null) return "";
// Ensure compatibility with AP servers that send both <br> as well as newlines
var regex = new Regex(@"<br\s?\/?>\r?\n", RegexOptions.IgnoreCase);
html = regex.Replace(html, "\n");
var dom = await new HtmlParser().ParseDocumentAsync(html);
if (dom.Body == null) return "";
var sb = new StringBuilder();
dom.Body.ChildNodes.Select(ParseNode).ToList().ForEach(s => sb.Append(s));
return sb.ToString().Trim();
}
public static async Task<string> ToHtmlAsync(string mfm) {
var nodes = MfmParser.Parse(mfm);
var context = BrowsingContext.New();
var document = await context.OpenNewAsync();
var element = document.CreateElement("p");
foreach (var node in nodes) element.AppendNodes(document.FromMfmNode(node));
await using var sw = new StringWriter();
await element.ToHtmlAsync(sw);
return sw.ToString();
}
private static INode FromMfmNode(this IDocument document, MfmNode node) {
switch (node) {
case MfmBoldNode: {
var el = document.CreateElement("b");
el.AppendChildren(document, node);
return el;
}
case MfmSmallNode: {
var el = document.CreateElement("small");
el.AppendChildren(document, node);
return el;
}
case MfmStrikeNode: {
var el = document.CreateElement("del");
el.AppendChildren(document, node);
return el;
}
case MfmItalicNode:
case MfmFnNode: {
var el = document.CreateElement("i");
el.AppendChildren(document, node);
return el;
}
case MfmCodeBlockNode codeBlockNode: {
var el = document.CreateElement("pre");
var inner = document.CreateElement("code");
inner.TextContent = codeBlockNode.Code;
el.AppendNodes(inner);
return el;
}
case MfmCenterNode: {
var el = document.CreateElement("div");
el.AppendChildren(document, node);
return el;
}
case MfmEmojiCodeNode emojiCodeNode: {
return document.CreateTextNode($"\u200B:{emojiCodeNode.Name}:\u200B");
}
case MfmUnicodeEmojiNode unicodeEmojiNode: {
return document.CreateTextNode(unicodeEmojiNode.Emoji);
}
case MfmHashtagNode hashtagNode: {
var el = document.CreateElement("a");
//TODO: get url from config
el.SetAttribute("href", $"https://example.org/tags/{hashtagNode.Hashtag}");
el.TextContent = $"#{hashtagNode.Hashtag}";
el.SetAttribute("rel", "tag");
return el;
}
case MfmInlineCodeNode inlineCodeNode: {
var el = document.CreateElement("code");
el.TextContent = inlineCodeNode.Code;
return el;
}
case MfmMathInlineNode mathInlineNode: {
var el = document.CreateElement("code");
el.TextContent = mathInlineNode.Formula;
return el;
}
case MfmMathBlockNode mathBlockNode: {
var el = document.CreateElement("code");
el.TextContent = mathBlockNode.Formula;
return el;
}
case MfmLinkNode linkNode: {
var el = document.CreateElement("a");
el.SetAttribute("href", linkNode.Url);
el.AppendChildren(document, node);
return el;
}
case MfmMentionNode mentionNode: {
var el = document.CreateElement("span");
el.TextContent = mentionNode.Acct;
//TODO: Resolve mentions and only fall back to the above
return el;
}
case MfmQuoteNode: {
var el = document.CreateElement("blockquote");
el.AppendChildren(document, node);
return el;
}
case MfmTextNode textNode: {
var el = document.CreateElement("span");
var nodes = textNode.Text.Split("\r\n")
.SelectMany(p => p.Split('\r'))
.SelectMany(p => p.Split('\n'))
.Select(document.CreateTextNode);
foreach (var htmlNode in nodes) {
el.AppendNodes(htmlNode);
el.AppendNodes(document.CreateElement("br"));
}
if (el.LastChild != null)
el.RemoveChild(el.LastChild);
return el;
}
case MfmUrlNode urlNode: {
var el = document.CreateElement("a");
el.SetAttribute("href", urlNode.Url);
var prefix = urlNode.Url.StartsWith("https://") ? "https://" : "http://";
var length = prefix.Length;
el.TextContent = urlNode.Url[length..];
return el;
}
case MfmSearchNode searchNode: {
//TODO: get search engine from config
var el = document.CreateElement("a");
el.SetAttribute("href", $"https://duckduckgo.com?q={HttpUtility.UrlEncode(searchNode.Query)}");
el.TextContent = searchNode.Content;
return el;
}
case MfmPlainNode: {
var el = document.CreateElement("span");
el.AppendChildren(document, node);
return el;
}
default: {
throw new NotImplementedException("Unsupported MfmNode type");
}
}
}
private static void AppendChildren(this INode element, IDocument document, MfmNode parent) {
foreach (var node in parent.Children) element.AppendNodes(document.FromMfmNode(node));
}
}

View file

@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<RootNamespace>Iceshrimp.MfmSharp</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="AngleSharp" Version="1.1.0"/>
</ItemGroup>
</Project>

View file

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2024 The Iceshrimp contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -1,27 +1,9 @@
using System.Text;
using System.Text.RegularExpressions;
using AngleSharp.Dom;
using AngleSharp.Html.Parser;
namespace Iceshrimp.Backend.Core.Helpers;
namespace Iceshrimp.MfmSharp.Parsing;
public static class MfmHelpers {
public static async Task<string> FromHtmlAsync(string? html) {
if (html == null) return "";
// Ensure compatibility with AP servers that send both <br> as well as newlines
var regex = new Regex(@"<br\s?\/?>\r?\n", RegexOptions.IgnoreCase);
html = regex.Replace(html, "\n");
var dom = await new HtmlParser().ParseDocumentAsync(html);
if (dom.Body == null) return "";
var sb = new StringBuilder();
dom.Body.ChildNodes.Select(ParseNode).ToList().ForEach(s => sb.Append(s));
return sb.ToString().Trim();
}
private static string? ParseNode(INode node) {
internal static class HtmlParser {
internal static string? ParseNode(INode node) {
if (node.NodeType is NodeType.Text)
return node.TextContent;
if (node.NodeType is NodeType.Comment or NodeType.Document)

View file

@ -0,0 +1,400 @@
using System.Text.RegularExpressions;
using Iceshrimp.MfmSharp.Types;
namespace Iceshrimp.MfmSharp.Parsing;
public static class MfmParser {
private static readonly List<INodeParser> Parsers = [
new PlainNodeParser(),
new ItalicNodeParser(),
new BoldNodeParser(),
new SmallNodeParser(),
new StrikeNodeParser(),
new CenterNodeParser(),
new HashtagNodeParser(),
new MentionNodeParser(),
new UrlNodeParser(),
new AltUrlNodeParser(),
new LinkNodeParser(),
new SilentLinkNodeParser(),
new InlineCodeNodeParser()
];
public static IEnumerable<MfmNode> Parse(string buffer, int position = 0, int nestLimit = 20) {
var nodes = new List<MfmNode>();
while (position < buffer.Length) {
var parser = Parsers.FirstOrDefault(p => p.IsValid(buffer, position));
if (parser == null) {
if (nodes.LastOrDefault() is MfmTextNode textNode) {
textNode.Text += buffer[position++];
}
else {
var node = new MfmTextNode {
Text = buffer[position++].ToString()
};
nodes.Add(node);
}
continue;
}
var result = parser.Parse(buffer, position, nestLimit);
position += result.chars;
nodes.Add(result.node);
}
return nodes;
}
}
internal static class NodeParserAbstractions {
public static (int start, int end, int chars) HandlePosition(string pre, string post, string buffer, int position) {
var start = position + pre.Length;
//TODO: cover case of buffer == string.empty
var end = buffer.IndexOf(post, start, StringComparison.Ordinal);
int chars;
if (end == -1) {
end = buffer.Length;
chars = end - position;
}
else {
chars = end - position + post.Length;
}
return (start, end, chars);
}
public static (int start, int end, int chars) HandlePosition(string character, string buffer, int position) {
return HandlePosition(character, character, buffer, position);
}
public static (int start, int end, int chars) HandlePosition(string pre, Regex regex, string buffer, int position) {
var start = position + pre.Length;
var end = regex.Match(buffer[start..]).Index + start;
var chars = end - position;
return (start, end, chars);
}
}
internal interface INodeParser {
public bool IsValid(string buffer, int position);
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit);
}
internal class ItalicNodeParser : INodeParser {
private const string Char = "*";
public bool IsValid(string buffer, int position) {
return buffer[position..].StartsWith(Char) && !buffer[position..].StartsWith("**");
}
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit) {
var (start, end, chars) = NodeParserAbstractions.HandlePosition(Char, buffer, position);
var node = new MfmItalicNode {
Children = MfmParser.Parse(buffer[start..end], 0, --nestLimit).OfType<MfmInlineNode>()
};
return (node, chars);
}
}
internal class InlineCodeNodeParser : INodeParser {
private const string Char = "`";
public bool IsValid(string buffer, int position) {
return buffer[position..].StartsWith(Char) && !buffer[position..].StartsWith("```");
}
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit) {
var (start, end, chars) = NodeParserAbstractions.HandlePosition(Char, buffer, position);
var node = new MfmInlineCodeNode {
Code = buffer[start..end]
};
return (node, chars);
}
}
internal class BoldNodeParser : INodeParser {
private const string Char = "**";
public bool IsValid(string buffer, int position) {
return buffer[position..].StartsWith(Char);
}
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit) {
var (start, end, chars) = NodeParserAbstractions.HandlePosition(Char, buffer, position);
var node = new MfmBoldNode {
Children = MfmParser.Parse(buffer[start..end], 0, --nestLimit).OfType<MfmInlineNode>()
};
return (node, chars);
}
}
internal class PlainNodeParser : INodeParser {
private const string Pre = "<plain>";
private const string Post = "</plain>";
public bool IsValid(string buffer, int position) {
return buffer[position..].StartsWith(Pre);
}
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit) {
var (start, end, chars) = NodeParserAbstractions.HandlePosition(Pre, Post, buffer, position);
var node = new MfmPlainNode {
Children = [
new MfmTextNode {
Text = buffer[start..end]
}
]
};
return (node, chars);
}
}
internal class SmallNodeParser : INodeParser {
private const string Pre = "<small>";
private const string Post = "</small>";
public bool IsValid(string buffer, int position) {
return buffer[position..].StartsWith(Pre);
}
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit) {
var (start, end, chars) = NodeParserAbstractions.HandlePosition(Pre, Post, buffer, position);
var node = new MfmSmallNode {
Children = MfmParser.Parse(buffer[start..end], 0, --nestLimit).OfType<MfmInlineNode>()
};
return (node, chars);
}
}
internal class CenterNodeParser : INodeParser {
private const string Pre = "<center>";
private const string Post = "</center>";
public bool IsValid(string buffer, int position) {
return buffer[position..].StartsWith(Pre);
}
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit) {
var (start, end, chars) = NodeParserAbstractions.HandlePosition(Pre, Post, buffer, position);
var node = new MfmCenterNode {
Children = MfmParser.Parse(buffer[start..end], 0, --nestLimit).OfType<MfmInlineNode>()
};
return (node, chars);
}
}
internal class StrikeNodeParser : INodeParser {
private const string Char = "~~";
public bool IsValid(string buffer, int position) {
return buffer[position..].StartsWith(Char);
}
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit) {
var (start, end, chars) = NodeParserAbstractions.HandlePosition(Char, buffer, position);
var node = new MfmStrikeNode {
Children = MfmParser.Parse(buffer[start..end], 0, --nestLimit).OfType<MfmInlineNode>()
};
return (node, chars);
}
}
internal class HashtagNodeParser : INodeParser {
private const string Pre = "#";
private static readonly Regex Post = new(@"\s|$");
public bool IsValid(string buffer, int position) {
return buffer[position..].StartsWith(Pre);
}
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit) {
var (start, end, chars) = NodeParserAbstractions.HandlePosition(Pre, Post, buffer, position);
var node = new MfmHashtagNode {
Hashtag = buffer[start..end]
};
return (node, chars);
}
}
internal class MentionNodeParser : INodeParser {
private const string Pre = "@";
private static readonly Regex Post = new(@"\s|$");
private static readonly Regex Full = new(@"^[a-zA-Z0-9._\-]+(?:@[a-zA-Z0-9._\-]+\.[a-zA-Z0-9._\-]+)?$");
private static readonly Regex Lookbehind = new(@"\s");
public bool IsValid(string buffer, int position) {
if (!buffer[position..].StartsWith(Pre)) return false;
if (position != 0 && !Lookbehind.IsMatch(buffer[position - 1].ToString())) return false;
var (start, end, _) = NodeParserAbstractions.HandlePosition(Pre, Post, buffer, position);
return buffer[start..end].Split("@").Length <= 2 && Full.IsMatch(buffer[start..end]);
}
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit) {
//TODO: make sure this handles non-ascii/puny domains
var (start, end, chars) = NodeParserAbstractions.HandlePosition(Pre, Post, buffer, position);
var split = buffer[start..end].Split("@");
var node = new MfmMentionNode {
Username = split[0],
Host = split.Length == 2 ? split[1] : null,
Acct = $"@{buffer[start..end]}"
};
return (node, chars);
}
}
internal class UrlNodeParser : INodeParser {
private const string Pre = "https://";
private const string PreAlt = "http://";
private static readonly Regex Post = new(@"\s|$");
public bool IsValid(string buffer, int position) {
if (!buffer[position..].StartsWith(Pre) && !buffer[position..].StartsWith(PreAlt))
return false;
var prefix = buffer[position..].StartsWith(Pre) ? Pre : PreAlt;
var (start, end, _) = NodeParserAbstractions.HandlePosition(prefix, Post, buffer, position);
var result = Uri.TryCreate(prefix + buffer[start..end], UriKind.Absolute, out var uri);
return result && uri?.Scheme is "http" or "https";
}
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit) {
var prefix = buffer[position..].StartsWith(Pre) ? Pre : PreAlt;
var (start, end, chars) = NodeParserAbstractions.HandlePosition(prefix, Post, buffer, position);
var node = new MfmUrlNode {
Url = prefix + buffer[start..end],
Brackets = false
};
return (node, chars);
}
}
internal class AltUrlNodeParser : INodeParser {
private const string Pre = "<https://";
private const string PreAlt = "<http://";
private const string Post = ">";
public bool IsValid(string buffer, int position) {
if (!buffer[position..].StartsWith(Pre) && !buffer[position..].StartsWith(PreAlt))
return false;
var prefix = buffer[position..].StartsWith(Pre) ? Pre : PreAlt;
var (start, end, _) = NodeParserAbstractions.HandlePosition(prefix, Post, buffer, position);
var result = Uri.TryCreate(prefix[1..] + buffer[start..end], UriKind.Absolute, out var uri);
return result && uri?.Scheme is "http" or "https";
}
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit) {
var prefix = buffer[position..].StartsWith(Pre) ? Pre : PreAlt;
var (start, end, chars) = NodeParserAbstractions.HandlePosition(prefix, Post, buffer, position);
var node = new MfmUrlNode {
Url = prefix[1..] + buffer[start..end],
Brackets = true
};
return (node, chars);
}
}
internal class LinkNodeParser : INodeParser {
private const string Pre = "[";
private const string Post = ")";
private static readonly Regex Full = new(@"^\[(.+?)\]\((.+?)\)$");
public bool IsValid(string buffer, int position) {
if (!buffer[position..].StartsWith(Pre))
return false;
var (_, end, _) = NodeParserAbstractions.HandlePosition(Pre, Post, buffer, position);
if (end == buffer.Length)
return false;
var match = Full.Match(buffer[position..(end + 1)]);
if (match.Groups.Count != 3)
return false;
var result = Uri.TryCreate(match.Groups[2].Value, UriKind.Absolute, out var uri);
return result && uri?.Scheme is "http" or "https";
}
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit) {
var (start, end, chars) = NodeParserAbstractions.HandlePosition(Pre, Post, buffer, position);
var textEnd = buffer[position..].IndexOf(']') + position;
var match = Full.Match(buffer[position..(end + 1)]);
var node = new MfmLinkNode {
Url = match.Groups[2].Value,
Children = MfmParser.Parse(buffer[start..textEnd], 0, --nestLimit).OfType<MfmInlineNode>(),
Silent = false
};
return (node, chars);
}
}
internal class SilentLinkNodeParser : INodeParser {
private const string Pre = "?[";
private const string Post = ")";
private static readonly Regex Full = new(@"^\?\[(.+?)\]\((.+?)\)$");
public bool IsValid(string buffer, int position) {
if (!buffer[position..].StartsWith(Pre))
return false;
var (_, end, _) = NodeParserAbstractions.HandlePosition(Pre, Post, buffer, position);
if (end == buffer.Length)
return false;
var match = Full.Match(buffer[position..(end + 1)]);
if (match.Groups.Count != 3)
return false;
var result = Uri.TryCreate(match.Groups[2].Value, UriKind.Absolute, out var uri);
return result && uri?.Scheme is "http" or "https";
}
public (MfmNode node, int chars) Parse(string buffer, int position, int nestLimit) {
var (start, end, chars) = NodeParserAbstractions.HandlePosition(Pre, Post, buffer, position);
var textEnd = buffer[position..].IndexOf(']') + position;
var match = Full.Match(buffer[position..(end + 1)]);
var node = new MfmLinkNode {
Url = match.Groups[2].Value,
Children = MfmParser.Parse(buffer[start..textEnd], 0, --nestLimit).OfType<MfmInlineNode>(),
Silent = true
};
return (node, chars);
}
}
//TODO: still missing: FnNode, MathInlineNode, EmojiCodeNode, UnicodeEmojiNode, MfmMathBlockNode, MfmCodeBlockNode, MfmSearchNode, MfmQuoteNode
//TODO: "*italic **bold** *" doesn't work yet

View file

@ -0,0 +1,108 @@
namespace Iceshrimp.MfmSharp.Types;
public abstract class MfmNode {
public IEnumerable<MfmNode> Children = [];
}
public abstract class MfmInlineNode : MfmNode {
public new IEnumerable<MfmInlineNode> Children {
set => base.Children = value;
}
}
public abstract class MfmBlockNode : MfmNode {
public new IEnumerable<MfmInlineNode> Children {
set => base.Children = value;
}
}
public abstract class MfmPureInlineNode : MfmInlineNode {
public new required IEnumerable<MfmInlineNode> Children {
set => base.Children = value;
}
}
public abstract class MfmPureBlockNode : MfmNode {
public new required IEnumerable<MfmInlineNode> Children {
set => base.Children = value;
}
}
public sealed class MfmQuoteNode : MfmPureBlockNode;
public sealed class MfmSearchNode : MfmBlockNode {
public required string Content;
public required string Query;
}
public sealed class MfmCodeBlockNode : MfmBlockNode {
public required string Code;
public required string? Language;
}
public sealed class MfmMathBlockNode : MfmBlockNode {
public required string Formula;
}
public sealed class MfmCenterNode : MfmPureBlockNode;
public sealed class MfmUnicodeEmojiNode : MfmInlineNode {
public required string Emoji;
}
public sealed class MfmEmojiCodeNode : MfmInlineNode {
public required string Name;
}
public sealed class MfmBoldNode : MfmPureInlineNode;
public sealed class MfmSmallNode : MfmPureInlineNode;
public sealed class MfmItalicNode : MfmPureInlineNode;
public sealed class MfmStrikeNode : MfmPureInlineNode;
public sealed class MfmInlineCodeNode : MfmInlineNode {
public required string Code;
}
public sealed class MfmMathInlineNode : MfmInlineNode {
public required string Formula;
}
public sealed class MfmMentionNode : MfmInlineNode {
public required string Acct;
public required string? Host;
public required string Username;
}
public sealed class MfmHashtagNode : MfmInlineNode {
public required string Hashtag;
}
public sealed class MfmUrlNode : MfmInlineNode {
public required bool Brackets;
public required string Url;
}
public sealed class MfmLinkNode : MfmPureInlineNode {
public required bool Silent;
public required string Url;
}
public sealed class MfmFnNode : MfmPureInlineNode {
public required Dictionary<string, string> Args;
public required string Name;
//TODO: implement (string, bool) args
}
public sealed class MfmPlainNode : MfmInlineNode {
public new required IEnumerable<MfmTextNode> Children {
set => base.Children = value;
}
}
public sealed class MfmTextNode : MfmInlineNode {
public required string Text;
}

View file

@ -4,6 +4,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Iceshrimp.Backend", "Iceshr
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Iceshrimp.Tests", "Iceshrimp.Tests\Iceshrimp.Tests.csproj", "{0C93C33B-3D68-41DE-8BD6-2C19EB1C95F7}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Iceshrimp.MfmSharp", "Iceshrimp.MfmSharp\Iceshrimp.MfmSharp.csproj", "{3003009B-BE1D-4606-9BE3-8BC8167B628F}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@ -18,5 +20,9 @@ Global
{0C93C33B-3D68-41DE-8BD6-2C19EB1C95F7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0C93C33B-3D68-41DE-8BD6-2C19EB1C95F7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0C93C33B-3D68-41DE-8BD6-2C19EB1C95F7}.Release|Any CPU.Build.0 = Release|Any CPU
{3003009B-BE1D-4606-9BE3-8BC8167B628F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{3003009B-BE1D-4606-9BE3-8BC8167B628F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{3003009B-BE1D-4606-9BE3-8BC8167B628F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3003009B-BE1D-4606-9BE3-8BC8167B628F}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal

View file

@ -0,0 +1,38 @@
using Iceshrimp.MfmSharp.Conversion;
using Iceshrimp.MfmSharp.Parsing;
namespace Iceshrimp.Tests.Parsing;
[TestClass]
public class MfmTests {
private const string Mfm =
"<plain>*blabla*</plain> *test* #example @example @example@invalid @example@example.com @invalid:matrix.org https://hello.com http://test.de <https://大石泉すき.example.com> javascript://sdfgsdf [test](https://asdfg) ?[test](https://asdfg) `asd`";
[TestMethod]
public void TestParse() {
//TODO: actually validate the output (this currently only checks that no exception is thrown)
MfmParser.Parse(Mfm);
}
[TestMethod]
public async Task TestToHtml() {
double duration = 100;
for (var i = 0; i < 4; i++) duration = await Benchmark();
duration.Should().BeLessThan(2);
return;
async Task<double> Benchmark() {
var pre = DateTime.Now;
await MfmConverter.ToHtmlAsync(Mfm);
var post = DateTime.Now;
var ms = (post - pre).TotalMilliseconds;
Console.WriteLine($"Took {ms} ms");
return ms;
}
}
//TODO: TestFromHtml
//TODO: RoundtripTest
}