diff --git a/Iceshrimp.Parsing/Iceshrimp.Parsing.fsproj b/Iceshrimp.Parsing/Iceshrimp.Parsing.fsproj
index b6bd4a15..0860afdc 100644
--- a/Iceshrimp.Parsing/Iceshrimp.Parsing.fsproj
+++ b/Iceshrimp.Parsing/Iceshrimp.Parsing.fsproj
@@ -7,6 +7,7 @@
+
diff --git a/Iceshrimp.Parsing/Mfm.fs b/Iceshrimp.Parsing/Mfm.fs
new file mode 100644
index 00000000..97e3737c
--- /dev/null
+++ b/Iceshrimp.Parsing/Mfm.fs
@@ -0,0 +1,337 @@
+namespace Iceshrimp.Parsing
+
+open System
+open System.Collections.Generic
+open System.Runtime.InteropServices.JavaScript
+open FParsec
+
+module MfmNodeTypes =
+ []
+ type MfmNode() =
+ member val Children: MfmNode list = [] with get, set
+
+ []
+ type MfmInlineNode(c: MfmInlineNode list) =
+ inherit MfmNode()
+ do base.Children <- c |> List.map (fun x -> x :> MfmNode)
+
+ []
+ type MfmBlockNode(c: MfmInlineNode list) =
+ inherit MfmNode()
+ do base.Children <- c |> List.map (fun x -> x :> MfmNode)
+
+ type MfmTextNode(v: string) =
+ inherit MfmInlineNode([])
+ member val Text = v
+
+ type MfmItalicNode(c) =
+ inherit MfmInlineNode(c)
+
+ type MfmBoldNode(c) =
+ inherit MfmInlineNode(c)
+
+ type MfmStrikeNode(c) =
+ inherit MfmInlineNode(c)
+
+ type MfmInlineCodeNode(v: string) =
+ inherit MfmInlineNode([])
+ member val Code = v
+
+ type MfmPlainNode(v: string) =
+ inherit MfmInlineNode([ MfmTextNode(v) ])
+
+ type MfmSmallNode(c) =
+ inherit MfmInlineNode(c)
+
+ type MfmQuoteNode(c) =
+ inherit MfmBlockNode(c)
+
+ type MfmSearchNode(content: string, query: string) =
+ inherit MfmBlockNode([])
+ member val Content = content
+ member val Query = query
+
+ type MfmCodeBlockNode(code, lang: string option) =
+ inherit MfmBlockNode([])
+ member val Code = code
+ member val Language = lang
+
+ type MfmMathBlockNode(f) =
+ inherit MfmBlockNode([])
+ member val Formula = f
+
+ type MfmCenterNode(c) =
+ inherit MfmBlockNode(c)
+
+ type MfmUnicodeEmojiNode(e: string) =
+ inherit MfmInlineNode([])
+ member val Emoji = e
+
+ type MfmEmojiCodeNode(n) =
+ inherit MfmInlineNode([])
+ member val Name = n
+
+ type MfmMathInlineNode(f) =
+ inherit MfmInlineNode([])
+ member val Formula = f
+
+ type MfmMentionNode(acct, user, host: string option) =
+ inherit MfmInlineNode([])
+ member val Acct = acct
+ member val Username = user
+ member val Host = host
+
+ type MfmHashtagNode(h) =
+ inherit MfmInlineNode([])
+ member val Hashtag = h
+
+ type MfmUrlNode(url, brackets) =
+ inherit MfmInlineNode([])
+ member val Url = url
+ member val Brackets = brackets
+
+ type MfmLinkNode(url, text, silent) =
+ inherit MfmInlineNode([ MfmTextNode(text) ])
+ member val Url = url
+ member val Silent = silent
+
+ type MfmFnNode(args: Dictionary, name, children) =
+ inherit MfmInlineNode(children)
+ // (string, bool) args = (string, null as string?)
+ member val Args = args
+ member val Name = name
+
+ type internal MfmCharNode(v: char) =
+ inherit MfmInlineNode([])
+ member val Char = v
+
+open MfmNodeTypes
+
+module private MfmParser =
+ // Abstractions
+ let str s = pstring s
+ let seqAttempt s = s |> Seq.map attempt
+ let isWhitespace c = Char.IsWhiteSpace c
+ let isNotWhitespace c = Char.IsWhiteSpace c = false
+
+ let isAsciiLetterOrNumber c = Char.IsAsciiLetter c || Char.IsDigit c
+
+ let (|CharNode|MfmNode|) (x: MfmNode) =
+ if x :? MfmCharNode then
+ CharNode(x :?> MfmCharNode)
+ else
+ MfmNode x
+
+ let folder (current, result) node =
+ match (node: MfmNode), (current: char list) with
+ | CharNode node, _ -> node.Char :: current, result
+ | MfmNode node, [] -> current, node :: result
+ | MfmNode node, _ -> [], node :: (MfmTextNode(current |> List.toArray |> String) :: result)
+
+ let aggregateText nodes =
+ nodes
+ |> List.rev
+ |> List.fold folder ([], [])
+ |> function
+ | [], result -> result
+ | current, result -> MfmTextNode(current |> List.toArray |> String) :: result
+
+ let aggregateTextInline nodes =
+ nodes |> aggregateText |> List.map (fun x -> x :?> MfmInlineNode)
+
+ let domainFirstComponent =
+ many1Chars (satisfy isAsciiLetter <|> satisfy isDigit <|> anyOf "_-")
+
+ let domainComponent =
+ many1Chars (satisfy isAsciiLetter <|> satisfy isDigit <|> anyOf "._-")
+
+ let domainStart = (satisfy isAsciiLetter <|> satisfy isDigit)
+
+ let domainFull =
+ domainStart .>>. domainFirstComponent .>>. pchar '.' .>>. many1 domainComponent
+
+ let domainAggregate1 (a: char, b: string) = string a + b
+ let domainAggregate2 (a: char * string, b: char) = (domainAggregate1 a) + string b
+
+ let domainAggregate (x: (char * string) * char, y: string list) =
+ domainAggregate2 x + (String.concat "" y)
+
+ let domain = domainFull |>> domainAggregate
+
+ let acct (user: string, host: string option) =
+ match host with
+ | None -> user
+ | Some v -> user + "@" + v
+
+ // References
+ let node, nodeRef = createParserForwardedToRef ()
+ let inlineNode, inlineNodeRef = createParserForwardedToRef ()
+
+ let seqFlatten items =
+ seq {
+ for item in items do
+ yield! item
+ }
+
+ // Patterns
+ let italicPattern = (notFollowedBy <| str "**") >>. skipChar '*'
+ let codePattern = (notFollowedBy <| str "```") >>. skipChar '`'
+
+ // Node parsers
+
+ let italicNode =
+ italicPattern >>. manyTill inlineNode italicPattern
+ |>> fun c -> MfmItalicNode(aggregateTextInline c) :> MfmNode
+
+ //TODO: https://github.com/pzp1997/harkdown/blob/master/src/InlineParser.hs#L173-L201
+
+ let boldNode =
+ skipString "**" >>. manyTill inlineNode (skipString "**")
+ |>> fun c -> MfmBoldNode(aggregateTextInline c) :> MfmNode
+
+ let strikeNode =
+ skipString "~~" >>. manyTill inlineNode (skipString "~~")
+ |>> fun c -> MfmStrikeNode(aggregateTextInline c) :> MfmNode
+
+ let codeNode =
+ codePattern >>. manyCharsTill anyChar codePattern
+ |>> fun v -> MfmInlineCodeNode(v) :> MfmNode
+
+ let codeBlockNode =
+ previousCharSatisfiesNot isNotWhitespace
+ >>. skipString "```"
+ >>. opt (many1CharsTill asciiLetter (lookAhead newline))
+ .>>. (skipNewline >>. manyCharsTill anyChar (skipNewline >>. skipString "```"))
+ |>> fun (lang: string option, code: string) -> MfmCodeBlockNode(code, lang) :> MfmNode
+
+ let mathNode =
+ skipString "\(" >>. manyCharsTill anyChar (skipString "\)")
+ |>> fun f -> MfmMathInlineNode(f) :> MfmNode
+
+ let mathBlockNode =
+ previousCharSatisfiesNot isNotWhitespace
+ >>. skipString "\["
+ >>. many1CharsTill anyChar (skipString "\]")
+ |>> fun f -> MfmMathBlockNode(f) :> MfmNode
+
+ let emojiCodeNode =
+ skipChar ':' >>. manyCharsTill letter (skipChar ':')
+ |>> fun e -> MfmEmojiCodeNode(e) :> MfmNode
+
+ let plainNode =
+ skipString "" >>. manyCharsTill anyChar (skipString "")
+ |>> fun v -> MfmPlainNode(v) :> MfmNode
+
+ let smallNode =
+ skipString "" >>. manyTill inlineNode (skipString "")
+ |>> fun c -> MfmSmallNode(aggregateTextInline c) :> MfmNode
+
+ let centerNode =
+ skipString "" >>. manyTill inlineNode (skipString "")
+ |>> fun c -> MfmSmallNode(aggregateTextInline c) :> MfmNode
+
+ let mentionNode =
+ previousCharSatisfiesNot isNotWhitespace
+ >>. skipString "@"
+ >>. manyChars (satisfy isAsciiLetter <|> satisfy isDigit <|> anyOf "._-")
+ .>>. opt (skipChar '@' >>. domain)
+ .>> (lookAhead
+ <| choice
+ [ spaces1
+ eof
+ skipChar ')'
+ skipChar ','
+ skipChar '\''
+ skipChar ':' .>> nextCharSatisfiesNot isAsciiLetterOrNumber ])
+ |>> fun (user: string, host: string option) -> MfmMentionNode(acct (user, host), user, host) :> MfmNode
+
+ let hashtagNode =
+ previousCharSatisfiesNot isNotWhitespace
+ >>. skipChar '#'
+ >>. many1CharsTill letter (nextCharSatisfies isWhitespace <|> eof)
+ |>> fun h -> MfmHashtagNode(h) :> MfmNode
+
+ let urlNodePlain =
+ lookAhead (skipString "https://" <|> skipString "http://")
+ >>. manyCharsTill anyChar (nextCharSatisfies isWhitespace <|> eof) //FIXME: this needs significant improvements
+ >>= fun uri ->
+ match Uri.TryCreate(uri, UriKind.Absolute) with
+ | true, finalUri ->
+ match finalUri.Scheme with
+ | "http" -> preturn (MfmUrlNode(uri, false) :> MfmNode)
+ | "https" -> preturn (MfmUrlNode(uri, false) :> MfmNode)
+ | _ -> fail "invalid scheme"
+ | _ -> fail "invalid url"
+
+ let urlNodeBrackets =
+ skipChar '<'
+ >>. lookAhead (skipString "https://" <|> skipString "http://")
+ >>. manyCharsTill anyChar (skipChar '>')
+ >>= fun uri ->
+ match Uri.TryCreate(uri, UriKind.Absolute) with
+ | true, finalUri ->
+ match finalUri.Scheme with
+ | "http" -> preturn (MfmUrlNode(uri, true) :> MfmNode)
+ | "https" -> preturn (MfmUrlNode(uri, true) :> MfmNode)
+ | _ -> fail "invalid scheme"
+ | _ -> fail "invalid url"
+
+ let urlNode = urlNodePlain <|> urlNodeBrackets
+
+ let linkNode =
+ (opt (pchar '?'))
+ .>>. (pchar '[' >>. manyCharsTill anyChar (pchar ']'))
+ .>>. (pchar '('
+ >>. lookAhead (skipString "https://" <|> skipString "http://")
+ >>. manyCharsTill anyChar (pchar ')'))
+ >>= fun ((silent, text), uri) ->
+ match Uri.TryCreate(uri, UriKind.Absolute) with
+ | true, finalUri ->
+ match finalUri.Scheme with
+ | "http" -> preturn (MfmLinkNode(uri, text, silent.IsSome) :> MfmNode)
+ | "https" -> preturn (MfmLinkNode(uri, text, silent.IsSome) :> MfmNode)
+ | _ -> fail "invalid scheme"
+ | _ -> fail "invalid url"
+
+
+ let charNode = anyChar |>> fun v -> MfmCharNode(v) :> MfmNode
+
+ // Node collection
+ let inlineNodeSeq =
+ [ italicNode
+ boldNode
+ strikeNode
+ hashtagNode
+ mentionNode
+ codeNode
+ urlNode
+ linkNode
+ mathNode
+ emojiCodeNode
+ charNode ]
+
+ //TODO: still missing: FnNode, MfmSearchNode, MfmQuoteNode
+
+ // This intentionally doesn't implement the node type UnicodeEmojiNode, both for performance,
+ // and because it's not needed for backend processing
+
+ let blockNodeSeq =
+ [ plainNode; centerNode; smallNode; codeBlockNode; mathBlockNode ]
+
+ let nodeSeq = [ blockNodeSeq; inlineNodeSeq ]
+
+ // Populate references
+ do nodeRef.Value <- choice <| seqAttempt (seqFlatten <| nodeSeq)
+ do inlineNodeRef.Value <- choice <| (seqAttempt inlineNodeSeq) |>> fun v -> v :?> MfmInlineNode
+
+ // Final parse command
+ let parse = spaces >>. manyTill node eof .>> spaces
+
+open MfmParser
+
+module Mfm =
+ []
+ let parse str =
+ match run parse str with
+ | Success(result, _, _) -> aggregateText result
+ | Failure(s, _, _) -> failwith $"Failed to parse MFM: {s}"
diff --git a/Iceshrimp.Tests/Parsing/MfmTestsFParsec.cs b/Iceshrimp.Tests/Parsing/MfmTestsFParsec.cs
new file mode 100644
index 00000000..30a33ff0
--- /dev/null
+++ b/Iceshrimp.Tests/Parsing/MfmTestsFParsec.cs
@@ -0,0 +1,195 @@
+using Iceshrimp.Parsing;
+using Microsoft.FSharp.Collections;
+using static Iceshrimp.Parsing.MfmNodeTypes;
+
+namespace Iceshrimp.Tests.Parsing;
+
+[TestClass]
+public class MfmTestsFParsec
+{
+ [TestMethod]
+ public void TestParseBoldItalic()
+ {
+ List expected =
+ [
+ new MfmItalicNode(ListModule.OfSeq([
+ new MfmTextNode("italic "),
+ new MfmBoldNode(ListModule.OfSeq([new MfmTextNode("bold")])),
+ new MfmTextNode(" italic")
+ ]))
+ ];
+
+ var res = Mfm.parse("*italic **bold** italic*").ToList();
+ AssertionOptions.FormattingOptions.MaxDepth = 100;
+ res.Should().Equal(expected, MfmNodeEqual);
+ }
+
+ [TestMethod]
+ public void TestParseCode()
+ {
+ List expected =
+ [
+ new MfmInlineCodeNode("test"),
+ new MfmTextNode("\n"),
+ new MfmCodeBlockNode("test", null),
+ new MfmTextNode("\n"),
+ new MfmCodeBlockNode("test", "lang")
+ ];
+
+ var res = Mfm.parse("""
+ `test`
+ ```
+ test
+ ```
+ ```lang
+ test
+ ```
+ """);
+
+ AssertionOptions.FormattingOptions.MaxDepth = 100;
+ res.ToList().Should().Equal(expected, MfmNodeEqual);
+ }
+
+ [TestMethod]
+ public void Benchmark()
+ {
+ const string mfm =
+ "*blabla* *test* #example @example @example@invalid @example@example.com @invalid:matrix.org https://hello.com http://test.de javascript://sdfgsdf [test](https://asdfg) ?[test](https://asdfg) `asd`";
+
+ double duration = 100;
+ for (var i = 0; i < 4; i++) duration = RunBenchmark();
+
+ duration.Should().BeLessThan(2);
+
+ return;
+
+ double RunBenchmark()
+ {
+ var pre = DateTime.Now;
+ Mfm.parse(mfm);
+ var post = DateTime.Now;
+ var ms = (post - pre).TotalMilliseconds;
+ Console.WriteLine($"Took {ms} ms");
+ return ms;
+ }
+ }
+
+ private class MfmNodeEquality : IEqualityComparer
+ {
+ public bool Equals(MfmNode? x, MfmNode? y)
+ {
+ if (x == null && y == null) return true;
+ if (x == null && y != null) return false;
+ if (x != null && y == null) return false;
+
+ return MfmNodeEqual(x!, y!);
+ }
+
+ public int GetHashCode(MfmNode obj)
+ {
+ return obj.GetHashCode();
+ }
+ }
+
+ private static bool MfmNodeEqual(MfmNode a, MfmNode b)
+ {
+ if (a.GetType() != b.GetType()) return false;
+
+ if (!a.Children.IsEmpty || !b.Children.IsEmpty)
+ {
+ if (!a.Children.IsEmpty && b.Children.IsEmpty || a.Children.IsEmpty && !b.Children.IsEmpty)
+ return false;
+ if (!a.Children.SequenceEqual(b.Children, new MfmNodeEquality()))
+ return false;
+ }
+
+ switch (a)
+ {
+ case MfmTextNode textNode when ((MfmTextNode)b).Text != textNode.Text:
+ return false;
+ case MfmMentionNode ax:
+ {
+ var bx = (MfmMentionNode)b;
+ if (bx.Acct != ax.Acct) return false;
+ if (bx.Username != ax.Username) return false;
+ if (bx.Host?.Value != ax.Host?.Value) return false;
+ break;
+ }
+ case MfmCodeBlockNode ax:
+ {
+ var bx = (MfmCodeBlockNode)b;
+ if (ax.Code != bx.Code) return false;
+ if (ax.Language?.Value != bx.Language?.Value) return false;
+ break;
+ }
+ case MfmInlineCodeNode ax:
+ {
+ var bx = (MfmInlineCodeNode)b;
+ if (ax.Code != bx.Code) return false;
+ break;
+ }
+ case MfmMathBlockNode ax:
+ {
+ var bx = (MfmMathBlockNode)b;
+ if (ax.Formula != bx.Formula) return false;
+ if (ax.Formula != bx.Formula) return false;
+ break;
+ }
+ case MfmMathInlineNode ax:
+ {
+ var bx = (MfmMathInlineNode)b;
+ if (ax.Formula != bx.Formula) return false;
+ if (ax.Formula != bx.Formula) return false;
+ break;
+ }
+ case MfmSearchNode searchNode:
+ {
+ var bx = (MfmSearchNode)b;
+ if (searchNode.Query != bx.Query) return false;
+ if (searchNode.Content != bx.Content) return false;
+ break;
+ }
+ case MfmUnicodeEmojiNode ax:
+ {
+ var bx = (MfmUnicodeEmojiNode)b;
+ if (ax.Emoji != bx.Emoji) return false;
+ break;
+ }
+ case MfmEmojiCodeNode ax:
+ {
+ var bx = (MfmEmojiCodeNode)b;
+ if (ax.Name != bx.Name) return false;
+ break;
+ }
+ case MfmHashtagNode ax:
+ {
+ var bx = (MfmHashtagNode)b;
+ if (ax.Hashtag != bx.Hashtag) return false;
+ break;
+ }
+ case MfmUrlNode ax:
+ {
+ var bx = (MfmUrlNode)b;
+ if (ax.Url != bx.Url) return false;
+ if (ax.Brackets != bx.Brackets) return false;
+ break;
+ }
+ case MfmLinkNode ax:
+ {
+ var bx = (MfmLinkNode)b;
+ if (ax.Url != bx.Url) return false;
+ if (ax.Silent != bx.Silent) return false;
+ break;
+ }
+ case MfmFnNode ax:
+ {
+ var bx = (MfmFnNode)b;
+ if (ax.Args != bx.Args) return false;
+ if (ax.Name != bx.Name) return false;
+ break;
+ }
+ }
+
+ return true;
+ }
+}
\ No newline at end of file