[parsing/mfm] Check if the rest of the stream contains a closing tag before parsing asymmetric nodes

This commit is contained in:
Laura Hausmann 2024-11-26 20:12:00 +01:00
parent a342e3df25
commit 753fbdefe4
No known key found for this signature in database
GPG key ID: D044E84C5BE01605

View file

@ -244,6 +244,50 @@ module private MfmParser =
let (|GreaterEqualThan|_|) k value = if value >= k then Some() else None let (|GreaterEqualThan|_|) k value = if value >= k then Some() else None
let restOfLineContains (s: string) : Parser<unit, UserState> =
fun (stream: CharStream<_>) ->
let pos = stream.Position
let rest = stream.ReadRestOfLine false
do stream.Seek pos.Index
match rest with
| c when c.Contains(s) -> Reply(())
| _ -> Reply(Error, messageError "No match found")
let restOfSegmentContains (s: string, segment: char -> bool) : Parser<unit, UserState> =
fun (stream: CharStream<_>) ->
let pos = stream.Position
let rest = stream.ReadCharsOrNewlinesWhile(segment, false)
do stream.Seek pos.Index
match rest with
| c when c.Contains s -> Reply(())
| _ -> Reply(Error, messageError "No match found")
let restOfStreamContains (s: string) : Parser<unit, UserState> =
restOfSegmentContains (s, (fun _ -> true))
let streamMatches (s: string) : Parser<unit, UserState> =
fun stream ->
match stream.Match s with
| true -> Reply(())
| false -> Reply(Error, NoErrorMessages)
let streamMatchesOrEof (s: string) : Parser<unit, UserState> =
fun stream ->
match (stream.Match s, stream.IsEndOfStream) with
| false, false -> Reply(Error, NoErrorMessages)
| _ -> Reply(())
let anyCharExceptNewline: Parser<char, 'u> =
fun stream ->
let c = stream.ReadCharOrNewline()
if c <> EOS && isNotNewline c then
Reply(c)
else
Reply(Error, messageError "anyCharExceptNewline")
// References // References
let inlineNode, inlineNodeRef = createParserForwardedToRef () let inlineNode, inlineNodeRef = createParserForwardedToRef ()
@ -266,6 +310,7 @@ module private MfmParser =
let italicAsteriskNode = let italicAsteriskNode =
previousCharSatisfiesNot isNotWhitespace previousCharSatisfiesNot isNotWhitespace
>>. italicPatternAsterisk >>. italicPatternAsterisk
>>. restOfLineContains "*"
>>. pushLine >>. pushLine
>>. manyTill inlineNode italicPatternAsterisk >>. manyTill inlineNode italicPatternAsterisk
.>> assertLine .>> assertLine
@ -274,18 +319,22 @@ module private MfmParser =
let italicUnderscoreNode = let italicUnderscoreNode =
previousCharSatisfiesNot isNotWhitespace previousCharSatisfiesNot isNotWhitespace
>>. italicPatternUnderscore >>. italicPatternUnderscore
>>. restOfLineContains "_"
>>. pushLine >>. pushLine
>>. manyTill inlineNode italicPatternUnderscore >>. manyTill inlineNode italicPatternUnderscore
.>> assertLine .>> assertLine
|>> fun c -> MfmItalicNode(aggregateTextInline c, Symbol) :> MfmNode |>> fun c -> MfmItalicNode(aggregateTextInline c, Symbol) :> MfmNode
let italicTagNode = let italicTagNode =
skipString "<i>" >>. manyTill inlineNode (skipString "</i>") skipString "<i>"
>>. restOfStreamContains "</i>"
>>. manyTill inlineNode (skipString "</i>")
|>> fun c -> MfmItalicNode(aggregateTextInline c, HtmlTag) :> MfmNode |>> fun c -> MfmItalicNode(aggregateTextInline c, HtmlTag) :> MfmNode
let boldAsteriskNode = let boldAsteriskNode =
previousCharSatisfiesNot isNotWhitespace previousCharSatisfiesNot isNotWhitespace
>>. skipString "**" >>. skipString "**"
>>. restOfLineContains "**"
>>. pushLine >>. pushLine
>>. manyTill inlineNode (skipString "**") >>. manyTill inlineNode (skipString "**")
.>> assertLine .>> assertLine
@ -294,32 +343,45 @@ module private MfmParser =
let boldUnderscoreNode = let boldUnderscoreNode =
previousCharSatisfiesNot isNotWhitespace previousCharSatisfiesNot isNotWhitespace
>>. skipString "__" >>. skipString "__"
>>. restOfLineContains "__"
>>. pushLine >>. pushLine
>>. manyTill inlineNode (skipString "__") >>. manyTill inlineNode (skipString "__")
.>> assertLine .>> assertLine
|>> fun c -> MfmBoldNode(aggregateTextInline c, Symbol) :> MfmNode |>> fun c -> MfmBoldNode(aggregateTextInline c, Symbol) :> MfmNode
let boldTagNode = let boldTagNode =
skipString "<b>" >>. manyTill inlineNode (skipString "</b>") skipString "<b>"
>>. restOfStreamContains "</b>"
>>. manyTill inlineNode (skipString "</b>")
|>> fun c -> MfmBoldNode(aggregateTextInline c, HtmlTag) :> MfmNode |>> fun c -> MfmBoldNode(aggregateTextInline c, HtmlTag) :> MfmNode
let strikeNode = let strikeNode =
skipString "~~" >>. pushLine >>. manyTill inlineNode (skipString "~~") skipString "~~"
>>. restOfLineContains "~~"
>>. pushLine
>>. manyTill inlineNode (skipString "~~")
.>> assertLine .>> assertLine
|>> fun c -> MfmStrikeNode(aggregateTextInline c, Symbol) :> MfmNode |>> fun c -> MfmStrikeNode(aggregateTextInline c, Symbol) :> MfmNode
let strikeTagNode = let strikeTagNode =
skipString "<s>" >>. manyTill inlineNode (skipString "</s>") skipString "<s>"
>>. restOfStreamContains "</s>"
>>. manyTill inlineNode (skipString "</s>")
|>> fun c -> MfmStrikeNode(aggregateTextInline c, HtmlTag) :> MfmNode |>> fun c -> MfmStrikeNode(aggregateTextInline c, HtmlTag) :> MfmNode
let codeNode = let codeNode =
codePattern >>. pushLine >>. manyCharsTill anyChar codePattern .>> assertLine codePattern
>>. restOfLineContains "`"
>>. pushLine
>>. manyCharsTill anyChar codePattern
.>> assertLine
|>> fun v -> MfmInlineCodeNode(v) :> MfmNode |>> fun v -> MfmInlineCodeNode(v) :> MfmNode
let codeBlockNode = let codeBlockNode =
opt skipNewline opt skipNewline
>>. opt skipNewline >>. opt skipNewline
>>. followedByString "```" >>. followedByString "```"
>>. restOfStreamContains "```"
>>. previousCharSatisfiesNot isNotNewline >>. previousCharSatisfiesNot isNotNewline
>>. skipString "```" >>. skipString "```"
>>. opt (many1CharsTill asciiLetter (lookAhead newline)) >>. opt (many1CharsTill asciiLetter (lookAhead newline))
@ -330,38 +392,51 @@ module private MfmParser =
|>> fun (lang: string option, code: string) -> MfmCodeBlockNode(code, lang) :> MfmNode |>> fun (lang: string option, code: string) -> MfmCodeBlockNode(code, lang) :> MfmNode
let mathNode = let mathNode =
skipString "\(" >>. pushLine >>. manyCharsTill anyChar (skipString "\)") skipString "\("
>>. restOfLineContains "\)"
>>. pushLine
>>. manyCharsTill anyChar (skipString "\)")
.>> assertLine .>> assertLine
|>> fun f -> MfmMathInlineNode(f) :> MfmNode |>> fun f -> MfmMathInlineNode(f) :> MfmNode
let mathBlockNode = let mathBlockNode =
previousCharSatisfiesNot isNotWhitespace previousCharSatisfiesNot isNotWhitespace
>>. skipString "\[" >>. skipString "\["
>>. restOfStreamContains "\]"
>>. many1CharsTill anyChar (skipString "\]") >>. many1CharsTill anyChar (skipString "\]")
|>> fun f -> MfmMathBlockNode(f) :> MfmNode |>> fun f -> MfmMathBlockNode(f) :> MfmNode
let emojiCodeNode = let emojiCodeNode =
skipChar ':' skipChar ':'
>>. restOfLineContains ":"
>>. manyCharsTill (satisfy isAsciiLetter <|> satisfy isDigit <|> anyOf "+-_") (skipChar ':') >>. manyCharsTill (satisfy isAsciiLetter <|> satisfy isDigit <|> anyOf "+-_") (skipChar ':')
|>> fun e -> MfmEmojiCodeNode(e) :> MfmNode |>> fun e -> MfmEmojiCodeNode(e) :> MfmNode
let fnNode = let fnNode =
skipString "$[" >>. many1Chars (asciiLower <|> digit) skipString "$["
>>. restOfStreamContains "]"
>>. many1Chars (asciiLower <|> digit)
.>>. opt (skipChar '.' >>. sepBy1 fnArg (skipChar ',')) .>>. opt (skipChar '.' >>. sepBy1 fnArg (skipChar ','))
.>> skipChar ' ' .>> skipChar ' '
.>>. many1Till inlineNode (skipChar ']') .>>. many1Till inlineNode (skipChar ']')
|>> fun ((n, o), c) -> MfmFnNode(n, fnDict o, aggregateTextInline c) :> MfmNode |>> fun ((n, o), c) -> MfmFnNode(n, fnDict o, aggregateTextInline c) :> MfmNode
let plainNode = let plainNode =
skipString "<plain>" >>. manyCharsTill anyChar (skipString "</plain>") skipString "<plain>"
>>. restOfStreamContains "</plain>"
>>. manyCharsTill anyChar (skipString "</plain>")
|>> fun v -> MfmPlainNode(v) :> MfmNode |>> fun v -> MfmPlainNode(v) :> MfmNode
let smallNode = let smallNode =
skipString "<small>" >>. manyTill inlineNode (skipString "</small>") skipString "<small>"
>>. restOfStreamContains "</small>"
>>. manyTill inlineNode (skipString "</small>")
|>> fun c -> MfmSmallNode(aggregateTextInline c) :> MfmNode |>> fun c -> MfmSmallNode(aggregateTextInline c) :> MfmNode
let centerNode = let centerNode =
skipString "<center>" >>. manyTill inlineNode (skipString "</center>") skipString "<center>"
>>. restOfStreamContains "</center>"
>>. manyTill inlineNode (skipString "</center>")
|>> fun c -> MfmCenterNode(aggregateTextInline c) :> MfmNode |>> fun c -> MfmCenterNode(aggregateTextInline c) :> MfmNode
let mentionNode = let mentionNode =
@ -407,6 +482,7 @@ module private MfmParser =
let urlNodeBrackets = let urlNodeBrackets =
skipChar '<' skipChar '<'
>>. lookAhead (skipString "https://" <|> skipString "http://") >>. lookAhead (skipString "https://" <|> skipString "http://")
>>. restOfLineContains ">" // This intentionally breaks compatibility with mfm-js, as there's no reason to allow newlines in urls
>>. manyCharsTill anyChar (skipChar '>') >>. manyCharsTill anyChar (skipChar '>')
>>= fun uri -> >>= fun uri ->
match Uri.TryCreate(uri, UriKind.Absolute) with match Uri.TryCreate(uri, UriKind.Absolute) with
@ -419,11 +495,12 @@ module private MfmParser =
let linkNode = let linkNode =
(opt (pchar '?')) (opt (pchar '?'))
.>>. (pchar '[' >>. manyCharsTill anyChar (pchar ']')) .>>. (pchar '[' >>. restOfLineContains "]" >>. manyCharsTill anyChar (pchar ']'))
.>>. (pchar '(' .>>. (pchar '('
>>. restOfLineContains ")"
>>. lookAhead (skipString "https://" <|> skipString "http://") >>. lookAhead (skipString "https://" <|> skipString "http://")
>>. manyCharsTill >>. manyCharsTill
((pchar '(' .>> pushParen) <|> (pchar ')' .>> popParen) <|> anyChar) ((pchar '(' .>> pushParen) <|> (pchar ')' .>> popParen) <|> anyCharExceptNewline)
(assertNoParen >>. skipChar ')')) (assertNoParen >>. skipChar ')'))
.>> clearParen .>> clearParen
>>= fun ((silent, text), uri) -> >>= fun ((silent, text), uri) ->
@ -528,12 +605,14 @@ module private MfmParser =
open MfmParser open MfmParser
module Mfm = module Mfm =
let parse str = let internal runParser p str =
match runParserOnString parse (UserState.Default()) "" str with let state = UserState.Default()
| Success(result, _, _) -> aggregateText result let result = runParserOnString p state "" str
| Failure _ -> [ MfmTextNode(str) ]
let parseSimple str = match (result, state.TimeoutReached) with
match runParserOnString parseSimple (UserState.Default()) "" str with | Success(result, _, _), _ -> aggregateText result
| Success(result, _, _) -> aggregateText result | Failure _, true -> [ MfmTextNode(str) ]
| Failure _ -> [ MfmTextNode(str) ] | Failure(s, _, _), false -> failwith $"Failed to parse MFM: {s}"
let parse str = runParser parse str
let parseSimple str = runParser parseSimple str