From 8d9856fd62dff025a5f7a4c4fcc11d4c2da00815 Mon Sep 17 00:00:00 2001 From: Laura Hausmann Date: Wed, 8 Jan 2025 17:33:31 +0100 Subject: [PATCH] [parsing] Migrate SearchQuery parser to C# --- Dockerfile | 4 +- .../Core/Extensions/QueryableFtsExtensions.cs | 133 +++++---- Iceshrimp.Backend/Iceshrimp.Backend.csproj | 2 +- Iceshrimp.NET.sln | 12 +- Iceshrimp.Parsing/.gitattributes | 1 - Iceshrimp.Parsing/Iceshrimp.Parsing.csproj | 9 + Iceshrimp.Parsing/Iceshrimp.Parsing.fsproj | 12 - Iceshrimp.Parsing/SearchQuery.fs | 264 ------------------ Iceshrimp.Parsing/SearchQueryFilters.cs | 190 +++++++++++++ Iceshrimp.Parsing/SearchQueryParser.cs | 102 +++++++ Iceshrimp.Tests/Parsing/SearchQueryTests.cs | 126 +++++---- 11 files changed, 448 insertions(+), 407 deletions(-) delete mode 100644 Iceshrimp.Parsing/.gitattributes create mode 100644 Iceshrimp.Parsing/Iceshrimp.Parsing.csproj delete mode 100644 Iceshrimp.Parsing/Iceshrimp.Parsing.fsproj delete mode 100644 Iceshrimp.Parsing/SearchQuery.fs create mode 100644 Iceshrimp.Parsing/SearchQueryFilters.cs create mode 100644 Iceshrimp.Parsing/SearchQueryParser.cs diff --git a/Dockerfile b/Dockerfile index 16b146dd..9366b078 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,10 +12,10 @@ WORKDIR /src ARG BUILDPLATFORM ARG AOT=false -# copy csproj/fsproj & nuget config, then restore as distinct layers +# copy csproj files & nuget config, then restore as distinct layers COPY NuGet.Config /src COPY Iceshrimp.Backend/*.csproj /src/Iceshrimp.Backend/ -COPY Iceshrimp.Parsing/*.fsproj /src/Iceshrimp.Parsing/ +COPY Iceshrimp.Parsing/*.csproj /src/Iceshrimp.Parsing/ COPY Iceshrimp.Frontend/*.csproj /src/Iceshrimp.Frontend/ COPY Iceshrimp.Shared/*.csproj /src/Iceshrimp.Shared/ COPY Iceshrimp.Build/*.csproj /src/Iceshrimp.Build/ diff --git a/Iceshrimp.Backend/Core/Extensions/QueryableFtsExtensions.cs b/Iceshrimp.Backend/Core/Extensions/QueryableFtsExtensions.cs index 29126efe..cc182af9 100644 --- a/Iceshrimp.Backend/Core/Extensions/QueryableFtsExtensions.cs +++ b/Iceshrimp.Backend/Core/Extensions/QueryableFtsExtensions.cs @@ -7,7 +7,6 @@ using Iceshrimp.Backend.Core.Database.Tables; using Iceshrimp.Backend.Core.Helpers; using Iceshrimp.Parsing; using Microsoft.EntityFrameworkCore; -using static Iceshrimp.Parsing.SearchQueryFilters; namespace Iceshrimp.Backend.Core.Extensions; @@ -17,7 +16,7 @@ public static class QueryableFtsExtensions this IQueryable query, string input, User user, Config.InstanceSection config, DatabaseContext db ) { - var parsed = SearchQuery.parse(input); + var parsed = SearchQueryParser.Parse(input); var caseSensitivity = parsed.OfType().LastOrDefault()?.Value ?? CaseFilterType.Insensitive; var matchType = parsed.OfType().LastOrDefault()?.Value ?? MatchFilterType.Substring; @@ -106,8 +105,8 @@ public static class QueryableFtsExtensions private static IQueryable ApplyReplyFilter( this IQueryable query, ReplyFilter filter, Config.InstanceSection config, DatabaseContext db - ) => query.Where(p => p.Reply != null && - p.Reply.User.UserSubqueryMatches(filter.Value, filter.Negated, config, db)); + ) => query.Where(p => p.Reply != null + && p.Reply.User.UserSubqueryMatches(filter.Value, filter.Negated, config, db)); private static IQueryable ApplyInFilter( this IQueryable query, InFilter filter, User user, DatabaseContext db @@ -115,11 +114,11 @@ public static class QueryableFtsExtensions { return filter.Value switch { - { IsLikes: true } => query.ApplyInLikesFilter(user, filter.Negated, db), - { IsBookmarks: true } => query.ApplyInBookmarksFilter(user, filter.Negated, db), - { IsReactions: true } => query.ApplyInReactionsFilter(user, filter.Negated, db), - { IsInteractions: true } => query.ApplyInInteractionsFilter(user, filter.Negated, db), - _ => throw new ArgumentOutOfRangeException(nameof(filter), filter.Value, null) + InFilterType.Likes => query.ApplyInLikesFilter(user, filter.Negated, db), + InFilterType.Bookmarks => query.ApplyInBookmarksFilter(user, filter.Negated, db), + InFilterType.Reactions => query.ApplyInReactionsFilter(user, filter.Negated, db), + InFilterType.Interactions => query.ApplyInInteractionsFilter(user, filter.Negated, db), + _ => throw new ArgumentOutOfRangeException(nameof(filter), filter.Value, null) }; } @@ -155,11 +154,11 @@ public static class QueryableFtsExtensions { return filter.Value switch { - { IsFollowers: true } => query.ApplyFollowersFilter(user, filter.Negated), - { IsFollowing: true } => query.ApplyFollowingFilter(user, filter.Negated), - { IsRenotes: true } => query.ApplyBoostsFilter(filter.Negated), - { IsReplies: true } => query.ApplyRepliesFilter(filter.Negated), - _ => throw new ArgumentOutOfRangeException(nameof(filter)) + MiscFilterType.Followers => query.ApplyFollowersFilter(user, filter.Negated), + MiscFilterType.Following => query.ApplyFollowingFilter(user, filter.Negated), + MiscFilterType.Renotes => query.ApplyBoostsFilter(filter.Negated), + MiscFilterType.Replies => query.ApplyRepliesFilter(filter.Negated), + _ => throw new ArgumentOutOfRangeException(nameof(filter)) }; } @@ -182,26 +181,26 @@ public static class QueryableFtsExtensions private static IQueryable ApplyRegularAttachmentFilter(this IQueryable query, AttachmentFilter filter) { - if (filter.Value.IsMedia) + if (filter.Value is AttachmentFilterType.Media) return query.Where(p => p.AttachedFileTypes.Count != 0); - if (filter.Value.IsPoll) + if (filter.Value is AttachmentFilterType.Poll) return query.Where(p => p.HasPoll); - if (filter.Value.IsImage || filter.Value.IsVideo || filter.Value.IsAudio) + if (filter.Value is AttachmentFilterType.Image or AttachmentFilterType.Video or AttachmentFilterType.Audio) { - return query.Where(p => p.AttachedFileTypes.Count != 0 && - EF.Functions.ILike(p.RawAttachments, GetAttachmentILikeQuery(filter.Value))); + return query.Where(p => p.AttachedFileTypes.Count != 0 + && EF.Functions.ILike(p.RawAttachments, GetAttachmentILikeQuery(filter.Value))); } - if (filter.Value.IsFile) + if (filter.Value is AttachmentFilterType.File) { - return query.Where(p => p.AttachedFileTypes.Count != 0 && - (!EF.Functions.ILike(p.RawAttachments, - GetAttachmentILikeQuery(AttachmentFilterType.Image)) || - !EF.Functions.ILike(p.RawAttachments, - GetAttachmentILikeQuery(AttachmentFilterType.Video)) || - !EF.Functions.ILike(p.RawAttachments, - GetAttachmentILikeQuery(AttachmentFilterType.Audio)))); + return query.Where(p => p.AttachedFileTypes.Count != 0 + && (!EF.Functions.ILike(p.RawAttachments, + GetAttachmentILikeQuery(AttachmentFilterType.Image)) + || !EF.Functions.ILike(p.RawAttachments, + GetAttachmentILikeQuery(AttachmentFilterType.Video)) + || !EF.Functions.ILike(p.RawAttachments, + GetAttachmentILikeQuery(AttachmentFilterType.Audio)))); } throw new ArgumentOutOfRangeException(nameof(filter), filter.Value, null); @@ -209,21 +208,21 @@ public static class QueryableFtsExtensions private static IQueryable ApplyNegatedAttachmentFilter(this IQueryable query, AttachmentFilter filter) { - if (filter.Value.IsMedia) + if (filter.Value is AttachmentFilterType.Media) return query.Where(p => p.AttachedFileTypes.Count == 0); - if (filter.Value.IsPoll) + if (filter.Value is AttachmentFilterType.Poll) return query.Where(p => !p.HasPoll); - if (filter.Value.IsImage || filter.Value.IsVideo || filter.Value.IsAudio) + if (filter.Value is AttachmentFilterType.Image or AttachmentFilterType.Video or AttachmentFilterType.Audio) return query.Where(p => !EF.Functions.ILike(p.RawAttachments, GetAttachmentILikeQuery(filter.Value))); - if (filter.Value.IsFile) + if (filter.Value is AttachmentFilterType.File) { return query.Where(p => EF.Functions - .ILike(p.RawAttachments, GetAttachmentILikeQuery(AttachmentFilterType.Image)) || - EF.Functions - .ILike(p.RawAttachments, GetAttachmentILikeQuery(AttachmentFilterType.Video)) || - EF.Functions - .ILike(p.RawAttachments, GetAttachmentILikeQuery(AttachmentFilterType.Audio))); + .ILike(p.RawAttachments, GetAttachmentILikeQuery(AttachmentFilterType.Image)) + || EF.Functions + .ILike(p.RawAttachments, GetAttachmentILikeQuery(AttachmentFilterType.Video)) + || EF.Functions + .ILike(p.RawAttachments, GetAttachmentILikeQuery(AttachmentFilterType.Audio))); } throw new ArgumentOutOfRangeException(nameof(filter), filter.Value, null); @@ -235,10 +234,10 @@ public static class QueryableFtsExtensions { return filter switch { - { IsImage: true } => "%image/%", - { IsVideo: true } => "%video/%", - { IsAudio: true } => "%audio/%", - _ => throw new ArgumentOutOfRangeException(nameof(filter), filter, null) + AttachmentFilterType.Image => "%image/%", + AttachmentFilterType.Video => "%video/%", + AttachmentFilterType.Audio => "%audio/%", + _ => throw new ArgumentOutOfRangeException(nameof(filter), filter, null) }; } @@ -264,8 +263,8 @@ public static class QueryableFtsExtensions [SuppressMessage("ReSharper", "MemberCanBePrivate.Global", Justification = "Projectable chain must have consistent visibility")] internal static IQueryable UserSubquery((string username, string? host) filter, DatabaseContext db) => - db.Users.Where(p => p.UsernameLower == filter.username && - p.Host == (filter.host != null ? filter.host.ToPunycodeLower() : null)); + db.Users.Where(p => p.UsernameLower == filter.username + && p.Host == (filter.host != null ? filter.host.ToPunycodeLower() : null)); [Projectable] [SuppressMessage("ReSharper", "MemberCanBePrivate.Global", @@ -275,34 +274,34 @@ public static class QueryableFtsExtensions ) => matchType.Equals(MatchFilterType.Substring) ? caseSensitivity.Equals(CaseFilterType.Sensitive) ? negated - ? !EF.Functions.Like(note.Text!, "%" + query + "%", @"\") && - !EF.Functions.Like(note.Cw!, "%" + query + "%", @"\") && - !EF.Functions.Like(note.CombinedAltText!, "%" + query + "%", @"\") - : EF.Functions.Like(note.Text!, "%" + query + "%", @"\") || - EF.Functions.Like(note.Cw!, "%" + query + "%", @"\") || - EF.Functions.Like(note.CombinedAltText!, "%" + query + "%", @"\") + ? !EF.Functions.Like(note.Text!, "%" + query + "%", @"\") + && !EF.Functions.Like(note.Cw!, "%" + query + "%", @"\") + && !EF.Functions.Like(note.CombinedAltText!, "%" + query + "%", @"\") + : EF.Functions.Like(note.Text!, "%" + query + "%", @"\") + || EF.Functions.Like(note.Cw!, "%" + query + "%", @"\") + || EF.Functions.Like(note.CombinedAltText!, "%" + query + "%", @"\") : negated - ? !EF.Functions.ILike(note.Text!, "%" + query + "%", @"\") && - !EF.Functions.ILike(note.Cw!, "%" + query + "%", @"\") && - !EF.Functions.ILike(note.CombinedAltText!, "%" + query + "%", @"\") - : EF.Functions.ILike(note.Text!, "%" + query + "%", @"\") || - EF.Functions.ILike(note.Cw!, "%" + query + "%", @"\") || - EF.Functions.ILike(note.CombinedAltText!, "%" + query + "%", @"\") + ? !EF.Functions.ILike(note.Text!, "%" + query + "%", @"\") + && !EF.Functions.ILike(note.Cw!, "%" + query + "%", @"\") + && !EF.Functions.ILike(note.CombinedAltText!, "%" + query + "%", @"\") + : EF.Functions.ILike(note.Text!, "%" + query + "%", @"\") + || EF.Functions.ILike(note.Cw!, "%" + query + "%", @"\") + || EF.Functions.ILike(note.CombinedAltText!, "%" + query + "%", @"\") : caseSensitivity.Equals(CaseFilterType.Sensitive) ? negated - ? !Regex.IsMatch(note.Text!, "\\y" + query + "\\y") && - !Regex.IsMatch(note.Cw!, "\\y" + query + "\\y") && - !Regex.IsMatch(note.CombinedAltText!, "\\y" + query + "\\y") - : Regex.IsMatch(note.Text!, "\\y" + query + "\\y") || - Regex.IsMatch(note.Cw!, "\\y" + query + "\\y") || - Regex.IsMatch(note.CombinedAltText!, "\\y" + query + "\\y") + ? !Regex.IsMatch(note.Text!, "\\y" + query + "\\y") + && !Regex.IsMatch(note.Cw!, "\\y" + query + "\\y") + && !Regex.IsMatch(note.CombinedAltText!, "\\y" + query + "\\y") + : Regex.IsMatch(note.Text!, "\\y" + query + "\\y") + || Regex.IsMatch(note.Cw!, "\\y" + query + "\\y") + || Regex.IsMatch(note.CombinedAltText!, "\\y" + query + "\\y") : negated - ? !Regex.IsMatch(note.Text!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) && - !Regex.IsMatch(note.Cw!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) && - !Regex.IsMatch(note.CombinedAltText!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) - : Regex.IsMatch(note.Text!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) || - Regex.IsMatch(note.Cw!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) || - Regex.IsMatch(note.CombinedAltText!, "\\y" + query + "\\y", RegexOptions.IgnoreCase); + ? !Regex.IsMatch(note.Text!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) + && !Regex.IsMatch(note.Cw!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) + && !Regex.IsMatch(note.CombinedAltText!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) + : Regex.IsMatch(note.Text!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) + || Regex.IsMatch(note.Cw!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) + || Regex.IsMatch(note.CombinedAltText!, "\\y" + query + "\\y", RegexOptions.IgnoreCase); [Projectable] [SuppressMessage("ReSharper", "MemberCanBePrivate.Global", @@ -338,4 +337,4 @@ public static class QueryableFtsExtensions this Note note, IEnumerable words, CaseFilterType caseSensitivity, MatchFilterType matchType ) => words.Select(p => PreEscapeFtsQuery(p, matchType)) .Any(p => note.FtsQueryPreEscaped(p, false, caseSensitivity, matchType)); -} \ No newline at end of file +} diff --git a/Iceshrimp.Backend/Iceshrimp.Backend.csproj b/Iceshrimp.Backend/Iceshrimp.Backend.csproj index 358badfe..9c894cdb 100644 --- a/Iceshrimp.Backend/Iceshrimp.Backend.csproj +++ b/Iceshrimp.Backend/Iceshrimp.Backend.csproj @@ -13,7 +13,7 @@ - + diff --git a/Iceshrimp.NET.sln b/Iceshrimp.NET.sln index d37cf735..974493c6 100644 --- a/Iceshrimp.NET.sln +++ b/Iceshrimp.NET.sln @@ -4,8 +4,6 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Iceshrimp.Backend", "Iceshr EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Iceshrimp.Tests", "Iceshrimp.Tests\Iceshrimp.Tests.csproj", "{0C93C33B-3D68-41DE-8BD6-2C19EB1C95F7}" EndProject -Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Iceshrimp.Parsing", "Iceshrimp.Parsing\Iceshrimp.Parsing.fsproj", "{665B7CCA-6B5B-44DC-9CDB-D070622476C2}" -EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Iceshrimp.Frontend", "Iceshrimp.Frontend\Iceshrimp.Frontend.csproj", "{8BAF3DEB-19A7-4044-A3F3-75C8B9B51863}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Iceshrimp.Shared", "Iceshrimp.Shared\Iceshrimp.Shared.csproj", "{25E8E423-D2F7-437B-8E9B-5277BA5CE3CD}" @@ -61,6 +59,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "docker", "docker", "{B14D01 .docker\ci-env-dotnet9.Dockerfile = .docker\ci-env-dotnet9.Dockerfile EndProjectSection EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Iceshrimp.Parsing", "Iceshrimp.Parsing\Iceshrimp.Parsing.csproj", "{6BB21937-A781-4D2A-B64A-19E985870B38}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -75,10 +75,6 @@ Global {0C93C33B-3D68-41DE-8BD6-2C19EB1C95F7}.Debug|Any CPU.Build.0 = Debug|Any CPU {0C93C33B-3D68-41DE-8BD6-2C19EB1C95F7}.Release|Any CPU.ActiveCfg = Release|Any CPU {0C93C33B-3D68-41DE-8BD6-2C19EB1C95F7}.Release|Any CPU.Build.0 = Release|Any CPU - {665B7CCA-6B5B-44DC-9CDB-D070622476C2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {665B7CCA-6B5B-44DC-9CDB-D070622476C2}.Debug|Any CPU.Build.0 = Debug|Any CPU - {665B7CCA-6B5B-44DC-9CDB-D070622476C2}.Release|Any CPU.ActiveCfg = Release|Any CPU - {665B7CCA-6B5B-44DC-9CDB-D070622476C2}.Release|Any CPU.Build.0 = Release|Any CPU {8BAF3DEB-19A7-4044-A3F3-75C8B9B51863}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {8BAF3DEB-19A7-4044-A3F3-75C8B9B51863}.Debug|Any CPU.Build.0 = Debug|Any CPU {8BAF3DEB-19A7-4044-A3F3-75C8B9B51863}.Release|Any CPU.ActiveCfg = Release|Any CPU @@ -91,6 +87,10 @@ Global {B2598946-03CA-4C6B-8E3E-7F2AC77021E5}.Debug|Any CPU.Build.0 = Debug|Any CPU {B2598946-03CA-4C6B-8E3E-7F2AC77021E5}.Release|Any CPU.ActiveCfg = Release|Any CPU {B2598946-03CA-4C6B-8E3E-7F2AC77021E5}.Release|Any CPU.Build.0 = Release|Any CPU + {6BB21937-A781-4D2A-B64A-19E985870B38}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {6BB21937-A781-4D2A-B64A-19E985870B38}.Debug|Any CPU.Build.0 = Debug|Any CPU + {6BB21937-A781-4D2A-B64A-19E985870B38}.Release|Any CPU.ActiveCfg = Release|Any CPU + {6BB21937-A781-4D2A-B64A-19E985870B38}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(NestedProjects) = preSolution {12AC2DB4-4817-4F73-B541-20568AC51685} = {2000A25C-AF38-47BC-9432-D1278C12010B} diff --git a/Iceshrimp.Parsing/.gitattributes b/Iceshrimp.Parsing/.gitattributes deleted file mode 100644 index c082e50e..00000000 --- a/Iceshrimp.Parsing/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -*.fs linguist-language=F# diff --git a/Iceshrimp.Parsing/Iceshrimp.Parsing.csproj b/Iceshrimp.Parsing/Iceshrimp.Parsing.csproj new file mode 100644 index 00000000..17b910f6 --- /dev/null +++ b/Iceshrimp.Parsing/Iceshrimp.Parsing.csproj @@ -0,0 +1,9 @@ + + + + net9.0 + enable + enable + + + diff --git a/Iceshrimp.Parsing/Iceshrimp.Parsing.fsproj b/Iceshrimp.Parsing/Iceshrimp.Parsing.fsproj deleted file mode 100644 index 180665ff..00000000 --- a/Iceshrimp.Parsing/Iceshrimp.Parsing.fsproj +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - diff --git a/Iceshrimp.Parsing/SearchQuery.fs b/Iceshrimp.Parsing/SearchQuery.fs deleted file mode 100644 index 2f1aa32a..00000000 --- a/Iceshrimp.Parsing/SearchQuery.fs +++ /dev/null @@ -1,264 +0,0 @@ -namespace Iceshrimp.Parsing - -open System -open FParsec - -module SearchQueryFilters = - type Filter() = class end - - type WordFilter(neg: bool, value: string) = - inherit Filter() - member val Negated = neg - member val Value = value - - type CwFilter(neg: bool, value: string) = - inherit Filter() - member val Negated = neg - member val Value = value - - type MultiWordFilter(values: string list) = - inherit Filter() - member val Values = values - - type FromFilter(neg: bool, value: string) = - inherit Filter() - member val Negated = neg - member val Value = value - - type MentionFilter(neg: bool, value: string) = - inherit Filter() - member val Negated = neg - member val Value = value - - type ReplyFilter(neg: bool, value: string) = - inherit Filter() - member val Negated = neg - member val Value = value - - type InstanceFilter(neg: bool, value: string) = - inherit Filter() - member val Negated = neg - member val Value = value - - type MiscFilterType = - | Followers - | Following - | Replies - | Renotes - - type MiscFilter(neg: bool, value: string) = - inherit Filter() - member val Negated = neg - - member val Value = - match value with - | "followers" -> Followers - | "following" -> Following - | "replies" -> Replies - | "reply" -> Replies - | "boosts" -> Renotes - | "boost" -> Renotes - | "renote" -> Renotes - | "renotes" -> Renotes - | _ -> failwith $"Invalid type: {value}" - - type InFilterType = - | Bookmarks - | Likes - | Reactions - | Interactions - - type InFilter(neg: bool, value: string) = - inherit Filter() - member val Negated = neg - - member val Value = - match value with - | "bookmarks" -> Bookmarks - | "likes" -> Likes - | "favorites" -> Likes - | "favourites" -> Likes - | "reactions" -> Reactions - | "interactions" -> Interactions - | _ -> failwith $"Invalid type: {value}" - - - type AttachmentFilterType = - | Media - | Image - | Video - | Audio - | File - | Poll - - type AttachmentFilter(neg: bool, value: string) = - inherit Filter() - member val Negated = neg - - member val Value = - match value with - | "any" -> Media - | "media" -> Media - | "image" -> Image - | "video" -> Video - | "audio" -> Audio - | "file" -> File - | "poll" -> Poll - | _ -> failwith $"Invalid type: {value}" - - type AfterFilter(d: DateOnly) = - inherit Filter() - member val Value = d - - type BeforeFilter(d: DateOnly) = - inherit Filter() - member val Value = d - - type CaseFilterType = - | Sensitive - | Insensitive - - type CaseFilter(v: string) = - inherit Filter() - - member val Value = - match v with - | "sensitive" -> Sensitive - | "insensitive" -> Insensitive - | _ -> failwith $"Invalid type: {v}" - - type MatchFilterType = - | Words - | Substring - - type MatchFilter(v: string) = - inherit Filter() - - member val Value = - match v with - | "word" -> Words - | "words" -> Words - | "substr" -> Substring - | "substring" -> Substring - | _ -> failwith $"Invalid type: {v}" - -module private SearchQueryParser = - open SearchQueryFilters - - // Abstractions - let str s = pstring s - let tokenEnd = (skipChar ' ' <|> eof) - let token = anyChar |> manyCharsTill <| tokenEnd - let orTokenEnd = (skipChar ' ' <|> lookAhead (skipChar ')') <|> eof) - let orToken = spaces >>. anyChar |> manyCharsTill <| orTokenEnd - let key s = str s .>>? pchar ':' - let strEnd s = str s .>>? tokenEnd - let anyStr s = choice (s |> Seq.map strEnd) - let anyKey k = choice (k |> Seq.map key) - let seqAttempt s = s |> Seq.map attempt - let neg = opt <| pchar '-' - let negFilter k = pipe2 neg (anyKey k >>. token) - let negKeyFilter k v = pipe2 neg (anyKey k >>. anyStr v) - let keyFilter k v = anyKey k >>. anyStr v - let strSepByOr = sepBy orToken (str "OR ") - - let parseDate (s: string) = - match DateOnly.TryParseExact(s, "O") with - | true, result -> preturn result - | false, _ -> fail $"Invalid date: {s}" - - let dateFilter k = anyKey k >>. token >>= parseDate - - // Filters - let wordFilter = pipe2 neg token <| fun a b -> WordFilter(a.IsSome, b) :> Filter - - let cwFilter = negFilter [ "cw" ] <| fun n v -> CwFilter(n.IsSome, v) :> Filter - - let multiWordFilter = - skipChar '(' >>. strSepByOr .>> skipChar ')' - |>> fun v -> MultiWordFilter(v) :> Filter - - let literalStringFilter = - skipChar '"' >>. manyCharsTill anyChar (skipChar '"') - |>> fun v -> WordFilter(false, v) :> Filter - - let fromFilter = - negFilter [ "from"; "author"; "by"; "user" ] - <| fun n v -> FromFilter(n.IsSome, v) :> Filter - - let mentionFilter = - negFilter [ "mention"; "mentions"; "mentioning" ] - <| fun n v -> MentionFilter(n.IsSome, v) :> Filter - - let replyFilter = - negFilter [ "reply"; "replying"; "to" ] - <| fun n v -> ReplyFilter(n.IsSome, v) :> Filter - - let instanceFilter = - negFilter [ "instance"; "domain"; "host" ] - <| fun n v -> InstanceFilter(n.IsSome, v) :> Filter - - let miscFilter = - negKeyFilter - [ "filter" ] - [ "followers" - "following" - "replies" - "reply" - "renote" - "renotes" - "boosts" - "boost" ] - <| fun n v -> MiscFilter(n.IsSome, v) :> Filter - - let inFilter = - negKeyFilter [ "in" ] [ "bookmarks"; "favorites"; "favourites"; "reactions"; "likes"; "interactions" ] - <| fun n v -> InFilter(n.IsSome, v) :> Filter - - let attachmentFilter = - negKeyFilter [ "has"; "attachment"; "attached" ] [ "any"; "media"; "image"; "video"; "audio"; "file"; "poll" ] - <| fun n v -> AttachmentFilter(n.IsSome, v) :> Filter - - let afterFilter = - dateFilter [ "after"; "since" ] |>> fun v -> AfterFilter(v) :> Filter - - let beforeFilter = - dateFilter [ "before"; "until" ] |>> fun v -> BeforeFilter(v) :> Filter - - let caseFilter = - keyFilter [ "case" ] [ "sensitive"; "insensitive" ] - |>> fun v -> CaseFilter(v) :> Filter - - let matchFilter = - keyFilter [ "match" ] [ "words"; "word"; "substr"; "substring" ] - |>> fun v -> MatchFilter(v) :> Filter - - // Filter collection - let filterSeq = - [ literalStringFilter - fromFilter - mentionFilter - replyFilter - instanceFilter - miscFilter - inFilter - attachmentFilter - afterFilter - beforeFilter - caseFilter - matchFilter - cwFilter - multiWordFilter - wordFilter ] - - // Final parse commands - let filters = choice <| seqAttempt filterSeq - let parse = manyTill (spaces >>. filters .>> spaces) eof - -module SearchQuery = - open SearchQueryParser - - let parse str = - match run parse str with - | Success(result, _, _) -> result - | Failure(s, _, _) -> failwith $"Failed to parse query: {s}" diff --git a/Iceshrimp.Parsing/SearchQueryFilters.cs b/Iceshrimp.Parsing/SearchQueryFilters.cs new file mode 100644 index 00000000..06246fe7 --- /dev/null +++ b/Iceshrimp.Parsing/SearchQueryFilters.cs @@ -0,0 +1,190 @@ +using System.Diagnostics.CodeAnalysis; + +namespace Iceshrimp.Parsing; + +public interface ISearchQueryFilter; + +public record WordFilter(bool Negated, string Value) : ISearchQueryFilter; + +public record CwFilter(bool Negated, string Value) : ISearchQueryFilter; + +public record MultiWordFilter(bool Negated, string[] Values) : ISearchQueryFilter; + +public record FromFilter(bool Negated, string Value) : ISearchQueryFilter; + +public record MentionFilter(bool Negated, string Value) : ISearchQueryFilter; + +public record ReplyFilter(bool Negated, string Value) : ISearchQueryFilter; + +public record InstanceFilter(bool Negated, string Value) : ISearchQueryFilter; + +public enum MiscFilterType +{ + Followers, + Following, + Replies, + Renotes +} + +public record MiscFilter(bool Negated, MiscFilterType Value) : ISearchQueryFilter +{ + public static bool TryParse(bool negated, ReadOnlySpan value, [NotNullWhen(true)] out MiscFilter? result) + { + MiscFilterType? type = value switch + { + "followers" => MiscFilterType.Followers, + "following" => MiscFilterType.Following, + "replies" => MiscFilterType.Replies, + "reply" => MiscFilterType.Replies, + "renote" => MiscFilterType.Renotes, + "renotes" => MiscFilterType.Renotes, + "boosts" => MiscFilterType.Renotes, + "boost" => MiscFilterType.Renotes, + _ => null + }; + + if (!type.HasValue) + { + result = null; + return false; + } + + result = new MiscFilter(negated, type.Value); + return true; + } +} + +public enum InFilterType +{ + Bookmarks, + Likes, + Reactions, + Interactions +} + +public record InFilter(bool Negated, InFilterType Value) : ISearchQueryFilter +{ + public static bool TryParse(bool negated, ReadOnlySpan value, [NotNullWhen(true)] out InFilter? result) + { + InFilterType? type = value switch + { + "bookmarks" => InFilterType.Bookmarks, + "likes" => InFilterType.Likes, + "favorites" => InFilterType.Likes, + "favourites" => InFilterType.Likes, + "reactions" => InFilterType.Reactions, + "interactions" => InFilterType.Interactions, + _ => null + }; + + if (!type.HasValue) + { + result = null; + return false; + } + + result = new InFilter(negated, type.Value); + return true; + } +} + +public enum AttachmentFilterType +{ + Media, + Image, + Video, + Audio, + File, + Poll +} + +public record AttachmentFilter(bool Negated, AttachmentFilterType Value) : ISearchQueryFilter +{ + public static bool TryParse( + bool negated, ReadOnlySpan value, [NotNullWhen(true)] out AttachmentFilter? result + ) + { + AttachmentFilterType? type = value switch + { + "any" => AttachmentFilterType.Media, + "media" => AttachmentFilterType.Media, + "image" => AttachmentFilterType.Image, + "video" => AttachmentFilterType.Video, + "audio" => AttachmentFilterType.Audio, + "file" => AttachmentFilterType.File, + "poll" => AttachmentFilterType.Poll, + _ => null + }; + + if (!type.HasValue) + { + result = null; + return false; + } + + result = new AttachmentFilter(negated, type.Value); + return true; + } +} + +public record AfterFilter(DateOnly Value) : ISearchQueryFilter; + +public record BeforeFilter(DateOnly Value) : ISearchQueryFilter; + +public enum CaseFilterType +{ + Sensitive, + Insensitive +} + +public record CaseFilter(CaseFilterType Value) : ISearchQueryFilter +{ + public static bool TryParse(ReadOnlySpan value, [NotNullWhen(true)] out CaseFilter? result) + { + CaseFilterType? type = value switch + { + "sensitive" => CaseFilterType.Sensitive, + "insensitive" => CaseFilterType.Insensitive, + _ => null + }; + + if (!type.HasValue) + { + result = null; + return false; + } + + result = new CaseFilter(type.Value); + return true; + } +} + +public enum MatchFilterType +{ + Words, + Substring +} + +public record MatchFilter(MatchFilterType Value) : ISearchQueryFilter +{ + public static bool TryParse(ReadOnlySpan value, [NotNullWhen(true)] out MatchFilter? result) + { + MatchFilterType? type = value switch + { + "words" => MatchFilterType.Words, + "word" => MatchFilterType.Words, + "substring" => MatchFilterType.Substring, + "substr" => MatchFilterType.Substring, + _ => null + }; + + if (!type.HasValue) + { + result = null; + return false; + } + + result = new MatchFilter(type.Value); + return true; + } +} diff --git a/Iceshrimp.Parsing/SearchQueryParser.cs b/Iceshrimp.Parsing/SearchQueryParser.cs new file mode 100644 index 00000000..428b9bf7 --- /dev/null +++ b/Iceshrimp.Parsing/SearchQueryParser.cs @@ -0,0 +1,102 @@ +namespace Iceshrimp.Parsing; + +public static class SearchQueryParser +{ + public static List Parse(ReadOnlySpan input) + { + var results = new List(); + + input = input.Trim(); + if (input.Length == 0) return []; + + int pos = 0; + while (pos < input.Length) + { + var oldPos = pos; + var res = ParseToken(input, ref pos); + if (res == null) return results; + if (pos <= oldPos) throw new Exception("Infinite loop detected!"); + results.Add(res); + } + + return results; + } + + private static ISearchQueryFilter? ParseToken(ReadOnlySpan input, ref int pos) + { + while (input[pos] == ' ') + { + pos++; + if (pos >= input.Length) return null; + } + + var negated = false; + if (input[pos] == '-' && input.Length > pos + 1) + { + negated = true; + pos++; + } + + if (input[pos] == '"' && input.Length > pos + 2) + { + var closingQuote = pos + 1 + input[(pos + 1)..].IndexOf('"'); + if (closingQuote != -1) + { + var literalRes = new WordFilter(negated, input[++pos..closingQuote].ToString()); + pos = closingQuote + 1; + return literalRes; + } + } + + if (input[pos] == '(' && input.Length > pos + 2) + { + var closingParen = pos + 1 + input[(pos + 1)..].IndexOf(')'); + if (closingParen != -1) + { + var items = input[++pos..closingParen].ToString().Split(" OR ").Select(p => p.Trim()).ToArray(); + var literalRes = new MultiWordFilter(negated, items); + if (items.Length > 0) + { + pos = closingParen + 1; + return literalRes; + } + } + } + + var end = input[pos..].IndexOf(' '); + if (end == -1) + end = input.Length; + else + end += pos; + + var splitIdx = input[pos..end].IndexOf(':'); + var keyRange = splitIdx < 1 ? ..0 : pos..(pos + splitIdx); + var key = splitIdx < 1 ? ReadOnlySpan.Empty : input[keyRange]; + var value = splitIdx < 1 ? input : input[(keyRange.End.Value + 1)..end]; + + ISearchQueryFilter res = key switch + { + "cw" => new CwFilter(negated, value.ToString()), + "from" or "author" or "by" or "user" => new FromFilter(negated, value.ToString()), + "mention" or "mentions" or "mentioning" => new MentionFilter(negated, value.ToString()), + "reply" or "replying" or "to" => new ReplyFilter(negated, value.ToString()), + "instance" or "domain" or "host" => new InstanceFilter(negated, value.ToString()), + + "filter" when MiscFilter.TryParse(negated, value, out var parsed) => parsed, + "in" when InFilter.TryParse(negated, value, out var parsed) => parsed, + "has" or "attachment" or "attached" when AttachmentFilter.TryParse(negated, value, out var parsed) + => parsed, + + "case" when CaseFilter.TryParse(value, out var parsed) => parsed, + "match" when MatchFilter.TryParse(value, out var parsed) => parsed, + + "after" or "since" when DateOnly.TryParse(value, out var date) => new AfterFilter(date), + "before" or "until" when DateOnly.TryParse(value, out var date) => new BeforeFilter(date), + + _ => new WordFilter(negated, input[pos..end].ToString()) + }; + + pos = end; + return res; + } +} diff --git a/Iceshrimp.Tests/Parsing/SearchQueryTests.cs b/Iceshrimp.Tests/Parsing/SearchQueryTests.cs index c13a3193..11c6d43b 100644 --- a/Iceshrimp.Tests/Parsing/SearchQueryTests.cs +++ b/Iceshrimp.Tests/Parsing/SearchQueryTests.cs @@ -1,15 +1,14 @@ using Iceshrimp.Parsing; -using static Iceshrimp.Parsing.SearchQueryFilters; namespace Iceshrimp.Tests.Parsing; [TestClass] public class SearchQueryTests { - private static List GetCandidatesByUsername(IEnumerable candidates) => - candidates.Select(p => $"{p}:username").SelectMany(SearchQuery.parse).ToList(); + private static List GetCandidatesByUsername(IEnumerable candidates) => + candidates.Select(p => $"{p}:username").SelectMany(p => SearchQueryParser.Parse(p)).ToList(); - private static void Validate(ICollection results, object expectedResult, int count) + private static void Validate(ICollection results, object expectedResult, int count) { results.Count.Should().Be(count); foreach (var res in results) res.Should().BeEquivalentTo(expectedResult); @@ -20,7 +19,7 @@ public class SearchQueryTests [DataRow(true)] public void TestParseCw(bool negated) { - var result = SearchQuery.parse(negated ? "-cw:meta" : "cw:meta").ToList(); + var result = SearchQueryParser.Parse(negated ? "-cw:meta" : "cw:meta").ToList(); var expectedResult = new CwFilter(negated, "meta"); Validate(result, expectedResult, 1); } @@ -37,6 +36,17 @@ public class SearchQueryTests Validate(results, expectedResult, candidates.Count); } + [TestMethod] + [DataRow(false)] + [DataRow(true)] + public void TestParseInvalid(bool negated) + { + var prefix = negated ? "-" : ""; + //SearchQueryParser.Parse($"{prefix}from:"); + //SearchQueryParser.Parse($"{prefix}:"); + SearchQueryParser.Parse($"{prefix}asd {prefix}:"); + } + [TestMethod] [DataRow(false)] [DataRow(true)] @@ -68,26 +78,26 @@ public class SearchQueryTests { List candidates = ["instance", "domain", "host"]; if (negated) candidates = candidates.Select(p => "-" + p).ToList(); - var results = candidates.Select(p => $"{p}:instance.tld").SelectMany(SearchQuery.parse).ToList(); - var expectedResult = new InstanceFilter(negated, "instance.tld"); + var results = candidates.Select(p => $"{p}:instance.tld").SelectMany(p => SearchQueryParser.Parse(p)).ToList(); + var expectedResult = new InstanceFilter(negated, "instance.tld"); Validate(results, expectedResult, candidates.Count); } [TestMethod] public void TestParseAfter() { - List candidates = ["after", "since"]; - var results = candidates.Select(p => $"{p}:2024-03-01").SelectMany(SearchQuery.parse).ToList(); - var expectedResult = new AfterFilter(DateOnly.ParseExact("2024-03-01", "O")); + List candidates = ["after", "since"]; + var results = candidates.Select(p => $"{p}:2024-03-01").SelectMany(p => SearchQueryParser.Parse(p)).ToList(); + var expectedResult = new AfterFilter(DateOnly.ParseExact("2024-03-01", "O")); Validate(results, expectedResult, candidates.Count); } [TestMethod] public void TestParseBefore() { - List candidates = ["before", "until"]; - var results = candidates.Select(p => $"{p}:2024-03-01").SelectMany(SearchQuery.parse).ToList(); - var expectedResult = new BeforeFilter(DateOnly.ParseExact("2024-03-01", "O")); + List candidates = ["before", "until"]; + var results = candidates.Select(p => $"{p}:2024-03-01").SelectMany(p => SearchQueryParser.Parse(p)).ToList(); + var expectedResult = new BeforeFilter(DateOnly.ParseExact("2024-03-01", "O")); Validate(results, expectedResult, candidates.Count); } @@ -100,16 +110,18 @@ public class SearchQueryTests if (negated) keyCandidates = keyCandidates.Select(p => "-" + p).ToList(); List candidates = ["any", "media", "image", "video", "audio", "file", "poll"]; var results = - keyCandidates.Select(k => candidates.Select(v => $"{k}:{v}").SelectMany(SearchQuery.parse).ToList()); - List expectedResults = + keyCandidates.Select(k => candidates.Select(v => $"{k}:{v}") + .SelectMany(p => SearchQueryParser.Parse(p)) + .ToList()); + List expectedResults = [ - new AttachmentFilter(negated, "any"), - new AttachmentFilter(negated, "media"), - new AttachmentFilter(negated, "image"), - new AttachmentFilter(negated, "video"), - new AttachmentFilter(negated, "audio"), - new AttachmentFilter(negated, "file"), - new AttachmentFilter(negated, "poll") + new AttachmentFilter(negated, AttachmentFilterType.Media), + new AttachmentFilter(negated, AttachmentFilterType.Media), + new AttachmentFilter(negated, AttachmentFilterType.Image), + new AttachmentFilter(negated, AttachmentFilterType.Video), + new AttachmentFilter(negated, AttachmentFilterType.Audio), + new AttachmentFilter(negated, AttachmentFilterType.File), + new AttachmentFilter(negated, AttachmentFilterType.Poll) ]; results.Should() .HaveCount(keyCandidates.Count) @@ -119,10 +131,13 @@ public class SearchQueryTests [TestMethod] public void TestParseCase() { - const string key = "case"; - List candidates = ["sensitive", "insensitive"]; - var results = candidates.Select(v => $"{key}:{v}").SelectMany(SearchQuery.parse).ToList(); - List expectedResults = [new CaseFilter("sensitive"), new CaseFilter("insensitive")]; + const string key = "case"; + List candidates = ["sensitive", "insensitive"]; + var results = candidates.Select(v => $"{key}:{v}").SelectMany(p => SearchQueryParser.Parse(p)).ToList(); + List expectedResults = + [ + new CaseFilter(CaseFilterType.Sensitive), new CaseFilter(CaseFilterType.Insensitive) + ]; results.Should() .HaveCount(expectedResults.Count) .And.BeEquivalentTo(expectedResults, opts => opts.RespectingRuntimeTypes()); @@ -131,12 +146,15 @@ public class SearchQueryTests [TestMethod] public void TestParseMatch() { - const string key = "match"; + const string key = "match"; List candidates = ["words", "word", "substr", "substring"]; - var results = candidates.Select(v => $"{key}:{v}").SelectMany(SearchQuery.parse).ToList(); - List expectedResults = + var results = candidates.Select(v => $"{key}:{v}").SelectMany(p => SearchQueryParser.Parse(p)).ToList(); + List expectedResults = [ - new MatchFilter("words"), new MatchFilter("words"), new MatchFilter("substr"), new MatchFilter("substr") + new MatchFilter(MatchFilterType.Words), + new MatchFilter(MatchFilterType.Words), + new MatchFilter(MatchFilterType.Substring), + new MatchFilter(MatchFilterType.Substring) ]; results.Should() .HaveCount(expectedResults.Count) @@ -148,17 +166,17 @@ public class SearchQueryTests [DataRow(true)] public void TestParseIn(bool negated) { - var key = negated ? "-in" : "in"; + var key = negated ? "-in" : "in"; List candidates = ["bookmarks", "likes", "favorites", "favourites", "reactions", "interactions"]; - var results = candidates.Select(v => $"{key}:{v}").SelectMany(SearchQuery.parse).ToList(); - List expectedResults = + var results = candidates.Select(v => $"{key}:{v}").SelectMany(p => SearchQueryParser.Parse(p)).ToList(); + List expectedResults = [ - new InFilter(negated, "bookmarks"), - new InFilter(negated, "likes"), - new InFilter(negated, "likes"), - new InFilter(negated, "likes"), - new InFilter(negated, "reactions"), - new InFilter(negated, "interactions") + new InFilter(negated, InFilterType.Bookmarks), + new InFilter(negated, InFilterType.Likes), + new InFilter(negated, InFilterType.Likes), + new InFilter(negated, InFilterType.Likes), + new InFilter(negated, InFilterType.Reactions), + new InFilter(negated, InFilterType.Interactions) ]; results.Should() .HaveCount(expectedResults.Count) @@ -175,17 +193,17 @@ public class SearchQueryTests [ "followers", "following", "replies", "reply", "renote", "renotes", "boosts", "boost" ]; - var results = candidates.Select(v => $"{key}:{v}").SelectMany(SearchQuery.parse).ToList(); - List expectedResults = + var results = candidates.Select(v => $"{key}:{v}").SelectMany(p => SearchQueryParser.Parse(p)).ToList(); + List expectedResults = [ - new MiscFilter(negated, "followers"), - new MiscFilter(negated, "following"), - new MiscFilter(negated, "replies"), - new MiscFilter(negated, "replies"), - new MiscFilter(negated, "renotes"), - new MiscFilter(negated, "renotes"), - new MiscFilter(negated, "renotes"), - new MiscFilter(negated, "renotes") + new MiscFilter(negated, MiscFilterType.Followers), + new MiscFilter(negated, MiscFilterType.Following), + new MiscFilter(negated, MiscFilterType.Replies), + new MiscFilter(negated, MiscFilterType.Replies), + new MiscFilter(negated, MiscFilterType.Renotes), + new MiscFilter(negated, MiscFilterType.Renotes), + new MiscFilter(negated, MiscFilterType.Renotes), + new MiscFilter(negated, MiscFilterType.Renotes) ]; results.Should() .HaveCount(expectedResults.Count) @@ -199,8 +217,8 @@ public class SearchQueryTests { List candidates = ["test", "word", "since:2023-10-10invalid", "in:bookmarkstypo"]; if (negated) candidates = candidates.Select(p => "-" + p).ToList(); - var results = candidates.Select(v => $"{v}").SelectMany(SearchQuery.parse).ToList(); - List expectedResults = + var results = candidates.Select(v => $"{v}").SelectMany(p => SearchQueryParser.Parse(p)).ToList(); + List expectedResults = [ new WordFilter(negated, "test"), new WordFilter(negated, "word"), @@ -216,7 +234,7 @@ public class SearchQueryTests public void TestParseMultiWord() { const string input = "(word OR word2 OR word3)"; - var results = SearchQuery.parse(input).ToList(); + var results = SearchQueryParser.Parse(input).ToList(); results.Should().HaveCount(1); results[0].Should().BeOfType(); ((MultiWordFilter)results[0]).Values.ToList().Should().BeEquivalentTo(["word", "word2", "word3"]); @@ -226,10 +244,10 @@ public class SearchQueryTests public void TestParseLiteralString() { const string input = "\"literal string with spaces $# and has:image before:2023-10-10 other things\""; - var results = SearchQuery.parse(input).ToList(); + var results = SearchQueryParser.Parse(input).ToList(); results.Should().HaveCount(1); results[0].Should().BeOfType(); ((WordFilter)results[0]).Value.Should() .BeEquivalentTo("literal string with spaces $# and has:image before:2023-10-10 other things"); } -} \ No newline at end of file +}