[backend] Also search in alt text

This commit is contained in:
kopper 2024-10-09 18:30:08 +02:00 committed by Laura Hausmann
parent 83ab277a7e
commit 8998cd2874
No known key found for this signature in database
GPG key ID: D044E84C5BE01605
6 changed files with 88 additions and 16 deletions

View file

@ -636,6 +636,9 @@ public class DatabaseContext(DbContextOptions<DatabaseContext> options)
entity.HasIndex(e => e.Cw, "GIN_TRGM_note_cw")
.HasMethod("gin")
.HasOperators("gin_trgm_ops");
entity.HasIndex(e => e.CombinedAltText, "GIN_TRGM_note_combined_alt_text")
.HasMethod("gin")
.HasOperators("gin_trgm_ops");
entity.Property(e => e.AttachedFileTypes).HasDefaultValueSql("'{}'::character varying[]");
entity.Property(e => e.ChannelId).HasComment("The ID of source channel.");

View file

@ -2350,6 +2350,10 @@ namespace Iceshrimp.Backend.Core.Database.Migrations
.HasColumnName("channelId")
.HasComment("The ID of source channel.");
b.Property<string>("CombinedAltText")
.HasColumnType("text")
.HasColumnName("combinedAltText");
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone")
.HasColumnName("createdAt")
@ -2597,6 +2601,11 @@ namespace Iceshrimp.Backend.Core.Database.Migrations
b.HasIndex("UserId", "Id");
b.HasIndex(new[] { "CombinedAltText" }, "GIN_TRGM_note_combined_alt_text");
NpgsqlIndexBuilderExtensions.HasMethod(b.HasIndex(new[] { "CombinedAltText" }, "GIN_TRGM_note_combined_alt_text"), "gin");
NpgsqlIndexBuilderExtensions.HasOperators(b.HasIndex(new[] { "CombinedAltText" }, "GIN_TRGM_note_combined_alt_text"), new[] { "gin_trgm_ops" });
b.HasIndex(new[] { "Cw" }, "GIN_TRGM_note_cw");
NpgsqlIndexBuilderExtensions.HasMethod(b.HasIndex(new[] { "Cw" }, "GIN_TRGM_note_cw"), "gin");

View file

@ -0,0 +1,46 @@
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Infrastructure;
#nullable disable
namespace Iceshrimp.Backend.Core.Database.Migrations
{
/// <inheritdoc />
[DbContext(typeof(DatabaseContext))]
[Migration("20240527231353_AddNoteCombinedAltTextField")]
public partial class AddNoteCombinedAltTextField : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AddColumn<string>(
name: "combinedAltText",
table: "note",
type: "text",
nullable: true);
Console.WriteLine("Indexing drive file alt text, please hang tight!");
Console.WriteLine("This may take a long time (15-30 minutes), especially if your database is unusually large or you're running low end hardware.");
migrationBuilder.Sql("""UPDATE note SET "combinedAltText"=(SELECT string_agg(comment, ' ') FROM drive_file WHERE id = ANY ("fileIds")) WHERE "fileIds" != '{}';""");
migrationBuilder.CreateIndex(
name: "GIN_TRGM_note_combined_alt_text",
table: "note",
column: "combinedAltText")
.Annotation("Npgsql:IndexMethod", "gin")
.Annotation("Npgsql:IndexOperators", new[] { "gin_trgm_ops" });
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropIndex(
name: "GIN_TRGM_note_combined_alt_text",
table: "note");
migrationBuilder.DropColumn(
name: "combinedAltText",
table: "note");
}
}
}

View file

@ -219,6 +219,9 @@ public class Note : IEntity
[Column("repliesFetchedAt")]
public DateTime? RepliesFetchedAt { get;set; }
[Column("combinedAltText")]
public string? CombinedAltText { get; set; }
[ForeignKey(nameof(ChannelId))]
[InverseProperty(nameof(Tables.Channel.Notes))]
public virtual Channel? Channel { get; set; }
@ -366,4 +369,4 @@ public class Note : IEntity
[J("username")] public required string Username { get; set; }
[J("host")] public required string? Host { get; set; }
}
}
}

View file

@ -37,8 +37,7 @@ public static class QueryableFtsExtensions
MiscFilter miscFilter => current.ApplyMiscFilter(miscFilter, user),
ReplyFilter replyFilter => current.ApplyReplyFilter(replyFilter, config, db),
WordFilter wordFilter => current.ApplyWordFilter(wordFilter, caseSensitivity, matchType),
MultiWordFilter multiWordFilter =>
current.ApplyMultiWordFilter(multiWordFilter, caseSensitivity, matchType),
MultiWordFilter multiWordFilter => current.ApplyMultiWordFilter(multiWordFilter, caseSensitivity, matchType),
_ => throw new ArgumentOutOfRangeException(nameof(filter))
});
}
@ -264,31 +263,37 @@ public static class QueryableFtsExtensions
[Projectable]
[SuppressMessage("ReSharper", "MemberCanBePrivate.Global",
Justification = "Projectable chain must have consistent visibility")]
internal static bool FtsQueryPreEscaped(
this Note note, string query, bool negated, CaseFilterType caseSensitivity, MatchFilterType matchType
) => matchType.Equals(MatchFilterType.Substring)
internal static bool FtsQueryPreEscaped(this Note note, string query, bool negated, CaseFilterType caseSensitivity, MatchFilterType matchType) => matchType.Equals(MatchFilterType.Substring)
? caseSensitivity.Equals(CaseFilterType.Sensitive)
? negated
? !EF.Functions.Like(note.Text!, "%" + query + "%", @"\") &&
!EF.Functions.Like(note.Cw!, "%" + query + "%", @"\")
!EF.Functions.Like(note.Cw!, "%" + query + "%", @"\") &&
!EF.Functions.Like(note.CombinedAltText!, "%" + query + "%", @"\")
: EF.Functions.Like(note.Text!, "%" + query + "%", @"\") ||
EF.Functions.Like(note.Cw!, "%" + query + "%", @"\")
EF.Functions.Like(note.Cw!, "%" + query + "%", @"\") ||
EF.Functions.Like(note.CombinedAltText!, "%" + query + "%", @"\")
: negated
? !EF.Functions.ILike(note.Text!, "%" + query + "%", @"\") &&
!EF.Functions.ILike(note.Cw!, "%" + query + "%", @"\")
!EF.Functions.ILike(note.Cw!, "%" + query + "%", @"\") &&
!EF.Functions.ILike(note.CombinedAltText!, "%" + query + "%", @"\")
: EF.Functions.ILike(note.Text!, "%" + query + "%", @"\") ||
EF.Functions.ILike(note.Cw!, "%" + query + "%", @"\")
EF.Functions.ILike(note.Cw!, "%" + query + "%", @"\") ||
EF.Functions.ILike(note.CombinedAltText!, "%" + query + "%", @"\")
: caseSensitivity.Equals(CaseFilterType.Sensitive)
? negated
? !Regex.IsMatch(note.Text!, "\\y" + query + "\\y") &&
!Regex.IsMatch(note.Cw!, "\\y" + query + "\\y")
!Regex.IsMatch(note.Cw!, "\\y" + query + "\\y") &&
!Regex.IsMatch(note.CombinedAltText!, "\\y" + query + "\\y")
: Regex.IsMatch(note.Text!, "\\y" + query + "\\y") ||
Regex.IsMatch(note.Cw!, "\\y" + query + "\\y")
Regex.IsMatch(note.Cw!, "\\y" + query + "\\y") ||
Regex.IsMatch(note.CombinedAltText!, "\\y" + query + "\\y")
: negated
? !Regex.IsMatch(note.Text!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) &&
!Regex.IsMatch(note.Cw!, "\\y" + query + "\\y", RegexOptions.IgnoreCase)
!Regex.IsMatch(note.Cw!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) &&
!Regex.IsMatch(note.CombinedAltText!, "\\y" + query + "\\y", RegexOptions.IgnoreCase)
: Regex.IsMatch(note.Text!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) ||
Regex.IsMatch(note.Cw!, "\\y" + query + "\\y", RegexOptions.IgnoreCase);
Regex.IsMatch(note.Cw!, "\\y" + query + "\\y", RegexOptions.IgnoreCase) ||
Regex.IsMatch(note.CombinedAltText!, "\\y" + query + "\\y", RegexOptions.IgnoreCase);
internal static string PreEscapeFtsQuery(string query, MatchFilterType matchType)
=> matchType.Equals(MatchFilterType.Substring)

View file

@ -239,6 +239,7 @@ public class NoteService(
}
}
var combinedAltText = data.Attachments?.Select(p => p.Comment).Where(c => c != null);
policySvc.CallRewriteHooks(data, IRewritePolicy.HookLocationEnum.PostLogic);
var note = new Note
@ -270,7 +271,8 @@ public class NoteService(
Emojis = data.Emoji ?? [],
ReplyUri = data.ReplyUri,
RenoteUri = data.RenoteUri,
RepliesCollection = data.ASNote?.Replies?.Id
RepliesCollection = data.ASNote?.Replies?.Id,
CombinedAltText = combinedAltText != null ? string.Join(' ', combinedAltText) : null
};
if (data.Poll != null)
@ -594,6 +596,10 @@ public class NoteService(
{
note.FileIds = fileIds;
note.AttachedFileTypes = data.Attachments?.Select(p => p.Type).ToList() ?? [];
var combinedAltText = data.Attachments?.Select(p => p.Comment).Where(c => c != null);
note.CombinedAltText = combinedAltText != null ? string.Join(' ', combinedAltText) : null;
}
var isPollEdited = false;
@ -1607,4 +1613,4 @@ public class NoteService(
if (dbNote == null) return;
await RemoveReactionFromNoteAsync(dbNote, actor, name);
}
}
}