mirror of
https://github.com/idrainformatica/PecFlow.git
synced 2026-06-16 12:45:42 +02:00
Semantic search
This commit is contained in:
@@ -0,0 +1,76 @@
|
||||
"""add full text search vector to messages and extracted_text to attachments
|
||||
|
||||
Revision ID: 0008
|
||||
Revises: 0007
|
||||
Create Date: 2026-03-25
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
revision = '0008'
|
||||
down_revision = '0007'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# 1. Aggiunge colonna search_vector a messages
|
||||
op.add_column(
|
||||
'messages',
|
||||
sa.Column('search_vector', postgresql.TSVECTOR(), nullable=True),
|
||||
)
|
||||
|
||||
# 2. Aggiunge colonna extracted_text ad attachments (testo estratto da PDF/DOCX)
|
||||
op.add_column(
|
||||
'attachments',
|
||||
sa.Column('extracted_text', sa.Text(), nullable=True),
|
||||
)
|
||||
|
||||
# 3. Indice GIN per ricerca full-text veloce
|
||||
op.execute(
|
||||
"CREATE INDEX idx_messages_fts ON messages USING gin(search_vector) "
|
||||
"WHERE search_vector IS NOT NULL"
|
||||
)
|
||||
|
||||
# 4. Funzione trigger che aggiorna search_vector quando cambiano i campi testuali
|
||||
op.execute("""
|
||||
CREATE OR REPLACE FUNCTION messages_search_vector_update() RETURNS trigger AS $$
|
||||
BEGIN
|
||||
NEW.search_vector :=
|
||||
setweight(to_tsvector('italian', coalesce(NEW.subject, '')), 'A') ||
|
||||
setweight(to_tsvector('simple', coalesce(NEW.from_address, '')), 'B') ||
|
||||
setweight(to_tsvector('simple',
|
||||
coalesce(array_to_string(NEW.to_addresses, ' '), '')), 'B') ||
|
||||
setweight(to_tsvector('italian', coalesce(NEW.body_text, '')), 'C');
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
""")
|
||||
|
||||
# 5. Crea trigger (si attiva su INSERT e UPDATE dei campi rilevanti)
|
||||
op.execute("""
|
||||
CREATE TRIGGER trg_messages_search_vector
|
||||
BEFORE INSERT OR UPDATE OF subject, from_address, to_addresses, body_text
|
||||
ON messages
|
||||
FOR EACH ROW EXECUTE FUNCTION messages_search_vector_update();
|
||||
""")
|
||||
|
||||
# 6. Backfill: popola search_vector per i messaggi esistenti
|
||||
op.execute("""
|
||||
UPDATE messages SET search_vector =
|
||||
setweight(to_tsvector('italian', coalesce(subject, '')), 'A') ||
|
||||
setweight(to_tsvector('simple', coalesce(from_address, '')), 'B') ||
|
||||
setweight(to_tsvector('simple',
|
||||
coalesce(array_to_string(to_addresses, ' '), '')), 'B') ||
|
||||
setweight(to_tsvector('italian', coalesce(body_text, '')), 'C')
|
||||
WHERE search_vector IS NULL
|
||||
""")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute("DROP TRIGGER IF EXISTS trg_messages_search_vector ON messages")
|
||||
op.execute("DROP FUNCTION IF EXISTS messages_search_vector_update()")
|
||||
op.execute("DROP INDEX IF EXISTS idx_messages_fts")
|
||||
op.drop_column('attachments', 'extracted_text')
|
||||
op.drop_column('messages', 'search_vector')
|
||||
Reference in New Issue
Block a user