Files
PecHub/backend/alembic/versions/0008_full_text_search.py
T
2026-03-25 18:39:50 +01:00

77 lines
2.7 KiB
Python

"""add full text search vector to messages and extracted_text to attachments
Revision ID: 0008
Revises: 0007
Create Date: 2026-03-25
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
revision = '0008'
down_revision = '0007'
branch_labels = None
depends_on = None
def upgrade() -> None:
# 1. Aggiunge colonna search_vector a messages
op.add_column(
'messages',
sa.Column('search_vector', postgresql.TSVECTOR(), nullable=True),
)
# 2. Aggiunge colonna extracted_text ad attachments (testo estratto da PDF/DOCX)
op.add_column(
'attachments',
sa.Column('extracted_text', sa.Text(), nullable=True),
)
# 3. Indice GIN per ricerca full-text veloce
op.execute(
"CREATE INDEX idx_messages_fts ON messages USING gin(search_vector) "
"WHERE search_vector IS NOT NULL"
)
# 4. Funzione trigger che aggiorna search_vector quando cambiano i campi testuali
op.execute("""
CREATE OR REPLACE FUNCTION messages_search_vector_update() RETURNS trigger AS $$
BEGIN
NEW.search_vector :=
setweight(to_tsvector('italian', coalesce(NEW.subject, '')), 'A') ||
setweight(to_tsvector('simple', coalesce(NEW.from_address, '')), 'B') ||
setweight(to_tsvector('simple',
coalesce(array_to_string(NEW.to_addresses, ' '), '')), 'B') ||
setweight(to_tsvector('italian', coalesce(NEW.body_text, '')), 'C');
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
""")
# 5. Crea trigger (si attiva su INSERT e UPDATE dei campi rilevanti)
op.execute("""
CREATE TRIGGER trg_messages_search_vector
BEFORE INSERT OR UPDATE OF subject, from_address, to_addresses, body_text
ON messages
FOR EACH ROW EXECUTE FUNCTION messages_search_vector_update();
""")
# 6. Backfill: popola search_vector per i messaggi esistenti
op.execute("""
UPDATE messages SET search_vector =
setweight(to_tsvector('italian', coalesce(subject, '')), 'A') ||
setweight(to_tsvector('simple', coalesce(from_address, '')), 'B') ||
setweight(to_tsvector('simple',
coalesce(array_to_string(to_addresses, ' '), '')), 'B') ||
setweight(to_tsvector('italian', coalesce(body_text, '')), 'C')
WHERE search_vector IS NULL
""")
def downgrade() -> None:
op.execute("DROP TRIGGER IF EXISTS trg_messages_search_vector ON messages")
op.execute("DROP FUNCTION IF EXISTS messages_search_vector_update()")
op.execute("DROP INDEX IF EXISTS idx_messages_fts")
op.drop_column('attachments', 'extracted_text')
op.drop_column('messages', 'search_vector')