Files
2026-03-25 18:39:50 +01:00

140 lines
4.6 KiB
Python

"""
Servizio di ricerca full-text per i messaggi PEC.
Utilizza i vettori tsvector di PostgreSQL per ricerche veloci su:
- oggetto (peso A)
- mittente / destinatari (peso B)
- corpo del messaggio (peso C)
- testo estratto dagli allegati PDF/DOCX (peso D)
Se search_vector e' NULL (messaggio non ancora indicizzato dal worker),
cade back automaticamente a ILIKE sulle colonne base.
"""
import uuid
from datetime import datetime
from typing import Optional
from sqlalchemy import case, func, or_, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.models.message import Message
class SearchService:
"""Incapsula la logica di ricerca full-text sui messaggi."""
def __init__(self, db: AsyncSession) -> None:
self.db = db
async def search_messages(
self,
tenant_id: uuid.UUID,
search_term: str,
visible_mailbox_ids: Optional[list[uuid.UUID]],
mailbox_id: Optional[uuid.UUID] = None,
direction: Optional[str] = None,
state: Optional[str] = None,
pec_type: Optional[str] = None,
date_from: Optional[datetime] = None,
date_to: Optional[datetime] = None,
is_archived: Optional[bool] = False,
is_trashed: Optional[bool] = False,
is_starred: Optional[bool] = None,
is_read: Optional[bool] = None,
page: int = 1,
page_size: int = 50,
) -> tuple[list[Message], int]:
"""
Ricerca full-text nei messaggi.
Logica:
1. Messaggi con search_vector non NULL → usa @@ operator + ts_rank
2. Messaggi con search_vector NULL → fallback ILIKE (non ancora indicizzati)
3. Applica tutti i filtri aggiuntivi (data, stato, tipo, direzione, ecc.)
4. Ordina per rilevanza FTS desc, poi per data desc
"""
q = select(Message).where(
Message.tenant_id == tenant_id,
Message.parent_message_id.is_(None),
)
# Restrizione caselle visibili (permessi)
if visible_mailbox_ids is not None:
if not visible_mailbox_ids:
return [], 0
q = q.where(Message.mailbox_id.in_(visible_mailbox_ids))
# Filtri opzionali
if mailbox_id:
q = q.where(Message.mailbox_id == mailbox_id)
if direction:
q = q.where(Message.direction == direction)
if state:
q = q.where(Message.state == state)
if pec_type:
q = q.where(Message.pec_type == pec_type)
if is_archived is not None:
q = q.where(Message.is_archived == is_archived)
if is_trashed is not None:
q = q.where(Message.is_trashed == is_trashed)
if is_starred is not None:
q = q.where(Message.is_starred == is_starred)
if is_read is not None:
q = q.where(Message.is_read == is_read)
# Filtri data: cerca sia su received_at che su sent_at
if date_from:
q = q.where(
or_(
Message.received_at >= date_from,
Message.sent_at >= date_from,
)
)
if date_to:
q = q.where(
or_(
Message.received_at <= date_to,
Message.sent_at <= date_to,
)
)
# Full-text search con fallback ILIKE
tsquery = func.websearch_to_tsquery("italian", search_term)
term_like = f"%{search_term}%"
fts_condition = Message.search_vector.op("@@")(tsquery)
ilike_fallback = Message.search_vector.is_(None) & or_(
Message.subject.ilike(term_like),
Message.from_address.ilike(term_like),
Message.body_text.ilike(term_like),
)
q = q.where(or_(fts_condition, ilike_fallback))
# Conteggio totale (senza paginazione)
count_q = select(func.count()).select_from(q.subquery())
total: int = (await self.db.execute(count_q)).scalar_one()
# Ordinamento per rilevanza FTS, poi data
rank_expr = case(
(Message.search_vector.isnot(None), func.ts_rank(Message.search_vector, tsquery)),
else_=0.0,
)
q = (
q.options(selectinload(Message.labels))
.order_by(
rank_expr.desc(),
Message.received_at.desc().nullslast(),
Message.created_at.desc(),
)
.offset((page - 1) * page_size)
.limit(page_size)
)
result = await self.db.execute(q)
items = list(result.scalars().all())
return items, total