""" Servizio di ricerca full-text per i messaggi PEC. Utilizza i vettori tsvector di PostgreSQL per ricerche veloci su: - oggetto (peso A) - mittente / destinatari (peso B) - corpo del messaggio (peso C) - testo estratto dagli allegati PDF/DOCX (peso D) Se search_vector e' NULL (messaggio non ancora indicizzato dal worker), cade back automaticamente a ILIKE sulle colonne base. """ import uuid from datetime import datetime from typing import Optional from sqlalchemy import case, func, or_, select from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import selectinload from app.models.message import Message class SearchService: """Incapsula la logica di ricerca full-text sui messaggi.""" def __init__(self, db: AsyncSession) -> None: self.db = db async def search_messages( self, tenant_id: uuid.UUID, search_term: str, visible_mailbox_ids: Optional[list[uuid.UUID]], mailbox_id: Optional[uuid.UUID] = None, direction: Optional[str] = None, state: Optional[str] = None, pec_type: Optional[str] = None, date_from: Optional[datetime] = None, date_to: Optional[datetime] = None, is_archived: Optional[bool] = False, is_trashed: Optional[bool] = False, is_starred: Optional[bool] = None, is_read: Optional[bool] = None, page: int = 1, page_size: int = 50, ) -> tuple[list[Message], int]: """ Ricerca full-text nei messaggi. Logica: 1. Messaggi con search_vector non NULL → usa @@ operator + ts_rank 2. Messaggi con search_vector NULL → fallback ILIKE (non ancora indicizzati) 3. Applica tutti i filtri aggiuntivi (data, stato, tipo, direzione, ecc.) 4. Ordina per rilevanza FTS desc, poi per data desc """ q = select(Message).where( Message.tenant_id == tenant_id, Message.parent_message_id.is_(None), ) # Restrizione caselle visibili (permessi) if visible_mailbox_ids is not None: if not visible_mailbox_ids: return [], 0 q = q.where(Message.mailbox_id.in_(visible_mailbox_ids)) # Filtri opzionali if mailbox_id: q = q.where(Message.mailbox_id == mailbox_id) if direction: q = q.where(Message.direction == direction) if state: q = q.where(Message.state == state) if pec_type: q = q.where(Message.pec_type == pec_type) if is_archived is not None: q = q.where(Message.is_archived == is_archived) if is_trashed is not None: q = q.where(Message.is_trashed == is_trashed) if is_starred is not None: q = q.where(Message.is_starred == is_starred) if is_read is not None: q = q.where(Message.is_read == is_read) # Filtri data: cerca sia su received_at che su sent_at if date_from: q = q.where( or_( Message.received_at >= date_from, Message.sent_at >= date_from, ) ) if date_to: q = q.where( or_( Message.received_at <= date_to, Message.sent_at <= date_to, ) ) # Full-text search con fallback ILIKE tsquery = func.websearch_to_tsquery("italian", search_term) term_like = f"%{search_term}%" fts_condition = Message.search_vector.op("@@")(tsquery) ilike_fallback = Message.search_vector.is_(None) & or_( Message.subject.ilike(term_like), Message.from_address.ilike(term_like), Message.body_text.ilike(term_like), ) q = q.where(or_(fts_condition, ilike_fallback)) # Conteggio totale (senza paginazione) count_q = select(func.count()).select_from(q.subquery()) total: int = (await self.db.execute(count_q)).scalar_one() # Ordinamento per rilevanza FTS, poi data rank_expr = case( (Message.search_vector.isnot(None), func.ts_rank(Message.search_vector, tsquery)), else_=0.0, ) q = ( q.options(selectinload(Message.labels)) .order_by( rank_expr.desc(), Message.received_at.desc().nullslast(), Message.created_at.desc(), ) .offset((page - 1) * page_size) .limit(page_size) ) result = await self.db.execute(q) items = list(result.scalars().all()) return items, total