Semantic search

This commit is contained in:
2026-03-25 18:39:50 +01:00
parent f5fb537fed
commit cbeedc2d2f
14 changed files with 1336 additions and 56 deletions
+41 -11
View File
@@ -26,6 +26,8 @@ from sqlalchemy import func, or_, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.services.search_service import SearchService
from app.config import get_settings
from app.core.exceptions import ForbiddenError, NotFoundError
from app.database import get_db
@@ -184,8 +186,11 @@ async def list_messages(
is_starred: Optional[bool] = Query(None),
is_archived: Optional[bool] = Query(False),
is_trashed: Optional[bool] = Query(False),
search: Optional[str] = Query(None, max_length=200),
search: Optional[str] = Query(None, max_length=500),
pec_type: Optional[str] = Query(None),
# Filtri data (ISO 8601, es. 2026-01-01T00:00:00Z)
date_from: Optional[datetime] = Query(None, description="Data minima (received_at o sent_at)"),
date_to: Optional[datetime] = Query(None, description="Data massima (received_at o sent_at)"),
# Paginazione
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
@@ -195,7 +200,8 @@ async def list_messages(
- `is_archived=False` (default) esclude i messaggi archiviati.
- `is_trashed=False` (default) esclude i messaggi nel cestino.
- `search` cerca su subject, from_address, to_addresses.
- `search` usa ricerca full-text (tsvector) con fallback ILIKE.
- `date_from` / `date_to` filtrano per data ricezione o invio.
- `vbox_id` filtra per Virtual Box assegnata all'utente corrente.
"""
# Determinare le caselle visibili (normale check permessi)
@@ -284,16 +290,30 @@ async def list_messages(
if is_trashed is not None:
q = q.where(Message.is_trashed == is_trashed)
# ── Full-text search (FTS con fallback ILIKE per messaggi non indicizzati) ───
if search:
term = f"%{search}%"
from sqlalchemy import case as sa_case
tsquery = func.websearch_to_tsquery("italian", search)
term_like = f"%{search}%"
q = q.where(
or_(
Message.subject.ilike(term),
Message.from_address.ilike(term),
Message.body_text.ilike(term),
Message.search_vector.op("@@")(tsquery),
# Fallback per messaggi non ancora indicizzati dal worker
Message.search_vector.is_(None) & or_(
Message.subject.ilike(term_like),
Message.from_address.ilike(term_like),
Message.body_text.ilike(term_like),
),
)
)
# ── Filtri data ───────────────────────────────────────────────────────────
if date_from:
q = q.where(or_(Message.received_at >= date_from, Message.sent_at >= date_from))
if date_to:
q = q.where(or_(Message.received_at <= date_to, Message.sent_at <= date_to))
# Applica le regole della Virtual Box (AND tra le regole)
for rule in vbox_rules:
q = _apply_vbox_rule(q, rule.field, rule.operator, rule.value)
@@ -302,13 +322,23 @@ async def list_messages(
count_q = select(func.count()).select_from(q.subquery())
total = (await db.execute(count_q)).scalar_one()
# Ordinamento e paginazione
# Ordinamento: se c'e' una ricerca, ordina per rilevanza FTS, poi data
if search:
from sqlalchemy import case as sa_case
tsquery_ord = func.websearch_to_tsquery("italian", search)
rank_expr = sa_case(
(Message.search_vector.isnot(None), func.ts_rank(Message.search_vector, tsquery_ord)),
else_=0.0,
)
order_clauses = [rank_expr.desc(), Message.received_at.desc().nullslast(), Message.created_at.desc()]
else:
order_clauses = [Message.received_at.desc().nullslast(), Message.created_at.desc()]
# Paginazione
q = (
q.options(selectinload(Message.labels))
.order_by(
Message.received_at.desc().nullslast(),
Message.created_at.desc(),
)
.order_by(*order_clauses)
.offset((page - 1) * page_size)
.limit(page_size)
)