mirror of
https://github.com/idrainformatica/PecFlow.git
synced 2026-06-16 12:45:42 +02:00
545 lines
18 KiB
Python
545 lines
18 KiB
Python
"""
|
||
Logica di sincronizzazione messaggi IMAP – Fase 3 aggiornata.
|
||
|
||
Responsabilità:
|
||
1. Fetch della lista UID > last_sync_uid
|
||
2. Download envelope + raw EML per ogni UID
|
||
3. Parsing completo EML tramite app.parsers (Fase 3):
|
||
- Classificazione tipo PEC (X-Ricevuta / X-TipoRicevuta)
|
||
- Estrazione allegati (body text/html + allegati file)
|
||
- EML-in-EML per ricevute PEC
|
||
4. Salvataggio messaggio in tabella messages
|
||
5. Upload raw EML su MinIO
|
||
6. Upload allegati su MinIO + inserimento in tabella attachments
|
||
7. State machine messaggi outbound (sent→accepted→delivered/anomaly)
|
||
tramite X-Riferimento-Message-ID
|
||
8. Aggiornamento last_sync_uid e last_sync_at sulla mailbox
|
||
9. Pubblicazione evento Redis per notifica WebSocket
|
||
"""
|
||
|
||
import email
|
||
import email.header
|
||
import email.utils
|
||
import json
|
||
import logging
|
||
import re
|
||
import uuid
|
||
from datetime import UTC, datetime
|
||
|
||
import aioimaplib
|
||
import redis.asyncio as aioredis
|
||
from sqlalchemy import select
|
||
from sqlalchemy.ext.asyncio import AsyncSession
|
||
|
||
from app.config import get_settings
|
||
from app.models import Attachment, Mailbox, Message
|
||
from app.parsers.eml_parser import parse_eml
|
||
from app.parsers.pec_parser import apply_outbound_transition, classify_pec_message
|
||
from app.storage.minio_client import upload_attachment, upload_eml
|
||
|
||
logger = logging.getLogger(__name__)
|
||
settings = get_settings()
|
||
|
||
|
||
# ─── Helper legacy (mantenuti per backward compatibility con i test) ──────────
|
||
|
||
def _decode_header(header_value: str | None) -> str | None:
|
||
"""Decodifica header RFC 2047 (es. =?utf-8?b?...?=) in stringa Python."""
|
||
if not header_value:
|
||
return None
|
||
try:
|
||
parts = email.header.decode_header(header_value)
|
||
decoded = []
|
||
for part, charset in parts:
|
||
if isinstance(part, bytes):
|
||
decoded.append(part.decode(charset or "utf-8", errors="replace"))
|
||
else:
|
||
decoded.append(part)
|
||
return "".join(decoded).strip()
|
||
except Exception:
|
||
return str(header_value)
|
||
|
||
|
||
def _extract_addresses(field: str | None) -> list[str]:
|
||
"""Estrae lista di indirizzi email da un campo To/Cc."""
|
||
if not field:
|
||
return []
|
||
try:
|
||
addresses = email.utils.getaddresses([field])
|
||
return [addr for _, addr in addresses if addr]
|
||
except Exception:
|
||
return []
|
||
|
||
|
||
def _parse_date(date_str: str | None) -> datetime | None:
|
||
"""Converte stringa data RFC 2822 in datetime con timezone."""
|
||
if not date_str:
|
||
return None
|
||
try:
|
||
parsed = email.utils.parsedate_to_datetime(date_str)
|
||
if parsed.tzinfo is None:
|
||
parsed = parsed.replace(tzinfo=UTC)
|
||
return parsed
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
def _classify_pec_type(msg: email.message.Message) -> str:
|
||
"""
|
||
Classifica il tipo PEC dal header X-Ricevuta / X-TipoRicevuta.
|
||
Mantenuto per backward compatibility – usa il parser completo internamente.
|
||
"""
|
||
pec_class = classify_pec_message(msg)
|
||
return pec_class.pec_type
|
||
|
||
|
||
def _parse_eml(raw_bytes: bytes) -> dict:
|
||
"""
|
||
Parsing di base di un EML.
|
||
|
||
Wrapper del nuovo parser completo (Fase 3).
|
||
Mantenuto per backward compatibility con i test esistenti.
|
||
Restituisce un dict con i campi base necessari per la tabella messages.
|
||
"""
|
||
parsed = parse_eml(raw_bytes)
|
||
|
||
pec_type = "posta_certificata"
|
||
if parsed.raw_message is not None:
|
||
pec_class = classify_pec_message(parsed.raw_message)
|
||
pec_type = pec_class.pec_type
|
||
|
||
return {
|
||
"subject": parsed.subject,
|
||
"from_address": parsed.from_address,
|
||
"to_addresses": parsed.to_addresses if parsed.to_addresses else None,
|
||
"cc_addresses": parsed.cc_addresses if parsed.cc_addresses else None,
|
||
"message_id_header": parsed.message_id,
|
||
"sent_at": parsed.date,
|
||
"pec_type": pec_type,
|
||
"body_text": parsed.body_text,
|
||
"body_html": parsed.body_html,
|
||
"has_attachments": parsed.has_attachments,
|
||
}
|
||
|
||
|
||
# ─── Core sync function ───────────────────────────────────────────────────────
|
||
|
||
async def sync_new_messages(
|
||
imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL,
|
||
mailbox: Mailbox,
|
||
db: AsyncSession,
|
||
redis_client: aioredis.Redis,
|
||
) -> int:
|
||
"""
|
||
Sincronizza i messaggi nuovi (UID > last_sync_uid) per la mailbox data.
|
||
|
||
Returns:
|
||
Numero di nuovi messaggi sincronizzati.
|
||
"""
|
||
last_uid = mailbox.last_sync_uid or 0
|
||
search_range = f"{last_uid + 1}:*"
|
||
|
||
# ── SEARCH UID > last_sync_uid ─────────────────────────────────────────────
|
||
try:
|
||
status, search_data = await imap_client.search("UID", search_range)
|
||
except Exception as e:
|
||
logger.warning(f"[{mailbox.email_address}] SEARCH fallito: {e}")
|
||
return 0
|
||
|
||
if status != "OK":
|
||
logger.warning(
|
||
f"[{mailbox.email_address}] SEARCH status={status} data={search_data}"
|
||
)
|
||
return 0
|
||
|
||
raw_seqs = b" ".join(
|
||
d if isinstance(d, bytes) else d.encode() for d in search_data
|
||
).decode("ascii", errors="ignore").split()
|
||
|
||
seq_numbers = [s for s in raw_seqs if s.isdigit()]
|
||
if not seq_numbers:
|
||
return 0
|
||
|
||
seq_numbers = seq_numbers[: settings.imap_max_fetch_per_cycle]
|
||
logger.info(
|
||
f"[{mailbox.email_address}] Trovati {len(seq_numbers)} messaggi nuovi da sincronizzare"
|
||
)
|
||
|
||
synced_count = 0
|
||
max_uid_synced = last_uid
|
||
|
||
for seq in seq_numbers:
|
||
try:
|
||
uid, synced = await _fetch_and_save_message_by_seq(
|
||
imap_client=imap_client,
|
||
seq=seq,
|
||
last_uid=last_uid,
|
||
mailbox=mailbox,
|
||
db=db,
|
||
redis_client=redis_client,
|
||
)
|
||
if synced and uid and uid > max_uid_synced:
|
||
synced_count += 1
|
||
max_uid_synced = uid
|
||
except Exception as e:
|
||
logger.error(
|
||
f"[{mailbox.email_address}] Errore fetch seq {seq}: {e}",
|
||
exc_info=True,
|
||
)
|
||
|
||
# Aggiorna last_sync_uid e last_sync_at
|
||
if max_uid_synced > last_uid:
|
||
mailbox.last_sync_uid = max_uid_synced
|
||
mailbox.last_sync_at = datetime.now(UTC)
|
||
await db.flush()
|
||
await db.commit()
|
||
|
||
return synced_count
|
||
|
||
|
||
async def _fetch_and_save_message_by_seq(
|
||
imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL,
|
||
seq: str,
|
||
last_uid: int,
|
||
mailbox: Mailbox,
|
||
db: AsyncSession,
|
||
redis_client: aioredis.Redis,
|
||
) -> tuple[int | None, bool]:
|
||
"""
|
||
Fetcha un singolo messaggio per NUMERO DI SEQUENZA (non UID).
|
||
|
||
Returns:
|
||
(uid, saved): UID del messaggio e True se salvato, False altrimenti.
|
||
"""
|
||
try:
|
||
status, fetch_data = await imap_client.fetch(seq, "(UID RFC822 RFC822.SIZE)")
|
||
except Exception as e:
|
||
logger.error(f"[{mailbox.email_address}] FETCH seq {seq} fallito: {e}")
|
||
return None, False
|
||
|
||
if status != "OK" or not fetch_data:
|
||
logger.warning(
|
||
f"[{mailbox.email_address}] FETCH seq {seq} risposta vuota: {status}"
|
||
)
|
||
return None, False
|
||
|
||
items_info = [(type(x).__name__, len(x) if isinstance(x, (bytes, str)) else str(x)) for x in fetch_data]
|
||
logger.debug(f"[{mailbox.email_address}] fetch_data seq {seq}: {items_info}")
|
||
|
||
uid: int | None = None
|
||
raw_eml: bytes | None = None
|
||
size_bytes: int | None = None
|
||
|
||
for item in fetch_data:
|
||
if isinstance(item, bytearray):
|
||
if len(item) > 200:
|
||
raw_eml = bytes(item)
|
||
elif isinstance(item, bytes):
|
||
item_str = item.decode("ascii", errors="ignore")
|
||
uid_match = re.search(r"UID\s+(\d+)", item_str)
|
||
if uid_match:
|
||
uid = int(uid_match.group(1))
|
||
size_match = re.search(r"RFC822\.SIZE\s+(\d+)", item_str)
|
||
if size_match:
|
||
size_bytes = int(size_match.group(1))
|
||
elif isinstance(item, str):
|
||
uid_match = re.search(r"UID\s+(\d+)", item)
|
||
if uid_match:
|
||
uid = int(uid_match.group(1))
|
||
size_match = re.search(r"RFC822\.SIZE\s+(\d+)", item)
|
||
if size_match:
|
||
size_bytes = int(size_match.group(1))
|
||
|
||
if uid is None or uid <= last_uid:
|
||
return uid, False
|
||
|
||
if not raw_eml:
|
||
logger.warning(f"[{mailbox.email_address}] seq {seq} UID {uid}: body mancante")
|
||
return uid, False
|
||
|
||
if size_bytes is None:
|
||
size_bytes = len(raw_eml)
|
||
|
||
return uid, await _save_message(
|
||
uid=uid,
|
||
raw_eml=raw_eml,
|
||
size_bytes=size_bytes,
|
||
mailbox=mailbox,
|
||
db=db,
|
||
redis_client=redis_client,
|
||
)
|
||
|
||
|
||
async def _fetch_and_save_message(
|
||
imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL,
|
||
uid: int,
|
||
mailbox: Mailbox,
|
||
db: AsyncSession,
|
||
redis_client: aioredis.Redis,
|
||
) -> bool:
|
||
"""
|
||
Fetcha un singolo messaggio per UID (usato dal job sync_mailbox one-shot).
|
||
"""
|
||
existing = await db.execute(
|
||
select(Message.id).where(
|
||
Message.mailbox_id == mailbox.id,
|
||
Message.imap_uid == uid,
|
||
)
|
||
)
|
||
if existing.scalar_one_or_none():
|
||
return False
|
||
|
||
try:
|
||
status, fetch_data = await imap_client.uid("FETCH", str(uid), "(RFC822 RFC822.SIZE)")
|
||
except Exception as e:
|
||
logger.error(f"[{mailbox.email_address}] UID FETCH {uid} fallito: {e}")
|
||
return False
|
||
|
||
if status != "OK" or not fetch_data:
|
||
return False
|
||
|
||
raw_eml: bytes | None = None
|
||
size_bytes: int | None = None
|
||
for item in fetch_data:
|
||
if isinstance(item, bytes) and len(item) > 100:
|
||
raw_eml = item
|
||
elif isinstance(item, (bytes, str)):
|
||
s = item.decode("ascii", errors="ignore") if isinstance(item, bytes) else item
|
||
m = re.search(r"RFC822\.SIZE\s+(\d+)", s)
|
||
if m:
|
||
size_bytes = int(m.group(1))
|
||
|
||
if not raw_eml:
|
||
return False
|
||
|
||
return await _save_message(
|
||
uid=uid,
|
||
raw_eml=raw_eml,
|
||
size_bytes=size_bytes or len(raw_eml),
|
||
mailbox=mailbox,
|
||
db=db,
|
||
redis_client=redis_client,
|
||
)
|
||
|
||
|
||
# ─── Save message (Fase 3 – con parser completo, allegati, state machine) ────
|
||
|
||
async def _save_message(
|
||
uid: int,
|
||
raw_eml: bytes,
|
||
size_bytes: int,
|
||
mailbox: Mailbox,
|
||
db: AsyncSession,
|
||
redis_client: aioredis.Redis,
|
||
) -> bool:
|
||
"""
|
||
Salva un messaggio EML in DB e su MinIO.
|
||
|
||
Fase 3 – aggiornato per:
|
||
- Parser completo (body, allegati, EML-in-EML)
|
||
- Classificazione precisa tipo PEC (tutti i provider)
|
||
- Salvataggio allegati su MinIO + tabella attachments
|
||
- State machine outbound: aggiorna stato messaggio originale alla ricezione ricevuta
|
||
- Collegamento parent_message_id via X-Riferimento-Message-ID
|
||
"""
|
||
# ── Idempotenza ───────────────────────────────────────────────────────────
|
||
existing = await db.execute(
|
||
select(Message.id).where(
|
||
Message.mailbox_id == mailbox.id,
|
||
Message.imap_uid == uid,
|
||
)
|
||
)
|
||
if existing.scalar_one_or_none():
|
||
logger.debug(f"[{mailbox.email_address}] UID {uid} già in DB, skip")
|
||
return False
|
||
|
||
# ── Parsing completo EML ──────────────────────────────────────────────────
|
||
parsed = parse_eml(raw_eml)
|
||
pec_class = classify_pec_message(
|
||
parsed.raw_message or email.message_from_bytes(raw_eml)
|
||
)
|
||
received_at = datetime.now(UTC)
|
||
|
||
# ── State machine: trova e aggiorna messaggio outbound ────────────────────
|
||
parent_message_id: uuid.UUID | None = None
|
||
|
||
if pec_class.is_receipt and pec_class.riferimento_message_id:
|
||
parent_message_id = await _apply_outbound_state_machine(
|
||
riferimento_message_id=pec_class.riferimento_message_id,
|
||
pec_type=pec_class.pec_type,
|
||
tenant_id=mailbox.tenant_id,
|
||
db=db,
|
||
)
|
||
|
||
# ── Upload raw EML su MinIO ───────────────────────────────────────────────
|
||
eml_path: str | None = None
|
||
try:
|
||
eml_path = await upload_eml(
|
||
tenant_id=str(mailbox.tenant_id),
|
||
mailbox_id=str(mailbox.id),
|
||
uid=uid,
|
||
eml_bytes=raw_eml,
|
||
)
|
||
except Exception as e:
|
||
logger.error(f"[{mailbox.email_address}] Upload EML MinIO UID {uid}: {e}")
|
||
|
||
# ── Salva messaggio in DB ─────────────────────────────────────────────────
|
||
message = Message(
|
||
id=uuid.uuid4(),
|
||
tenant_id=mailbox.tenant_id,
|
||
mailbox_id=mailbox.id,
|
||
imap_uid=uid,
|
||
imap_folder="INBOX",
|
||
direction="inbound",
|
||
state="received",
|
||
pec_type=pec_class.pec_type,
|
||
subject=parsed.subject,
|
||
from_address=parsed.from_address,
|
||
to_addresses=parsed.to_addresses if parsed.to_addresses else None,
|
||
cc_addresses=parsed.cc_addresses if parsed.cc_addresses else None,
|
||
message_id_header=parsed.message_id,
|
||
sent_at=parsed.date,
|
||
received_at=received_at,
|
||
size_bytes=size_bytes,
|
||
body_text=parsed.body_text,
|
||
body_html=parsed.body_html,
|
||
has_attachments=parsed.has_attachments,
|
||
parent_message_id=parent_message_id,
|
||
raw_eml_path=eml_path,
|
||
is_read=False,
|
||
)
|
||
db.add(message)
|
||
await db.flush() # ottieni message.id prima di salvare gli allegati
|
||
|
||
# ── Salva allegati su MinIO + tabella attachments ─────────────────────────
|
||
if parsed.attachments:
|
||
await _save_attachments(
|
||
attachments=parsed.attachments,
|
||
message=message,
|
||
mailbox=mailbox,
|
||
db=db,
|
||
)
|
||
|
||
# ── Pubblica evento Redis per WebSocket ───────────────────────────────────
|
||
try:
|
||
event = {
|
||
"type": "mailbox:new_message",
|
||
"mailbox_id": str(mailbox.id),
|
||
"message_id": str(message.id),
|
||
"subject": message.subject or "",
|
||
"from_address": message.from_address or "",
|
||
"pec_type": message.pec_type,
|
||
"is_receipt": pec_class.is_receipt,
|
||
"received_at": received_at.isoformat(),
|
||
}
|
||
await redis_client.publish(f"ws:tenant:{mailbox.tenant_id}", json.dumps(event))
|
||
except Exception as e:
|
||
logger.warning(f"[{mailbox.email_address}] Redis publish UID {uid}: {e}")
|
||
|
||
logger.info(
|
||
f"[{mailbox.email_address}] Nuovo messaggio: UID={uid} "
|
||
f"pec_type={pec_class.pec_type!r} "
|
||
f"subject={message.subject!r} "
|
||
f"allegati={len(parsed.attachments)}"
|
||
)
|
||
return True
|
||
|
||
|
||
async def _apply_outbound_state_machine(
|
||
riferimento_message_id: str,
|
||
pec_type: str,
|
||
tenant_id: uuid.UUID,
|
||
db: AsyncSession,
|
||
) -> uuid.UUID | None:
|
||
"""
|
||
Aggiorna lo stato del messaggio outbound originale in base alla ricevuta.
|
||
|
||
Cerca il messaggio outbound con message_id_header == riferimento_message_id,
|
||
applica la transizione di stato se valida.
|
||
|
||
Returns:
|
||
UUID del messaggio originale se trovato, None altrimenti.
|
||
"""
|
||
result = await db.execute(
|
||
select(Message).where(
|
||
Message.tenant_id == tenant_id,
|
||
Message.message_id_header == riferimento_message_id,
|
||
Message.direction == "outbound",
|
||
)
|
||
)
|
||
parent_msg = result.scalar_one_or_none()
|
||
|
||
if not parent_msg:
|
||
logger.debug(
|
||
f"Messaggio outbound non trovato per riferimento={riferimento_message_id!r} "
|
||
f"(potrebbe essere stato inviato da client diverso)"
|
||
)
|
||
return None
|
||
|
||
new_state = apply_outbound_transition(parent_msg.state, pec_type)
|
||
if new_state:
|
||
old_state = parent_msg.state
|
||
parent_msg.state = new_state
|
||
parent_msg.updated_at = datetime.now(UTC)
|
||
await db.flush()
|
||
logger.info(
|
||
f"State machine outbound: {riferimento_message_id!r} "
|
||
f"{old_state!r} → {new_state!r} (ricevuta: {pec_type!r})"
|
||
)
|
||
|
||
return parent_msg.id
|
||
|
||
|
||
async def _save_attachments(
|
||
attachments: list,
|
||
message: Message,
|
||
mailbox: Mailbox,
|
||
db: AsyncSession,
|
||
) -> None:
|
||
"""
|
||
Carica gli allegati su MinIO e inserisce i record in tabella attachments.
|
||
|
||
Args:
|
||
attachments: lista di AttachmentInfo dal parser EML
|
||
message: messaggio DB a cui appartengono gli allegati
|
||
mailbox: casella (per il path MinIO)
|
||
db: sessione DB
|
||
"""
|
||
for att in attachments:
|
||
storage_path: str | None = None
|
||
|
||
try:
|
||
storage_path = await upload_attachment(
|
||
tenant_id=str(mailbox.tenant_id),
|
||
mailbox_id=str(mailbox.id),
|
||
message_id=str(message.id),
|
||
filename=att.filename,
|
||
content=att.content,
|
||
content_type=att.content_type,
|
||
)
|
||
except Exception as e:
|
||
logger.error(
|
||
f"Upload allegato {att.filename!r} per messaggio {message.id}: {e}"
|
||
)
|
||
# Continua con gli altri allegati anche se uno fallisce
|
||
continue
|
||
|
||
# Inserisci record in DB
|
||
att_record = Attachment(
|
||
id=uuid.uuid4(),
|
||
tenant_id=message.tenant_id,
|
||
message_id=message.id,
|
||
filename=att.filename,
|
||
content_type=att.content_type,
|
||
size_bytes=att.size_bytes,
|
||
storage_path=storage_path,
|
||
checksum_sha256=att.checksum_sha256,
|
||
)
|
||
db.add(att_record)
|
||
|
||
# flush per persistere tutti gli allegati nella transazione corrente
|
||
try:
|
||
await db.flush()
|
||
except Exception as e:
|
||
logger.error(f"Errore flush allegati per messaggio {message.id}: {e}")
|