""" Logica di sincronizzazione messaggi IMAP. Responsabilità: 1. Fetch della lista UID > last_sync_uid 2. Download envelope + raw EML per ogni UID 3. Parsing base degli header (subject, from, to, date) 4. Salvataggio in tabella messages 5. Upload raw EML su MinIO 6. Aggiornamento last_sync_uid e last_sync_at sulla mailbox 7. Pubblicazione evento Redis per notifica WebSocket """ import email import email.header import email.utils import hashlib import json import logging import re import uuid from datetime import UTC, datetime import aioimaplib import redis.asyncio as aioredis from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from app.config import get_settings from app.models import Mailbox, Message from app.storage.minio_client import upload_eml logger = logging.getLogger(__name__) settings = get_settings() # ─── Helper: decodifica header email ───────────────────────────────────────── def _decode_header(header_value: str | None) -> str | None: """Decodifica header RFC 2047 (es. =?utf-8?b?...?=) in stringa Python.""" if not header_value: return None try: parts = email.header.decode_header(header_value) decoded = [] for part, charset in parts: if isinstance(part, bytes): decoded.append(part.decode(charset or "utf-8", errors="replace")) else: decoded.append(part) return "".join(decoded).strip() except Exception: return str(header_value) def _extract_addresses(field: str | None) -> list[str]: """Estrae lista di indirizzi email da un campo To/Cc.""" if not field: return [] try: addresses = email.utils.getaddresses([field]) return [addr for _, addr in addresses if addr] except Exception: return [] def _parse_date(date_str: str | None) -> datetime | None: """Converte stringa data RFC 2822 in datetime con timezone.""" if not date_str: return None try: parsed = email.utils.parsedate_to_datetime(date_str) if parsed.tzinfo is None: parsed = parsed.replace(tzinfo=UTC) return parsed except Exception: return None def _classify_pec_type(msg: email.message.Message) -> str: """ Classifica il tipo PEC dal header X-Ricevuta / X-TipoRicevuta. Fase 3 fa il parsing completo; qui classifichiamo al meglio possibile. """ x_ricevuta = msg.get("X-Ricevuta", "").lower() x_tipo = msg.get("X-TipoRicevuta", "").lower() TYPE_MAP = { "accettazione": "accettazione", "non-accettazione": "non_accettazione", "presa-in-carico": "presa_in_carico", "avvenuta-consegna": "avvenuta_consegna", "mancata-consegna": "mancata_consegna", "errore-consegna": "errore_consegna", "preavviso-mancata-consegna": "preavviso_mancata_consegna", "rilevazione-virus": "rilevazione_virus", } value = x_tipo or x_ricevuta return TYPE_MAP.get(value, "posta_certificata") def _parse_eml(raw_bytes: bytes) -> dict: """ Parsing di base di un EML – estrae i campi necessari per la tabella messages. Il parsing completo (body, allegati, EML-in-EML) è in Fase 3. """ try: msg = email.message_from_bytes(raw_bytes) except Exception as e: logger.warning(f"Errore parsing EML: {e}") return {} subject = _decode_header(msg.get("Subject")) from_addr = email.utils.parseaddr(msg.get("From", ""))[1] or None to_addrs = _extract_addresses(msg.get("To")) cc_addrs = _extract_addresses(msg.get("Cc")) message_id = msg.get("Message-ID", "").strip() or None date = _parse_date(msg.get("Date")) pec_type = _classify_pec_type(msg) # Estrazione body text/html (best-effort – Fase 3 fa il parsing completo) body_text = None body_html = None has_attachments = False if msg.is_multipart(): for part in msg.walk(): ct = part.get_content_type() disp = part.get("Content-Disposition", "") if "attachment" in disp or "inline" in disp: if part.get_filename(): has_attachments = True elif ct == "text/plain" and body_text is None: try: charset = part.get_content_charset() or "utf-8" body_text = part.get_payload(decode=True).decode(charset, errors="replace") except Exception: pass elif ct == "text/html" and body_html is None: try: charset = part.get_content_charset() or "utf-8" body_html = part.get_payload(decode=True).decode(charset, errors="replace") except Exception: pass else: ct = msg.get_content_type() try: charset = msg.get_content_charset() or "utf-8" payload = msg.get_payload(decode=True) if payload: if ct == "text/plain": body_text = payload.decode(charset, errors="replace") elif ct == "text/html": body_html = payload.decode(charset, errors="replace") except Exception: pass return { "subject": subject, "from_address": from_addr, "to_addresses": to_addrs if to_addrs else None, "cc_addresses": cc_addrs if cc_addrs else None, "message_id_header": message_id, "sent_at": date, "pec_type": pec_type, "body_text": body_text, "body_html": body_html, "has_attachments": has_attachments, } # ─── Core sync function ─────────────────────────────────────────────────────── async def sync_new_messages( imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL, mailbox: Mailbox, db: AsyncSession, redis_client: aioredis.Redis, ) -> int: """ Sincronizza i messaggi nuovi (UID > last_sync_uid) per la mailbox data. Returns: Numero di nuovi messaggi sincronizzati. """ last_uid = mailbox.last_sync_uid or 0 search_range = f"{last_uid + 1}:*" # ── SEARCH UID > last_sync_uid ───────────────────────────────────────────── # aioimaplib non supporta uid('SEARCH',...) → usare search('UID', range) # che invia "SEARCH UID n:*" e restituisce numeri di sequenza try: status, search_data = await imap_client.search("UID", search_range) except Exception as e: logger.warning(f"[{mailbox.email_address}] SEARCH fallito: {e}") return 0 if status != "OK": logger.warning( f"[{mailbox.email_address}] SEARCH status={status} data={search_data}" ) return 0 # search() restituisce numeri di sequenza (non UID) raw_seqs = b" ".join( d if isinstance(d, bytes) else d.encode() for d in search_data ).decode("ascii", errors="ignore").split() seq_numbers = [s for s in raw_seqs if s.isdigit()] if not seq_numbers: return 0 # Limita il numero di fetch per ciclo seq_numbers = seq_numbers[: settings.imap_max_fetch_per_cycle] logger.info( f"[{mailbox.email_address}] Trovati {len(seq_numbers)} messaggi nuovi da sincronizzare" ) synced_count = 0 max_uid_synced = last_uid for seq in seq_numbers: try: uid, synced = await _fetch_and_save_message_by_seq( imap_client=imap_client, seq=seq, last_uid=last_uid, mailbox=mailbox, db=db, redis_client=redis_client, ) if synced and uid and uid > max_uid_synced: synced_count += 1 max_uid_synced = uid except Exception as e: logger.error( f"[{mailbox.email_address}] Errore fetch seq {seq}: {e}", exc_info=True, ) # Aggiorna last_sync_uid e last_sync_at if max_uid_synced > last_uid: mailbox.last_sync_uid = max_uid_synced mailbox.last_sync_at = datetime.now(UTC) await db.flush() await db.commit() return synced_count async def _fetch_and_save_message_by_seq( imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL, seq: str, last_uid: int, mailbox: Mailbox, db: AsyncSession, redis_client: aioredis.Redis, ) -> tuple[int | None, bool]: """ Fetcha un singolo messaggio per NUMERO DI SEQUENZA (non UID). Include UID nella richiesta FETCH per estrarlo dalla risposta. Returns: (uid, saved): UID del messaggio e True se salvato, False altrimenti. """ # FETCH seq (UID RFC822 RFC822.SIZE) try: status, fetch_data = await imap_client.fetch(seq, "(UID RFC822 RFC822.SIZE)") except Exception as e: logger.error(f"[{mailbox.email_address}] FETCH seq {seq} fallito: {e}") return None, False if status != "OK" or not fetch_data: logger.warning( f"[{mailbox.email_address}] FETCH seq {seq} risposta vuota: {status}" ) return None, False # Debug: mostra la struttura di fetch_data items_info = [(type(x).__name__, len(x) if isinstance(x, (bytes, str)) else str(x)) for x in fetch_data] logger.debug(f"[{mailbox.email_address}] fetch_data seq {seq}: {items_info}") # Estrae UID, raw EML e size dalla risposta. # NOTA CRITICA: aioimaplib restituisce il corpo EML come `bytearray` (non `bytes`)! # [0] bytes → FETCH response header con UID e RFC822.SIZE # [1] bytearray → raw EML (il corpo del messaggio) # [2] bytes → ')' (chiusura) # [3] bytes → riga OK finale uid: int | None = None raw_eml: bytes | None = None size_bytes: int | None = None for item in fetch_data: if isinstance(item, bytearray): # Questo è il corpo del messaggio EML if len(item) > 200: raw_eml = bytes(item) elif isinstance(item, bytes): # Risposta header – estrae UID e RFC822.SIZE item_str = item.decode("ascii", errors="ignore") uid_match = re.search(r"UID\s+(\d+)", item_str) if uid_match: uid = int(uid_match.group(1)) size_match = re.search(r"RFC822\.SIZE\s+(\d+)", item_str) if size_match: size_bytes = int(size_match.group(1)) elif isinstance(item, str): uid_match = re.search(r"UID\s+(\d+)", item) if uid_match: uid = int(uid_match.group(1)) size_match = re.search(r"RFC822\.SIZE\s+(\d+)", item) if size_match: size_bytes = int(size_match.group(1)) if uid is None or uid <= last_uid: # Questo messaggio ha un UID <= last_uid, non va sincronizzato return uid, False if not raw_eml: logger.warning(f"[{mailbox.email_address}] seq {seq} UID {uid}: body mancante") return uid, False if size_bytes is None: size_bytes = len(raw_eml) return uid, await _save_message( uid=uid, raw_eml=raw_eml, size_bytes=size_bytes, mailbox=mailbox, db=db, redis_client=redis_client, ) async def _fetch_and_save_message( imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL, uid: int, mailbox: Mailbox, db: AsyncSession, redis_client: aioredis.Redis, ) -> bool: """ Fetcha un singolo messaggio per UID (usato dal job sync_mailbox one-shot). Usa UID FETCH (aioimaplib uid() method). """ existing = await db.execute( select(Message.id).where( Message.mailbox_id == mailbox.id, Message.imap_uid == uid, ) ) if existing.scalar_one_or_none(): return False try: status, fetch_data = await imap_client.uid("FETCH", str(uid), "(RFC822 RFC822.SIZE)") except Exception as e: logger.error(f"[{mailbox.email_address}] UID FETCH {uid} fallito: {e}") return False if status != "OK" or not fetch_data: return False raw_eml: bytes | None = None size_bytes: int | None = None for item in fetch_data: if isinstance(item, bytes) and len(item) > 100: raw_eml = item elif isinstance(item, (bytes, str)): s = item.decode("ascii", errors="ignore") if isinstance(item, bytes) else item m = re.search(r"RFC822\.SIZE\s+(\d+)", s) if m: size_bytes = int(m.group(1)) if not raw_eml: return False return await _save_message( uid=uid, raw_eml=raw_eml, size_bytes=size_bytes or len(raw_eml), mailbox=mailbox, db=db, redis_client=redis_client, ) async def _save_message( uid: int, raw_eml: bytes, size_bytes: int, mailbox: Mailbox, db: AsyncSession, redis_client: aioredis.Redis, ) -> bool: """ Salva un messaggio EML in DB e su MinIO. Pubblica evento WebSocket. """ # Idempotenza existing = await db.execute( select(Message.id).where( Message.mailbox_id == mailbox.id, Message.imap_uid == uid, ) ) if existing.scalar_one_or_none(): logger.debug(f"[{mailbox.email_address}] UID {uid} già in DB, skip") return False parsed = _parse_eml(raw_eml) received_at = datetime.now(UTC) # Upload su MinIO eml_path: str | None = None try: eml_path = await upload_eml( tenant_id=str(mailbox.tenant_id), mailbox_id=str(mailbox.id), uid=uid, eml_bytes=raw_eml, ) except Exception as e: logger.error(f"[{mailbox.email_address}] Upload MinIO UID {uid}: {e}") # Salva in DB message = Message( id=uuid.uuid4(), tenant_id=mailbox.tenant_id, mailbox_id=mailbox.id, imap_uid=uid, imap_folder="INBOX", direction="inbound", state="received", pec_type=parsed.get("pec_type", "posta_certificata"), subject=parsed.get("subject"), from_address=parsed.get("from_address"), to_addresses=parsed.get("to_addresses"), cc_addresses=parsed.get("cc_addresses"), message_id_header=parsed.get("message_id_header"), sent_at=parsed.get("sent_at"), received_at=received_at, size_bytes=size_bytes, body_text=parsed.get("body_text"), body_html=parsed.get("body_html"), has_attachments=parsed.get("has_attachments", False), raw_eml_path=eml_path, is_read=False, ) db.add(message) await db.flush() # Pubblica evento Redis per WebSocket try: event = { "type": "mailbox:new_message", "mailbox_id": str(mailbox.id), "message_id": str(message.id), "subject": message.subject or "", "from_address": message.from_address or "", "pec_type": message.pec_type, "received_at": received_at.isoformat(), } await redis_client.publish(f"ws:tenant:{mailbox.tenant_id}", json.dumps(event)) except Exception as e: logger.warning(f"[{mailbox.email_address}] Redis publish UID {uid}: {e}") logger.info( f"[{mailbox.email_address}] Nuovo messaggio: UID={uid} " f"subject={message.subject!r} pec_type={message.pec_type}" ) return True