This commit is contained in:
2026-03-18 17:30:13 +01:00
parent 58a233236c
commit d80d912fb3
36 changed files with 3502 additions and 4 deletions
+473
View File
@@ -0,0 +1,473 @@
"""
Logica di sincronizzazione messaggi IMAP.
Responsabilità:
1. Fetch della lista UID > last_sync_uid
2. Download envelope + raw EML per ogni UID
3. Parsing base degli header (subject, from, to, date)
4. Salvataggio in tabella messages
5. Upload raw EML su MinIO
6. Aggiornamento last_sync_uid e last_sync_at sulla mailbox
7. Pubblicazione evento Redis per notifica WebSocket
"""
import email
import email.header
import email.utils
import hashlib
import json
import logging
import re
import uuid
from datetime import UTC, datetime
import aioimaplib
import redis.asyncio as aioredis
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import get_settings
from app.models import Mailbox, Message
from app.storage.minio_client import upload_eml
logger = logging.getLogger(__name__)
settings = get_settings()
# ─── Helper: decodifica header email ─────────────────────────────────────────
def _decode_header(header_value: str | None) -> str | None:
"""Decodifica header RFC 2047 (es. =?utf-8?b?...?=) in stringa Python."""
if not header_value:
return None
try:
parts = email.header.decode_header(header_value)
decoded = []
for part, charset in parts:
if isinstance(part, bytes):
decoded.append(part.decode(charset or "utf-8", errors="replace"))
else:
decoded.append(part)
return "".join(decoded).strip()
except Exception:
return str(header_value)
def _extract_addresses(field: str | None) -> list[str]:
"""Estrae lista di indirizzi email da un campo To/Cc."""
if not field:
return []
try:
addresses = email.utils.getaddresses([field])
return [addr for _, addr in addresses if addr]
except Exception:
return []
def _parse_date(date_str: str | None) -> datetime | None:
"""Converte stringa data RFC 2822 in datetime con timezone."""
if not date_str:
return None
try:
parsed = email.utils.parsedate_to_datetime(date_str)
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=UTC)
return parsed
except Exception:
return None
def _classify_pec_type(msg: email.message.Message) -> str:
"""
Classifica il tipo PEC dal header X-Ricevuta / X-TipoRicevuta.
Fase 3 fa il parsing completo; qui classifichiamo al meglio possibile.
"""
x_ricevuta = msg.get("X-Ricevuta", "").lower()
x_tipo = msg.get("X-TipoRicevuta", "").lower()
TYPE_MAP = {
"accettazione": "accettazione",
"non-accettazione": "non_accettazione",
"presa-in-carico": "presa_in_carico",
"avvenuta-consegna": "avvenuta_consegna",
"mancata-consegna": "mancata_consegna",
"errore-consegna": "errore_consegna",
"preavviso-mancata-consegna": "preavviso_mancata_consegna",
"rilevazione-virus": "rilevazione_virus",
}
value = x_tipo or x_ricevuta
return TYPE_MAP.get(value, "posta_certificata")
def _parse_eml(raw_bytes: bytes) -> dict:
"""
Parsing di base di un EML estrae i campi necessari per la tabella messages.
Il parsing completo (body, allegati, EML-in-EML) è in Fase 3.
"""
try:
msg = email.message_from_bytes(raw_bytes)
except Exception as e:
logger.warning(f"Errore parsing EML: {e}")
return {}
subject = _decode_header(msg.get("Subject"))
from_addr = email.utils.parseaddr(msg.get("From", ""))[1] or None
to_addrs = _extract_addresses(msg.get("To"))
cc_addrs = _extract_addresses(msg.get("Cc"))
message_id = msg.get("Message-ID", "").strip() or None
date = _parse_date(msg.get("Date"))
pec_type = _classify_pec_type(msg)
# Estrazione body text/html (best-effort Fase 3 fa il parsing completo)
body_text = None
body_html = None
has_attachments = False
if msg.is_multipart():
for part in msg.walk():
ct = part.get_content_type()
disp = part.get("Content-Disposition", "")
if "attachment" in disp or "inline" in disp:
if part.get_filename():
has_attachments = True
elif ct == "text/plain" and body_text is None:
try:
charset = part.get_content_charset() or "utf-8"
body_text = part.get_payload(decode=True).decode(charset, errors="replace")
except Exception:
pass
elif ct == "text/html" and body_html is None:
try:
charset = part.get_content_charset() or "utf-8"
body_html = part.get_payload(decode=True).decode(charset, errors="replace")
except Exception:
pass
else:
ct = msg.get_content_type()
try:
charset = msg.get_content_charset() or "utf-8"
payload = msg.get_payload(decode=True)
if payload:
if ct == "text/plain":
body_text = payload.decode(charset, errors="replace")
elif ct == "text/html":
body_html = payload.decode(charset, errors="replace")
except Exception:
pass
return {
"subject": subject,
"from_address": from_addr,
"to_addresses": to_addrs if to_addrs else None,
"cc_addresses": cc_addrs if cc_addrs else None,
"message_id_header": message_id,
"sent_at": date,
"pec_type": pec_type,
"body_text": body_text,
"body_html": body_html,
"has_attachments": has_attachments,
}
# ─── Core sync function ───────────────────────────────────────────────────────
async def sync_new_messages(
imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL,
mailbox: Mailbox,
db: AsyncSession,
redis_client: aioredis.Redis,
) -> int:
"""
Sincronizza i messaggi nuovi (UID > last_sync_uid) per la mailbox data.
Returns:
Numero di nuovi messaggi sincronizzati.
"""
last_uid = mailbox.last_sync_uid or 0
search_range = f"{last_uid + 1}:*"
# ── SEARCH UID > last_sync_uid ─────────────────────────────────────────────
# aioimaplib non supporta uid('SEARCH',...) → usare search('UID', range)
# che invia "SEARCH UID n:*" e restituisce numeri di sequenza
try:
status, search_data = await imap_client.search("UID", search_range)
except Exception as e:
logger.warning(f"[{mailbox.email_address}] SEARCH fallito: {e}")
return 0
if status != "OK":
logger.warning(
f"[{mailbox.email_address}] SEARCH status={status} data={search_data}"
)
return 0
# search() restituisce numeri di sequenza (non UID)
raw_seqs = b" ".join(
d if isinstance(d, bytes) else d.encode() for d in search_data
).decode("ascii", errors="ignore").split()
seq_numbers = [s for s in raw_seqs if s.isdigit()]
if not seq_numbers:
return 0
# Limita il numero di fetch per ciclo
seq_numbers = seq_numbers[: settings.imap_max_fetch_per_cycle]
logger.info(
f"[{mailbox.email_address}] Trovati {len(seq_numbers)} messaggi nuovi da sincronizzare"
)
synced_count = 0
max_uid_synced = last_uid
for seq in seq_numbers:
try:
uid, synced = await _fetch_and_save_message_by_seq(
imap_client=imap_client,
seq=seq,
last_uid=last_uid,
mailbox=mailbox,
db=db,
redis_client=redis_client,
)
if synced and uid and uid > max_uid_synced:
synced_count += 1
max_uid_synced = uid
except Exception as e:
logger.error(
f"[{mailbox.email_address}] Errore fetch seq {seq}: {e}",
exc_info=True,
)
# Aggiorna last_sync_uid e last_sync_at
if max_uid_synced > last_uid:
mailbox.last_sync_uid = max_uid_synced
mailbox.last_sync_at = datetime.now(UTC)
await db.flush()
await db.commit()
return synced_count
async def _fetch_and_save_message_by_seq(
imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL,
seq: str,
last_uid: int,
mailbox: Mailbox,
db: AsyncSession,
redis_client: aioredis.Redis,
) -> tuple[int | None, bool]:
"""
Fetcha un singolo messaggio per NUMERO DI SEQUENZA (non UID).
Include UID nella richiesta FETCH per estrarlo dalla risposta.
Returns:
(uid, saved): UID del messaggio e True se salvato, False altrimenti.
"""
# FETCH seq (UID RFC822 RFC822.SIZE)
try:
status, fetch_data = await imap_client.fetch(seq, "(UID RFC822 RFC822.SIZE)")
except Exception as e:
logger.error(f"[{mailbox.email_address}] FETCH seq {seq} fallito: {e}")
return None, False
if status != "OK" or not fetch_data:
logger.warning(
f"[{mailbox.email_address}] FETCH seq {seq} risposta vuota: {status}"
)
return None, False
# Debug: mostra la struttura di fetch_data
items_info = [(type(x).__name__, len(x) if isinstance(x, (bytes, str)) else str(x)) for x in fetch_data]
logger.debug(f"[{mailbox.email_address}] fetch_data seq {seq}: {items_info}")
# Estrae UID, raw EML e size dalla risposta.
# NOTA CRITICA: aioimaplib restituisce il corpo EML come `bytearray` (non `bytes`)!
# [0] bytes → FETCH response header con UID e RFC822.SIZE
# [1] bytearray → raw EML (il corpo del messaggio)
# [2] bytes → ')' (chiusura)
# [3] bytes → riga OK finale
uid: int | None = None
raw_eml: bytes | None = None
size_bytes: int | None = None
for item in fetch_data:
if isinstance(item, bytearray):
# Questo è il corpo del messaggio EML
if len(item) > 200:
raw_eml = bytes(item)
elif isinstance(item, bytes):
# Risposta header estrae UID e RFC822.SIZE
item_str = item.decode("ascii", errors="ignore")
uid_match = re.search(r"UID\s+(\d+)", item_str)
if uid_match:
uid = int(uid_match.group(1))
size_match = re.search(r"RFC822\.SIZE\s+(\d+)", item_str)
if size_match:
size_bytes = int(size_match.group(1))
elif isinstance(item, str):
uid_match = re.search(r"UID\s+(\d+)", item)
if uid_match:
uid = int(uid_match.group(1))
size_match = re.search(r"RFC822\.SIZE\s+(\d+)", item)
if size_match:
size_bytes = int(size_match.group(1))
if uid is None or uid <= last_uid:
# Questo messaggio ha un UID <= last_uid, non va sincronizzato
return uid, False
if not raw_eml:
logger.warning(f"[{mailbox.email_address}] seq {seq} UID {uid}: body mancante")
return uid, False
if size_bytes is None:
size_bytes = len(raw_eml)
return uid, await _save_message(
uid=uid,
raw_eml=raw_eml,
size_bytes=size_bytes,
mailbox=mailbox,
db=db,
redis_client=redis_client,
)
async def _fetch_and_save_message(
imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL,
uid: int,
mailbox: Mailbox,
db: AsyncSession,
redis_client: aioredis.Redis,
) -> bool:
"""
Fetcha un singolo messaggio per UID (usato dal job sync_mailbox one-shot).
Usa UID FETCH (aioimaplib uid() method).
"""
existing = await db.execute(
select(Message.id).where(
Message.mailbox_id == mailbox.id,
Message.imap_uid == uid,
)
)
if existing.scalar_one_or_none():
return False
try:
status, fetch_data = await imap_client.uid("FETCH", str(uid), "(RFC822 RFC822.SIZE)")
except Exception as e:
logger.error(f"[{mailbox.email_address}] UID FETCH {uid} fallito: {e}")
return False
if status != "OK" or not fetch_data:
return False
raw_eml: bytes | None = None
size_bytes: int | None = None
for item in fetch_data:
if isinstance(item, bytes) and len(item) > 100:
raw_eml = item
elif isinstance(item, (bytes, str)):
s = item.decode("ascii", errors="ignore") if isinstance(item, bytes) else item
m = re.search(r"RFC822\.SIZE\s+(\d+)", s)
if m:
size_bytes = int(m.group(1))
if not raw_eml:
return False
return await _save_message(
uid=uid,
raw_eml=raw_eml,
size_bytes=size_bytes or len(raw_eml),
mailbox=mailbox,
db=db,
redis_client=redis_client,
)
async def _save_message(
uid: int,
raw_eml: bytes,
size_bytes: int,
mailbox: Mailbox,
db: AsyncSession,
redis_client: aioredis.Redis,
) -> bool:
"""
Salva un messaggio EML in DB e su MinIO. Pubblica evento WebSocket.
"""
# Idempotenza
existing = await db.execute(
select(Message.id).where(
Message.mailbox_id == mailbox.id,
Message.imap_uid == uid,
)
)
if existing.scalar_one_or_none():
logger.debug(f"[{mailbox.email_address}] UID {uid} già in DB, skip")
return False
parsed = _parse_eml(raw_eml)
received_at = datetime.now(UTC)
# Upload su MinIO
eml_path: str | None = None
try:
eml_path = await upload_eml(
tenant_id=str(mailbox.tenant_id),
mailbox_id=str(mailbox.id),
uid=uid,
eml_bytes=raw_eml,
)
except Exception as e:
logger.error(f"[{mailbox.email_address}] Upload MinIO UID {uid}: {e}")
# Salva in DB
message = Message(
id=uuid.uuid4(),
tenant_id=mailbox.tenant_id,
mailbox_id=mailbox.id,
imap_uid=uid,
imap_folder="INBOX",
direction="inbound",
state="received",
pec_type=parsed.get("pec_type", "posta_certificata"),
subject=parsed.get("subject"),
from_address=parsed.get("from_address"),
to_addresses=parsed.get("to_addresses"),
cc_addresses=parsed.get("cc_addresses"),
message_id_header=parsed.get("message_id_header"),
sent_at=parsed.get("sent_at"),
received_at=received_at,
size_bytes=size_bytes,
body_text=parsed.get("body_text"),
body_html=parsed.get("body_html"),
has_attachments=parsed.get("has_attachments", False),
raw_eml_path=eml_path,
is_read=False,
)
db.add(message)
await db.flush()
# Pubblica evento Redis per WebSocket
try:
event = {
"type": "mailbox:new_message",
"mailbox_id": str(mailbox.id),
"message_id": str(message.id),
"subject": message.subject or "",
"from_address": message.from_address or "",
"pec_type": message.pec_type,
"received_at": received_at.isoformat(),
}
await redis_client.publish(f"ws:tenant:{mailbox.tenant_id}", json.dumps(event))
except Exception as e:
logger.warning(f"[{mailbox.email_address}] Redis publish UID {uid}: {e}")
logger.info(
f"[{mailbox.email_address}] Nuovo messaggio: UID={uid} "
f"subject={message.subject!r} pec_type={message.pec_type}"
)
return True