mirror of
https://github.com/idrainformatica/PecFlow.git
synced 2026-06-16 12:45:42 +02:00
474 lines
15 KiB
Python
474 lines
15 KiB
Python
"""
|
||
Logica di sincronizzazione messaggi IMAP.
|
||
|
||
Responsabilità:
|
||
1. Fetch della lista UID > last_sync_uid
|
||
2. Download envelope + raw EML per ogni UID
|
||
3. Parsing base degli header (subject, from, to, date)
|
||
4. Salvataggio in tabella messages
|
||
5. Upload raw EML su MinIO
|
||
6. Aggiornamento last_sync_uid e last_sync_at sulla mailbox
|
||
7. Pubblicazione evento Redis per notifica WebSocket
|
||
"""
|
||
|
||
import email
|
||
import email.header
|
||
import email.utils
|
||
import hashlib
|
||
import json
|
||
import logging
|
||
import re
|
||
import uuid
|
||
from datetime import UTC, datetime
|
||
|
||
import aioimaplib
|
||
import redis.asyncio as aioredis
|
||
from sqlalchemy import select
|
||
from sqlalchemy.ext.asyncio import AsyncSession
|
||
|
||
from app.config import get_settings
|
||
from app.models import Mailbox, Message
|
||
from app.storage.minio_client import upload_eml
|
||
|
||
logger = logging.getLogger(__name__)
|
||
settings = get_settings()
|
||
|
||
|
||
# ─── Helper: decodifica header email ─────────────────────────────────────────
|
||
|
||
def _decode_header(header_value: str | None) -> str | None:
|
||
"""Decodifica header RFC 2047 (es. =?utf-8?b?...?=) in stringa Python."""
|
||
if not header_value:
|
||
return None
|
||
try:
|
||
parts = email.header.decode_header(header_value)
|
||
decoded = []
|
||
for part, charset in parts:
|
||
if isinstance(part, bytes):
|
||
decoded.append(part.decode(charset or "utf-8", errors="replace"))
|
||
else:
|
||
decoded.append(part)
|
||
return "".join(decoded).strip()
|
||
except Exception:
|
||
return str(header_value)
|
||
|
||
|
||
def _extract_addresses(field: str | None) -> list[str]:
|
||
"""Estrae lista di indirizzi email da un campo To/Cc."""
|
||
if not field:
|
||
return []
|
||
try:
|
||
addresses = email.utils.getaddresses([field])
|
||
return [addr for _, addr in addresses if addr]
|
||
except Exception:
|
||
return []
|
||
|
||
|
||
def _parse_date(date_str: str | None) -> datetime | None:
|
||
"""Converte stringa data RFC 2822 in datetime con timezone."""
|
||
if not date_str:
|
||
return None
|
||
try:
|
||
parsed = email.utils.parsedate_to_datetime(date_str)
|
||
if parsed.tzinfo is None:
|
||
parsed = parsed.replace(tzinfo=UTC)
|
||
return parsed
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
def _classify_pec_type(msg: email.message.Message) -> str:
|
||
"""
|
||
Classifica il tipo PEC dal header X-Ricevuta / X-TipoRicevuta.
|
||
Fase 3 fa il parsing completo; qui classifichiamo al meglio possibile.
|
||
"""
|
||
x_ricevuta = msg.get("X-Ricevuta", "").lower()
|
||
x_tipo = msg.get("X-TipoRicevuta", "").lower()
|
||
|
||
TYPE_MAP = {
|
||
"accettazione": "accettazione",
|
||
"non-accettazione": "non_accettazione",
|
||
"presa-in-carico": "presa_in_carico",
|
||
"avvenuta-consegna": "avvenuta_consegna",
|
||
"mancata-consegna": "mancata_consegna",
|
||
"errore-consegna": "errore_consegna",
|
||
"preavviso-mancata-consegna": "preavviso_mancata_consegna",
|
||
"rilevazione-virus": "rilevazione_virus",
|
||
}
|
||
|
||
value = x_tipo or x_ricevuta
|
||
return TYPE_MAP.get(value, "posta_certificata")
|
||
|
||
|
||
def _parse_eml(raw_bytes: bytes) -> dict:
|
||
"""
|
||
Parsing di base di un EML – estrae i campi necessari per la tabella messages.
|
||
Il parsing completo (body, allegati, EML-in-EML) è in Fase 3.
|
||
"""
|
||
try:
|
||
msg = email.message_from_bytes(raw_bytes)
|
||
except Exception as e:
|
||
logger.warning(f"Errore parsing EML: {e}")
|
||
return {}
|
||
|
||
subject = _decode_header(msg.get("Subject"))
|
||
from_addr = email.utils.parseaddr(msg.get("From", ""))[1] or None
|
||
to_addrs = _extract_addresses(msg.get("To"))
|
||
cc_addrs = _extract_addresses(msg.get("Cc"))
|
||
message_id = msg.get("Message-ID", "").strip() or None
|
||
date = _parse_date(msg.get("Date"))
|
||
pec_type = _classify_pec_type(msg)
|
||
|
||
# Estrazione body text/html (best-effort – Fase 3 fa il parsing completo)
|
||
body_text = None
|
||
body_html = None
|
||
has_attachments = False
|
||
|
||
if msg.is_multipart():
|
||
for part in msg.walk():
|
||
ct = part.get_content_type()
|
||
disp = part.get("Content-Disposition", "")
|
||
if "attachment" in disp or "inline" in disp:
|
||
if part.get_filename():
|
||
has_attachments = True
|
||
elif ct == "text/plain" and body_text is None:
|
||
try:
|
||
charset = part.get_content_charset() or "utf-8"
|
||
body_text = part.get_payload(decode=True).decode(charset, errors="replace")
|
||
except Exception:
|
||
pass
|
||
elif ct == "text/html" and body_html is None:
|
||
try:
|
||
charset = part.get_content_charset() or "utf-8"
|
||
body_html = part.get_payload(decode=True).decode(charset, errors="replace")
|
||
except Exception:
|
||
pass
|
||
else:
|
||
ct = msg.get_content_type()
|
||
try:
|
||
charset = msg.get_content_charset() or "utf-8"
|
||
payload = msg.get_payload(decode=True)
|
||
if payload:
|
||
if ct == "text/plain":
|
||
body_text = payload.decode(charset, errors="replace")
|
||
elif ct == "text/html":
|
||
body_html = payload.decode(charset, errors="replace")
|
||
except Exception:
|
||
pass
|
||
|
||
return {
|
||
"subject": subject,
|
||
"from_address": from_addr,
|
||
"to_addresses": to_addrs if to_addrs else None,
|
||
"cc_addresses": cc_addrs if cc_addrs else None,
|
||
"message_id_header": message_id,
|
||
"sent_at": date,
|
||
"pec_type": pec_type,
|
||
"body_text": body_text,
|
||
"body_html": body_html,
|
||
"has_attachments": has_attachments,
|
||
}
|
||
|
||
|
||
# ─── Core sync function ───────────────────────────────────────────────────────
|
||
|
||
async def sync_new_messages(
|
||
imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL,
|
||
mailbox: Mailbox,
|
||
db: AsyncSession,
|
||
redis_client: aioredis.Redis,
|
||
) -> int:
|
||
"""
|
||
Sincronizza i messaggi nuovi (UID > last_sync_uid) per la mailbox data.
|
||
|
||
Returns:
|
||
Numero di nuovi messaggi sincronizzati.
|
||
"""
|
||
last_uid = mailbox.last_sync_uid or 0
|
||
search_range = f"{last_uid + 1}:*"
|
||
|
||
# ── SEARCH UID > last_sync_uid ─────────────────────────────────────────────
|
||
# aioimaplib non supporta uid('SEARCH',...) → usare search('UID', range)
|
||
# che invia "SEARCH UID n:*" e restituisce numeri di sequenza
|
||
try:
|
||
status, search_data = await imap_client.search("UID", search_range)
|
||
except Exception as e:
|
||
logger.warning(f"[{mailbox.email_address}] SEARCH fallito: {e}")
|
||
return 0
|
||
|
||
if status != "OK":
|
||
logger.warning(
|
||
f"[{mailbox.email_address}] SEARCH status={status} data={search_data}"
|
||
)
|
||
return 0
|
||
|
||
# search() restituisce numeri di sequenza (non UID)
|
||
raw_seqs = b" ".join(
|
||
d if isinstance(d, bytes) else d.encode() for d in search_data
|
||
).decode("ascii", errors="ignore").split()
|
||
|
||
seq_numbers = [s for s in raw_seqs if s.isdigit()]
|
||
if not seq_numbers:
|
||
return 0
|
||
|
||
# Limita il numero di fetch per ciclo
|
||
seq_numbers = seq_numbers[: settings.imap_max_fetch_per_cycle]
|
||
logger.info(
|
||
f"[{mailbox.email_address}] Trovati {len(seq_numbers)} messaggi nuovi da sincronizzare"
|
||
)
|
||
|
||
synced_count = 0
|
||
max_uid_synced = last_uid
|
||
|
||
for seq in seq_numbers:
|
||
try:
|
||
uid, synced = await _fetch_and_save_message_by_seq(
|
||
imap_client=imap_client,
|
||
seq=seq,
|
||
last_uid=last_uid,
|
||
mailbox=mailbox,
|
||
db=db,
|
||
redis_client=redis_client,
|
||
)
|
||
if synced and uid and uid > max_uid_synced:
|
||
synced_count += 1
|
||
max_uid_synced = uid
|
||
except Exception as e:
|
||
logger.error(
|
||
f"[{mailbox.email_address}] Errore fetch seq {seq}: {e}",
|
||
exc_info=True,
|
||
)
|
||
|
||
# Aggiorna last_sync_uid e last_sync_at
|
||
if max_uid_synced > last_uid:
|
||
mailbox.last_sync_uid = max_uid_synced
|
||
mailbox.last_sync_at = datetime.now(UTC)
|
||
await db.flush()
|
||
await db.commit()
|
||
|
||
return synced_count
|
||
|
||
|
||
async def _fetch_and_save_message_by_seq(
|
||
imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL,
|
||
seq: str,
|
||
last_uid: int,
|
||
mailbox: Mailbox,
|
||
db: AsyncSession,
|
||
redis_client: aioredis.Redis,
|
||
) -> tuple[int | None, bool]:
|
||
"""
|
||
Fetcha un singolo messaggio per NUMERO DI SEQUENZA (non UID).
|
||
Include UID nella richiesta FETCH per estrarlo dalla risposta.
|
||
|
||
Returns:
|
||
(uid, saved): UID del messaggio e True se salvato, False altrimenti.
|
||
"""
|
||
# FETCH seq (UID RFC822 RFC822.SIZE)
|
||
try:
|
||
status, fetch_data = await imap_client.fetch(seq, "(UID RFC822 RFC822.SIZE)")
|
||
except Exception as e:
|
||
logger.error(f"[{mailbox.email_address}] FETCH seq {seq} fallito: {e}")
|
||
return None, False
|
||
|
||
if status != "OK" or not fetch_data:
|
||
logger.warning(
|
||
f"[{mailbox.email_address}] FETCH seq {seq} risposta vuota: {status}"
|
||
)
|
||
return None, False
|
||
|
||
# Debug: mostra la struttura di fetch_data
|
||
items_info = [(type(x).__name__, len(x) if isinstance(x, (bytes, str)) else str(x)) for x in fetch_data]
|
||
logger.debug(f"[{mailbox.email_address}] fetch_data seq {seq}: {items_info}")
|
||
|
||
# Estrae UID, raw EML e size dalla risposta.
|
||
# NOTA CRITICA: aioimaplib restituisce il corpo EML come `bytearray` (non `bytes`)!
|
||
# [0] bytes → FETCH response header con UID e RFC822.SIZE
|
||
# [1] bytearray → raw EML (il corpo del messaggio)
|
||
# [2] bytes → ')' (chiusura)
|
||
# [3] bytes → riga OK finale
|
||
uid: int | None = None
|
||
raw_eml: bytes | None = None
|
||
size_bytes: int | None = None
|
||
|
||
for item in fetch_data:
|
||
if isinstance(item, bytearray):
|
||
# Questo è il corpo del messaggio EML
|
||
if len(item) > 200:
|
||
raw_eml = bytes(item)
|
||
elif isinstance(item, bytes):
|
||
# Risposta header – estrae UID e RFC822.SIZE
|
||
item_str = item.decode("ascii", errors="ignore")
|
||
uid_match = re.search(r"UID\s+(\d+)", item_str)
|
||
if uid_match:
|
||
uid = int(uid_match.group(1))
|
||
size_match = re.search(r"RFC822\.SIZE\s+(\d+)", item_str)
|
||
if size_match:
|
||
size_bytes = int(size_match.group(1))
|
||
elif isinstance(item, str):
|
||
uid_match = re.search(r"UID\s+(\d+)", item)
|
||
if uid_match:
|
||
uid = int(uid_match.group(1))
|
||
size_match = re.search(r"RFC822\.SIZE\s+(\d+)", item)
|
||
if size_match:
|
||
size_bytes = int(size_match.group(1))
|
||
|
||
if uid is None or uid <= last_uid:
|
||
# Questo messaggio ha un UID <= last_uid, non va sincronizzato
|
||
return uid, False
|
||
|
||
if not raw_eml:
|
||
logger.warning(f"[{mailbox.email_address}] seq {seq} UID {uid}: body mancante")
|
||
return uid, False
|
||
|
||
if size_bytes is None:
|
||
size_bytes = len(raw_eml)
|
||
|
||
return uid, await _save_message(
|
||
uid=uid,
|
||
raw_eml=raw_eml,
|
||
size_bytes=size_bytes,
|
||
mailbox=mailbox,
|
||
db=db,
|
||
redis_client=redis_client,
|
||
)
|
||
|
||
|
||
async def _fetch_and_save_message(
|
||
imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL,
|
||
uid: int,
|
||
mailbox: Mailbox,
|
||
db: AsyncSession,
|
||
redis_client: aioredis.Redis,
|
||
) -> bool:
|
||
"""
|
||
Fetcha un singolo messaggio per UID (usato dal job sync_mailbox one-shot).
|
||
Usa UID FETCH (aioimaplib uid() method).
|
||
"""
|
||
existing = await db.execute(
|
||
select(Message.id).where(
|
||
Message.mailbox_id == mailbox.id,
|
||
Message.imap_uid == uid,
|
||
)
|
||
)
|
||
if existing.scalar_one_or_none():
|
||
return False
|
||
|
||
try:
|
||
status, fetch_data = await imap_client.uid("FETCH", str(uid), "(RFC822 RFC822.SIZE)")
|
||
except Exception as e:
|
||
logger.error(f"[{mailbox.email_address}] UID FETCH {uid} fallito: {e}")
|
||
return False
|
||
|
||
if status != "OK" or not fetch_data:
|
||
return False
|
||
|
||
raw_eml: bytes | None = None
|
||
size_bytes: int | None = None
|
||
for item in fetch_data:
|
||
if isinstance(item, bytes) and len(item) > 100:
|
||
raw_eml = item
|
||
elif isinstance(item, (bytes, str)):
|
||
s = item.decode("ascii", errors="ignore") if isinstance(item, bytes) else item
|
||
m = re.search(r"RFC822\.SIZE\s+(\d+)", s)
|
||
if m:
|
||
size_bytes = int(m.group(1))
|
||
|
||
if not raw_eml:
|
||
return False
|
||
|
||
return await _save_message(
|
||
uid=uid,
|
||
raw_eml=raw_eml,
|
||
size_bytes=size_bytes or len(raw_eml),
|
||
mailbox=mailbox,
|
||
db=db,
|
||
redis_client=redis_client,
|
||
)
|
||
|
||
|
||
async def _save_message(
|
||
uid: int,
|
||
raw_eml: bytes,
|
||
size_bytes: int,
|
||
mailbox: Mailbox,
|
||
db: AsyncSession,
|
||
redis_client: aioredis.Redis,
|
||
) -> bool:
|
||
"""
|
||
Salva un messaggio EML in DB e su MinIO. Pubblica evento WebSocket.
|
||
"""
|
||
# Idempotenza
|
||
existing = await db.execute(
|
||
select(Message.id).where(
|
||
Message.mailbox_id == mailbox.id,
|
||
Message.imap_uid == uid,
|
||
)
|
||
)
|
||
if existing.scalar_one_or_none():
|
||
logger.debug(f"[{mailbox.email_address}] UID {uid} già in DB, skip")
|
||
return False
|
||
|
||
parsed = _parse_eml(raw_eml)
|
||
received_at = datetime.now(UTC)
|
||
|
||
# Upload su MinIO
|
||
eml_path: str | None = None
|
||
try:
|
||
eml_path = await upload_eml(
|
||
tenant_id=str(mailbox.tenant_id),
|
||
mailbox_id=str(mailbox.id),
|
||
uid=uid,
|
||
eml_bytes=raw_eml,
|
||
)
|
||
except Exception as e:
|
||
logger.error(f"[{mailbox.email_address}] Upload MinIO UID {uid}: {e}")
|
||
|
||
# Salva in DB
|
||
message = Message(
|
||
id=uuid.uuid4(),
|
||
tenant_id=mailbox.tenant_id,
|
||
mailbox_id=mailbox.id,
|
||
imap_uid=uid,
|
||
imap_folder="INBOX",
|
||
direction="inbound",
|
||
state="received",
|
||
pec_type=parsed.get("pec_type", "posta_certificata"),
|
||
subject=parsed.get("subject"),
|
||
from_address=parsed.get("from_address"),
|
||
to_addresses=parsed.get("to_addresses"),
|
||
cc_addresses=parsed.get("cc_addresses"),
|
||
message_id_header=parsed.get("message_id_header"),
|
||
sent_at=parsed.get("sent_at"),
|
||
received_at=received_at,
|
||
size_bytes=size_bytes,
|
||
body_text=parsed.get("body_text"),
|
||
body_html=parsed.get("body_html"),
|
||
has_attachments=parsed.get("has_attachments", False),
|
||
raw_eml_path=eml_path,
|
||
is_read=False,
|
||
)
|
||
db.add(message)
|
||
await db.flush()
|
||
|
||
# Pubblica evento Redis per WebSocket
|
||
try:
|
||
event = {
|
||
"type": "mailbox:new_message",
|
||
"mailbox_id": str(mailbox.id),
|
||
"message_id": str(message.id),
|
||
"subject": message.subject or "",
|
||
"from_address": message.from_address or "",
|
||
"pec_type": message.pec_type,
|
||
"received_at": received_at.isoformat(),
|
||
}
|
||
await redis_client.publish(f"ws:tenant:{mailbox.tenant_id}", json.dumps(event))
|
||
except Exception as e:
|
||
logger.warning(f"[{mailbox.email_address}] Redis publish UID {uid}: {e}")
|
||
|
||
logger.info(
|
||
f"[{mailbox.email_address}] Nuovo messaggio: UID={uid} "
|
||
f"subject={message.subject!r} pec_type={message.pec_type}"
|
||
)
|
||
return True
|