This commit is contained in:
2026-03-18 17:30:13 +01:00
parent 58a233236c
commit d80d912fb3
36 changed files with 3502 additions and 4 deletions
+1
View File
@@ -0,0 +1 @@
# Worker PecFlow
+66
View File
@@ -0,0 +1,66 @@
"""
Configurazione worker legge le stesse variabili d'ambiente del backend.
"""
from functools import lru_cache
from pydantic import field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
class WorkerSettings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
extra="ignore",
)
# ── Ambiente ──────────────────────────────────────────────────────────────
app_env: str = "development"
log_level: str = "INFO"
# ── Database ──────────────────────────────────────────────────────────────
database_url: str = "postgresql+asyncpg://pecflow:pecflow_dev_password@db:5432/pecflow"
# ── Redis ─────────────────────────────────────────────────────────────────
redis_url: str = "redis://redis:6379/0"
# ── MinIO ─────────────────────────────────────────────────────────────────
minio_endpoint: str = "minio:9000"
minio_access_key: str = "minioadmin"
minio_secret_key: str = "minioadmin"
minio_bucket: str = "pecflow"
minio_use_ssl: bool = False
# ── Cifratura credenziali (ADR-002) ───────────────────────────────────────
encryption_key: str = "0" * 64
# ── Parametri IMAP sync ───────────────────────────────────────────────────
imap_idle_timeout_seconds: int = 1680 # 28 minuti (RFC 2177 ≤ 29 min)
imap_polling_interval_seconds: int = 60 # polling se IDLE non supportato
imap_max_fetch_per_cycle: int = 50 # max messaggi per ciclo di fetch
imap_max_error_count: int = 5 # errori consecutivi → status=error
imap_connect_timeout_seconds: int = 30 # timeout connessione iniziale
# ── Backoff esponenziale ──────────────────────────────────────────────────
backoff_initial_seconds: float = 1.0
backoff_multiplier: float = 2.0
backoff_max_seconds: float = 300.0 # 5 minuti massimo
@field_validator("encryption_key")
@classmethod
def validate_encryption_key(cls, v: str) -> str:
if len(v) != 64:
raise ValueError("ENCRYPTION_KEY deve essere 64 caratteri hex")
bytes.fromhex(v)
return v
@property
def encryption_key_bytes(self) -> bytes:
return bytes.fromhex(self.encryption_key)
@lru_cache
def get_settings() -> WorkerSettings:
return WorkerSettings()
+35
View File
@@ -0,0 +1,35 @@
"""
Connessione database per il worker usa SQLAlchemy async (stesso stack del backend).
"""
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.orm import DeclarativeBase
from app.config import get_settings
settings = get_settings()
engine = create_async_engine(
settings.database_url,
echo=False,
pool_size=5,
max_overflow=10,
pool_pre_ping=True,
)
AsyncSessionLocal = async_sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False,
autoflush=False,
autocommit=False,
)
class Base(DeclarativeBase):
pass
async def get_db_session() -> AsyncSession:
"""Restituisce una nuova sessione DB da usare come context manager."""
return AsyncSessionLocal()
+1
View File
@@ -0,0 +1 @@
# IMAP sync engine
+395
View File
@@ -0,0 +1,395 @@
"""
IMAPConnection gestione singola connessione IMAP con:
- IDLE con heartbeat 28 min (RFC 2177)
- Polling fallback ogni 60s se IDLE non supportato
- Backoff esponenziale su disconnessione
- Aggiornamento stato mailbox su N errori consecutivi
Architettura (ADR-003):
Un asyncio.Task per casella → overhead minimo, migliaia di caselle per host.
"""
import asyncio
import base64
import logging
from datetime import UTC, datetime
import aioimaplib
import redis.asyncio as aioredis
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import get_settings
from app.database import AsyncSessionLocal
from app.imap.reconnect import ExponentialBackoff
from app.imap.sync import sync_new_messages
from app.models import Mailbox
logger = logging.getLogger(__name__)
settings = get_settings()
def _decrypt(enc: str) -> str:
"""Decifra un campo credenziale AES-256-GCM."""
import os
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
key = settings.encryption_key_bytes
aesgcm = AESGCM(key)
raw = base64.b64decode(enc.encode("ascii"))
nonce = raw[:12]
ciphertext_with_tag = raw[12:]
return aesgcm.decrypt(nonce, ciphertext_with_tag, None).decode("utf-8")
class IMAPConnection:
"""
Gestisce la connessione IMAP di una singola casella PEC.
Ciclo di vita:
1. Connessione IMAP (SSL o plain)
2. Login
3. SELECT INBOX
4. Sync iniziale (tutti i messaggi nuovi dall'ultimo UID noto)
5. IDLE loop (o polling se IDLE non disponibile)
6. Su EXISTS/EXPUNGE notify → fetch nuovi messaggi
7. Su errore → backoff → torna al punto 1
8. Su N errori consecutivi → imposta mailbox.status=error
"""
def __init__(
self,
mailbox_id: str,
redis_client: aioredis.Redis,
) -> None:
self.mailbox_id = mailbox_id
self.redis = redis_client
self._running = False
self._client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL | None = None
async def run(self) -> None:
"""
Loop principale della connessione IMAP.
Questo metodo non solleva mai eccezioni; gestisce internamente tutti gli errori.
"""
self._running = True
backoff = ExponentialBackoff(label=f"mailbox:{self.mailbox_id[:8]}")
while self._running:
try:
async with AsyncSessionLocal() as db:
# Carica mailbox dal DB
mailbox = await db.get(Mailbox, self.mailbox_id)
if not mailbox:
logger.error(
f"[{self.mailbox_id}] Mailbox non trovata in DB. Task terminato."
)
return
if mailbox.status in ("deleted", "paused"):
logger.info(
f"[{mailbox.email_address}] Status={mailbox.status}, task in pausa."
)
await asyncio.sleep(60)
continue
# Decifra credenziali
creds = self._decrypt_creds(mailbox)
# Connetti e sincronizza
await self._connect_and_run(mailbox, creds, db, backoff)
except asyncio.CancelledError:
logger.info(f"[{self.mailbox_id}] Task IMAP cancellato.")
self._running = False
return
except Exception as e:
logger.error(
f"[{self.mailbox_id}] Errore inatteso nel loop IMAP: {e}",
exc_info=True,
)
await backoff.wait(e)
def stop(self) -> None:
"""Segnala al loop di terminare al prossimo ciclo."""
self._running = False
# ─── Connessione e loop interno ──────────────────────────────────────────
async def _connect_and_run(
self,
mailbox: Mailbox,
creds: dict,
db: AsyncSession,
backoff: ExponentialBackoff,
) -> None:
"""
Tenta la connessione IMAP. Se riesce, avvia il loop IDLE/polling.
Se fallisce, incrementa il contatore errori e aggiorna lo stato mailbox.
"""
try:
client = await self._connect(creds)
self._client = client
except asyncio.TimeoutError:
err_msg = f"Timeout connessione IMAP ({settings.imap_connect_timeout_seconds}s)"
await self._record_error(mailbox, db, err_msg)
await backoff.wait(TimeoutError(err_msg))
return
except Exception as e:
err_msg = str(e)
await self._record_error(mailbox, db, err_msg)
await backoff.wait(e)
return
# Connessione riuscita
backoff.reset()
await self._reset_error_state(mailbox, db)
# Sync iniziale: porta il DB aggiornato fino all'ultimo UID disponibile
logger.info(f"[{mailbox.email_address}] Sync iniziale...")
try:
n = await sync_new_messages(self._client, mailbox, db, self.redis)
if n > 0:
logger.info(
f"[{mailbox.email_address}] Sync iniziale completata: {n} messaggi nuovi"
)
except Exception as e:
logger.error(
f"[{mailbox.email_address}] Errore sync iniziale: {e}", exc_info=True
)
# Avvia IDLE o polling
supports_idle = self._supports_idle(client)
if supports_idle:
logger.info(f"[{mailbox.email_address}] Avvio IDLE loop")
await self._idle_loop(mailbox, db)
else:
logger.info(
f"[{mailbox.email_address}] IDLE non supportato, avvio polling "
f"ogni {settings.imap_polling_interval_seconds}s"
)
await self._polling_loop(mailbox, db)
async def _connect(
self, creds: dict
) -> aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL:
"""Connette al server IMAP e fa login. Solleva eccezione su errore."""
host = creds["host"]
port = creds["port"]
user = creds["user"]
password = creds["password"]
use_ssl = creds["use_ssl"]
logger.info(f"Connessione IMAP {user}@{host}:{port} ssl={use_ssl}")
if use_ssl:
client = aioimaplib.IMAP4_SSL(
host=host,
port=port,
timeout=settings.imap_connect_timeout_seconds,
)
else:
client = aioimaplib.IMAP4(
host=host,
port=port,
timeout=settings.imap_connect_timeout_seconds,
)
await asyncio.wait_for(
client.wait_hello_from_server(),
timeout=settings.imap_connect_timeout_seconds,
)
status, _ = await client.login(user, password)
if status != "OK":
await client.logout()
raise ConnectionError(f"Login IMAP fallito: status={status}")
status, _ = await client.select("INBOX")
if status != "OK":
await client.logout()
raise ConnectionError(f"SELECT INBOX fallito: status={status}")
logger.info(f"IMAP connesso: {user}@{host}:{port}")
return client
def _supports_idle(
self, client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL
) -> bool:
"""Verifica se il server supporta IDLE."""
try:
caps = client.protocol.capabilities
return "IDLE" in caps
except Exception:
return False
# ─── IDLE loop ────────────────────────────────────────────────────────────
async def _idle_loop(self, mailbox: Mailbox, db: AsyncSession) -> None:
"""
Loop IMAP IDLE con heartbeat ogni 28 minuti (RFC 2177).
Quando il server segnala EXISTS (nuovi messaggi) → sync.
Ogni 28 minuti → DONE + re-IDLE per mantenere connessione viva.
"""
client = self._client
idle_timeout = settings.imap_idle_timeout_seconds # 28 min
while self._running:
try:
# Avvia IDLE
await client.idle_start(timeout=idle_timeout)
# Attendi server push con timeout (heartbeat)
try:
server_push = await asyncio.wait_for(
client.wait_server_push(),
timeout=float(idle_timeout),
)
except asyncio.TimeoutError:
# Heartbeat: nessun nuovo messaggio in 28 minuti → re-IDLE
server_push = []
# Termina IDLE
await client.idle_done()
# Controlla se ci sono nuovi messaggi (EXISTS)
has_new = any(
b"EXISTS" in (line if isinstance(line, bytes) else line.encode())
for line in server_push
if line
)
if has_new:
logger.debug(
f"[{mailbox.email_address}] EXISTS ricevuto, sync..."
)
# Ricarica mailbox dal DB per avere last_sync_uid aggiornato
await db.refresh(mailbox)
n = await sync_new_messages(client, mailbox, db, self.redis)
if n > 0:
logger.info(
f"[{mailbox.email_address}] {n} nuovi messaggi sincronizzati"
)
except asyncio.CancelledError:
try:
await client.idle_done()
except Exception:
pass
return
except (ConnectionError, IOError, OSError) as e:
logger.warning(
f"[{mailbox.email_address}] Connessione IDLE persa: {e}"
)
raise # propaga al loop esterno per backoff
except Exception as e:
logger.error(
f"[{mailbox.email_address}] Errore IDLE loop: {e}", exc_info=True
)
raise
# ─── Polling loop ─────────────────────────────────────────────────────────
async def _polling_loop(self, mailbox: Mailbox, db: AsyncSession) -> None:
"""
Polling IMAP ogni N secondi quando IDLE non è supportato.
Esegue NOOP + SEARCH UID per verificare nuovi messaggi.
"""
client = self._client
interval = settings.imap_polling_interval_seconds
while self._running:
try:
await asyncio.sleep(interval)
if not self._running:
break
# NOOP per mantenere connessione viva
try:
await client.noop()
except Exception:
raise ConnectionError("Connessione IMAP persa durante NOOP")
# Ricarica mailbox e controlla nuovi UID
await db.refresh(mailbox)
n = await sync_new_messages(client, mailbox, db, self.redis)
if n > 0:
logger.info(
f"[{mailbox.email_address}] Polling: {n} nuovi messaggi"
)
except asyncio.CancelledError:
return
except (ConnectionError, IOError, OSError) as e:
logger.warning(
f"[{mailbox.email_address}] Connessione polling persa: {e}"
)
raise
except Exception as e:
logger.error(
f"[{mailbox.email_address}] Errore polling loop: {e}", exc_info=True
)
raise
# ─── Error management ─────────────────────────────────────────────────────
async def _record_error(
self, mailbox: Mailbox, db: AsyncSession, error_msg: str
) -> None:
"""
Incrementa sync_error_count. Se supera il limite → status=error.
Pubblica evento Redis di errore.
"""
import json
mailbox.sync_error_count += 1
mailbox.sync_error_msg = error_msg[:500]
if mailbox.sync_error_count >= settings.imap_max_error_count:
if mailbox.status != "error":
mailbox.status = "error"
logger.error(
f"[{mailbox.email_address}] Troppe anomalie "
f"({mailbox.sync_error_count}), status=error"
)
# Pubblica evento WebSocket di errore
try:
event = {
"type": "mailbox:sync_error",
"mailbox_id": str(mailbox.id),
"error": error_msg,
"status": "error",
}
channel = f"ws:tenant:{mailbox.tenant_id}"
await self.redis.publish(channel, json.dumps(event))
except Exception:
pass
await db.flush()
await db.commit()
async def _reset_error_state(
self, mailbox: Mailbox, db: AsyncSession
) -> None:
"""Resetta il contatore errori dopo una connessione riuscita."""
if mailbox.sync_error_count > 0 or mailbox.status == "error":
mailbox.sync_error_count = 0
mailbox.sync_error_msg = None
if mailbox.status == "error":
mailbox.status = "active"
await db.flush()
await db.commit()
# ─── Decrypt credentials ──────────────────────────────────────────────────
@staticmethod
def _decrypt_creds(mailbox: Mailbox) -> dict:
"""Decifra le credenziali IMAP dalla mailbox."""
return {
"host": _decrypt(mailbox.imap_host_enc),
"port": int(_decrypt(mailbox.imap_port_enc)),
"user": _decrypt(mailbox.imap_user_enc),
"password": _decrypt(mailbox.imap_pass_enc),
"use_ssl": mailbox.imap_use_ssl,
}
+275
View File
@@ -0,0 +1,275 @@
"""
MailboxPool orchestratore async per N caselle IMAP in parallelo.
All'avvio del worker:
1. Carica tutte le mailbox con status='active' dal DB
2. Avvia un asyncio.Task per ogni casella (IMAPConnection.run)
3. Monitora i task: se uno muore, lo riavvia dopo un breve delay
4. Osserva eventi Redis per caselle aggiunte/rimosse/aggiornate a runtime
ADR-003: Un task async per casella overhead < 10MB per casella.
"""
import asyncio
import json
import logging
import uuid
from typing import Any
import redis.asyncio as aioredis
from sqlalchemy import select
from app.config import get_settings
from app.database import AsyncSessionLocal
from app.imap.connection import IMAPConnection
from app.models import Mailbox
logger = logging.getLogger(__name__)
settings = get_settings()
# Canale Redis per eventi di gestione caselle
MAILBOX_EVENTS_CHANNEL = "mailbox:events"
class MailboxPool:
"""
Gestisce il pool di task IMAP asincroni.
Un task per casella, monitorato e riavviato automaticamente in caso di crash.
"""
def __init__(self, redis_client: aioredis.Redis) -> None:
self.redis = redis_client
# mailbox_id (str) → asyncio.Task
self._tasks: dict[str, asyncio.Task] = {}
# mailbox_id (str) → IMAPConnection
self._connections: dict[str, IMAPConnection] = {}
self._running = False
self._monitor_task: asyncio.Task | None = None
self._events_task: asyncio.Task | None = None
# ─── Lifecycle ────────────────────────────────────────────────────────────
async def start(self) -> None:
"""
Carica le mailbox attive dal DB e avvia i task IMAP.
Da chiamare nell'on_startup del worker arq.
"""
self._running = True
logger.info("MailboxPool: avvio in corso...")
mailbox_ids = await self._load_active_mailbox_ids()
logger.info(f"MailboxPool: {len(mailbox_ids)} caselle attive trovate")
for mid in mailbox_ids:
await self._start_task(mid)
# Task di monitoraggio: riavvia task morti
self._monitor_task = asyncio.create_task(
self._monitor_loop(), name="mailbox-pool-monitor"
)
# Task listener Redis: gestisce caselle aggiunte/rimosse a runtime
self._events_task = asyncio.create_task(
self._events_listener(), name="mailbox-pool-events"
)
logger.info(
f"MailboxPool avviato: {len(self._tasks)} task IMAP in esecuzione"
)
async def stop(self) -> None:
"""
Ferma tutti i task IMAP.
Da chiamare nell'on_shutdown del worker arq.
"""
logger.info("MailboxPool: arresto in corso...")
self._running = False
# Cancella task di sistema
for task in [self._monitor_task, self._events_task]:
if task and not task.done():
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
# Ferma tutte le connessioni IMAP
for conn in self._connections.values():
conn.stop()
# Cancella i task
if self._tasks:
await asyncio.gather(
*[t for t in self._tasks.values() if not t.done()],
return_exceptions=True,
)
self._tasks.clear()
self._connections.clear()
logger.info("MailboxPool: tutti i task fermati")
# ─── Gestione task individuali ────────────────────────────────────────────
async def add_mailbox(self, mailbox_id: str) -> None:
"""Aggiunge e avvia una nuova casella al pool a runtime."""
if mailbox_id in self._tasks and not self._tasks[mailbox_id].done():
logger.debug(f"MailboxPool: {mailbox_id[:8]} già nel pool")
return
await self._start_task(mailbox_id)
logger.info(f"MailboxPool: casella aggiunta {mailbox_id[:8]}")
async def remove_mailbox(self, mailbox_id: str) -> None:
"""Ferma e rimuove una casella dal pool a runtime."""
if mailbox_id in self._connections:
self._connections[mailbox_id].stop()
if mailbox_id in self._tasks:
task = self._tasks[mailbox_id]
if not task.done():
task.cancel()
try:
await asyncio.wait_for(task, timeout=5.0)
except (asyncio.CancelledError, asyncio.TimeoutError):
pass
del self._tasks[mailbox_id]
if mailbox_id in self._connections:
del self._connections[mailbox_id]
logger.info(f"MailboxPool: casella rimossa {mailbox_id[:8]}")
@property
def active_count(self) -> int:
"""Numero di task IMAP attivi."""
return sum(1 for t in self._tasks.values() if not t.done())
# ─── Loop di monitoraggio ─────────────────────────────────────────────────
async def _monitor_loop(self) -> None:
"""
Ogni 30 secondi verifica i task morti e li riavvia.
Rimuove anche task di caselle che non esistono più nel DB.
"""
while self._running:
try:
await asyncio.sleep(30)
dead_ids = [
mid for mid, task in self._tasks.items()
if task.done() and not task.cancelled()
]
if dead_ids:
logger.info(
f"MailboxPool monitor: {len(dead_ids)} task morti, riavvio..."
)
active_ids = await self._load_active_mailbox_ids()
active_set = {str(mid) for mid in active_ids}
for mid in dead_ids:
if mid in active_set:
logger.info(
f"MailboxPool: riavvio task {mid[:8]}..."
)
await self._start_task(mid)
else:
# Casella non più attiva, rimuovi dal pool
logger.info(
f"MailboxPool: casella {mid[:8]} non più attiva, rimossa"
)
self._tasks.pop(mid, None)
self._connections.pop(mid, None)
# Aggiungi caselle nuove attive
active_ids = await self._load_active_mailbox_ids()
for mid in active_ids:
mid_str = str(mid)
if mid_str not in self._tasks or self._tasks[mid_str].done():
logger.info(
f"MailboxPool: rilevata nuova casella attiva {mid_str[:8]}"
)
await self._start_task(mid_str)
except asyncio.CancelledError:
return
except Exception as e:
logger.error(f"MailboxPool monitor errore: {e}", exc_info=True)
# ─── Listener eventi Redis ─────────────────────────────────────────────────
async def _events_listener(self) -> None:
"""
Ascolta eventi Redis per aggiungere/rimuovere caselle a runtime.
Formato evento: {"action": "add"|"remove"|"refresh", "mailbox_id": "<uuid>"}
Pubblicare con: PUBLISH mailbox:events '{"action":"add","mailbox_id":"<uuid>"}'
"""
pubsub = self.redis.pubsub()
await pubsub.subscribe(MAILBOX_EVENTS_CHANNEL)
logger.debug(f"MailboxPool: in ascolto su Redis {MAILBOX_EVENTS_CHANNEL}")
try:
async for message in pubsub.listen():
if not self._running:
break
if message["type"] != "message":
continue
try:
data = json.loads(message["data"])
action = data.get("action")
mid = data.get("mailbox_id")
if not action or not mid:
continue
if action == "add":
await self.add_mailbox(mid)
elif action == "remove":
await self.remove_mailbox(mid)
elif action == "refresh":
# Rimuovi e riavvia (utile dopo cambio credenziali)
await self.remove_mailbox(mid)
await asyncio.sleep(1)
await self.add_mailbox(mid)
except (json.JSONDecodeError, KeyError) as e:
logger.warning(f"MailboxPool: evento Redis malformato: {e}")
except asyncio.CancelledError:
await pubsub.unsubscribe(MAILBOX_EVENTS_CHANNEL)
return
except Exception as e:
logger.error(f"MailboxPool events listener errore: {e}", exc_info=True)
# ─── Private ─────────────────────────────────────────────────────────────
async def _start_task(self, mailbox_id: str) -> None:
"""Crea e avvia un nuovo task IMAPConnection per la casella."""
conn = IMAPConnection(
mailbox_id=mailbox_id,
redis_client=self.redis,
)
self._connections[mailbox_id] = conn
task = asyncio.create_task(
conn.run(),
name=f"imap-{mailbox_id[:8]}",
)
self._tasks[mailbox_id] = task
async def _load_active_mailbox_ids(self) -> list[str]:
"""Carica dal DB gli UUID di tutte le caselle con status=active."""
try:
async with AsyncSessionLocal() as db:
result = await db.execute(
select(Mailbox.id).where(Mailbox.status == "active")
)
return [str(row[0]) for row in result.all()]
except Exception as e:
logger.error(f"MailboxPool: errore caricamento caselle: {e}")
return []
+81
View File
@@ -0,0 +1,81 @@
"""
Strategia backoff esponenziale per riconnessioni IMAP.
Parametri configurabili in WorkerSettings:
backoff_initial_seconds = 1.0 (primo wait)
backoff_multiplier = 2.0 (moltiplicatore)
backoff_max_seconds = 300.0 (tetto massimo: 5 minuti)
Sequenza di attesa: 1s → 2s → 4s → 8s → 16s → 32s → 64s → 128s → 256s → 300s → 300s → ...
"""
import asyncio
import logging
import random
from app.config import get_settings
logger = logging.getLogger(__name__)
settings = get_settings()
class ExponentialBackoff:
"""
Gestisce il backoff esponenziale con jitter opzionale.
Uso:
backoff = ExponentialBackoff(label="casella@pec.it")
while True:
try:
await connect()
backoff.reset() # connessione riuscita → resetta backoff
await run_loop()
except Exception as e:
await backoff.wait(e) # attende prima di ritentare
"""
def __init__(self, label: str = "", jitter: bool = True) -> None:
self.label = label
self.jitter = jitter
self._attempt = 0
self._current_wait = settings.backoff_initial_seconds
def reset(self) -> None:
"""Resetta il backoff dopo una connessione riuscita."""
if self._attempt > 0:
logger.info(
f"[{self.label}] Connessione ristabilita dopo {self._attempt} tentativi"
)
self._attempt = 0
self._current_wait = settings.backoff_initial_seconds
async def wait(self, error: Exception | None = None) -> None:
"""
Attende il tempo calcolato dal backoff, poi incrementa per il prossimo ciclo.
Aggiunge jitter ±10% per evitare thundering herd su N caselle.
"""
self._attempt += 1
wait_time = min(self._current_wait, settings.backoff_max_seconds)
if self.jitter:
jitter_range = wait_time * 0.1
wait_time += random.uniform(-jitter_range, jitter_range)
wait_time = max(0.5, wait_time)
logger.warning(
f"[{self.label}] Tentativo {self._attempt} fallito"
f"{f': {error}' if error else ''}. "
f"Riconnessione in {wait_time:.1f}s"
)
await asyncio.sleep(wait_time)
# Incrementa per il prossimo tentativo
self._current_wait = min(
self._current_wait * settings.backoff_multiplier,
settings.backoff_max_seconds,
)
@property
def attempt(self) -> int:
return self._attempt
+473
View File
@@ -0,0 +1,473 @@
"""
Logica di sincronizzazione messaggi IMAP.
Responsabilità:
1. Fetch della lista UID > last_sync_uid
2. Download envelope + raw EML per ogni UID
3. Parsing base degli header (subject, from, to, date)
4. Salvataggio in tabella messages
5. Upload raw EML su MinIO
6. Aggiornamento last_sync_uid e last_sync_at sulla mailbox
7. Pubblicazione evento Redis per notifica WebSocket
"""
import email
import email.header
import email.utils
import hashlib
import json
import logging
import re
import uuid
from datetime import UTC, datetime
import aioimaplib
import redis.asyncio as aioredis
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import get_settings
from app.models import Mailbox, Message
from app.storage.minio_client import upload_eml
logger = logging.getLogger(__name__)
settings = get_settings()
# ─── Helper: decodifica header email ─────────────────────────────────────────
def _decode_header(header_value: str | None) -> str | None:
"""Decodifica header RFC 2047 (es. =?utf-8?b?...?=) in stringa Python."""
if not header_value:
return None
try:
parts = email.header.decode_header(header_value)
decoded = []
for part, charset in parts:
if isinstance(part, bytes):
decoded.append(part.decode(charset or "utf-8", errors="replace"))
else:
decoded.append(part)
return "".join(decoded).strip()
except Exception:
return str(header_value)
def _extract_addresses(field: str | None) -> list[str]:
"""Estrae lista di indirizzi email da un campo To/Cc."""
if not field:
return []
try:
addresses = email.utils.getaddresses([field])
return [addr for _, addr in addresses if addr]
except Exception:
return []
def _parse_date(date_str: str | None) -> datetime | None:
"""Converte stringa data RFC 2822 in datetime con timezone."""
if not date_str:
return None
try:
parsed = email.utils.parsedate_to_datetime(date_str)
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=UTC)
return parsed
except Exception:
return None
def _classify_pec_type(msg: email.message.Message) -> str:
"""
Classifica il tipo PEC dal header X-Ricevuta / X-TipoRicevuta.
Fase 3 fa il parsing completo; qui classifichiamo al meglio possibile.
"""
x_ricevuta = msg.get("X-Ricevuta", "").lower()
x_tipo = msg.get("X-TipoRicevuta", "").lower()
TYPE_MAP = {
"accettazione": "accettazione",
"non-accettazione": "non_accettazione",
"presa-in-carico": "presa_in_carico",
"avvenuta-consegna": "avvenuta_consegna",
"mancata-consegna": "mancata_consegna",
"errore-consegna": "errore_consegna",
"preavviso-mancata-consegna": "preavviso_mancata_consegna",
"rilevazione-virus": "rilevazione_virus",
}
value = x_tipo or x_ricevuta
return TYPE_MAP.get(value, "posta_certificata")
def _parse_eml(raw_bytes: bytes) -> dict:
"""
Parsing di base di un EML estrae i campi necessari per la tabella messages.
Il parsing completo (body, allegati, EML-in-EML) è in Fase 3.
"""
try:
msg = email.message_from_bytes(raw_bytes)
except Exception as e:
logger.warning(f"Errore parsing EML: {e}")
return {}
subject = _decode_header(msg.get("Subject"))
from_addr = email.utils.parseaddr(msg.get("From", ""))[1] or None
to_addrs = _extract_addresses(msg.get("To"))
cc_addrs = _extract_addresses(msg.get("Cc"))
message_id = msg.get("Message-ID", "").strip() or None
date = _parse_date(msg.get("Date"))
pec_type = _classify_pec_type(msg)
# Estrazione body text/html (best-effort Fase 3 fa il parsing completo)
body_text = None
body_html = None
has_attachments = False
if msg.is_multipart():
for part in msg.walk():
ct = part.get_content_type()
disp = part.get("Content-Disposition", "")
if "attachment" in disp or "inline" in disp:
if part.get_filename():
has_attachments = True
elif ct == "text/plain" and body_text is None:
try:
charset = part.get_content_charset() or "utf-8"
body_text = part.get_payload(decode=True).decode(charset, errors="replace")
except Exception:
pass
elif ct == "text/html" and body_html is None:
try:
charset = part.get_content_charset() or "utf-8"
body_html = part.get_payload(decode=True).decode(charset, errors="replace")
except Exception:
pass
else:
ct = msg.get_content_type()
try:
charset = msg.get_content_charset() or "utf-8"
payload = msg.get_payload(decode=True)
if payload:
if ct == "text/plain":
body_text = payload.decode(charset, errors="replace")
elif ct == "text/html":
body_html = payload.decode(charset, errors="replace")
except Exception:
pass
return {
"subject": subject,
"from_address": from_addr,
"to_addresses": to_addrs if to_addrs else None,
"cc_addresses": cc_addrs if cc_addrs else None,
"message_id_header": message_id,
"sent_at": date,
"pec_type": pec_type,
"body_text": body_text,
"body_html": body_html,
"has_attachments": has_attachments,
}
# ─── Core sync function ───────────────────────────────────────────────────────
async def sync_new_messages(
imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL,
mailbox: Mailbox,
db: AsyncSession,
redis_client: aioredis.Redis,
) -> int:
"""
Sincronizza i messaggi nuovi (UID > last_sync_uid) per la mailbox data.
Returns:
Numero di nuovi messaggi sincronizzati.
"""
last_uid = mailbox.last_sync_uid or 0
search_range = f"{last_uid + 1}:*"
# ── SEARCH UID > last_sync_uid ─────────────────────────────────────────────
# aioimaplib non supporta uid('SEARCH',...) → usare search('UID', range)
# che invia "SEARCH UID n:*" e restituisce numeri di sequenza
try:
status, search_data = await imap_client.search("UID", search_range)
except Exception as e:
logger.warning(f"[{mailbox.email_address}] SEARCH fallito: {e}")
return 0
if status != "OK":
logger.warning(
f"[{mailbox.email_address}] SEARCH status={status} data={search_data}"
)
return 0
# search() restituisce numeri di sequenza (non UID)
raw_seqs = b" ".join(
d if isinstance(d, bytes) else d.encode() for d in search_data
).decode("ascii", errors="ignore").split()
seq_numbers = [s for s in raw_seqs if s.isdigit()]
if not seq_numbers:
return 0
# Limita il numero di fetch per ciclo
seq_numbers = seq_numbers[: settings.imap_max_fetch_per_cycle]
logger.info(
f"[{mailbox.email_address}] Trovati {len(seq_numbers)} messaggi nuovi da sincronizzare"
)
synced_count = 0
max_uid_synced = last_uid
for seq in seq_numbers:
try:
uid, synced = await _fetch_and_save_message_by_seq(
imap_client=imap_client,
seq=seq,
last_uid=last_uid,
mailbox=mailbox,
db=db,
redis_client=redis_client,
)
if synced and uid and uid > max_uid_synced:
synced_count += 1
max_uid_synced = uid
except Exception as e:
logger.error(
f"[{mailbox.email_address}] Errore fetch seq {seq}: {e}",
exc_info=True,
)
# Aggiorna last_sync_uid e last_sync_at
if max_uid_synced > last_uid:
mailbox.last_sync_uid = max_uid_synced
mailbox.last_sync_at = datetime.now(UTC)
await db.flush()
await db.commit()
return synced_count
async def _fetch_and_save_message_by_seq(
imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL,
seq: str,
last_uid: int,
mailbox: Mailbox,
db: AsyncSession,
redis_client: aioredis.Redis,
) -> tuple[int | None, bool]:
"""
Fetcha un singolo messaggio per NUMERO DI SEQUENZA (non UID).
Include UID nella richiesta FETCH per estrarlo dalla risposta.
Returns:
(uid, saved): UID del messaggio e True se salvato, False altrimenti.
"""
# FETCH seq (UID RFC822 RFC822.SIZE)
try:
status, fetch_data = await imap_client.fetch(seq, "(UID RFC822 RFC822.SIZE)")
except Exception as e:
logger.error(f"[{mailbox.email_address}] FETCH seq {seq} fallito: {e}")
return None, False
if status != "OK" or not fetch_data:
logger.warning(
f"[{mailbox.email_address}] FETCH seq {seq} risposta vuota: {status}"
)
return None, False
# Debug: mostra la struttura di fetch_data
items_info = [(type(x).__name__, len(x) if isinstance(x, (bytes, str)) else str(x)) for x in fetch_data]
logger.debug(f"[{mailbox.email_address}] fetch_data seq {seq}: {items_info}")
# Estrae UID, raw EML e size dalla risposta.
# NOTA CRITICA: aioimaplib restituisce il corpo EML come `bytearray` (non `bytes`)!
# [0] bytes → FETCH response header con UID e RFC822.SIZE
# [1] bytearray → raw EML (il corpo del messaggio)
# [2] bytes → ')' (chiusura)
# [3] bytes → riga OK finale
uid: int | None = None
raw_eml: bytes | None = None
size_bytes: int | None = None
for item in fetch_data:
if isinstance(item, bytearray):
# Questo è il corpo del messaggio EML
if len(item) > 200:
raw_eml = bytes(item)
elif isinstance(item, bytes):
# Risposta header estrae UID e RFC822.SIZE
item_str = item.decode("ascii", errors="ignore")
uid_match = re.search(r"UID\s+(\d+)", item_str)
if uid_match:
uid = int(uid_match.group(1))
size_match = re.search(r"RFC822\.SIZE\s+(\d+)", item_str)
if size_match:
size_bytes = int(size_match.group(1))
elif isinstance(item, str):
uid_match = re.search(r"UID\s+(\d+)", item)
if uid_match:
uid = int(uid_match.group(1))
size_match = re.search(r"RFC822\.SIZE\s+(\d+)", item)
if size_match:
size_bytes = int(size_match.group(1))
if uid is None or uid <= last_uid:
# Questo messaggio ha un UID <= last_uid, non va sincronizzato
return uid, False
if not raw_eml:
logger.warning(f"[{mailbox.email_address}] seq {seq} UID {uid}: body mancante")
return uid, False
if size_bytes is None:
size_bytes = len(raw_eml)
return uid, await _save_message(
uid=uid,
raw_eml=raw_eml,
size_bytes=size_bytes,
mailbox=mailbox,
db=db,
redis_client=redis_client,
)
async def _fetch_and_save_message(
imap_client: aioimaplib.IMAP4 | aioimaplib.IMAP4_SSL,
uid: int,
mailbox: Mailbox,
db: AsyncSession,
redis_client: aioredis.Redis,
) -> bool:
"""
Fetcha un singolo messaggio per UID (usato dal job sync_mailbox one-shot).
Usa UID FETCH (aioimaplib uid() method).
"""
existing = await db.execute(
select(Message.id).where(
Message.mailbox_id == mailbox.id,
Message.imap_uid == uid,
)
)
if existing.scalar_one_or_none():
return False
try:
status, fetch_data = await imap_client.uid("FETCH", str(uid), "(RFC822 RFC822.SIZE)")
except Exception as e:
logger.error(f"[{mailbox.email_address}] UID FETCH {uid} fallito: {e}")
return False
if status != "OK" or not fetch_data:
return False
raw_eml: bytes | None = None
size_bytes: int | None = None
for item in fetch_data:
if isinstance(item, bytes) and len(item) > 100:
raw_eml = item
elif isinstance(item, (bytes, str)):
s = item.decode("ascii", errors="ignore") if isinstance(item, bytes) else item
m = re.search(r"RFC822\.SIZE\s+(\d+)", s)
if m:
size_bytes = int(m.group(1))
if not raw_eml:
return False
return await _save_message(
uid=uid,
raw_eml=raw_eml,
size_bytes=size_bytes or len(raw_eml),
mailbox=mailbox,
db=db,
redis_client=redis_client,
)
async def _save_message(
uid: int,
raw_eml: bytes,
size_bytes: int,
mailbox: Mailbox,
db: AsyncSession,
redis_client: aioredis.Redis,
) -> bool:
"""
Salva un messaggio EML in DB e su MinIO. Pubblica evento WebSocket.
"""
# Idempotenza
existing = await db.execute(
select(Message.id).where(
Message.mailbox_id == mailbox.id,
Message.imap_uid == uid,
)
)
if existing.scalar_one_or_none():
logger.debug(f"[{mailbox.email_address}] UID {uid} già in DB, skip")
return False
parsed = _parse_eml(raw_eml)
received_at = datetime.now(UTC)
# Upload su MinIO
eml_path: str | None = None
try:
eml_path = await upload_eml(
tenant_id=str(mailbox.tenant_id),
mailbox_id=str(mailbox.id),
uid=uid,
eml_bytes=raw_eml,
)
except Exception as e:
logger.error(f"[{mailbox.email_address}] Upload MinIO UID {uid}: {e}")
# Salva in DB
message = Message(
id=uuid.uuid4(),
tenant_id=mailbox.tenant_id,
mailbox_id=mailbox.id,
imap_uid=uid,
imap_folder="INBOX",
direction="inbound",
state="received",
pec_type=parsed.get("pec_type", "posta_certificata"),
subject=parsed.get("subject"),
from_address=parsed.get("from_address"),
to_addresses=parsed.get("to_addresses"),
cc_addresses=parsed.get("cc_addresses"),
message_id_header=parsed.get("message_id_header"),
sent_at=parsed.get("sent_at"),
received_at=received_at,
size_bytes=size_bytes,
body_text=parsed.get("body_text"),
body_html=parsed.get("body_html"),
has_attachments=parsed.get("has_attachments", False),
raw_eml_path=eml_path,
is_read=False,
)
db.add(message)
await db.flush()
# Pubblica evento Redis per WebSocket
try:
event = {
"type": "mailbox:new_message",
"mailbox_id": str(mailbox.id),
"message_id": str(message.id),
"subject": message.subject or "",
"from_address": message.from_address or "",
"pec_type": message.pec_type,
"received_at": received_at.isoformat(),
}
await redis_client.publish(f"ws:tenant:{mailbox.tenant_id}", json.dumps(event))
except Exception as e:
logger.warning(f"[{mailbox.email_address}] Redis publish UID {uid}: {e}")
logger.info(
f"[{mailbox.email_address}] Nuovo messaggio: UID={uid} "
f"subject={message.subject!r} pec_type={message.pec_type}"
)
return True
+1
View File
@@ -0,0 +1 @@
# Definizioni job arq
+77
View File
@@ -0,0 +1,77 @@
"""
Job arq: sync_mailbox trigger manuale per forzare la sincronizzazione di una casella.
Questo job viene usato per:
- Forzare una sync immediata dopo la creazione di una nuova casella
- Resync manuale da parte dell'admin
- Retry dopo un errore (called dal pool monitor)
Non sostituisce il loop IMAP continuo (IMAPConnection); è un one-shot job.
"""
import logging
from typing import Any
from app.database import AsyncSessionLocal
from app.imap.reconnect import ExponentialBackoff
from app.imap.sync import sync_new_messages
from app.models import Mailbox
logger = logging.getLogger(__name__)
async def sync_mailbox(ctx: dict[str, Any], mailbox_id: str) -> dict:
"""
Job arq: sincronizza una singola casella PEC.
Args:
ctx: contesto arq (contiene redis, pool reference)
mailbox_id: UUID della casella da sincronizzare
Returns:
dict con risultato del job
"""
redis_client = ctx.get("redis")
async with AsyncSessionLocal() as db:
mailbox = await db.get(Mailbox, mailbox_id)
if not mailbox:
return {"status": "error", "message": f"Mailbox {mailbox_id} non trovata"}
if mailbox.status not in ("active", "error"):
return {
"status": "skipped",
"message": f"Mailbox status={mailbox.status}, skip",
}
from app.imap.connection import IMAPConnection
creds = IMAPConnection._decrypt_creds(mailbox)
try:
from app.imap.connection import IMAPConnection
conn = IMAPConnection(mailbox_id=mailbox_id, redis_client=redis_client)
client = await conn._connect(creds)
n = await sync_new_messages(client, mailbox, db, redis_client)
try:
await client.logout()
except Exception:
pass
return {
"status": "ok",
"mailbox": mailbox.email_address,
"new_messages": n,
}
except Exception as e:
logger.error(f"[sync_mailbox] {mailbox_id} errore: {e}", exc_info=True)
return {
"status": "error",
"mailbox": mailbox.email_address,
"message": str(e),
}
+159
View File
@@ -0,0 +1,159 @@
"""
Entrypoint worker arq PecFlow IMAP Sync Engine.
Avvio: python -m app.main
Cosa fa:
1. Connette a Redis tramite arq
2. on_startup → avvia MailboxPool (N task IMAP asincroni)
3. on_shutdown → ferma MailboxPool
4. Registra job arq (sync_mailbox, future: send_pec, archive_batch, ecc.)
5. Loop arq per processare job dalla coda Redis
L'event loop è condiviso tra arq e MailboxPool (asyncio task).
"""
import asyncio
import logging
import sys
from typing import Any
import redis.asyncio as aioredis
from arq import run_worker
from arq.connections import RedisSettings
from app.config import get_settings
from app.imap.pool import MailboxPool
from app.jobs.sync_mailbox import sync_mailbox
from app.storage.minio_client import ensure_bucket_exists
settings = get_settings()
# Logging
logging.basicConfig(
level=getattr(logging, settings.log_level.upper(), logging.INFO),
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
stream=sys.stdout,
)
logger = logging.getLogger(__name__)
# Pool globale (accessibile dalle callback)
_mailbox_pool: MailboxPool | None = None
# ─── Lifecycle callbacks arq ──────────────────────────────────────────────────
async def on_startup(ctx: dict[str, Any]) -> None:
"""
Inizializzazione worker all'avvio.
Avvia il MailboxPool con tutte le caselle attive.
"""
global _mailbox_pool
logger.info("🚀 PecFlow Worker avviato")
logger.info(f" DB: {settings.database_url.split('@')[-1]}")
logger.info(f" Redis: {settings.redis_url}")
logger.info(f" MinIO: {settings.minio_endpoint}")
# Verifica/crea bucket MinIO
try:
await ensure_bucket_exists()
except Exception as e:
logger.warning(f"MinIO non disponibile al startup: {e}")
# Crea client Redis condiviso
redis_client = aioredis.from_url(settings.redis_url, decode_responses=True)
ctx["redis"] = redis_client
# Avvia MailboxPool
_mailbox_pool = MailboxPool(redis_client=redis_client)
ctx["mailbox_pool"] = _mailbox_pool
await _mailbox_pool.start()
logger.info(
f"✅ Worker pronto: {_mailbox_pool.active_count} caselle IMAP attive"
)
async def on_shutdown(ctx: dict[str, Any]) -> None:
"""Cleanup all'arresto del worker."""
global _mailbox_pool
logger.info("🛑 PecFlow Worker in arresto...")
pool = ctx.get("mailbox_pool") or _mailbox_pool
if pool:
await pool.stop()
redis_client = ctx.get("redis")
if redis_client:
await redis_client.aclose()
logger.info("🛑 Worker fermato")
# ─── Worker health check ──────────────────────────────────────────────────────
async def health_check(ctx: dict[str, Any]) -> dict:
"""
Job speciale per health check del worker.
Può essere chiamato da monitoring esterno.
"""
pool: MailboxPool | None = ctx.get("mailbox_pool")
return {
"status": "ok",
"active_imap_connections": pool.active_count if pool else 0,
}
# ─── WorkerSettings arq ───────────────────────────────────────────────────────
def _parse_redis_settings() -> RedisSettings:
"""Parsa REDIS_URL in RedisSettings arq."""
url = settings.redis_url
# Supporta redis://host:port/db e redis://user:pass@host:port/db
import urllib.parse
parsed = urllib.parse.urlparse(url)
host = parsed.hostname or "localhost"
port = parsed.port or 6379
db = int(parsed.path.lstrip("/") or "0")
password = parsed.password or None
return RedisSettings(host=host, port=port, database=db, password=password)
class WorkerSettings:
"""Configurazione del worker arq."""
# Funzioni/job registrati
functions = [sync_mailbox, health_check]
# Callbacks lifecycle
on_startup = on_startup
on_shutdown = on_shutdown
# Redis
redis_settings = _parse_redis_settings()
# Concorrenza
max_jobs = 20
# Timeout per ogni job (secondi)
job_timeout = 300
# Retry automatico in caso di errore
max_tries = 3
# Polling interval (arq controlla la coda ogni N ms)
poll_delay = 0.5
# Keep job results per N secondi
keep_result = 3600
# ─── Entrypoint ───────────────────────────────────────────────────────────────
if __name__ == "__main__":
logger.info("Avvio PecFlow Worker (arq)...")
run_worker(WorkerSettings)
+128
View File
@@ -0,0 +1,128 @@
"""
Re-export dei modelli SQLAlchemy dal backend.
Il worker usa gli stessi modelli ORM del backend per leggere/scrivere nel DB.
Importa da una base comune tramite il package condiviso.
Nota: i modelli sono definiti nel backend. Il worker li ridefinisce qui
come classi identiche (stessa struttura) per non creare dipendenza circolare.
Tuttavia, poiché i due container condividono lo stesso DB PostgreSQL,
utilizziamo i modelli del backend ricopiando solo le parti necessarie.
"""
# I modelli completi sono già in backend/app/models/.
# Il worker li importa dalla propria copia locale per evitare
# una dipendenza del package worker → backend.
# In un monorepo reale si userebbe un shared/ package.
import uuid
from datetime import datetime
from sqlalchemy import (
ARRAY, BigInteger, Boolean, DateTime, Enum, ForeignKey,
Integer, String, Text, func,
)
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
class Base(DeclarativeBase):
pass
MailboxStatus = Enum(
"active", "paused", "error", "deleted",
name="mailbox_status", create_type=False,
)
PecDirection = Enum("inbound", "outbound", name="pec_direction", create_type=False)
PecState = Enum(
"draft", "queued", "sent", "accepted", "delivered", "anomaly", "failed", "received",
name="pec_state", create_type=False,
)
PecMsgType = Enum(
"posta_certificata", "accettazione", "non_accettazione", "presa_in_carico",
"avvenuta_consegna", "mancata_consegna", "errore_consegna",
"preavviso_mancata_consegna", "rilevazione_virus", "unknown",
name="pec_msg_type", create_type=False,
)
class Mailbox(Base):
__tablename__ = "mailboxes"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True)
tenant_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
email_address: Mapped[str] = mapped_column(String(255), nullable=False)
display_name: Mapped[str | None] = mapped_column(String(255), nullable=True)
provider: Mapped[str | None] = mapped_column(String(100), nullable=True)
imap_host_enc: Mapped[str] = mapped_column(Text, nullable=False)
imap_port_enc: Mapped[str] = mapped_column(Text, nullable=False)
imap_user_enc: Mapped[str] = mapped_column(Text, nullable=False)
imap_pass_enc: Mapped[str] = mapped_column(Text, nullable=False)
imap_use_ssl: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
smtp_host_enc: Mapped[str] = mapped_column(Text, nullable=False)
smtp_port_enc: Mapped[str] = mapped_column(Text, nullable=False)
smtp_user_enc: Mapped[str] = mapped_column(Text, nullable=False)
smtp_pass_enc: Mapped[str] = mapped_column(Text, nullable=False)
smtp_use_tls: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
status: Mapped[str] = mapped_column(MailboxStatus, nullable=False, default="active")
last_sync_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
last_sync_uid: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
sync_error_msg: Mapped[str | None] = mapped_column(Text, nullable=True)
sync_error_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
created_by: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
)
class Message(Base):
__tablename__ = "messages"
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
)
tenant_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
mailbox_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
message_id_header: Mapped[str | None] = mapped_column(Text, nullable=True)
imap_uid: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
imap_folder: Mapped[str] = mapped_column(String(255), nullable=False, default="INBOX")
direction: Mapped[str] = mapped_column(PecDirection, nullable=False)
pec_type: Mapped[str] = mapped_column(PecMsgType, nullable=False, default="posta_certificata")
state: Mapped[str] = mapped_column(PecState, nullable=False)
subject: Mapped[str | None] = mapped_column(Text, nullable=True)
from_address: Mapped[str | None] = mapped_column(String(255), nullable=True)
to_addresses: Mapped[list[str] | None] = mapped_column(ARRAY(Text), nullable=True)
cc_addresses: Mapped[list[str] | None] = mapped_column(ARRAY(Text), nullable=True)
sent_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
received_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
size_bytes: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
body_text: Mapped[str | None] = mapped_column(Text, nullable=True)
body_html: Mapped[str | None] = mapped_column(Text, nullable=True)
has_attachments: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
parent_message_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("messages.id"), nullable=True
)
is_read: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
is_starred: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
is_archived: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
archived_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
raw_eml_path: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
)
+1
View File
@@ -0,0 +1 @@
# MinIO storage client
+74
View File
@@ -0,0 +1,74 @@
"""
Client MinIO/S3 asincrono per il worker.
Percorso EML raw: pecflow/tenants/{tenant_id}/mailboxes/{mailbox_id}/raw/{uid}.eml
Percorso allegati: pecflow/tenants/{tenant_id}/mailboxes/{mailbox_id}/attachments/{msg_id}/{filename}
"""
import io
import logging
from functools import lru_cache
from miniopy_async import Minio
from app.config import get_settings
logger = logging.getLogger(__name__)
settings = get_settings()
@lru_cache(maxsize=1)
def get_minio_client() -> Minio:
"""Restituisce l'istanza singleton del client MinIO."""
return Minio(
endpoint=settings.minio_endpoint,
access_key=settings.minio_access_key,
secret_key=settings.minio_secret_key,
secure=settings.minio_use_ssl,
)
async def upload_eml(
tenant_id: str,
mailbox_id: str,
uid: int,
eml_bytes: bytes,
) -> str:
"""
Carica un raw EML su MinIO e restituisce il percorso oggetto.
Percorso: tenants/{tenant_id}/mailboxes/{mailbox_id}/raw/{uid}.eml
"""
client = get_minio_client()
bucket = settings.minio_bucket
object_path = f"tenants/{tenant_id}/mailboxes/{mailbox_id}/raw/{uid}.eml"
try:
data_stream = io.BytesIO(eml_bytes)
await client.put_object(
bucket_name=bucket,
object_name=object_path,
data=data_stream,
length=len(eml_bytes),
content_type="message/rfc822",
)
logger.debug(f"EML caricato: s3://{bucket}/{object_path} ({len(eml_bytes)} bytes)")
return object_path
except Exception as e:
logger.error(f"Errore upload EML {object_path}: {e}")
raise
async def ensure_bucket_exists() -> None:
"""Verifica che il bucket MinIO esista, altrimenti lo crea."""
client = get_minio_client()
bucket = settings.minio_bucket
try:
found = await client.bucket_exists(bucket)
if not found:
await client.make_bucket(bucket)
logger.info(f"Bucket MinIO creato: {bucket}")
else:
logger.debug(f"Bucket MinIO esistente: {bucket}")
except Exception as e:
logger.warning(f"Impossibile verificare/creare bucket MinIO: {e}")