""" Script di test trasmissione verso Aeterna (archiviazione certificata). Questo script è STANDALONE: si connette direttamente al DB PecHub, trova i messaggi con is_pending_conservation=True, scarica i loro EML da MinIO, costruisce pacchetti SIP BagIt e li invia ad Aeterna. Utilizzo (eseguire dal server dentro il container worker o direttamente): # Sul server, dentro il container worker: docker exec -it pechub-worker-1 python /app/scripts/test_aeterna_transmission.py # Con credenziali personalizzate: AETERNA_USERNAME=xxx AETERNA_PASSWORD=yyy \ python worker/scripts/test_aeterna_transmission.py Variabili d'ambiente accettate (sovrascrivono i default): AETERNA_ENDPOINT Default: https://api.aeterna.idrainformatica.it AETERNA_USERNAME Default: matteo@idrainformatica.it AETERNA_PASSWORD Default: letto da .env AETERNA_TENANT_SLUG Default: pechub DATABASE_URL Default: letta da .env del worker Output: Per ogni messaggio: stato trasmissione, versamento_id Aeterna, latenza. Al termine: aggiorna is_conserved=True nel DB se l'ingest e' riuscito. """ from __future__ import annotations import asyncio import hashlib import io import json import os import sys import time import uuid import zipfile from datetime import UTC, datetime from pathlib import Path # ─── Setup path per importare dal worker ───────────────────────────────────── # Se eseguito da fuori il container, aggiungi il path del worker worker_dir = Path(__file__).parent.parent sys.path.insert(0, str(worker_dir)) # ─── Configurazione Aeterna ─────────────────────────────────────────────────── AETERNA_ENDPOINT = os.getenv("AETERNA_ENDPOINT", "https://api.aeterna.idrainformatica.it") AETERNA_USERNAME = os.getenv("AETERNA_USERNAME", "matteo@idrainformatica.it") AETERNA_PASSWORD = os.getenv("AETERNA_PASSWORD", "Ma212718!") AETERNA_TENANT_SLUG = os.getenv("AETERNA_TENANT_SLUG", "pechub") # ─── Funzioni helper ────────────────────────────────────────────────────────── def log(msg: str) -> None: ts = datetime.now().strftime("%H:%M:%S") print(f"[{ts}] {msg}") def log_section(title: str) -> None: print(f"\n{'='*60}") print(f" {title}") print(f"{'='*60}") def build_bagit_sip( eml_bytes: bytes, message_id: str, subject: str | None = None, from_address: str | None = None, to_addresses: list[str] | None = None, received_at: str | None = None, ) -> bytes: """Costruisce un pacchetto BagIt RFC 8493 in memoria (ZIP).""" bag_name = f"pechub-pec-{message_id}" eml_filename = f"{message_id}.eml" data_path = f"data/{eml_filename}" eml_sha256 = hashlib.sha256(eml_bytes).hexdigest() bagit_txt = "BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8\n" bag_info_lines = [ "Bag-Software-Agent: PecHub Archival Module (test script)", f"Bagging-Date: {datetime.now(UTC).strftime('%Y-%m-%d')}", f"External-Identifier: {message_id}", "Source-Organization: PecHub", ] if subject: bag_info_lines.append(f"Description: {subject[:500]}") if from_address: bag_info_lines.append(f"Contact-Email: {from_address}") if to_addresses: bag_info_lines.append(f"External-Description: PEC a {', '.join(to_addresses[:3])}") if received_at: bag_info_lines.append(f"Bag-Group-Identifier: {received_at[:10]}") bag_info_txt = "\n".join(bag_info_lines) + "\n" manifest_txt = f"{eml_sha256} {data_path}\n" buf = io.BytesIO() with zipfile.ZipFile(buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: zf.writestr(f"{bag_name}/bagit.txt", bagit_txt) zf.writestr(f"{bag_name}/bag-info.txt", bag_info_txt) zf.writestr(f"{bag_name}/manifest-sha256.txt", manifest_txt) zf.writestr(f"{bag_name}/{data_path}", eml_bytes) return buf.getvalue() # ─── Client Aeterna (inline, senza dipendenze worker) ──────────────────────── class AeternaTestClient: """Client minimale per il test di trasmissione.""" def __init__(self, endpoint: str, username: str, password: str, tenant_slug: str): self.endpoint = endpoint.rstrip("/") self.username = username self.password = password self.tenant_slug = tenant_slug self._token: str | None = None self._token_expires_at: float = 0.0 async def login(self) -> str: import httpx log(f" Autenticazione su Aeterna ({self.endpoint}) ...") async with httpx.AsyncClient(timeout=30) as client: resp = await client.post( f"{self.endpoint}/api/v1/auth/login", json={ "email": self.username, "password": self.password, "tenant_slug": self.tenant_slug, }, ) if resp.status_code != 200: raise RuntimeError( f"Login fallito HTTP {resp.status_code}: {resp.text[:300]}" ) data = resp.json() self._token = data["access_token"] expires_in = int(data.get("expires_in", 3600)) self._token_expires_at = time.monotonic() + expires_in - 60 user_email = data.get("user", {}).get("email", "?") log(f" Login riuscito come: {user_email}") return self._token async def get_token(self) -> str: if self._token and time.monotonic() < self._token_expires_at: return self._token return await self.login() async def upload_sip( self, zip_bytes: bytes, zip_filename: str, title: str, description: str = "", ) -> dict: import httpx token = await self.get_token() log(f" Upload SIP '{zip_filename}' ({len(zip_bytes):,} bytes) ...") t_start = time.monotonic() async with httpx.AsyncClient(timeout=120) as client: resp = await client.post( f"{self.endpoint}/api/v1/ingest/upload", headers={"Authorization": f"Bearer {token}"}, files={"file": (zip_filename, zip_bytes, "application/zip")}, data={ "title": title[:500], "description": description[:500], "creator": "PecHub Test Script", }, ) latency_ms = int((time.monotonic() - t_start) * 1000) if resp.status_code in (200, 201, 202): data = resp.json() log(f" Upload OK in {latency_ms}ms – package_id: {data.get('package_id')}") return {"success": True, "latency_ms": latency_ms, **data} else: log(f" Upload FALLITO HTTP {resp.status_code}: {resp.text[:300]}") return { "success": False, "latency_ms": latency_ms, "error": resp.text[:300], "status_code": resp.status_code, } async def poll_status(self, package_id: str, max_polls: int = 10, interval: float = 3.0) -> dict: import httpx token = await self.get_token() log(f" Polling status package_id={package_id} (max {max_polls} tentativi) ...") for i in range(max_polls): async with httpx.AsyncClient(timeout=15) as client: resp = await client.get( f"{self.endpoint}/api/v1/ingest/{package_id}/status", headers={"Authorization": f"Bearer {token}"}, ) if resp.status_code == 200: data = resp.json() status = data.get("status", "UNKNOWN").upper() stage = data.get("pipeline_stage", "") pct = data.get("progress_pct", 0) log(f" [{i+1}/{max_polls}] status={status} stage={stage} progress={pct}%") if status in ("ACTIVE", "FAILED", "REJECTED"): return {"success": status == "ACTIVE", "final_status": status, **data} if i < max_polls - 1: await asyncio.sleep(interval) else: log(f" Polling error HTTP {resp.status_code}") break log(" Polling completato (stato non finale raggiunto, processo ancora in corso)") return {"success": None, "message": "polling esaurito"} # ─── Recupero messaggi da DB e MinIO ───────────────────────────────────────── async def get_pending_conservation_messages() -> list[dict]: """ Recupera i messaggi con is_pending_conservation=True dal DB PecHub. Restituisce una lista di dict con i campi rilevanti. """ try: import asyncpg # type: ignore[import] except ImportError: log("asyncpg non installato. Uso psycopg2 come fallback...") return await get_messages_via_env() db_url = os.getenv("DATABASE_URL", "") if not db_url: log("DATABASE_URL non impostata. Tento connessione locale...") db_url = "postgresql://pechub:pechub@localhost:5432/pechub" # asyncpg vuole postgresql:// non postgres:// e senza +asyncpg driver specifier db_url = db_url.replace("postgres://", "postgresql://") db_url = db_url.replace("postgresql+asyncpg://", "postgresql://") db_url = db_url.replace("postgresql+psycopg2://", "postgresql://") try: conn = await asyncpg.connect(db_url) rows = await conn.fetch(""" SELECT m.id, m.tenant_id, m.subject, m.from_address, m.to_addresses, m.received_at, m.raw_eml_path, m.is_pending_conservation, m.is_conserved FROM messages m WHERE m.is_pending_conservation = TRUE AND m.is_conserved = FALSE ORDER BY m.received_at DESC LIMIT 20 """) await conn.close() return [dict(row) for row in rows] except Exception as e: log(f"Errore connessione DB: {e}") return [] async def get_messages_via_env() -> list[dict]: """Fallback: usa variabili d'ambiente per costruire messaggi di test.""" log("Uso messaggi di test hardcoded (DB non disponibile)") return [] async def download_eml_from_minio(raw_eml_path: str) -> bytes | None: """ Scarica il file EML da MinIO usando il path memorizzato nel DB. """ try: from app.storage.minio_client import get_minio_client client = await get_minio_client() bucket = os.getenv("MINIO_BUCKET", "pechub") response = await asyncio.to_thread( client.get_object, bucket, raw_eml_path ) data = response.read() response.close() response.release_conn() return data except ImportError: pass except Exception as e: log(f" Errore download MinIO ({raw_eml_path}): {e}") # Fallback: prova con boto3/minio direttamente try: import minio # type: ignore[import] endpoint = os.getenv("MINIO_ENDPOINT", "localhost:9000") access_key = os.getenv("MINIO_ACCESS_KEY", "minioadmin") secret_key = os.getenv("MINIO_SECRET_KEY", "minioadmin") bucket = os.getenv("MINIO_BUCKET", "pechub") client = minio.Minio( endpoint, access_key=access_key, secret_key=secret_key, secure=endpoint.startswith("https"), ) response = client.get_object(bucket, raw_eml_path) data = response.read() response.close() response.release_conn() return data except Exception as e: log(f" Errore download MinIO (fallback): {e}") return None async def mark_message_conserved(message_id: str, versamento_id: str) -> None: """Aggiorna il messaggio nel DB come conservato.""" try: import asyncpg # type: ignore[import] except ImportError: log(f" [skip] asyncpg non disponibile: impossibile aggiornare is_conserved per {message_id}") return db_url = os.getenv("DATABASE_URL", "postgresql://pechub:pechub@localhost:5432/pechub") db_url = db_url.replace("postgres://", "postgresql://") db_url = db_url.replace("postgresql+asyncpg://", "postgresql://") db_url = db_url.replace("postgresql+psycopg2://", "postgresql://") try: conn = await asyncpg.connect(db_url) await conn.execute(""" UPDATE messages SET is_conserved = TRUE, conserved_at = NOW() WHERE id = $1 """, uuid.UUID(message_id)) await conn.close() log(f" DB aggiornato: is_conserved=TRUE per message_id={message_id}") except Exception as e: log(f" Errore aggiornamento DB per {message_id}: {e}") # ─── Main ───────────────────────────────────────────────────────────────────── async def main() -> None: log_section("TEST TRASMISSIONE AETERNA – PecHub") log(f"Endpoint: {AETERNA_ENDPOINT}") log(f"Username: {AETERNA_USERNAME}") log(f"Tenant slug: {AETERNA_TENANT_SLUG}") log(f"Timestamp: {datetime.now().isoformat()}") # 1. Connessione ad Aeterna log_section("1. AUTENTICAZIONE AETERNA") client = AeternaTestClient( endpoint=AETERNA_ENDPOINT, username=AETERNA_USERNAME, password=AETERNA_PASSWORD, tenant_slug=AETERNA_TENANT_SLUG, ) try: await client.login() except Exception as e: log(f"ERRORE FATALE: impossibile autenticarsi su Aeterna: {e}") sys.exit(1) # 2. Recupera messaggi da conservare log_section("2. RECUPERO MESSAGGI 'DA CONSERVARE'") messages = await get_pending_conservation_messages() if not messages: log("Nessun messaggio con is_pending_conservation=TRUE trovato.") log("Verificare che i messaggi siano stati marcati 'Da conservare' nell'interfaccia.") log("") log("Suggerimento: selezionare un messaggio in PecHub e usare") log("'Aggiungi a Da Conservare' per marcarlo per l'archiviazione.") sys.exit(0) log(f"Trovati {len(messages)} messaggi da conservare:") for i, m in enumerate(messages, 1): subj = (m.get("subject") or "")[:60] recv = str(m.get("received_at") or "")[:10] log(f" [{i}] id={str(m['id'])[:8]}... | data={recv} | oggetto={subj}") # 3. Trasmissione log_section("3. TRASMISSIONE A AETERNA") results = [] for i, msg in enumerate(messages, 1): msg_id = str(msg["id"]) subject = msg.get("subject") or f"PEC {msg_id[:8]}" from_addr = msg.get("from_address") to_addrs = msg.get("to_addresses") or [] received_at = str(msg.get("received_at") or "") raw_eml_path = msg.get("raw_eml_path") log(f"\nMessaggio [{i}/{len(messages)}]: {subject[:50]}") log(f" ID: {msg_id}") log(f" EML path: {raw_eml_path}") # Scarica EML eml_bytes: bytes | None = None if raw_eml_path: log(" Download EML da MinIO ...") eml_bytes = await download_eml_from_minio(raw_eml_path) if not eml_bytes: # Crea un EML di test sintetico log(" EML non disponibile. Generazione EML sintetico di test ...") eml_bytes = f"""From: {from_addr or 'test@pec.it'} To: {', '.join(to_addrs) if to_addrs else 'destinatario@pec.it'} Subject: {subject} Date: {received_at} Message-ID: <{msg_id}@pechub.test> Content-Type: text/plain; charset=UTF-8 MIME-Version: 1.0 Questo e' un messaggio PEC archiviato da PecHub. ID messaggio: {msg_id} Data archiviazione: {datetime.now(UTC).isoformat()} """.encode("utf-8") # Costruisci BagIt SIP log(" Costruzione pacchetto BagIt SIP ...") zip_bytes = build_bagit_sip( eml_bytes=eml_bytes, message_id=msg_id, subject=subject, from_address=from_addr, to_addresses=to_addrs, received_at=received_at, ) log(f" SIP costruito: {len(zip_bytes):,} bytes") # Upload su Aeterna upload_result = await client.upload_sip( zip_bytes=zip_bytes, zip_filename=f"pechub-pec-{msg_id}.zip", title=subject, description=f"Messaggio PEC ID={msg_id} | Da={from_addr} | A={', '.join(to_addrs or [])}", ) if not upload_result.get("success"): log(f" UPLOAD FALLITO: {upload_result.get('error', 'errore sconosciuto')}") results.append({ "message_id": msg_id, "subject": subject, "success": False, "error": upload_result.get("error"), }) continue package_id = upload_result.get("package_id") pid = upload_result.get("pid", "") log(f" package_id = {package_id}") log(f" pid = {pid}") # Polling status (opzionale, non bloccante per il test) log(" Attesa elaborazione pipeline (polling 5 poll x 4s) ...") status_result = await client.poll_status(package_id, max_polls=5, interval=4.0) final_status = status_result.get("final_status", "unknown") log(f" Stato finale Aeterna: {final_status}") # Aggiorna DB se accettato if final_status == "ACTIVE" or status_result.get("success") is None: # success=None significa che l'ingest e' ancora in corso ma accettato await mark_message_conserved(msg_id, package_id) results.append({ "message_id": msg_id, "subject": subject, "success": True, "package_id": package_id, "pid": pid, "final_status": final_status, "latency_ms": upload_result.get("latency_ms"), }) # 4. Riepilogo log_section("4. RIEPILOGO") ok = sum(1 for r in results if r.get("success")) log(f"Messaggi trasmessi con successo: {ok}/{len(results)}") log("") for r in results: icon = "OK" if r.get("success") else "FAIL" log(f" [{icon}] {r['subject'][:50]}") if r.get("package_id"): log(f" package_id = {r['package_id']}") log(f" pid = {r.get('pid', '-')}") log(f" status = {r.get('final_status', '-')}") log(f" latency = {r.get('latency_ms', '-')} ms") if r.get("error"): log(f" error = {r['error'][:100]}") # Salva risultati in JSON output_file = Path("/tmp/aeterna_test_results.json") output_file.write_text(json.dumps(results, indent=2, default=str)) log(f"\nRisultati salvati in: {output_file}") if ok == len(results) and results: log("\nTest completato con successo!") elif not results: log("\nNessun messaggio trasmesso.") else: log(f"\nTest parzialmente riuscito ({ok}/{len(results)} messaggi trasmessi).") if __name__ == "__main__": asyncio.run(main())