Files
PecHub/worker/scripts/test_aeterna_transmission.py
T
2026-06-18 11:24:05 +02:00

517 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Script di test trasmissione verso Aeterna (archiviazione certificata).
Questo script è STANDALONE: si connette direttamente al DB PecHub,
trova i messaggi con is_pending_conservation=True, scarica i loro EML
da MinIO, costruisce pacchetti SIP BagIt e li invia ad Aeterna.
Utilizzo (eseguire dal server dentro il container worker o direttamente):
# Sul server, dentro il container worker:
docker exec -it pechub-worker-1 python /app/scripts/test_aeterna_transmission.py
# Con credenziali personalizzate:
AETERNA_USERNAME=xxx AETERNA_PASSWORD=yyy \
python worker/scripts/test_aeterna_transmission.py
Variabili d'ambiente accettate (sovrascrivono i default):
AETERNA_ENDPOINT Default: https://api.aeterna.idrainformatica.it
AETERNA_USERNAME Default: matteo@idrainformatica.it
AETERNA_PASSWORD Default: letto da .env
AETERNA_TENANT_SLUG Default: pechub
DATABASE_URL Default: letta da .env del worker
Output:
Per ogni messaggio: stato trasmissione, versamento_id Aeterna, latenza.
Al termine: aggiorna is_conserved=True nel DB se l'ingest e' riuscito.
"""
from __future__ import annotations
import asyncio
import hashlib
import io
import json
import os
import sys
import time
import uuid
import zipfile
from datetime import UTC, datetime
from pathlib import Path
# ─── Setup path per importare dal worker ─────────────────────────────────────
# Se eseguito da fuori il container, aggiungi il path del worker
worker_dir = Path(__file__).parent.parent
sys.path.insert(0, str(worker_dir))
# ─── Configurazione Aeterna ───────────────────────────────────────────────────
AETERNA_ENDPOINT = os.getenv("AETERNA_ENDPOINT", "https://api.aeterna.idrainformatica.it")
AETERNA_USERNAME = os.getenv("AETERNA_USERNAME", "matteo@idrainformatica.it")
AETERNA_PASSWORD = os.getenv("AETERNA_PASSWORD", "Ma212718!")
AETERNA_TENANT_SLUG = os.getenv("AETERNA_TENANT_SLUG", "pechub")
# ─── Funzioni helper ──────────────────────────────────────────────────────────
def log(msg: str) -> None:
ts = datetime.now().strftime("%H:%M:%S")
print(f"[{ts}] {msg}")
def log_section(title: str) -> None:
print(f"\n{'='*60}")
print(f" {title}")
print(f"{'='*60}")
def build_bagit_sip(
eml_bytes: bytes,
message_id: str,
subject: str | None = None,
from_address: str | None = None,
to_addresses: list[str] | None = None,
received_at: str | None = None,
) -> bytes:
"""Costruisce un pacchetto BagIt RFC 8493 in memoria (ZIP)."""
bag_name = f"pechub-pec-{message_id}"
eml_filename = f"{message_id}.eml"
data_path = f"data/{eml_filename}"
eml_sha256 = hashlib.sha256(eml_bytes).hexdigest()
bagit_txt = "BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8\n"
bag_info_lines = [
"Bag-Software-Agent: PecHub Archival Module (test script)",
f"Bagging-Date: {datetime.now(UTC).strftime('%Y-%m-%d')}",
f"External-Identifier: {message_id}",
"Source-Organization: PecHub",
]
if subject:
bag_info_lines.append(f"Description: {subject[:500]}")
if from_address:
bag_info_lines.append(f"Contact-Email: {from_address}")
if to_addresses:
bag_info_lines.append(f"External-Description: PEC a {', '.join(to_addresses[:3])}")
if received_at:
bag_info_lines.append(f"Bag-Group-Identifier: {received_at[:10]}")
bag_info_txt = "\n".join(bag_info_lines) + "\n"
manifest_txt = f"{eml_sha256} {data_path}\n"
buf = io.BytesIO()
with zipfile.ZipFile(buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
zf.writestr(f"{bag_name}/bagit.txt", bagit_txt)
zf.writestr(f"{bag_name}/bag-info.txt", bag_info_txt)
zf.writestr(f"{bag_name}/manifest-sha256.txt", manifest_txt)
zf.writestr(f"{bag_name}/{data_path}", eml_bytes)
return buf.getvalue()
# ─── Client Aeterna (inline, senza dipendenze worker) ────────────────────────
class AeternaTestClient:
"""Client minimale per il test di trasmissione."""
def __init__(self, endpoint: str, username: str, password: str, tenant_slug: str):
self.endpoint = endpoint.rstrip("/")
self.username = username
self.password = password
self.tenant_slug = tenant_slug
self._token: str | None = None
self._token_expires_at: float = 0.0
async def login(self) -> str:
import httpx
log(f" Autenticazione su Aeterna ({self.endpoint}) ...")
async with httpx.AsyncClient(timeout=30) as client:
resp = await client.post(
f"{self.endpoint}/api/v1/auth/login",
json={
"email": self.username,
"password": self.password,
"tenant_slug": self.tenant_slug,
},
)
if resp.status_code != 200:
raise RuntimeError(
f"Login fallito HTTP {resp.status_code}: {resp.text[:300]}"
)
data = resp.json()
self._token = data["access_token"]
expires_in = int(data.get("expires_in", 3600))
self._token_expires_at = time.monotonic() + expires_in - 60
user_email = data.get("user", {}).get("email", "?")
log(f" Login riuscito come: {user_email}")
return self._token
async def get_token(self) -> str:
if self._token and time.monotonic() < self._token_expires_at:
return self._token
return await self.login()
async def upload_sip(
self,
zip_bytes: bytes,
zip_filename: str,
title: str,
description: str = "",
) -> dict:
import httpx
token = await self.get_token()
log(f" Upload SIP '{zip_filename}' ({len(zip_bytes):,} bytes) ...")
t_start = time.monotonic()
async with httpx.AsyncClient(timeout=120) as client:
resp = await client.post(
f"{self.endpoint}/api/v1/ingest/upload",
headers={"Authorization": f"Bearer {token}"},
files={"file": (zip_filename, zip_bytes, "application/zip")},
data={
"title": title[:500],
"description": description[:500],
"creator": "PecHub Test Script",
},
)
latency_ms = int((time.monotonic() - t_start) * 1000)
if resp.status_code in (200, 201, 202):
data = resp.json()
log(f" Upload OK in {latency_ms}ms package_id: {data.get('package_id')}")
return {"success": True, "latency_ms": latency_ms, **data}
else:
log(f" Upload FALLITO HTTP {resp.status_code}: {resp.text[:300]}")
return {
"success": False,
"latency_ms": latency_ms,
"error": resp.text[:300],
"status_code": resp.status_code,
}
async def poll_status(self, package_id: str, max_polls: int = 10, interval: float = 3.0) -> dict:
import httpx
token = await self.get_token()
log(f" Polling status package_id={package_id} (max {max_polls} tentativi) ...")
for i in range(max_polls):
async with httpx.AsyncClient(timeout=15) as client:
resp = await client.get(
f"{self.endpoint}/api/v1/ingest/{package_id}/status",
headers={"Authorization": f"Bearer {token}"},
)
if resp.status_code == 200:
data = resp.json()
status = data.get("status", "UNKNOWN").upper()
stage = data.get("pipeline_stage", "")
pct = data.get("progress_pct", 0)
log(f" [{i+1}/{max_polls}] status={status} stage={stage} progress={pct}%")
if status in ("ACTIVE", "FAILED", "REJECTED"):
return {"success": status == "ACTIVE", "final_status": status, **data}
if i < max_polls - 1:
await asyncio.sleep(interval)
else:
log(f" Polling error HTTP {resp.status_code}")
break
log(" Polling completato (stato non finale raggiunto, processo ancora in corso)")
return {"success": None, "message": "polling esaurito"}
# ─── Recupero messaggi da DB e MinIO ─────────────────────────────────────────
async def get_pending_conservation_messages() -> list[dict]:
"""
Recupera i messaggi con is_pending_conservation=True dal DB PecHub.
Restituisce una lista di dict con i campi rilevanti.
"""
try:
import asyncpg # type: ignore[import]
except ImportError:
log("asyncpg non installato. Uso psycopg2 come fallback...")
return await get_messages_via_env()
db_url = os.getenv("DATABASE_URL", "")
if not db_url:
log("DATABASE_URL non impostata. Tento connessione locale...")
db_url = "postgresql://pechub:pechub@localhost:5432/pechub"
# asyncpg vuole postgresql:// non postgres:// e senza +asyncpg driver specifier
db_url = db_url.replace("postgres://", "postgresql://")
db_url = db_url.replace("postgresql+asyncpg://", "postgresql://")
db_url = db_url.replace("postgresql+psycopg2://", "postgresql://")
try:
conn = await asyncpg.connect(db_url)
rows = await conn.fetch("""
SELECT
m.id,
m.tenant_id,
m.subject,
m.from_address,
m.to_addresses,
m.received_at,
m.raw_eml_path,
m.is_pending_conservation,
m.is_conserved
FROM messages m
WHERE m.is_pending_conservation = TRUE
AND m.is_conserved = FALSE
ORDER BY m.received_at DESC
LIMIT 20
""")
await conn.close()
return [dict(row) for row in rows]
except Exception as e:
log(f"Errore connessione DB: {e}")
return []
async def get_messages_via_env() -> list[dict]:
"""Fallback: usa variabili d'ambiente per costruire messaggi di test."""
log("Uso messaggi di test hardcoded (DB non disponibile)")
return []
async def download_eml_from_minio(raw_eml_path: str) -> bytes | None:
"""
Scarica il file EML da MinIO usando il path memorizzato nel DB.
"""
try:
from app.storage.minio_client import get_minio_client
client = await get_minio_client()
bucket = os.getenv("MINIO_BUCKET", "pechub")
response = await asyncio.to_thread(
client.get_object, bucket, raw_eml_path
)
data = response.read()
response.close()
response.release_conn()
return data
except ImportError:
pass
except Exception as e:
log(f" Errore download MinIO ({raw_eml_path}): {e}")
# Fallback: prova con boto3/minio direttamente
try:
import minio # type: ignore[import]
endpoint = os.getenv("MINIO_ENDPOINT", "localhost:9000")
access_key = os.getenv("MINIO_ACCESS_KEY", "minioadmin")
secret_key = os.getenv("MINIO_SECRET_KEY", "minioadmin")
bucket = os.getenv("MINIO_BUCKET", "pechub")
client = minio.Minio(
endpoint,
access_key=access_key,
secret_key=secret_key,
secure=endpoint.startswith("https"),
)
response = client.get_object(bucket, raw_eml_path)
data = response.read()
response.close()
response.release_conn()
return data
except Exception as e:
log(f" Errore download MinIO (fallback): {e}")
return None
async def mark_message_conserved(message_id: str, versamento_id: str) -> None:
"""Aggiorna il messaggio nel DB come conservato."""
try:
import asyncpg # type: ignore[import]
except ImportError:
log(f" [skip] asyncpg non disponibile: impossibile aggiornare is_conserved per {message_id}")
return
db_url = os.getenv("DATABASE_URL", "postgresql://pechub:pechub@localhost:5432/pechub")
db_url = db_url.replace("postgres://", "postgresql://")
db_url = db_url.replace("postgresql+asyncpg://", "postgresql://")
db_url = db_url.replace("postgresql+psycopg2://", "postgresql://")
try:
conn = await asyncpg.connect(db_url)
await conn.execute("""
UPDATE messages
SET is_conserved = TRUE,
conserved_at = NOW()
WHERE id = $1
""", uuid.UUID(message_id))
await conn.close()
log(f" DB aggiornato: is_conserved=TRUE per message_id={message_id}")
except Exception as e:
log(f" Errore aggiornamento DB per {message_id}: {e}")
# ─── Main ─────────────────────────────────────────────────────────────────────
async def main() -> None:
log_section("TEST TRASMISSIONE AETERNA PecHub")
log(f"Endpoint: {AETERNA_ENDPOINT}")
log(f"Username: {AETERNA_USERNAME}")
log(f"Tenant slug: {AETERNA_TENANT_SLUG}")
log(f"Timestamp: {datetime.now().isoformat()}")
# 1. Connessione ad Aeterna
log_section("1. AUTENTICAZIONE AETERNA")
client = AeternaTestClient(
endpoint=AETERNA_ENDPOINT,
username=AETERNA_USERNAME,
password=AETERNA_PASSWORD,
tenant_slug=AETERNA_TENANT_SLUG,
)
try:
await client.login()
except Exception as e:
log(f"ERRORE FATALE: impossibile autenticarsi su Aeterna: {e}")
sys.exit(1)
# 2. Recupera messaggi da conservare
log_section("2. RECUPERO MESSAGGI 'DA CONSERVARE'")
messages = await get_pending_conservation_messages()
if not messages:
log("Nessun messaggio con is_pending_conservation=TRUE trovato.")
log("Verificare che i messaggi siano stati marcati 'Da conservare' nell'interfaccia.")
log("")
log("Suggerimento: selezionare un messaggio in PecHub e usare")
log("'Aggiungi a Da Conservare' per marcarlo per l'archiviazione.")
sys.exit(0)
log(f"Trovati {len(messages)} messaggi da conservare:")
for i, m in enumerate(messages, 1):
subj = (m.get("subject") or "")[:60]
recv = str(m.get("received_at") or "")[:10]
log(f" [{i}] id={str(m['id'])[:8]}... | data={recv} | oggetto={subj}")
# 3. Trasmissione
log_section("3. TRASMISSIONE A AETERNA")
results = []
for i, msg in enumerate(messages, 1):
msg_id = str(msg["id"])
subject = msg.get("subject") or f"PEC {msg_id[:8]}"
from_addr = msg.get("from_address")
to_addrs = msg.get("to_addresses") or []
received_at = str(msg.get("received_at") or "")
raw_eml_path = msg.get("raw_eml_path")
log(f"\nMessaggio [{i}/{len(messages)}]: {subject[:50]}")
log(f" ID: {msg_id}")
log(f" EML path: {raw_eml_path}")
# Scarica EML
eml_bytes: bytes | None = None
if raw_eml_path:
log(" Download EML da MinIO ...")
eml_bytes = await download_eml_from_minio(raw_eml_path)
if not eml_bytes:
# Crea un EML di test sintetico
log(" EML non disponibile. Generazione EML sintetico di test ...")
eml_bytes = f"""From: {from_addr or 'test@pec.it'}
To: {', '.join(to_addrs) if to_addrs else 'destinatario@pec.it'}
Subject: {subject}
Date: {received_at}
Message-ID: <{msg_id}@pechub.test>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Questo e' un messaggio PEC archiviato da PecHub.
ID messaggio: {msg_id}
Data archiviazione: {datetime.now(UTC).isoformat()}
""".encode("utf-8")
# Costruisci BagIt SIP
log(" Costruzione pacchetto BagIt SIP ...")
zip_bytes = build_bagit_sip(
eml_bytes=eml_bytes,
message_id=msg_id,
subject=subject,
from_address=from_addr,
to_addresses=to_addrs,
received_at=received_at,
)
log(f" SIP costruito: {len(zip_bytes):,} bytes")
# Upload su Aeterna
upload_result = await client.upload_sip(
zip_bytes=zip_bytes,
zip_filename=f"pechub-pec-{msg_id}.zip",
title=subject,
description=f"Messaggio PEC ID={msg_id} | Da={from_addr} | A={', '.join(to_addrs or [])}",
)
if not upload_result.get("success"):
log(f" UPLOAD FALLITO: {upload_result.get('error', 'errore sconosciuto')}")
results.append({
"message_id": msg_id,
"subject": subject,
"success": False,
"error": upload_result.get("error"),
})
continue
package_id = upload_result.get("package_id")
pid = upload_result.get("pid", "")
log(f" package_id = {package_id}")
log(f" pid = {pid}")
# Polling status (opzionale, non bloccante per il test)
log(" Attesa elaborazione pipeline (polling 5 poll x 4s) ...")
status_result = await client.poll_status(package_id, max_polls=5, interval=4.0)
final_status = status_result.get("final_status", "unknown")
log(f" Stato finale Aeterna: {final_status}")
# Aggiorna DB se accettato
if final_status == "ACTIVE" or status_result.get("success") is None:
# success=None significa che l'ingest e' ancora in corso ma accettato
await mark_message_conserved(msg_id, package_id)
results.append({
"message_id": msg_id,
"subject": subject,
"success": True,
"package_id": package_id,
"pid": pid,
"final_status": final_status,
"latency_ms": upload_result.get("latency_ms"),
})
# 4. Riepilogo
log_section("4. RIEPILOGO")
ok = sum(1 for r in results if r.get("success"))
log(f"Messaggi trasmessi con successo: {ok}/{len(results)}")
log("")
for r in results:
icon = "OK" if r.get("success") else "FAIL"
log(f" [{icon}] {r['subject'][:50]}")
if r.get("package_id"):
log(f" package_id = {r['package_id']}")
log(f" pid = {r.get('pid', '-')}")
log(f" status = {r.get('final_status', '-')}")
log(f" latency = {r.get('latency_ms', '-')} ms")
if r.get("error"):
log(f" error = {r['error'][:100]}")
# Salva risultati in JSON
output_file = Path("/tmp/aeterna_test_results.json")
output_file.write_text(json.dumps(results, indent=2, default=str))
log(f"\nRisultati salvati in: {output_file}")
if ok == len(results) and results:
log("\nTest completato con successo!")
elif not results:
log("\nNessun messaggio trasmesso.")
else:
log(f"\nTest parzialmente riuscito ({ok}/{len(results)} messaggi trasmessi).")
if __name__ == "__main__":
asyncio.run(main())