GapFill Flowee

This commit is contained in:
2026-06-18 11:24:05 +02:00
parent 64442af182
commit c68daf4313
25 changed files with 2965 additions and 48 deletions
+516
View File
@@ -0,0 +1,516 @@
"""
Script di test trasmissione verso Aeterna (archiviazione certificata).
Questo script è STANDALONE: si connette direttamente al DB PecHub,
trova i messaggi con is_pending_conservation=True, scarica i loro EML
da MinIO, costruisce pacchetti SIP BagIt e li invia ad Aeterna.
Utilizzo (eseguire dal server dentro il container worker o direttamente):
# Sul server, dentro il container worker:
docker exec -it pechub-worker-1 python /app/scripts/test_aeterna_transmission.py
# Con credenziali personalizzate:
AETERNA_USERNAME=xxx AETERNA_PASSWORD=yyy \
python worker/scripts/test_aeterna_transmission.py
Variabili d'ambiente accettate (sovrascrivono i default):
AETERNA_ENDPOINT Default: https://api.aeterna.idrainformatica.it
AETERNA_USERNAME Default: matteo@idrainformatica.it
AETERNA_PASSWORD Default: letto da .env
AETERNA_TENANT_SLUG Default: pechub
DATABASE_URL Default: letta da .env del worker
Output:
Per ogni messaggio: stato trasmissione, versamento_id Aeterna, latenza.
Al termine: aggiorna is_conserved=True nel DB se l'ingest e' riuscito.
"""
from __future__ import annotations
import asyncio
import hashlib
import io
import json
import os
import sys
import time
import uuid
import zipfile
from datetime import UTC, datetime
from pathlib import Path
# ─── Setup path per importare dal worker ─────────────────────────────────────
# Se eseguito da fuori il container, aggiungi il path del worker
worker_dir = Path(__file__).parent.parent
sys.path.insert(0, str(worker_dir))
# ─── Configurazione Aeterna ───────────────────────────────────────────────────
AETERNA_ENDPOINT = os.getenv("AETERNA_ENDPOINT", "https://api.aeterna.idrainformatica.it")
AETERNA_USERNAME = os.getenv("AETERNA_USERNAME", "matteo@idrainformatica.it")
AETERNA_PASSWORD = os.getenv("AETERNA_PASSWORD", "Ma212718!")
AETERNA_TENANT_SLUG = os.getenv("AETERNA_TENANT_SLUG", "pechub")
# ─── Funzioni helper ──────────────────────────────────────────────────────────
def log(msg: str) -> None:
ts = datetime.now().strftime("%H:%M:%S")
print(f"[{ts}] {msg}")
def log_section(title: str) -> None:
print(f"\n{'='*60}")
print(f" {title}")
print(f"{'='*60}")
def build_bagit_sip(
eml_bytes: bytes,
message_id: str,
subject: str | None = None,
from_address: str | None = None,
to_addresses: list[str] | None = None,
received_at: str | None = None,
) -> bytes:
"""Costruisce un pacchetto BagIt RFC 8493 in memoria (ZIP)."""
bag_name = f"pechub-pec-{message_id}"
eml_filename = f"{message_id}.eml"
data_path = f"data/{eml_filename}"
eml_sha256 = hashlib.sha256(eml_bytes).hexdigest()
bagit_txt = "BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8\n"
bag_info_lines = [
"Bag-Software-Agent: PecHub Archival Module (test script)",
f"Bagging-Date: {datetime.now(UTC).strftime('%Y-%m-%d')}",
f"External-Identifier: {message_id}",
"Source-Organization: PecHub",
]
if subject:
bag_info_lines.append(f"Description: {subject[:500]}")
if from_address:
bag_info_lines.append(f"Contact-Email: {from_address}")
if to_addresses:
bag_info_lines.append(f"External-Description: PEC a {', '.join(to_addresses[:3])}")
if received_at:
bag_info_lines.append(f"Bag-Group-Identifier: {received_at[:10]}")
bag_info_txt = "\n".join(bag_info_lines) + "\n"
manifest_txt = f"{eml_sha256} {data_path}\n"
buf = io.BytesIO()
with zipfile.ZipFile(buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
zf.writestr(f"{bag_name}/bagit.txt", bagit_txt)
zf.writestr(f"{bag_name}/bag-info.txt", bag_info_txt)
zf.writestr(f"{bag_name}/manifest-sha256.txt", manifest_txt)
zf.writestr(f"{bag_name}/{data_path}", eml_bytes)
return buf.getvalue()
# ─── Client Aeterna (inline, senza dipendenze worker) ────────────────────────
class AeternaTestClient:
"""Client minimale per il test di trasmissione."""
def __init__(self, endpoint: str, username: str, password: str, tenant_slug: str):
self.endpoint = endpoint.rstrip("/")
self.username = username
self.password = password
self.tenant_slug = tenant_slug
self._token: str | None = None
self._token_expires_at: float = 0.0
async def login(self) -> str:
import httpx
log(f" Autenticazione su Aeterna ({self.endpoint}) ...")
async with httpx.AsyncClient(timeout=30) as client:
resp = await client.post(
f"{self.endpoint}/api/v1/auth/login",
json={
"email": self.username,
"password": self.password,
"tenant_slug": self.tenant_slug,
},
)
if resp.status_code != 200:
raise RuntimeError(
f"Login fallito HTTP {resp.status_code}: {resp.text[:300]}"
)
data = resp.json()
self._token = data["access_token"]
expires_in = int(data.get("expires_in", 3600))
self._token_expires_at = time.monotonic() + expires_in - 60
user_email = data.get("user", {}).get("email", "?")
log(f" Login riuscito come: {user_email}")
return self._token
async def get_token(self) -> str:
if self._token and time.monotonic() < self._token_expires_at:
return self._token
return await self.login()
async def upload_sip(
self,
zip_bytes: bytes,
zip_filename: str,
title: str,
description: str = "",
) -> dict:
import httpx
token = await self.get_token()
log(f" Upload SIP '{zip_filename}' ({len(zip_bytes):,} bytes) ...")
t_start = time.monotonic()
async with httpx.AsyncClient(timeout=120) as client:
resp = await client.post(
f"{self.endpoint}/api/v1/ingest/upload",
headers={"Authorization": f"Bearer {token}"},
files={"file": (zip_filename, zip_bytes, "application/zip")},
data={
"title": title[:500],
"description": description[:500],
"creator": "PecHub Test Script",
},
)
latency_ms = int((time.monotonic() - t_start) * 1000)
if resp.status_code in (200, 201, 202):
data = resp.json()
log(f" Upload OK in {latency_ms}ms package_id: {data.get('package_id')}")
return {"success": True, "latency_ms": latency_ms, **data}
else:
log(f" Upload FALLITO HTTP {resp.status_code}: {resp.text[:300]}")
return {
"success": False,
"latency_ms": latency_ms,
"error": resp.text[:300],
"status_code": resp.status_code,
}
async def poll_status(self, package_id: str, max_polls: int = 10, interval: float = 3.0) -> dict:
import httpx
token = await self.get_token()
log(f" Polling status package_id={package_id} (max {max_polls} tentativi) ...")
for i in range(max_polls):
async with httpx.AsyncClient(timeout=15) as client:
resp = await client.get(
f"{self.endpoint}/api/v1/ingest/{package_id}/status",
headers={"Authorization": f"Bearer {token}"},
)
if resp.status_code == 200:
data = resp.json()
status = data.get("status", "UNKNOWN").upper()
stage = data.get("pipeline_stage", "")
pct = data.get("progress_pct", 0)
log(f" [{i+1}/{max_polls}] status={status} stage={stage} progress={pct}%")
if status in ("ACTIVE", "FAILED", "REJECTED"):
return {"success": status == "ACTIVE", "final_status": status, **data}
if i < max_polls - 1:
await asyncio.sleep(interval)
else:
log(f" Polling error HTTP {resp.status_code}")
break
log(" Polling completato (stato non finale raggiunto, processo ancora in corso)")
return {"success": None, "message": "polling esaurito"}
# ─── Recupero messaggi da DB e MinIO ─────────────────────────────────────────
async def get_pending_conservation_messages() -> list[dict]:
"""
Recupera i messaggi con is_pending_conservation=True dal DB PecHub.
Restituisce una lista di dict con i campi rilevanti.
"""
try:
import asyncpg # type: ignore[import]
except ImportError:
log("asyncpg non installato. Uso psycopg2 come fallback...")
return await get_messages_via_env()
db_url = os.getenv("DATABASE_URL", "")
if not db_url:
log("DATABASE_URL non impostata. Tento connessione locale...")
db_url = "postgresql://pechub:pechub@localhost:5432/pechub"
# asyncpg vuole postgresql:// non postgres:// e senza +asyncpg driver specifier
db_url = db_url.replace("postgres://", "postgresql://")
db_url = db_url.replace("postgresql+asyncpg://", "postgresql://")
db_url = db_url.replace("postgresql+psycopg2://", "postgresql://")
try:
conn = await asyncpg.connect(db_url)
rows = await conn.fetch("""
SELECT
m.id,
m.tenant_id,
m.subject,
m.from_address,
m.to_addresses,
m.received_at,
m.raw_eml_path,
m.is_pending_conservation,
m.is_conserved
FROM messages m
WHERE m.is_pending_conservation = TRUE
AND m.is_conserved = FALSE
ORDER BY m.received_at DESC
LIMIT 20
""")
await conn.close()
return [dict(row) for row in rows]
except Exception as e:
log(f"Errore connessione DB: {e}")
return []
async def get_messages_via_env() -> list[dict]:
"""Fallback: usa variabili d'ambiente per costruire messaggi di test."""
log("Uso messaggi di test hardcoded (DB non disponibile)")
return []
async def download_eml_from_minio(raw_eml_path: str) -> bytes | None:
"""
Scarica il file EML da MinIO usando il path memorizzato nel DB.
"""
try:
from app.storage.minio_client import get_minio_client
client = await get_minio_client()
bucket = os.getenv("MINIO_BUCKET", "pechub")
response = await asyncio.to_thread(
client.get_object, bucket, raw_eml_path
)
data = response.read()
response.close()
response.release_conn()
return data
except ImportError:
pass
except Exception as e:
log(f" Errore download MinIO ({raw_eml_path}): {e}")
# Fallback: prova con boto3/minio direttamente
try:
import minio # type: ignore[import]
endpoint = os.getenv("MINIO_ENDPOINT", "localhost:9000")
access_key = os.getenv("MINIO_ACCESS_KEY", "minioadmin")
secret_key = os.getenv("MINIO_SECRET_KEY", "minioadmin")
bucket = os.getenv("MINIO_BUCKET", "pechub")
client = minio.Minio(
endpoint,
access_key=access_key,
secret_key=secret_key,
secure=endpoint.startswith("https"),
)
response = client.get_object(bucket, raw_eml_path)
data = response.read()
response.close()
response.release_conn()
return data
except Exception as e:
log(f" Errore download MinIO (fallback): {e}")
return None
async def mark_message_conserved(message_id: str, versamento_id: str) -> None:
"""Aggiorna il messaggio nel DB come conservato."""
try:
import asyncpg # type: ignore[import]
except ImportError:
log(f" [skip] asyncpg non disponibile: impossibile aggiornare is_conserved per {message_id}")
return
db_url = os.getenv("DATABASE_URL", "postgresql://pechub:pechub@localhost:5432/pechub")
db_url = db_url.replace("postgres://", "postgresql://")
db_url = db_url.replace("postgresql+asyncpg://", "postgresql://")
db_url = db_url.replace("postgresql+psycopg2://", "postgresql://")
try:
conn = await asyncpg.connect(db_url)
await conn.execute("""
UPDATE messages
SET is_conserved = TRUE,
conserved_at = NOW()
WHERE id = $1
""", uuid.UUID(message_id))
await conn.close()
log(f" DB aggiornato: is_conserved=TRUE per message_id={message_id}")
except Exception as e:
log(f" Errore aggiornamento DB per {message_id}: {e}")
# ─── Main ─────────────────────────────────────────────────────────────────────
async def main() -> None:
log_section("TEST TRASMISSIONE AETERNA PecHub")
log(f"Endpoint: {AETERNA_ENDPOINT}")
log(f"Username: {AETERNA_USERNAME}")
log(f"Tenant slug: {AETERNA_TENANT_SLUG}")
log(f"Timestamp: {datetime.now().isoformat()}")
# 1. Connessione ad Aeterna
log_section("1. AUTENTICAZIONE AETERNA")
client = AeternaTestClient(
endpoint=AETERNA_ENDPOINT,
username=AETERNA_USERNAME,
password=AETERNA_PASSWORD,
tenant_slug=AETERNA_TENANT_SLUG,
)
try:
await client.login()
except Exception as e:
log(f"ERRORE FATALE: impossibile autenticarsi su Aeterna: {e}")
sys.exit(1)
# 2. Recupera messaggi da conservare
log_section("2. RECUPERO MESSAGGI 'DA CONSERVARE'")
messages = await get_pending_conservation_messages()
if not messages:
log("Nessun messaggio con is_pending_conservation=TRUE trovato.")
log("Verificare che i messaggi siano stati marcati 'Da conservare' nell'interfaccia.")
log("")
log("Suggerimento: selezionare un messaggio in PecHub e usare")
log("'Aggiungi a Da Conservare' per marcarlo per l'archiviazione.")
sys.exit(0)
log(f"Trovati {len(messages)} messaggi da conservare:")
for i, m in enumerate(messages, 1):
subj = (m.get("subject") or "")[:60]
recv = str(m.get("received_at") or "")[:10]
log(f" [{i}] id={str(m['id'])[:8]}... | data={recv} | oggetto={subj}")
# 3. Trasmissione
log_section("3. TRASMISSIONE A AETERNA")
results = []
for i, msg in enumerate(messages, 1):
msg_id = str(msg["id"])
subject = msg.get("subject") or f"PEC {msg_id[:8]}"
from_addr = msg.get("from_address")
to_addrs = msg.get("to_addresses") or []
received_at = str(msg.get("received_at") or "")
raw_eml_path = msg.get("raw_eml_path")
log(f"\nMessaggio [{i}/{len(messages)}]: {subject[:50]}")
log(f" ID: {msg_id}")
log(f" EML path: {raw_eml_path}")
# Scarica EML
eml_bytes: bytes | None = None
if raw_eml_path:
log(" Download EML da MinIO ...")
eml_bytes = await download_eml_from_minio(raw_eml_path)
if not eml_bytes:
# Crea un EML di test sintetico
log(" EML non disponibile. Generazione EML sintetico di test ...")
eml_bytes = f"""From: {from_addr or 'test@pec.it'}
To: {', '.join(to_addrs) if to_addrs else 'destinatario@pec.it'}
Subject: {subject}
Date: {received_at}
Message-ID: <{msg_id}@pechub.test>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Questo e' un messaggio PEC archiviato da PecHub.
ID messaggio: {msg_id}
Data archiviazione: {datetime.now(UTC).isoformat()}
""".encode("utf-8")
# Costruisci BagIt SIP
log(" Costruzione pacchetto BagIt SIP ...")
zip_bytes = build_bagit_sip(
eml_bytes=eml_bytes,
message_id=msg_id,
subject=subject,
from_address=from_addr,
to_addresses=to_addrs,
received_at=received_at,
)
log(f" SIP costruito: {len(zip_bytes):,} bytes")
# Upload su Aeterna
upload_result = await client.upload_sip(
zip_bytes=zip_bytes,
zip_filename=f"pechub-pec-{msg_id}.zip",
title=subject,
description=f"Messaggio PEC ID={msg_id} | Da={from_addr} | A={', '.join(to_addrs or [])}",
)
if not upload_result.get("success"):
log(f" UPLOAD FALLITO: {upload_result.get('error', 'errore sconosciuto')}")
results.append({
"message_id": msg_id,
"subject": subject,
"success": False,
"error": upload_result.get("error"),
})
continue
package_id = upload_result.get("package_id")
pid = upload_result.get("pid", "")
log(f" package_id = {package_id}")
log(f" pid = {pid}")
# Polling status (opzionale, non bloccante per il test)
log(" Attesa elaborazione pipeline (polling 5 poll x 4s) ...")
status_result = await client.poll_status(package_id, max_polls=5, interval=4.0)
final_status = status_result.get("final_status", "unknown")
log(f" Stato finale Aeterna: {final_status}")
# Aggiorna DB se accettato
if final_status == "ACTIVE" or status_result.get("success") is None:
# success=None significa che l'ingest e' ancora in corso ma accettato
await mark_message_conserved(msg_id, package_id)
results.append({
"message_id": msg_id,
"subject": subject,
"success": True,
"package_id": package_id,
"pid": pid,
"final_status": final_status,
"latency_ms": upload_result.get("latency_ms"),
})
# 4. Riepilogo
log_section("4. RIEPILOGO")
ok = sum(1 for r in results if r.get("success"))
log(f"Messaggi trasmessi con successo: {ok}/{len(results)}")
log("")
for r in results:
icon = "OK" if r.get("success") else "FAIL"
log(f" [{icon}] {r['subject'][:50]}")
if r.get("package_id"):
log(f" package_id = {r['package_id']}")
log(f" pid = {r.get('pid', '-')}")
log(f" status = {r.get('final_status', '-')}")
log(f" latency = {r.get('latency_ms', '-')} ms")
if r.get("error"):
log(f" error = {r['error'][:100]}")
# Salva risultati in JSON
output_file = Path("/tmp/aeterna_test_results.json")
output_file.write_text(json.dumps(results, indent=2, default=str))
log(f"\nRisultati salvati in: {output_file}")
if ok == len(results) and results:
log("\nTest completato con successo!")
elif not results:
log("\nNessun messaggio trasmesso.")
else:
log(f"\nTest parzialmente riuscito ({ok}/{len(results)} messaggi trasmessi).")
if __name__ == "__main__":
asyncio.run(main())