517 lines
19 KiB
Python
517 lines
19 KiB
Python
"""
|
||
Script di test trasmissione verso Aeterna (archiviazione certificata).
|
||
|
||
Questo script è STANDALONE: si connette direttamente al DB PecHub,
|
||
trova i messaggi con is_pending_conservation=True, scarica i loro EML
|
||
da MinIO, costruisce pacchetti SIP BagIt e li invia ad Aeterna.
|
||
|
||
Utilizzo (eseguire dal server dentro il container worker o direttamente):
|
||
|
||
# Sul server, dentro il container worker:
|
||
docker exec -it pechub-worker-1 python /app/scripts/test_aeterna_transmission.py
|
||
|
||
# Con credenziali personalizzate:
|
||
AETERNA_USERNAME=xxx AETERNA_PASSWORD=yyy \
|
||
python worker/scripts/test_aeterna_transmission.py
|
||
|
||
Variabili d'ambiente accettate (sovrascrivono i default):
|
||
AETERNA_ENDPOINT Default: https://api.aeterna.idrainformatica.it
|
||
AETERNA_USERNAME Default: matteo@idrainformatica.it
|
||
AETERNA_PASSWORD Default: letto da .env
|
||
AETERNA_TENANT_SLUG Default: pechub
|
||
DATABASE_URL Default: letta da .env del worker
|
||
|
||
Output:
|
||
Per ogni messaggio: stato trasmissione, versamento_id Aeterna, latenza.
|
||
Al termine: aggiorna is_conserved=True nel DB se l'ingest e' riuscito.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import hashlib
|
||
import io
|
||
import json
|
||
import os
|
||
import sys
|
||
import time
|
||
import uuid
|
||
import zipfile
|
||
from datetime import UTC, datetime
|
||
from pathlib import Path
|
||
|
||
# ─── Setup path per importare dal worker ─────────────────────────────────────
|
||
# Se eseguito da fuori il container, aggiungi il path del worker
|
||
worker_dir = Path(__file__).parent.parent
|
||
sys.path.insert(0, str(worker_dir))
|
||
|
||
# ─── Configurazione Aeterna ───────────────────────────────────────────────────
|
||
|
||
AETERNA_ENDPOINT = os.getenv("AETERNA_ENDPOINT", "https://api.aeterna.idrainformatica.it")
|
||
AETERNA_USERNAME = os.getenv("AETERNA_USERNAME", "matteo@idrainformatica.it")
|
||
AETERNA_PASSWORD = os.getenv("AETERNA_PASSWORD", "Ma212718!")
|
||
AETERNA_TENANT_SLUG = os.getenv("AETERNA_TENANT_SLUG", "pechub")
|
||
|
||
# ─── Funzioni helper ──────────────────────────────────────────────────────────
|
||
|
||
def log(msg: str) -> None:
|
||
ts = datetime.now().strftime("%H:%M:%S")
|
||
print(f"[{ts}] {msg}")
|
||
|
||
|
||
def log_section(title: str) -> None:
|
||
print(f"\n{'='*60}")
|
||
print(f" {title}")
|
||
print(f"{'='*60}")
|
||
|
||
|
||
def build_bagit_sip(
|
||
eml_bytes: bytes,
|
||
message_id: str,
|
||
subject: str | None = None,
|
||
from_address: str | None = None,
|
||
to_addresses: list[str] | None = None,
|
||
received_at: str | None = None,
|
||
) -> bytes:
|
||
"""Costruisce un pacchetto BagIt RFC 8493 in memoria (ZIP)."""
|
||
bag_name = f"pechub-pec-{message_id}"
|
||
eml_filename = f"{message_id}.eml"
|
||
data_path = f"data/{eml_filename}"
|
||
eml_sha256 = hashlib.sha256(eml_bytes).hexdigest()
|
||
|
||
bagit_txt = "BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8\n"
|
||
|
||
bag_info_lines = [
|
||
"Bag-Software-Agent: PecHub Archival Module (test script)",
|
||
f"Bagging-Date: {datetime.now(UTC).strftime('%Y-%m-%d')}",
|
||
f"External-Identifier: {message_id}",
|
||
"Source-Organization: PecHub",
|
||
]
|
||
if subject:
|
||
bag_info_lines.append(f"Description: {subject[:500]}")
|
||
if from_address:
|
||
bag_info_lines.append(f"Contact-Email: {from_address}")
|
||
if to_addresses:
|
||
bag_info_lines.append(f"External-Description: PEC a {', '.join(to_addresses[:3])}")
|
||
if received_at:
|
||
bag_info_lines.append(f"Bag-Group-Identifier: {received_at[:10]}")
|
||
bag_info_txt = "\n".join(bag_info_lines) + "\n"
|
||
|
||
manifest_txt = f"{eml_sha256} {data_path}\n"
|
||
|
||
buf = io.BytesIO()
|
||
with zipfile.ZipFile(buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
|
||
zf.writestr(f"{bag_name}/bagit.txt", bagit_txt)
|
||
zf.writestr(f"{bag_name}/bag-info.txt", bag_info_txt)
|
||
zf.writestr(f"{bag_name}/manifest-sha256.txt", manifest_txt)
|
||
zf.writestr(f"{bag_name}/{data_path}", eml_bytes)
|
||
|
||
return buf.getvalue()
|
||
|
||
|
||
# ─── Client Aeterna (inline, senza dipendenze worker) ────────────────────────
|
||
|
||
class AeternaTestClient:
|
||
"""Client minimale per il test di trasmissione."""
|
||
|
||
def __init__(self, endpoint: str, username: str, password: str, tenant_slug: str):
|
||
self.endpoint = endpoint.rstrip("/")
|
||
self.username = username
|
||
self.password = password
|
||
self.tenant_slug = tenant_slug
|
||
self._token: str | None = None
|
||
self._token_expires_at: float = 0.0
|
||
|
||
async def login(self) -> str:
|
||
import httpx
|
||
log(f" Autenticazione su Aeterna ({self.endpoint}) ...")
|
||
async with httpx.AsyncClient(timeout=30) as client:
|
||
resp = await client.post(
|
||
f"{self.endpoint}/api/v1/auth/login",
|
||
json={
|
||
"email": self.username,
|
||
"password": self.password,
|
||
"tenant_slug": self.tenant_slug,
|
||
},
|
||
)
|
||
if resp.status_code != 200:
|
||
raise RuntimeError(
|
||
f"Login fallito HTTP {resp.status_code}: {resp.text[:300]}"
|
||
)
|
||
data = resp.json()
|
||
self._token = data["access_token"]
|
||
expires_in = int(data.get("expires_in", 3600))
|
||
self._token_expires_at = time.monotonic() + expires_in - 60
|
||
user_email = data.get("user", {}).get("email", "?")
|
||
log(f" Login riuscito come: {user_email}")
|
||
return self._token
|
||
|
||
async def get_token(self) -> str:
|
||
if self._token and time.monotonic() < self._token_expires_at:
|
||
return self._token
|
||
return await self.login()
|
||
|
||
async def upload_sip(
|
||
self,
|
||
zip_bytes: bytes,
|
||
zip_filename: str,
|
||
title: str,
|
||
description: str = "",
|
||
) -> dict:
|
||
import httpx
|
||
token = await self.get_token()
|
||
log(f" Upload SIP '{zip_filename}' ({len(zip_bytes):,} bytes) ...")
|
||
t_start = time.monotonic()
|
||
|
||
async with httpx.AsyncClient(timeout=120) as client:
|
||
resp = await client.post(
|
||
f"{self.endpoint}/api/v1/ingest/upload",
|
||
headers={"Authorization": f"Bearer {token}"},
|
||
files={"file": (zip_filename, zip_bytes, "application/zip")},
|
||
data={
|
||
"title": title[:500],
|
||
"description": description[:500],
|
||
"creator": "PecHub Test Script",
|
||
},
|
||
)
|
||
|
||
latency_ms = int((time.monotonic() - t_start) * 1000)
|
||
|
||
if resp.status_code in (200, 201, 202):
|
||
data = resp.json()
|
||
log(f" Upload OK in {latency_ms}ms – package_id: {data.get('package_id')}")
|
||
return {"success": True, "latency_ms": latency_ms, **data}
|
||
else:
|
||
log(f" Upload FALLITO HTTP {resp.status_code}: {resp.text[:300]}")
|
||
return {
|
||
"success": False,
|
||
"latency_ms": latency_ms,
|
||
"error": resp.text[:300],
|
||
"status_code": resp.status_code,
|
||
}
|
||
|
||
async def poll_status(self, package_id: str, max_polls: int = 10, interval: float = 3.0) -> dict:
|
||
import httpx
|
||
token = await self.get_token()
|
||
log(f" Polling status package_id={package_id} (max {max_polls} tentativi) ...")
|
||
|
||
for i in range(max_polls):
|
||
async with httpx.AsyncClient(timeout=15) as client:
|
||
resp = await client.get(
|
||
f"{self.endpoint}/api/v1/ingest/{package_id}/status",
|
||
headers={"Authorization": f"Bearer {token}"},
|
||
)
|
||
|
||
if resp.status_code == 200:
|
||
data = resp.json()
|
||
status = data.get("status", "UNKNOWN").upper()
|
||
stage = data.get("pipeline_stage", "")
|
||
pct = data.get("progress_pct", 0)
|
||
log(f" [{i+1}/{max_polls}] status={status} stage={stage} progress={pct}%")
|
||
|
||
if status in ("ACTIVE", "FAILED", "REJECTED"):
|
||
return {"success": status == "ACTIVE", "final_status": status, **data}
|
||
|
||
if i < max_polls - 1:
|
||
await asyncio.sleep(interval)
|
||
else:
|
||
log(f" Polling error HTTP {resp.status_code}")
|
||
break
|
||
|
||
log(" Polling completato (stato non finale raggiunto, processo ancora in corso)")
|
||
return {"success": None, "message": "polling esaurito"}
|
||
|
||
|
||
# ─── Recupero messaggi da DB e MinIO ─────────────────────────────────────────
|
||
|
||
async def get_pending_conservation_messages() -> list[dict]:
|
||
"""
|
||
Recupera i messaggi con is_pending_conservation=True dal DB PecHub.
|
||
Restituisce una lista di dict con i campi rilevanti.
|
||
"""
|
||
try:
|
||
import asyncpg # type: ignore[import]
|
||
except ImportError:
|
||
log("asyncpg non installato. Uso psycopg2 come fallback...")
|
||
return await get_messages_via_env()
|
||
|
||
db_url = os.getenv("DATABASE_URL", "")
|
||
if not db_url:
|
||
log("DATABASE_URL non impostata. Tento connessione locale...")
|
||
db_url = "postgresql://pechub:pechub@localhost:5432/pechub"
|
||
|
||
# asyncpg vuole postgresql:// non postgres:// e senza +asyncpg driver specifier
|
||
db_url = db_url.replace("postgres://", "postgresql://")
|
||
db_url = db_url.replace("postgresql+asyncpg://", "postgresql://")
|
||
db_url = db_url.replace("postgresql+psycopg2://", "postgresql://")
|
||
|
||
try:
|
||
conn = await asyncpg.connect(db_url)
|
||
rows = await conn.fetch("""
|
||
SELECT
|
||
m.id,
|
||
m.tenant_id,
|
||
m.subject,
|
||
m.from_address,
|
||
m.to_addresses,
|
||
m.received_at,
|
||
m.raw_eml_path,
|
||
m.is_pending_conservation,
|
||
m.is_conserved
|
||
FROM messages m
|
||
WHERE m.is_pending_conservation = TRUE
|
||
AND m.is_conserved = FALSE
|
||
ORDER BY m.received_at DESC
|
||
LIMIT 20
|
||
""")
|
||
await conn.close()
|
||
return [dict(row) for row in rows]
|
||
except Exception as e:
|
||
log(f"Errore connessione DB: {e}")
|
||
return []
|
||
|
||
|
||
async def get_messages_via_env() -> list[dict]:
|
||
"""Fallback: usa variabili d'ambiente per costruire messaggi di test."""
|
||
log("Uso messaggi di test hardcoded (DB non disponibile)")
|
||
return []
|
||
|
||
|
||
async def download_eml_from_minio(raw_eml_path: str) -> bytes | None:
|
||
"""
|
||
Scarica il file EML da MinIO usando il path memorizzato nel DB.
|
||
"""
|
||
try:
|
||
from app.storage.minio_client import get_minio_client
|
||
client = await get_minio_client()
|
||
bucket = os.getenv("MINIO_BUCKET", "pechub")
|
||
response = await asyncio.to_thread(
|
||
client.get_object, bucket, raw_eml_path
|
||
)
|
||
data = response.read()
|
||
response.close()
|
||
response.release_conn()
|
||
return data
|
||
except ImportError:
|
||
pass
|
||
except Exception as e:
|
||
log(f" Errore download MinIO ({raw_eml_path}): {e}")
|
||
|
||
# Fallback: prova con boto3/minio direttamente
|
||
try:
|
||
import minio # type: ignore[import]
|
||
endpoint = os.getenv("MINIO_ENDPOINT", "localhost:9000")
|
||
access_key = os.getenv("MINIO_ACCESS_KEY", "minioadmin")
|
||
secret_key = os.getenv("MINIO_SECRET_KEY", "minioadmin")
|
||
bucket = os.getenv("MINIO_BUCKET", "pechub")
|
||
|
||
client = minio.Minio(
|
||
endpoint,
|
||
access_key=access_key,
|
||
secret_key=secret_key,
|
||
secure=endpoint.startswith("https"),
|
||
)
|
||
response = client.get_object(bucket, raw_eml_path)
|
||
data = response.read()
|
||
response.close()
|
||
response.release_conn()
|
||
return data
|
||
except Exception as e:
|
||
log(f" Errore download MinIO (fallback): {e}")
|
||
return None
|
||
|
||
|
||
async def mark_message_conserved(message_id: str, versamento_id: str) -> None:
|
||
"""Aggiorna il messaggio nel DB come conservato."""
|
||
try:
|
||
import asyncpg # type: ignore[import]
|
||
except ImportError:
|
||
log(f" [skip] asyncpg non disponibile: impossibile aggiornare is_conserved per {message_id}")
|
||
return
|
||
|
||
db_url = os.getenv("DATABASE_URL", "postgresql://pechub:pechub@localhost:5432/pechub")
|
||
db_url = db_url.replace("postgres://", "postgresql://")
|
||
db_url = db_url.replace("postgresql+asyncpg://", "postgresql://")
|
||
db_url = db_url.replace("postgresql+psycopg2://", "postgresql://")
|
||
|
||
try:
|
||
conn = await asyncpg.connect(db_url)
|
||
await conn.execute("""
|
||
UPDATE messages
|
||
SET is_conserved = TRUE,
|
||
conserved_at = NOW()
|
||
WHERE id = $1
|
||
""", uuid.UUID(message_id))
|
||
await conn.close()
|
||
log(f" DB aggiornato: is_conserved=TRUE per message_id={message_id}")
|
||
except Exception as e:
|
||
log(f" Errore aggiornamento DB per {message_id}: {e}")
|
||
|
||
|
||
# ─── Main ─────────────────────────────────────────────────────────────────────
|
||
|
||
async def main() -> None:
|
||
log_section("TEST TRASMISSIONE AETERNA – PecHub")
|
||
log(f"Endpoint: {AETERNA_ENDPOINT}")
|
||
log(f"Username: {AETERNA_USERNAME}")
|
||
log(f"Tenant slug: {AETERNA_TENANT_SLUG}")
|
||
log(f"Timestamp: {datetime.now().isoformat()}")
|
||
|
||
# 1. Connessione ad Aeterna
|
||
log_section("1. AUTENTICAZIONE AETERNA")
|
||
client = AeternaTestClient(
|
||
endpoint=AETERNA_ENDPOINT,
|
||
username=AETERNA_USERNAME,
|
||
password=AETERNA_PASSWORD,
|
||
tenant_slug=AETERNA_TENANT_SLUG,
|
||
)
|
||
try:
|
||
await client.login()
|
||
except Exception as e:
|
||
log(f"ERRORE FATALE: impossibile autenticarsi su Aeterna: {e}")
|
||
sys.exit(1)
|
||
|
||
# 2. Recupera messaggi da conservare
|
||
log_section("2. RECUPERO MESSAGGI 'DA CONSERVARE'")
|
||
messages = await get_pending_conservation_messages()
|
||
|
||
if not messages:
|
||
log("Nessun messaggio con is_pending_conservation=TRUE trovato.")
|
||
log("Verificare che i messaggi siano stati marcati 'Da conservare' nell'interfaccia.")
|
||
log("")
|
||
log("Suggerimento: selezionare un messaggio in PecHub e usare")
|
||
log("'Aggiungi a Da Conservare' per marcarlo per l'archiviazione.")
|
||
sys.exit(0)
|
||
|
||
log(f"Trovati {len(messages)} messaggi da conservare:")
|
||
for i, m in enumerate(messages, 1):
|
||
subj = (m.get("subject") or "")[:60]
|
||
recv = str(m.get("received_at") or "")[:10]
|
||
log(f" [{i}] id={str(m['id'])[:8]}... | data={recv} | oggetto={subj}")
|
||
|
||
# 3. Trasmissione
|
||
log_section("3. TRASMISSIONE A AETERNA")
|
||
results = []
|
||
|
||
for i, msg in enumerate(messages, 1):
|
||
msg_id = str(msg["id"])
|
||
subject = msg.get("subject") or f"PEC {msg_id[:8]}"
|
||
from_addr = msg.get("from_address")
|
||
to_addrs = msg.get("to_addresses") or []
|
||
received_at = str(msg.get("received_at") or "")
|
||
raw_eml_path = msg.get("raw_eml_path")
|
||
|
||
log(f"\nMessaggio [{i}/{len(messages)}]: {subject[:50]}")
|
||
log(f" ID: {msg_id}")
|
||
log(f" EML path: {raw_eml_path}")
|
||
|
||
# Scarica EML
|
||
eml_bytes: bytes | None = None
|
||
if raw_eml_path:
|
||
log(" Download EML da MinIO ...")
|
||
eml_bytes = await download_eml_from_minio(raw_eml_path)
|
||
|
||
if not eml_bytes:
|
||
# Crea un EML di test sintetico
|
||
log(" EML non disponibile. Generazione EML sintetico di test ...")
|
||
eml_bytes = f"""From: {from_addr or 'test@pec.it'}
|
||
To: {', '.join(to_addrs) if to_addrs else 'destinatario@pec.it'}
|
||
Subject: {subject}
|
||
Date: {received_at}
|
||
Message-ID: <{msg_id}@pechub.test>
|
||
Content-Type: text/plain; charset=UTF-8
|
||
MIME-Version: 1.0
|
||
|
||
Questo e' un messaggio PEC archiviato da PecHub.
|
||
ID messaggio: {msg_id}
|
||
Data archiviazione: {datetime.now(UTC).isoformat()}
|
||
""".encode("utf-8")
|
||
|
||
# Costruisci BagIt SIP
|
||
log(" Costruzione pacchetto BagIt SIP ...")
|
||
zip_bytes = build_bagit_sip(
|
||
eml_bytes=eml_bytes,
|
||
message_id=msg_id,
|
||
subject=subject,
|
||
from_address=from_addr,
|
||
to_addresses=to_addrs,
|
||
received_at=received_at,
|
||
)
|
||
log(f" SIP costruito: {len(zip_bytes):,} bytes")
|
||
|
||
# Upload su Aeterna
|
||
upload_result = await client.upload_sip(
|
||
zip_bytes=zip_bytes,
|
||
zip_filename=f"pechub-pec-{msg_id}.zip",
|
||
title=subject,
|
||
description=f"Messaggio PEC ID={msg_id} | Da={from_addr} | A={', '.join(to_addrs or [])}",
|
||
)
|
||
|
||
if not upload_result.get("success"):
|
||
log(f" UPLOAD FALLITO: {upload_result.get('error', 'errore sconosciuto')}")
|
||
results.append({
|
||
"message_id": msg_id,
|
||
"subject": subject,
|
||
"success": False,
|
||
"error": upload_result.get("error"),
|
||
})
|
||
continue
|
||
|
||
package_id = upload_result.get("package_id")
|
||
pid = upload_result.get("pid", "")
|
||
log(f" package_id = {package_id}")
|
||
log(f" pid = {pid}")
|
||
|
||
# Polling status (opzionale, non bloccante per il test)
|
||
log(" Attesa elaborazione pipeline (polling 5 poll x 4s) ...")
|
||
status_result = await client.poll_status(package_id, max_polls=5, interval=4.0)
|
||
final_status = status_result.get("final_status", "unknown")
|
||
log(f" Stato finale Aeterna: {final_status}")
|
||
|
||
# Aggiorna DB se accettato
|
||
if final_status == "ACTIVE" or status_result.get("success") is None:
|
||
# success=None significa che l'ingest e' ancora in corso ma accettato
|
||
await mark_message_conserved(msg_id, package_id)
|
||
|
||
results.append({
|
||
"message_id": msg_id,
|
||
"subject": subject,
|
||
"success": True,
|
||
"package_id": package_id,
|
||
"pid": pid,
|
||
"final_status": final_status,
|
||
"latency_ms": upload_result.get("latency_ms"),
|
||
})
|
||
|
||
# 4. Riepilogo
|
||
log_section("4. RIEPILOGO")
|
||
ok = sum(1 for r in results if r.get("success"))
|
||
log(f"Messaggi trasmessi con successo: {ok}/{len(results)}")
|
||
log("")
|
||
for r in results:
|
||
icon = "OK" if r.get("success") else "FAIL"
|
||
log(f" [{icon}] {r['subject'][:50]}")
|
||
if r.get("package_id"):
|
||
log(f" package_id = {r['package_id']}")
|
||
log(f" pid = {r.get('pid', '-')}")
|
||
log(f" status = {r.get('final_status', '-')}")
|
||
log(f" latency = {r.get('latency_ms', '-')} ms")
|
||
if r.get("error"):
|
||
log(f" error = {r['error'][:100]}")
|
||
|
||
# Salva risultati in JSON
|
||
output_file = Path("/tmp/aeterna_test_results.json")
|
||
output_file.write_text(json.dumps(results, indent=2, default=str))
|
||
log(f"\nRisultati salvati in: {output_file}")
|
||
|
||
if ok == len(results) and results:
|
||
log("\nTest completato con successo!")
|
||
elif not results:
|
||
log("\nNessun messaggio trasmesso.")
|
||
else:
|
||
log(f"\nTest parzialmente riuscito ({ok}/{len(results)} messaggi trasmessi).")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(main())
|