From 8d0c3cbde2b9bb5e9ab44887ed96274f0603beb9 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 4 Mar 2026 10:09:32 +0100
Subject: [PATCH 01/66] DFL trustworthiness and numerical datasets

---
 nebula/addons/trustworthiness/calculation.py  | 406 ++++++++++-
 .../trustworthiness/configs/eval_metrics.json | 102 ++-
 .../configs/eval_metrics_dfl.json             | 640 +++++++++++++++++
 .../configs/factsheet_template.json           |   7 +-
 .../configs/factsheet_template_dfl.json       |  56 ++
 nebula/addons/trustworthiness/dfl_local.py    | 285 ++++++++
 nebula/addons/trustworthiness/factsheet.py    |  82 ++-
 nebula/addons/trustworthiness/graphics.py     | 122 +++-
 nebula/addons/trustworthiness/metric.py       |  70 +-
 .../trustworthiness/per_round_metrics.py      | 174 +++++
 .../addons/trustworthiness/trustworthiness.py | 246 +++++--
 nebula/addons/trustworthiness/utils.py        |  62 +-
 nebula/controller/scenarios.py                |  48 ++
 nebula/core/datasets/adultcensus/__init__.py  |   0
 .../core/datasets/adultcensus/adultcensus.py  | 242 +++++++
 .../core/datasets/breast_cancer/__init__.py   |   0
 .../datasets/breast_cancer/breast_cancer.py   | 158 +++++
 nebula/core/datasets/covtype/__init__.py      |   0
 nebula/core/datasets/covtype/covtype.py       | 220 ++++++
 nebula/core/datasets/nebuladataset.py         |   6 +
 nebula/core/models/adultcensus/__init__.py    |   0
 nebula/core/models/adultcensus/mlp.py         |  67 ++
 nebula/core/models/breast_cancer/__init__.py  |   0
 nebula/core/models/breast_cancer/mlp.py       |  55 ++
 nebula/core/models/covtype/__init__.py        |   0
 nebula/core/models/covtype/mlp.py             |  55 ++
 nebula/core/node.py                           |  27 +
 .../static/js/deployment/help-content.js      |   3 +
 nebula/frontend/static/js/deployment/main.js  |   8 +-
 .../frontend/static/js/deployment/scenario.js | 132 +++-
 .../static/js/deployment/trustworthiness.js   | 643 ++++++++++++------
 nebula/frontend/templates/deployment.html     | 509 +++++++++-----
 32 files changed, 3937 insertions(+), 488 deletions(-)
 create mode 100755 nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
 create mode 100755 nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
 create mode 100644 nebula/addons/trustworthiness/dfl_local.py
 create mode 100644 nebula/addons/trustworthiness/per_round_metrics.py
 create mode 100755 nebula/core/datasets/adultcensus/__init__.py
 create mode 100644 nebula/core/datasets/adultcensus/adultcensus.py
 create mode 100755 nebula/core/datasets/breast_cancer/__init__.py
 create mode 100644 nebula/core/datasets/breast_cancer/breast_cancer.py
 create mode 100755 nebula/core/datasets/covtype/__init__.py
 create mode 100644 nebula/core/datasets/covtype/covtype.py
 create mode 100755 nebula/core/models/adultcensus/__init__.py
 create mode 100644 nebula/core/models/adultcensus/mlp.py
 create mode 100755 nebula/core/models/breast_cancer/__init__.py
 create mode 100644 nebula/core/models/breast_cancer/mlp.py
 create mode 100755 nebula/core/models/covtype/__init__.py
 create mode 100644 nebula/core/models/covtype/mlp.py

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index db3499f5d..251fe1e5a 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -12,10 +12,12 @@
 import shap
 import torch.nn
 from art.estimators.classification import PyTorchClassifier
-from art.metrics import clever_u
+from art.metrics import clever_u, loss_sensitivity, empirical_robustness
 from codecarbon import EmissionsTracker
 from scipy.stats import variation
 from torch import nn, optim
+import torch.nn.functional as F
+import time
 
 from nebula.addons.trustworthiness.utils import read_csv
 
@@ -286,6 +288,21 @@ def get_bytes_models(models_files):
 
     return avg_model_size
 
+def get_bytes_model(model_file):
+    """
+    Calculates the bytes of the final model of a node.
+
+    Args:
+        model_file: Final model.
+
+    Returns:
+        float: The bytes of the model.
+    """
+
+    model_size = os.path.getsize(model_file)
+
+    return model_size
+
 
 def get_bytes_sent_recv(scenario_name):
     """
@@ -309,7 +326,7 @@ def get_bytes_sent_recv(scenario_name):
 
     total_upload_bytes = int(data["bytes_sent"].sum())
     total_download_bytes = int(data["bytes_recv"].sum())
-    
+
     avg_upload_bytes = total_upload_bytes / number_files
     avg_download_bytes = total_download_bytes / number_files
 
@@ -330,15 +347,46 @@ def get_avg_loss_accuracy(scenario_name):
     total_accuracy = 0
     total_loss = 0
 
+    expected_nodes = 3
+    """
+    if os.path.exists(factsheet_file):
+        with open(factsheet_file, "r") as f:
+            fs = json.load(f)
+        # normalmente client_num viene como string, lo convierto
+        expected_nodes = int(fs.get("participants", {}).get("client_num", 0) or 0)
+        logger.info(f"nodes={expected_nodes}")
+    """
+
+
     data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
 
+    logger.info(f"FIRST 5 LINES:\n{open(data_file,'r').read().splitlines()[:5]}")
+    logger.info(f"LAST 5 LINES:\n{open(data_file,'r').read().splitlines()[-5:]}")
+
     data = read_csv(data_file)
 
+    logger.info(f"shape={data.shape}")
+    logger.info(f"dtypes={data.dtypes.to_dict()}")
+    logger.info(f"accuracy sample raw={data['accuracy'].head(20).tolist()}")
+    logger.info(f"accuracy non-null={data['accuracy'].notna().sum()}")
+
     number_files = len(data)
+    logger.info(f"number_files={number_files}")
+
+    """
+    while (number_files != expected_nodes):
+        logger.info("WAIT")
+        time.sleep(5)
+        data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
+        data = read_csv(data_file)
+        number_files = len(data)
+        logger.info(f"number_files={number_files}")
+        logger.info(f"expected_nodes={expected_nodes}")
+    """
 
     total_loss = data["loss"].sum()
     total_accuracy = data["accuracy"].sum()
-    
+
     avg_loss = total_loss / number_files
     avg_accuracy = total_accuracy / number_files
     std_accuracy = statistics.stdev(data["accuracy"])
@@ -399,18 +447,46 @@ def get_clever_score(model, test_sample, nb_classes, learning_rate):
         float: The CLEVER score.
     """
 
+
     images, _ = test_sample
-    background = images[-1]
+    input_shape = None
+
+    # Si por cualquier motivo llega sin batch, lo añadimos
+    if torch.is_tensor(images) and images.dim() >= 1 and images.shape[0] != 0:
+        pass
+    else:
+        raise ValueError("`test_sample[0]` debe ser un torch.Tensor no vacío.")
+
+    if input_shape is None:
+        if images.dim() >= 2:
+            # (B, ...) -> input_shape = (...)
+            input_shape = tuple(images.shape[1:])
+        else:
+            # (...) sin batch
+            input_shape = tuple(images.shape)
+
+    # Escogemos un "background" (aquí el último del batch, como hacías tú)
+    background = images[-1] if images.dim() >= 2 else images
+
+    # Convertir a numpy de forma segura (GPU-friendly)
+    x = background.detach().cpu().numpy()
+
+    # Asegurar batch dimension para clever_u: (1, *input_shape)
+    if tuple(x.shape) == tuple(input_shape):
+        x = x.reshape((1,) + tuple(input_shape))
+
 
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.Adam(model.parameters(), learning_rate)
 
+
+
     # Create the ART classifier
     classifier = PyTorchClassifier(
         model=model,
         loss=criterion,
         optimizer=optimizer,
-        input_shape=(1, 28, 28),
+        input_shape=input_shape,
         nb_classes=nb_classes,
     )
 
@@ -434,6 +510,7 @@ def stop_emissions_tracking_and_save(
     role: str,
     workload: str,
     sample_size: int = 0,
+    participant_idx=None,
 ):
     """
     Stops emissions tracking object from CodeCarbon and saves relevant information to emissions.csv file.
@@ -456,6 +533,7 @@ def stop_emissions_tracking_and_save(
     else:
         df = pd.DataFrame(
             columns=[
+                "id",
                 "role",
                 "energy_grid",
                 "emissions",
@@ -470,6 +548,7 @@ def stop_emissions_tracking_and_save(
             [
                 df,
                 pd.DataFrame({
+                    "id": participant_idx,
                     "role": role,
                     "energy_grid": [energy_grid],
                     "emissions": [tracker.final_emissions_data.emissions],
@@ -491,3 +570,320 @@ def stop_emissions_tracking_and_save(
         df.to_csv(emissions_file, encoding="utf-8", index=False)
     except Exception as e:
         logger.warning(e)
+
+def comm_efficiency(bytes_up: int, bytes_down: int, test_acc_avg: float, eps: float = 1e-12) -> float:
+    """
+    Communication efficiency = total_bytes / final_accuracy.
+    Lower is better.
+
+    Args:
+        bytes_up: total uploaded bytes
+        bytes_down: total downloaded bytes
+        final_accuracy: final test accuracy in [0,1] (or [0,100] if your factsheet uses %)
+        eps: small constant to avoid division by zero
+
+    Returns:
+        float
+    """
+    total_bytes = float(bytes_up) + float(bytes_down)
+    acc = float(test_acc_avg)
+
+    # Si tu factsheet guarda accuracy como porcentaje (0-100), descomenta esto:
+    # if acc > 1.0:
+    #     acc = acc / 100.0
+
+    if acc < eps:
+        acc = eps
+
+    return total_bytes / acc
+
+def get_loss_sensitivity_score(model, test_sample, nb_classes, learning_rate):
+
+    images, labels = test_sample
+    sample = images[-1].unsqueeze(0)
+    label = labels[-1].unsqueeze(0)
+
+    label = F.one_hot(label, num_classes=nb_classes).float()
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), learning_rate)
+
+    # Create the ART classifier
+    classifier = PyTorchClassifier(
+        model=model,
+        loss=criterion,
+        optimizer=optimizer,
+        input_shape=sample.shape[1:],
+        nb_classes=nb_classes,
+    )
+
+    score = loss_sensitivity(
+        classifier,
+        sample.numpy(),
+        label.numpy(),
+    )
+    return float(score)
+
+def compute_adversarial_accuracy_art(
+    model,
+    test_loader,
+    nb_classes,
+    learning_rate,
+    epsilon=0.03
+):
+    """
+    Computes adversarial accuracy using ART FGSM attack.
+    """
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.eval()
+    model.to(device)
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+
+    # Obtener shape dinámicamente
+    sample_batch = next(iter(test_loader))
+    images, _ = sample_batch
+    input_shape = images.shape[1:] #CAMBIAR
+
+    classifier = PyTorchClassifier(
+        model=model,
+        loss=criterion,
+        optimizer=optimizer,
+        input_shape=input_shape,
+        nb_classes=nb_classes,
+    )
+    """
+    from art.attack.evasion import FastGradientMethod
+
+    attack = FastGradientMethod(
+        estimator=classifier,
+        eps=epsilon,
+        norm=np.inf
+    )
+    """
+
+    correct = 0
+    total = 0
+
+    for images, labels in test_loader:
+        images = images.to(device)
+        labels = labels.to(device)
+
+        # Generar adversarios con FGSM puro
+        x_adv = fgsm_attack(model, images, labels, epsilon=epsilon)
+
+        # Predicciones
+        with torch.no_grad():
+            outputs = model(x_adv)
+            preds = outputs.argmax(dim=1)
+
+        correct += (preds == labels).sum().item()
+        total += labels.size(0)
+
+    return correct / total
+
+def get_empirical_robustness_score(
+    model: object,
+    test_sample: object,
+    nb_classes: int,
+    learning_rate: float,
+    attack_name: str = "fgsm",
+    attack_params: dict | None = None,
+    max_samples: int = 32,
+) -> float:
+    """
+    Calculates the Empirical Robustness score using Adversarial Robustness Toolbox (ART).
+
+    Empirical robustness estimates the minimal relative perturbation required for a successful attack
+    on the provided samples. Higher is better (needs larger perturbation to fool the model).
+
+    Args:
+        model (object): The model.
+        test_sample (object): A batch from the test dataloader (images, labels).
+        nb_classes (int): Number of classes.
+        learning_rate (float): LR used to build the ART classifier wrapper.
+        attack_name (str): Attack key supported by ART empirical_robustness (commonly "fgsm" or "hsj").
+        attack_params (dict | None): Optional attack parameters.
+        max_samples (int): Max number of samples from the batch to use.
+
+    Returns:
+        float: Empirical robustness score (>= 0.0). If it cannot be computed, returns 0.0.
+    """
+    try:
+        images, _ = test_sample
+
+        # Limit how many samples we use from the batch (keeps it lightweight)
+        batch_size: int = int(images.shape[0])
+        n: int = int(min(max_samples, batch_size))
+        x = images[:n].detach().cpu().numpy()
+
+        # Infer input shape for ART (no batch dimension)
+        input_shape = tuple(images.shape[1:])
+
+        criterion = nn.CrossEntropyLoss()
+        optimizer = optim.Adam(model.parameters(), learning_rate)
+
+        classifier = PyTorchClassifier(
+            model=model,
+            loss=criterion,
+            optimizer=optimizer,
+            input_shape=input_shape,
+            nb_classes=nb_classes,
+        )
+
+        score = empirical_robustness(
+            classifier=classifier,
+            x=x,
+            attack_name=attack_name,
+            attack_params=attack_params,
+        )
+
+        # ART may return ndarray depending on input; aggregate to scalar
+        if isinstance(score, np.ndarray):
+            score = float(np.mean(score))
+
+        if score is None or (isinstance(score, float) and math.isnan(score)):
+            return 0.0
+
+        return float(score)
+
+    except Exception as exc:
+        logger.warning("Could not compute empirical robustness (ART). Returning 0.0")
+        logger.warning(exc)
+        return 0.0
+
+
+
+def fgsm_attack(model, images, labels, epsilon=0.03):
+    """
+    Genera ejemplos adversariales usando FGSM puro en PyTorch. Cuando se pueda meter los ataques de ART se podría cambiar
+    """
+    images = images.clone().detach().to(images.device)
+    labels = labels.to(images.device)
+    images.requires_grad = True
+
+    outputs = model(images)
+    loss = nn.CrossEntropyLoss()(outputs, labels)
+    model.zero_grad()
+    loss.backward()
+
+    # FGSM: x_adv = x + epsilon * sign(grad)
+    perturbation = epsilon * images.grad.sign()
+    x_adv = images + perturbation
+
+    # Limitar valores al rango [0,1]
+    #x_adv = torch.clamp(x_adv, 0, 1)
+    return x_adv.detach()
+
+def get_confidence_score(
+    model,
+    test_sample,
+    max_samples: int = 128,
+    use_true_label: bool = True,
+) -> float:
+    """
+    Confidence Score basado en probabilidades softmax.
+
+    - Si use_true_label=True: devuelve la media de P(y_true | x).
+    - Si use_true_label=False: devuelve la media de max softmax prob (MSP).
+
+    Args:
+        model (object): Modelo (torch.nn.Module).
+        test_sample (object): Batch del dataloader: (x, y).
+        max_samples (int): Máximo nº de muestras del batch a usar.
+        use_true_label (bool): Ver arriba.
+
+    Returns:
+        float: Confidence score en [0, 1] (o 0.0 si falla).
+    """
+    try:
+        if not isinstance(model, torch.nn.Module):
+            logger.warning("Model is not a torch.nn.Module")
+            return 0.0
+
+        x, y = test_sample
+
+        # Recorta batch para que sea barato
+        if isinstance(x, torch.Tensor):
+            x = x[:max_samples]
+        if isinstance(y, torch.Tensor):
+            y = y[:max_samples]
+
+        # Usa el device real del modelo
+        try:
+            device = next(model.parameters()).device
+        except Exception:
+            device = torch.device("cpu")
+
+        model.eval()
+        with torch.no_grad():
+            x = x.to(device) if isinstance(x, torch.Tensor) else x
+            out = model(x)
+
+            # Por si el modelo devuelve tupla (logits, ...)
+            logits = out[0] if isinstance(out, (tuple, list)) else out
+            probs = torch.softmax(logits, dim=1)
+
+            if use_true_label and isinstance(y, torch.Tensor):
+                # y puede venir como índices [B] o one-hot [B, C]
+                if y.ndim > 1:
+                    y_idx = torch.argmax(y, dim=1)
+                else:
+                    y_idx = y
+                y_idx = y_idx.to(device)
+
+                # P(y_true|x)
+                true_probs = probs.gather(1, y_idx.view(-1, 1)).squeeze(1)
+                return float(true_probs.mean().detach().cpu().item())
+
+            # MSP: max_c P(c|x)
+            msp = probs.max(dim=1).values
+            return float(msp.mean().detach().cpu().item())
+
+    except Exception as e:
+        logger.warning("Could not compute confidence score")
+        logger.warning(e)
+        return 0.0
+
+def attack_success_rate(model, test_sample,epsilon=0.03):
+    """
+    Calcula ASR para un ataque untargeted.
+
+    attack_fn debe recibir (model, images, labels)
+    y devolver imágenes adversariales.
+    """
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.eval()
+
+    images, labels = test_sample
+    images = images.to(device)
+    labels = labels.to(device)
+
+    # 1️⃣ Predicciones originales
+    with torch.no_grad():
+        outputs = model(images)
+        preds = outputs.argmax(dim=1)
+
+    # Solo consideramos los originalmente correctos
+    correct_mask = preds.eq(labels)
+    num_correct = correct_mask.sum().item()
+
+    if num_correct == 0:
+        return 0.0  # evitar división por cero
+
+    # 2️⃣ Generar adversariales
+    x_adv = fgsm_attack(model, images, labels, epsilon=epsilon)
+
+    # 3️⃣ Predicciones adversariales
+    with torch.no_grad():
+        outputs_adv = model(x_adv)
+        preds_adv = outputs_adv.argmax(dim=1)
+
+    # 4️⃣ Ataque exitoso = antes correcto y ahora incorrecto
+    successful_attacks = (correct_mask & preds_adv.ne(labels)).sum().item()
+
+    asr = successful_attacks / num_correct
+
+    return asr
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics.json b/nebula/addons/trustworthiness/configs/eval_metrics.json
index 5ab1b3427..642efb262 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics.json
@@ -14,7 +14,72 @@
             "score_function": "get_range_score",
             "type": "true_score",
             "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
-            "weight": 1
+            "weight": 0.4
+          },
+          "loss_sensitivity": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_loss_sensitivity"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.2
+          },
+          "adversarial_accuracy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_adv_accuracy"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "emprical_robustness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_empirical_robustness"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "confidence_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_confidence_score"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "attack_success_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_attack_success_rate"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
           }
         }
       },
@@ -388,6 +453,26 @@
               {
                 "source": "factsheet",
                 "field_path": "performance/test_clever"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_loss_sensitivity"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_adv_accuracy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_empirical_robustness"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_confidence_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_attack_success_rate"
               }
             ],
             "operation": "check_properties",
@@ -554,6 +639,19 @@
       "federation_complexity": {
         "weight": 0.25,
         "metrics": {
+          "communication_efficiency": {
+            "inputs": [
+              { "source": "factsheet", "field_path": "system/total_upload_bytes" },
+              { "source": "factsheet", "field_path": "system/total_download_bytes" },
+              { "source": "factsheet", "field_path": "performance/test_acc_avg" }
+            ],
+            "operation": "comm_efficiency",
+            "type": "ranges",
+            "direction": "low",
+            "ranges":[0.1, 10e2, 10e3,10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
+            "description": "Descripcion de la metrica",
+            "weight": 0.1
+          },
           "number_of_training_rounds": {
             "inputs": [
               {
@@ -566,7 +664,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The total number of training rounds",
-            "weight": 0.16666666
+            "weight": 0.06666666
           },
           "avg_model_size": {
             "inputs": [
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
new file mode 100755
index 000000000..fea2f70d3
--- /dev/null
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
@@ -0,0 +1,640 @@
+{
+    "robustness": {
+      "resilience_to_attacks": {
+        "weight": 0.4,
+        "metrics": {
+          "certified_robustness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_clever"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
+            "weight": 0.4
+          },
+          "loss_sensitivity": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_loss_sensitivity"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.2
+          },
+          "adversarial_accuracy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_adv_accuracy"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "emprical_robustness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_empirical_robustness"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "confidence_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_confidence_score"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "attack_success_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_attack_success_rate"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          }
+        }
+      },
+      "algorithm_robustness": {
+        "weight": 0.4,
+        "metrics": {
+          "personalization": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/personalization"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of personalized FL algorithm.",
+            "weight": 1.0
+          }
+        }
+      },
+      "client_reliability": {
+        "weight": 0.2,
+        "metrics": {
+          "scale": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The number of clients in the model.",
+            "weight": 1
+          }
+        }
+      }
+    },
+    "privacy": {
+      "technique": {
+        "weight": 0.2,
+        "metrics": {
+          "differential_privacy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of differential privacy.",
+            "weight": 1
+          }
+        }
+      },
+      "uncertainty": {
+        "weight": 0.6,
+        "metrics": {
+          "entropy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/entropy_local"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The measure of uncertainty in identifying a client.",
+            "weight": 1
+          }
+        }
+      },
+      "indistinguishability": {
+        "weight": 0.2,
+        "metrics": {
+          "global_privacy_risk": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/dp_epsilon"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              }
+            ],
+            "operation": "get_global_privacy_risk",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "A worst-case approximation of the maximal risk for distinguishing two clients.",
+            "weight": 1
+          }
+        }
+      }
+    },
+    "fairness": {
+      "class_distribution": {
+        "weight": 1,
+        "metrics": {
+          "class_imbalance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/class_imbalance"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Variation of the sample size per class.",
+            "weight": 1
+          }
+        }
+      }
+    },
+    "explainability": {
+      "interpretability": {
+        "weight": 0.4,
+        "metrics": {
+          "algorithmic_transparency": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/training_model"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "RandomForestClassifier": 4,
+              "KNeighborsClassifier": 3,
+              "SVC": 2,
+              "GaussianProcessClassifier": 3,
+              "DecisionTreeClassifier": 5,
+              "MLP": 1,
+              "AdaBoostClassifier": 3,
+              "GaussianNB": 3.5,
+              "QuadraticDiscriminantAnalysis": 3,
+              "LogisticRegression": 4,
+              "LinearRegression": 3.5,
+              "Sequential": 1,
+              "CNN": 1
+            },
+            "description": "Mapping of Learning techniques to the level of explainability based on on literature research and qualitative analysis of each learning technique.",
+            "weight": 0.6
+          },
+          "model_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [10e1, 10e2, 10e3, 10e4, 10e5, 10e6, 10e7, 10e8],
+            "description": "Ranges of how to map model size to a score from 1-5.",
+            "weight": 0.4
+          }
+        }
+      },
+      "post_hoc_methods": {
+        "weight": 0.6,
+        "metrics": {
+          "feature_importance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_feature_importance_cv"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Variation of feature importance scores of all the features.",
+            "weight": 0.5
+          },
+          "visualization": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/visualization"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of graphical capabilities to show the explainability.",
+            "weight": 0.5
+          }
+        }
+      }
+    },
+    "accountability": {
+      "factsheet_completeness": {
+        "weight": 1,
+        "metrics": {
+          "project_specs": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "project/overview"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "project/purpose"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "project/background"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Specifications of the project.",
+            "weight": 0.1
+          },
+          "participants": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/sample_client_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_selector"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Participants information.",
+            "weight": 0.1
+          },
+          "data": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/provenance"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "data/preprocessing"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "data/entropy_local"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Meta data about the data.",
+            "weight": 0.2
+          },
+          "configuration": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/optimization_algorithm"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/training_model"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/personalization"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/dp_epsilon"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/total_round_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/learning_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/local_update_steps"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "FL model configurations.",
+            "weight": 0.2
+          },
+          "performance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_loss"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_acc"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_feature_importance_cv"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_clever"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_loss_sensitivity"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_adv_accuracy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_empirical_robustness"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_confidence_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_attack_success_rate"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Performance evaluation results.",
+            "weight": 0.2
+          },
+          "fairness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/class_imbalance"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Fairness metrics results.",
+            "weight": 0.1
+          },
+          "system": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/time_minutes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/model_size"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/upload_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/download_bytes"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          }
+        }
+      }
+    },
+    "architectural_soundness": {
+      "client_management": {
+        "weight": 0.5,
+        "metrics": {
+          "client_selector": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_selector"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "The use of a client selector.",
+            "weight": 1
+          }
+        }
+      },
+      "optimization": {
+        "weight": 0.5,
+        "metrics": {
+          "algorithm": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/aggregation_algorithm"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_map_value",
+            "score_map": {
+              "FedAvg": 0.9509,
+              "Krum": 0.9535,
+              "TrimmedMean": 0.9595,
+              "Median": 0.9461
+            },
+            "description": "The choice of a suitable aggregation algorithm.",
+            "weight": 1
+          }
+        }
+      }
+    },
+    "sustainability": {
+      "energy_source": {
+        "weight": 0.5,
+        "metrics": {
+          "carbon_intensity_clients": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/carbon_intensity_local"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "desc",
+            "scale": [20, 795],
+            "description": "Carbon intensity of energy grid used by clients",
+            "weight": 1
+          }
+        }
+      },
+      "federation_complexity": {
+        "weight": 0.5,
+        "metrics": {
+          "communication_efficiency": {
+            "inputs": [
+              { "source": "factsheet", "field_path": "system/upload_bytes" },
+              { "source": "factsheet", "field_path": "system/download_bytes" },
+              { "source": "factsheet", "field_path": "performance/test_acc" }
+            ],
+            "operation": "comm_efficiency",
+            "type": "ranges",
+            "direction": "low",
+            "ranges":[0.1, 10e2, 10e3,10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
+            "description": "Descripcion de la metrica",
+            "weight": 0.1
+          },
+          "number_of_training_rounds": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/total_round_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The total number of training rounds",
+            "weight": 0.06666666
+          },
+          "avg_model_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges":[10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
+            "description": "The size of the model",
+            "weight": 0.16666666
+          },
+          "client_selection_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/sample_client_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "asc",
+            "scale": [
+              0.1,1
+            ],
+            "description": "The selection rate of clients for each training round",
+            "weight": 0.16666666
+          },
+          "number_of_clients": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The number of clients in the federation.",
+            "weight": 0.16666666
+          },
+          "local_training_rounds": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/local_update_steps"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "desc",
+            "scale": [1, 100],
+            "description": "The number of local training rounds.",
+            "weight": 0.16666666
+          },
+          "avg_dataset_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/local_dataset_size"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [10e1, 10e2, 10e3, 10e4, 10e5],
+            "description": "The average number of training samples",
+            "weight": 0.16666666
+          }
+        }
+      }
+    }
+  }
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template.json b/nebula/addons/trustworthiness/configs/factsheet_template.json
index eeeaa7f67..b2369d7ea 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template.json
@@ -31,7 +31,12 @@
 		"test_loss_avg": "",
 		"test_acc_avg": "",
 		"test_feature_importance_cv": "",
-		"test_clever": ""
+		"test_clever": "",
+		"test_loss_sensitivity": "",
+		"test_adv_accuracy": "",
+		"test_empirical_robustness": "",
+		"test_confidence_score": "",
+		"test_attack_success_rate": ""
 	},
 	"fairness": {
 		"test_acc_cv": "",
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
new file mode 100755
index 000000000..e2efbce7d
--- /dev/null
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
@@ -0,0 +1,56 @@
+{
+	"project": {
+		"overview": "",
+		"purpose": "",
+		"background": ""
+	},
+	"data": {
+		"provenance": "",
+		"preprocessing": "",
+		"entropy_local": ""
+	},
+	"participants": {
+		"client_num": "",
+		"sample_client_rate": "",
+		"client_selector": "",
+		"local_dataset_size": ""
+	},
+	"configuration": {
+		"aggregation_algorithm": "",
+		"training_model": "",
+		"personalization": "",
+		"visualization": "",
+		"differential_privacy": "",
+		"dp_epsilon": "",
+		"trainable_param_num": "",
+		"total_round_num": "",
+		"learning_rate": "",
+		"local_update_steps": ""
+	},
+	"performance": {
+		"test_loss": "",
+		"test_acc": "",
+		"test_feature_importance_cv": "",
+		"test_clever": "",
+		"test_loss_sensitivity": "",
+		"test_adv_accuracy": "",
+		"test_empirical_robustness": "",
+		"test_confidence_score": "",
+		"test_attack_success_rate": ""
+	},
+	"fairness": {
+		"class_imbalance": ""
+	},
+	"system": {
+		"time_minutes": "",
+		"model_size": "",
+		"upload_bytes": "",
+		"download_bytes":""
+	},
+	"sustainability": {
+		"carbon_intensity_local": "",
+		"emissions_training_local": "",
+		"energy_consumed_local": "",
+		"emissions_communication_local": ""
+	}
+}
diff --git a/nebula/addons/trustworthiness/dfl_local.py b/nebula/addons/trustworthiness/dfl_local.py
new file mode 100644
index 000000000..ee24b3d58
--- /dev/null
+++ b/nebula/addons/trustworthiness/dfl_local.py
@@ -0,0 +1,285 @@
+# nebula/addons/trustworthiness/dfl_local.py
+import json, os, shutil
+from datetime import datetime
+from nebula.addons.trustworthiness.metric import TrustMetricManager
+import logging
+import glob
+import shutil
+from json import JSONDecodeError
+import pickle
+import numpy as np
+import pandas as pd
+import time
+
+# from nebula.core.models.cifar10.cnn import CIFAR10ModelCNN
+from nebula.core.models.mnist.mlp import MNISTModelMLP
+from nebula.core.models.mnist.cnn import MNISTModelCNN
+from nebula.core.models.covtype.mlp import CovtypeModelMLP
+from nebula.core.models.adultcensus.mlp import AdultCensusModelMLP
+from nebula.core.models.breast_cancer.mlp import BreastCancerModelMLP
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model
+from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
+
+dirname = os.path.dirname(__file__)
+logger = logging.getLogger(__name__)
+
+def compute_trust_local_dfl(experiment_name, participant_idx, data, start_time, end_time):
+    trust_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
+    os.makedirs(trust_dir, exist_ok=True)
+
+    # 1) Factsheet por nodo
+    factsheet_name = f"factsheet_participant_{participant_idx}.json"
+    factsheet_path = os.path.join(trust_dir, factsheet_name)
+
+    # Copia de template (la misma que usa Factsheet) :contentReference[oaicite:9]{index=9}
+    template_path = os.path.join(dirname, "configs", "factsheet_template_dfl.json")
+    if not os.path.exists(factsheet_path):
+        shutil.copyfile(template_path, factsheet_path)
+
+    # Relleno mínimo: aquí pones valores LOCALES del nodo.
+    # (puedes ir ampliándolo)
+    with open(factsheet_path, "r+", encoding="utf-8") as f:
+        factsheet = {}
+        factsheet = json.load(f)
+
+        # Pre-train básico desde data (usa federation, dataset, etc.) :contentReference[oaicite:10]{index=10}
+        logging.info("DFL FactSheet: Populating factsheet with pre training metrics")
+
+        federation = data["federation"]
+        n_nodes = int(data["n_nodes"])
+        dataset = data["dataset"]
+        algorithm = data["model"]
+        aggregation_algorithm = data["agg_algorithm"]
+        n_rounds = int(data["rounds"])
+        attack = data["attack_params"]["attacks"]
+        if attack != "No Attack":
+            poisoned_node_percent = int(data["attack_params"]["poisoned_node_percent"])
+            poisoned_sample_percent = int(data["attack_params"]["poisoned_sample_percent"])
+            poisoned_noise_percent = int(data["attack_params"]["poisoned_noise_percent"])
+        else:
+            poisoned_node_percent = 0
+            poisoned_sample_percent = 0
+            poisoned_noise_percent = 0
+        with_reputation = data["reputation"]["enabled"]
+        is_dynamic_topology = False # data["is_dynamic_topology"]
+        is_dynamic_aggregation = False # data["is_dynamic_aggregation"]
+        target_aggregation = False # data["target_aggregation"]
+
+        if attack != "No Attack" and with_reputation == True and is_dynamic_aggregation == True:
+            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used against the clients is {attack}, where the percentage of attacked nodes is {poisoned_node_percent}, the percentage of attacked samples of each node is {poisoned_sample_percent}, and the percent of poisoned noise is {poisoned_noise_percent}. A reputation-based defence with a dynamic aggregation based on the aggregation algorithm {target_aggregation} is used, and the trustworthiness of the project is desired."
+
+        elif attack != "No Attack" and with_reputation == True and is_dynamic_topology == True:
+            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used against the clients is {attack}, where the percentage of attacked nodes is {poisoned_node_percent}, the percentage of attacked samples of each node is {poisoned_sample_percent}, and the percent of poisoned noise is {poisoned_noise_percent}. A reputation-based defence with a dynamic topology is used, and the trustworthiness of the project is desired."
+
+        elif attack != "No Attack" and with_reputation == False:
+            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used against the clients is {attack}, where the percentage of attacked nodes is {poisoned_node_percent}, the percentage of attacked samples of each node is {poisoned_sample_percent}, and the percent of poisoned noise is {poisoned_noise_percent}. No defence mechanism is used, and the trustworthiness of the project is desired."
+
+        elif attack == "No Attack":
+            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks against clients are used, and the trustworthiness of the project is desired."
+
+        # Set project specifications
+        factsheet["project"]["overview"] = data["scenario_title"]
+        factsheet["project"]["purpose"] = data["scenario_description"]
+        factsheet["project"]["background"] = background
+
+        # Set data specifications
+        factsheet["data"]["provenance"] = data["dataset"]
+        factsheet["data"]["preprocessing"] = data["topology"]
+
+        # Set participants
+        factsheet["participants"]["client_num"] = data["n_nodes"] or ""
+        factsheet["participants"]["sample_client_rate"] = 1
+        factsheet["participants"]["client_selector"] = ""
+
+        # Set configuration
+        factsheet["configuration"]["aggregation_algorithm"] = data["agg_algorithm"] or ""
+        factsheet["configuration"]["training_model"] = data["model"] or ""
+        factsheet["configuration"]["personalization"] = False
+        factsheet["configuration"]["visualization"] = True
+        factsheet["configuration"]["total_round_num"] = n_rounds
+
+        if poisoned_noise_percent != 0:
+            factsheet["configuration"]["differential_privacy"] = True
+            factsheet["configuration"]["dp_epsilon"] = poisoned_noise_percent
+        else:
+            factsheet["configuration"]["differential_privacy"] = False
+            factsheet["configuration"]["dp_epsilon"] = ""
+
+        if dataset == "MNIST" and algorithm == "MLP":
+            model = MNISTModelMLP()
+            num_classes_temp = 10
+        elif dataset == "MNIST" and algorithm == "CNN":
+            model = MNISTModelCNN()
+            num_classes_temp = 10
+        elif dataset == "Covtype" and algorithm == "MLP":
+            model = CovtypeModelMLP()
+            num_classes_temp = 7
+        elif dataset == "AdultCensus" and algorithm == "MLP":
+            model = AdultCensusModelMLP()
+            num_classes_temp = 2
+        elif dataset == "BreastCancer" and algorithm == "MLP":
+            model = BreastCancerModelMLP()
+            num_classes_temp = 2
+
+        factsheet["configuration"]["learning_rate"] = model.get_learning_rate()
+        factsheet["configuration"]["trainable_param_num"] = model.count_parameters()
+        factsheet["configuration"]["local_update_steps"] = 1
+
+        files_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
+
+        final_model_file = os.path.join(files_dir, f"participant_{participant_idx}_final_model.pk")
+        train_model_file = os.path.join(files_dir, f"participant_{participant_idx}_train_model.pk")
+        test_dataloader_file = os.path.join(files_dir, f"participant_{participant_idx}_test_loader.pk")
+        emissions_file = os.path.join(files_dir, f"emissions.csv")
+
+        with open(train_model_file, "rb") as t_file:
+            lightning_model = pickle.load(t_file)
+
+        get_all_data_entropy(experiment_name)
+
+        data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_idx)}_class_count.json")
+
+        entropy_local = normalized_entropy_from_class_counts(data_class_count_file)
+
+        factsheet["data"]["entropy_local"] = entropy_local
+
+        df = load_round_metrics(experiment_name, participant_idx)
+        acc = df["accuracy"].astype(float).to_numpy()
+        loss = df["loss"].astype(float).to_numpy()
+
+        final_acc = float(acc[-1])
+        final_loss = float(loss[-1])
+
+        factsheet["performance"]["test_loss"] = float(final_loss)
+        factsheet["performance"]["test_acc"] = float(final_acc)
+
+        bytes_sent, bytes_recv = get_bytes(experiment_name, participant_idx)
+
+        model_file = os.path.join(files_dir, f"participant_{participant_idx}_final_model.pk")
+        factsheet["system"]["model_size"] = get_bytes_model(model_file)
+
+        factsheet["system"]["upload_bytes"] = int(bytes_sent)
+        factsheet["system"]["download_bytes"] = int(bytes_recv)
+
+        factsheet["system"]["time_minutes"] = get_elapsed_time(start_time, end_time)
+
+        count_class_file = os.path.join(files_dir, f"{participant_idx}_class_count.json")
+        if os.path.exists(count_class_file):
+            with open(count_class_file, "r") as fs:
+                class_distribution = json.load(fs)
+            class_samples_sizes = list(class_distribution.values())
+            class_imbalance = get_cv(list=class_samples_sizes)
+            factsheet["fairness"]["class_imbalance"] = 1 if class_imbalance > 1 else class_imbalance
+        else:
+            factsheet["fairness"]["class_imbalance"] = factsheet["fairness"].get("class_imbalance", 0.0)
+
+        carbon_intensity_local, emissions_training_local, energy_consumed_local, sample_size = get_emissions(emissions_file, participant_idx)
+
+        factsheet["sustainability"]["carbon_intensity_local"] = carbon_intensity_local
+        factsheet["sustainability"]["emissions_training_local"] = emissions_training_local
+        factsheet["sustainability"]["energy_consumed_local"] = energy_consumed_local
+        factsheet["participants"]["local_dataset_size"] = sample_size
+
+        factsheet["sustainability"]["emissions_communication_local"] = (bytes_sent * 2.24e-10 * carbon_intensity_local)+(bytes_recv * 2.24e-10 * carbon_intensity_local)
+
+        model.load_state_dict(lightning_model.state_dict())
+
+        with open(test_dataloader_file, "rb") as d_file:
+            test_dataloader = pickle.load(d_file)
+
+        test_sample = next(iter(test_dataloader))
+
+        lr = factsheet["configuration"]["learning_rate"]
+        value_clever = get_clever_score(model, test_sample, num_classes_temp, lr)
+
+        factsheet["performance"]["test_clever"] = 1 if value_clever > 1 else value_clever
+
+        value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes_temp, lr)
+
+        factsheet["performance"]["test_loss_sensitivity"] = 1 if value_loss_sensitivity > 1 else value_loss_sensitivity
+
+        value_adv_accuracy = compute_adversarial_accuracy_art(model, test_dataloader, num_classes_temp, lr)
+
+        factsheet["performance"]["test_adv_accuracy"] = 1 if value_adv_accuracy > 1 else value_adv_accuracy
+
+        value_empirical_robustness = get_empirical_robustness_score(model, test_sample, num_classes_temp, lr)
+
+        factsheet["performance"]["test_empirical_robustness"] = 1 if value_empirical_robustness > 1 else value_empirical_robustness
+
+        value_confidence_score = get_confidence_score(model, test_sample)
+
+        factsheet["performance"]["test_confidence_score"] = 1 if value_confidence_score > 1 else value_confidence_score
+        attack_success_rate
+
+        value_attack_success_rate = attack_success_rate(model, test_sample)
+
+        factsheet["performance"]["test_attack_success_rate"] = 1 if value_attack_success_rate > 1 else value_attack_success_rate
+
+        feature_importance = get_feature_importance_cv(model, test_sample)
+
+        factsheet["performance"]["test_feature_importance_cv"] = 1 if feature_importance > 1 else feature_importance
+
+        f.seek(0)
+        f.truncate()
+        json.dump(factsheet, f, indent=4)
+
+def load_round_metrics(experiment_name: str, participant_idx: int):
+    files_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
+    path = os.path.join(files_dir, f"round_metrics_participant_{participant_idx}.csv")
+    df = pd.read_csv(path)
+
+    # Asegura orden
+    if "round" in df.columns:
+        df = df.sort_values("round")
+
+    # Limpieza básica
+    df = df.dropna(subset=["loss", "accuracy"])
+    return df
+
+def get_bytes(experiment_name: str, participant_idx: int):
+    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"data_results_{participant_idx}.csv")
+
+    data = read_csv(data_file)
+
+    row = data[data["id"] == participant_idx]
+
+    bytes_sent = row["bytes_sent"].iloc[0]
+    bytes_recv = row["bytes_recv"].iloc[0]
+
+    return bytes_sent, bytes_recv
+
+def get_emissions(emissions_file, participant_idx: int):
+    data = read_csv(emissions_file)
+
+    row = data[data["id"] == participant_idx]
+
+    avg_carbon_intensity_clients = row["energy_grid"].iloc[0]
+    emissions_training = row["emissions"].iloc[0]
+    energy_consumed = row["energy_consumed"].iloc[0]
+    sample_size = row["sample_size"].iloc[0]
+
+    return avg_carbon_intensity_clients, emissions_training, energy_consumed, sample_size
+
+def normalized_entropy_from_class_counts(count_class_file: str) -> float:
+    with open(count_class_file, "r") as f:
+        dist = json.load(f)
+
+    counts = np.array(list(dist.values()), dtype=float)
+    total = counts.sum()
+    if total <= 0:
+        return 0.0
+
+    p = counts / total
+
+    # Entropía (evita log(0))
+    eps = 1e-12
+    H = -float(np.sum(p * np.log(p + eps)))
+
+    # Normalización por número de clases
+    K = len(p)
+    if K <= 1:
+        return 0.0
+
+    H_norm = H / float(np.log(K))
+    # seguridad numérica
+    return max(0.0, min(1.0, H_norm))
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 3ffce970a..55aaa1bc2 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -7,15 +7,20 @@
 import pickle
 import numpy as np
 import pandas as pd
+import time
 
 # from nebula.core.models.cifar10.cnn import CIFAR10ModelCNN
 from nebula.core.models.mnist.mlp import MNISTModelMLP
 from nebula.core.models.mnist.cnn import MNISTModelCNN
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv
+from nebula.core.models.covtype.mlp import CovtypeModelMLP
+from nebula.core.models.adultcensus.mlp import AdultCensusModelMLP
+from nebula.core.models.breast_cancer.mlp import BreastCancerModelMLP
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
 
 dirname = os.path.dirname(__file__)
+logger = logging.getLogger(__name__)
 
 class Factsheet:
     def __init__(self):
@@ -112,8 +117,19 @@ def populate_factsheet_pre_train(self, data, scenario_name):
 
                     if dataset == "MNIST" and algorithm == "MLP":
                         model = MNISTModelMLP()
+                        num_classes_temp = 10
                     elif dataset == "MNIST" and algorithm == "CNN":
                         model = MNISTModelCNN()
+                        num_classes_temp = 10
+                    elif dataset == "Covtype" and algorithm == "MLP":
+                        model = CovtypeModelMLP()
+                        num_classes_temp = 7
+                    elif dataset == "AdultCensus" and algorithm == "MLP":
+                        model = AdultCensusModelMLP()
+                        num_classes_temp = 2
+                    elif dataset == "BreastCancer" and algorithm == "MLP":
+                        model = BreastCancerModelMLP()
+                        num_classes_temp = 2
                     # elif dataset == "Syscall" and algorithm == "MLP":
                     #     model = SyscallModelMLP()
                     # else:
@@ -147,6 +163,28 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time):
             try:
                 factsheet = json.load(f)
 
+                expected_total = int(factsheet.get("participants", {}).get("client_num", 0) or 0)
+                logging.info(f"[Factsheet] expected_total_nodes = {expected_total}")
+
+                data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "confirmation.csv")
+
+                data = read_csv(data_file)
+
+                number_files = len(data)
+
+                logger.info(f"number_files={number_files}")
+
+                while (number_files != expected_total):
+                    logger.info("WAIT")
+                    time.sleep(5)
+                    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "confirmation.csv")
+                    data = read_csv(data_file)
+                    number_files = len(data)
+                    logger.info(f"number_files={number_files}")
+                    logger.info(f"expected_nodes={expected_total}")
+
+
+
                 dataset = factsheet["data"]["provenance"]
                 model = factsheet["configuration"]["training_model"]
 
@@ -165,7 +203,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time):
                 #         dataloader = pickle.load(file)
                 #     get_entropy(i, scenario_name, dataloader)
                 #     i += 1
-                
+
                 get_all_data_entropy(scenario_name)
 
                 with open(f"{files_dir}/entropy.json", "r") as file:
@@ -198,7 +236,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time):
                 factsheet["fairness"]["selection_cv"] = 1
 
                 count_all_class_samples(scenario_name)
-                
+
                 with open(f"{files_dir}/count_class.json", "r") as file:
                     class_distribution = json.load(file)
 
@@ -211,13 +249,24 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time):
 
                 if dataset == "MNIST" and model == "MLP":
                     model = MNISTModelMLP()
+                    num_classes_temp = 10 # CAMBIAR
                 elif dataset == "MNIST" and model == "CNN":
                     model = MNISTModelCNN()
+                    num_classes_temp = 10
+                elif dataset == "Covtype" and model == "MLP":
+                    model = CovtypeModelMLP()
+                    num_classes_temp = 7
+                elif dataset == "AdultCensus" and model == "MLP":
+                    model = AdultCensusModelMLP()
+                    num_classes_temp = 2
+                elif dataset == "BreastCancer" and algorithm == "MLP":
+                    model = BreastCancerModelMLP()
+                    num_classes_temp = 2
                 # elif dataset == "Syscall" and model == "MLP":
                 #     model = SyscallModelMLP()
                 # else:
                 #     model = CIFAR10ModelCNN()
-                
+
                 model.load_state_dict(lightning_model.state_dict())
 
                 with open(test_dataloader_file, "rb") as file:
@@ -226,10 +275,31 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time):
                 test_sample = next(iter(test_dataloader))
 
                 lr = factsheet["configuration"]["learning_rate"]
-                value_clever = get_clever_score(model, test_sample, 10, lr)
+                value_clever = get_clever_score(model, test_sample, num_classes_temp, lr)
 
                 factsheet["performance"]["test_clever"] = 1 if value_clever > 1 else value_clever
 
+                value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes_temp, lr)
+
+                factsheet["performance"]["test_loss_sensitivity"] = 1 if value_loss_sensitivity > 1 else value_loss_sensitivity
+
+                value_adv_accuracy = compute_adversarial_accuracy_art(model, test_dataloader, num_classes_temp, lr)
+
+                factsheet["performance"]["test_adv_accuracy"] = 1 if value_adv_accuracy > 1 else value_adv_accuracy
+
+                value_empirical_robustness = get_empirical_robustness_score(model, test_sample, num_classes_temp, lr)
+
+                factsheet["performance"]["test_empirical_robustness"] = 1 if value_empirical_robustness > 1 else value_empirical_robustness
+
+                value_confidence_score = get_confidence_score(model, test_sample)
+
+                factsheet["performance"]["test_confidence_score"] = 1 if value_confidence_score > 1 else value_confidence_score
+                attack_success_rate
+
+                value_attack_success_rate = attack_success_rate(model, test_sample)
+
+                factsheet["performance"]["test_attack_success_rate"] = 1 if value_attack_success_rate > 1 else value_attack_success_rate
+
                 feature_importance = get_feature_importance_cv(model, test_sample)
 
                 factsheet["performance"]["test_feature_importance_cv"] = 1 if feature_importance > 1 else feature_importance
@@ -278,4 +348,4 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time):
 
             except JSONDecodeError as e:
                 logging.info(f"{factsheet_file} is invalid")
-                logging.error(e)
\ No newline at end of file
+                logging.error(e)
diff --git a/nebula/addons/trustworthiness/graphics.py b/nebula/addons/trustworthiness/graphics.py
index 9233db756..03239ac72 100644
--- a/nebula/addons/trustworthiness/graphics.py
+++ b/nebula/addons/trustworthiness/graphics.py
@@ -19,13 +19,17 @@ class Graphics():
     def __init__(
         self,
         scenario_start_time,
-        scenario_name
+        scenario_name,
+        participant_id=None,
     ):
         self.scenario_start_time = scenario_start_time
         self.scenario_name = scenario_name
         log_dir = os.path.join(os.environ["NEBULA_LOGS_DIR"], scenario_name)
-        self.nebulalogger = NebulaTensorBoardLogger(scenario_start_time, f"{log_dir}", name="metrics", version=f"trust", log_graph=True)
-        
+        if participant_id==None:
+            self.nebulalogger = NebulaTensorBoardLogger(scenario_start_time, f"{log_dir}", name="metrics", version=f"trust", log_graph=True)
+        else:
+            self.nebulalogger = NebulaTensorBoardLogger(scenario_start_time, f"{log_dir}", name="metrics", version=f"trust_{participant_id}", log_graph=True)
+
     def __log_figure(self, df, pillar, color, notion_y_pos = -0.4, figsize=(10,6)):
         filtered_df = df[df['Pillar'] == pillar].copy()
 
@@ -36,7 +40,7 @@ def __log_figure(self, df, pillar, color, notion_y_pos = -0.4, figsize=(10,6)):
         filtered_df.loc[:, 'Notion'] = filtered_df['Notion'].apply(lambda x: str(x).title())
 
         unique_notion_count = filtered_df['Notion'].nunique()
-        palette = [color] * unique_notion_count 
+        palette = [color] * unique_notion_count
 
         plt.figure(figsize=figsize)
         ax = sns.barplot(data=filtered_df, x='Metric', y='Metric Score', hue='Notion', palette=palette, dodge=False)
@@ -50,12 +54,12 @@ def __log_figure(self, df, pillar, color, notion_y_pos = -0.4, figsize=(10,6)):
             notion = row['Notion']
             notion_score = row['Notion Score']
             metric_score = row['Metric Score']
-            
+
             if notion not in notion_scores:
                 metrics_for_notion = filtered_df[filtered_df['Notion'] == notion]['Metric']
                 start_pos = x_positions[i]
                 end_pos = x_positions[i + len(metrics_for_notion) - 1]
-                
+
                 notion_x_pos = (start_pos + end_pos) / 2
                 ax.axhline(notion_score, ls='--', color='black', lw=0.5, xmin=start_pos/len(x_positions), xmax=(end_pos+1)/len(x_positions))
                 ax.text(notion_x_pos, notion_score + 0.01, f"{notion_score:.2f}", ha='center', va='bottom', fontsize=10, color='black')  # Color negro
@@ -70,15 +74,15 @@ def __log_figure(self, df, pillar, color, notion_y_pos = -0.4, figsize=(10,6)):
                 metrics_for_notion = filtered_df[filtered_df['Notion'] == notion]['Metric']
                 start_pos = x_positions[i]
                 end_pos = x_positions[i + len(metrics_for_notion) - 1]
-                
+
                 notion_x_pos = (start_pos + end_pos) / 2
-                
-                ax.text(notion_x_pos, notion_y_pos, notion, ha='center', va='center', fontsize=10, color='black') 
-                
-                seen_notions.add(notion)  
+
+                ax.text(notion_x_pos, notion_y_pos, notion, ha='center', va='center', fontsize=10, color='black')
+
+                seen_notions.add(notion)
 
         for i, v in enumerate(filtered_df['Metric Score']):
-            ax.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=10, color='black') 
+            ax.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=10, color='black')
 
         plt.xlabel('Metrics and notions', labelpad=35)
         plt.ylabel('Score')
@@ -87,7 +91,7 @@ def __log_figure(self, df, pillar, color, notion_y_pos = -0.4, figsize=(10,6)):
         ax.legend_.remove()
 
         plt.tight_layout()
-        
+
         self.nebulalogger.log_figure(ax.get_figure(), 0, f"Trust/Pillar/{pillar}")
         plt.close()
 
@@ -179,4 +183,94 @@ def graphics(self):
         ax.set_xticklabels(name_labels, rotation=45)
 
         self.nebulalogger.log_figure(ax.get_figure(), 0, f"Trust/AllPillars")
-        plt.close()
\ No newline at end of file
+        plt.close()
+
+    def graphics_dfl(self,participant_id):
+            results_file = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), self.scenario_name, "trustworthiness", f"nebula_trust_results_{participant_id}.json")
+            with open(results_file, 'r') as f:
+                results = json.load(f)
+
+            pillars_list = []
+            notion_names = []
+            notion_scores = []
+            metric_names = []
+            metric_scores = []
+
+            for pillar in results["pillars"]:
+                for key, value in pillar.items():
+                    pillar_name = key
+                    if "notions" in value:
+                        for notion in value["notions"]:
+                            for notion_key, notion_value in notion.items():
+                                notion_name = notion_key
+                                notion_score = notion_value["score"]
+                                for metric in notion_value["metrics"]:
+                                    for metric_key, metric_value in metric.items():
+                                        metric_name = metric_key
+                                        metric_score = metric_value["score"]
+
+                                        pillars_list.append(pillar_name)
+                                        notion_names.append(notion_name)
+                                        notion_scores.append(notion_score)
+                                        metric_names.append(metric_name)
+                                        metric_scores.append(metric_score)
+
+            df = pd.DataFrame({
+                "Pillar": pillars_list,
+                "Notion": notion_names,
+                "Notion Score": notion_scores,
+                "Metric": metric_names,
+                "Metric Score": metric_scores
+            })
+
+            self.__log_figure(df, 'robustness', "#F8D3DF")
+            self.__log_figure(df, "privacy", "#DA8D8B", -0.2)
+            self.__log_figure(df, "fairness", "#DDDDDD")
+            self.__log_figure(df, "explainability", "#FCEFC3")
+            self.__log_figure(df, "accountability", "#8FAADC", -0.3)
+            self.__log_figure(df, "architectural_soundness", "#DBB9FA", -0.3)
+            self.__log_figure(df, "sustainability", "#BBFDAF", -0.5, figsize=(12,8))
+
+            categories = [
+                "robustness",
+                "privacy",
+                "fairness",
+                "explainability",
+                "accountability",
+                "architectural_soundness",
+                "sustainability"
+            ]
+
+            scores = [results["pillars"][i][category]["score"] for i, category in enumerate(categories)]
+
+            trust_score = results["trust_score"]
+            categories.append("trust_score")
+            scores.append(trust_score)
+
+            palette = ["#F8D3DF", "#DA8D8B", "#DDDDDD", "#FCEFC3", "#8FAADC", "#DBB9FA", "#BBFDAF", "#BF9000"]
+
+            plt.figure(figsize=(10, 8))
+            ax = sns.barplot(x=categories, y=scores, palette=palette, hue=categories, legend=False)
+            ax.set_xlabel("Pillar")
+            ax.set_ylabel("Score")
+            ax.set_title("Pillars and trust scores")
+
+            for i, v in enumerate(scores):
+                ax.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=10)
+
+            name_labels = [
+                f"Robustness_{participant_id}",
+                f"Privacy_{participant_id}",
+                f"Fairness_{participant_id}",
+                f"Explainability_{participant_id}",
+                f"Accountability_{participant_id}",
+                f"Architectural Soundness_{participant_id}",
+                f"Sustainability_{participant_id}",
+                f"Trust Score_{participant_id}"
+            ]
+
+            ax.set_xticks(range(len(categories)))
+            ax.set_xticklabels(name_labels, rotation=45)
+
+            self.nebulalogger.log_figure(ax.get_figure(), 0, f"Trust/AllPillars_{participant_id}")
+            plt.close()
diff --git a/nebula/addons/trustworthiness/metric.py b/nebula/addons/trustworthiness/metric.py
index 0952576b3..da62568b7 100755
--- a/nebula/addons/trustworthiness/metric.py
+++ b/nebula/addons/trustworthiness/metric.py
@@ -16,11 +16,17 @@ class TrustMetricManager:
     Manager class to help store the output directory and handle calls from the FL framework.
     """
 
-    def __init__(self, scenario_start_time):
-        self.factsheet_file_nm = "factsheet.json"
-        self.eval_metrics_file_nm = "eval_metrics.json"
-        self.nebula_trust_results_nm = "nebula_trust_results.json"
-        self.scenario_start_time = scenario_start_time
+    def __init__(self, scenario_start_time, federation, participant=None):
+        if federation == "DFL":
+            self.factsheet_file_nm = f"factsheet_participant_{participant}.json" # IDEA: Pasarle desde trustworthiness.py el id del participante, ponerlo a None para CFL
+            self.eval_metrics_file_nm = "eval_metrics_dfl.json"
+            self.nebula_trust_results_nm = f"nebula_trust_results_{participant}.json"
+            self.scenario_start_time = scenario_start_time
+        else:
+            self.factsheet_file_nm = "factsheet.json"
+            self.eval_metrics_file_nm = "eval_metrics.json"
+            self.nebula_trust_results_nm = "nebula_trust_results.json"
+            self.scenario_start_time = scenario_start_time
 
     def evaluate(self, experiment_name, weights, use_weights=False):
         """
@@ -64,6 +70,58 @@ def evaluate(self, experiment_name, weights, use_weights=False):
             final_score = round(final_score, 2)
             result_json["trust_score"] = final_score
             write_results_json(results_file, result_json)
-            
+
             graphics = Graphics(self.scenario_start_time, scenario_name)
             graphics.graphics()
+
+    def evaluate_participant(self, experiment_name, weights, participant_id, use_weights=False):
+        """
+        Evaluates the trustworthiness score.
+
+        Args:
+            scenario (object): The scenario in whith the trustworthiness will be calculated.
+            weights (dict): The desired weghts of the pillars.
+            use_weights (bool): True to turn on the weights in the metric config file, default to False.
+        """
+        # Get scenario name
+        scenario_name = experiment_name
+        factsheet_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", self.factsheet_file_nm)
+        metrics_cfg_file = os.path.join(dirname, "configs", self.eval_metrics_file_nm)
+        results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", self.nebula_trust_results_nm)
+
+        if not os.path.exists(factsheet_file):
+            logger.error(f"{factsheet_file} is missing! Please check documentation.")
+            return
+
+        if not os.path.exists(metrics_cfg_file):
+            logger.error(f"{metrics_cfg_file} is missing! Please check documentation.")
+            return
+
+        with open(factsheet_file, "r") as f, open(metrics_cfg_file, "r") as m:
+            factsheet = json.load(f)
+            #metrics_cfg = json.load(m)
+            #metrics_cfg = replace_everywhere(metrics_cfg, "factsheet", f"factsheet_participant_{participant_id}")
+
+            raw_metrics_cfg: str = m.read()
+            raw_metrics_cfg = raw_metrics_cfg.replace("factsheet", f"factsheet_participant_{participant_id}")
+            metrics_cfg = json.loads(raw_metrics_cfg)
+
+            metrics = metrics_cfg.items()
+            input_docs = {f"factsheet_participant_{participant_id}": factsheet}
+
+            result_json = {"trust_score": 0, "pillars": []}
+            final_score = 0
+            result_print = []
+            for key, value in metrics:
+                pillar = TrustPillar(key, value, input_docs, use_weights)
+                score, result = pillar.evaluate()
+                weight = weights.get(key) / 100
+                final_score += weight * score
+                result_print.append([key, score])
+                result_json["pillars"].append(result)
+            final_score = round(final_score, 2)
+            result_json["trust_score"] = final_score
+            write_results_json(results_file, result_json)
+
+            graphics = Graphics(self.scenario_start_time, scenario_name, participant_id)
+            graphics.graphics_dfl(participant_id)
diff --git a/nebula/addons/trustworthiness/per_round_metrics.py b/nebula/addons/trustworthiness/per_round_metrics.py
new file mode 100644
index 000000000..086167065
--- /dev/null
+++ b/nebula/addons/trustworthiness/per_round_metrics.py
@@ -0,0 +1,174 @@
+# nebula/addons/trustworthiness/per_round_metrics.py
+from __future__ import annotations
+
+import asyncio
+import copy
+import csv
+import os
+from dataclasses import dataclass, field
+from typing import Any, Optional, Tuple
+
+import torch
+
+from nebula.addons.functions import print_msg_box
+from nebula.addons.trustworthiness.calculation import get_feature_importance_cv
+
+
+def _safe_get_round(engine) -> int:
+    trainer = getattr(engine, "trainer", None)
+    if trainer is None:
+        return -1
+
+    # Nebula suele exponer get_round() o el atributo round
+    try:
+        return int(trainer.get_round())
+    except Exception:
+        return int(getattr(trainer, "round", -1))
+
+
+def _get_local_test_loader(engine):
+    trainer = getattr(engine, "trainer", None)
+    dm = getattr(trainer, "datamodule", None)
+    if dm is None:
+        return None
+
+    try:
+        dm.setup(stage="test")
+    except Exception:
+        pass
+
+    try:
+        tdl = dm.test_dataloader()
+        # En Nebula normalmente: [local_loader, global_loader]
+        if isinstance(tdl, (list, tuple)) and len(tdl) > 0:
+            return tdl[0]
+        return tdl
+    except Exception:
+        return None
+
+
+def _build_test_sample_min_bs(test_loader, min_bs: int = 10) -> Optional[Tuple[Any, Any]]:
+    """
+    Devuelve un batch (x, y) con batch_size >= min_bs si es posible.
+    así que min_bs=10 es lo ideal.
+    """
+    if test_loader is None:
+        return None
+
+    try:
+        it = iter(test_loader)
+        batch = next(it)
+    except Exception:
+        return None
+
+    if not (isinstance(batch, (tuple, list)) and len(batch) >= 2):
+        return None
+
+    x, y = batch[0], batch[1]
+    if not (isinstance(x, torch.Tensor) and isinstance(y, torch.Tensor)):
+        return None
+
+    if x.size(0) >= min_bs:
+        return (x, y)
+
+    xs = [x]
+    ys = [y]
+    cur = x.size(0)
+
+    while cur < min_bs:
+        try:
+            b2 = next(it)
+        except Exception:
+            break
+        if not (isinstance(b2, (tuple, list)) and len(b2) >= 2):
+            break
+        x2, y2 = b2[0], b2[1]
+        if not (isinstance(x2, torch.Tensor) and isinstance(y2, torch.Tensor)):
+            break
+        xs.append(x2)
+        ys.append(y2)
+        cur += x2.size(0)
+
+    x_cat = torch.cat(xs, dim=0)
+    y_cat = torch.cat(ys, dim=0)
+    return (x_cat, y_cat)
+
+
+@dataclass
+class PerRoundTrustMetrics:
+    experiment_name: str
+    participant_idx: int
+    trust_dir: str
+    role_label: str
+
+    # Control
+    enable_print: bool = True
+    enable_csv: bool = True
+
+    fi_every_n_rounds: int = 1  # pon 5 o 10 si quieres reducir coste
+
+    # Estado interno
+    _csv_path: str = field(init=False)
+    _prev_acc: Optional[float] = field(default=None, init=False)
+    _test_loader: Any = field(default=None, init=False)
+    _lock: asyncio.Lock = field(default_factory=asyncio.Lock, init=False)
+
+    async def setup(self, engine) -> None:
+        os.makedirs(self.trust_dir, exist_ok=True)
+        self._csv_path = os.path.join(
+            self.trust_dir, f"round_metrics_participant_{self.participant_idx}.csv"
+        )
+
+        if self.enable_csv and not os.path.exists(self._csv_path):
+            with open(self._csv_path, "w", newline="") as f:
+                w = csv.writer(f)
+                w.writerow([
+                    "round",
+                    "participant",
+                    "role",
+                    "loss",
+                    "accuracy",
+                    "tw_stability",
+                ])
+
+        self._test_loader = _get_local_test_loader(engine)
+
+    async def on_test_metrics(self, engine, loss: float, acc: float) -> None:
+        async with self._lock:
+            round_id = _safe_get_round(engine)
+
+            # Métrica sencilla per-round (ejemplo): estabilidad de accuracy
+            if self._prev_acc is None:
+                tw_stability = 1.0
+            else:
+                tw_stability = 1.0 - abs(acc - self._prev_acc)
+                tw_stability = max(0.0, min(1.0, tw_stability))
+            self._prev_acc = acc
+
+            fi_cv: Optional[float] = None
+
+            if self.enable_csv:
+                with open(self._csv_path, "a", newline="") as f:
+                    w = csv.writer(f)
+                    w.writerow([
+                        round_id,
+                        self.participant_idx,
+                        self.role_label,
+                        float(loss),
+                        float(acc),
+                        float(tw_stability),
+                        None if fi_cv is None else float(fi_cv),
+                    ])
+
+            if self.enable_print:
+                fi_txt = "NA" if fi_cv is None else f"{fi_cv:.4f}"
+                print_msg_box(
+                    msg=(
+                        f"Round: {round_id}\n"
+                        f"Loss: {loss:.4f}\n"
+                        f"Accuracy: {acc:.4f}\n"
+                        f"TW/Stability: {tw_stability:.4f}\n"
+                    ),
+                    indent=2,
+                    title=f"Trustworthiness (per-round) | {self.role_label} | Participant: {self.participant_idx}",
+                )
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 1eaa17c6a..ab860816f 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -8,8 +8,14 @@
 from nebula.core.engine import Engine
 import pickle
 from nebula.addons.trustworthiness.calculation import stop_emissions_tracking_and_save
-from nebula.addons.trustworthiness.utils import save_results_csv
+from nebula.addons.trustworthiness.utils import save_results_csv, save_confirmation_csv
 from codecarbon import EmissionsTracker
+from nebula.addons.trustworthiness.per_round_metrics import PerRoundTrustMetrics
+from datetime import datetime
+from nebula.addons.trustworthiness.factsheet import Factsheet
+from nebula.addons.trustworthiness.metric import TrustMetricManager
+from nebula.addons.trustworthiness.dfl_local import compute_trust_local_dfl
+import json, os
 
 """                                                     ##############################
                                                         #       TRUST WORKLOADS      #
@@ -23,23 +29,23 @@ class TrustWorkload(ABC):
     @abstractmethod
     async def init(self, experiment_name):
         raise NotImplementedError
-    
+
     @abstractmethod
     def get_workload(self) -> str:
         raise NotImplementedError
-    
+
     @abstractmethod
     def get_sample_size(self) -> float:
         raise NotImplementedError
-    
+
     abstractmethod
     def get_metrics(self) -> tuple[float, float]:
         raise NotImplementedError
-    
+
     @abstractmethod
     async def finish_experiment_role_pre_actions(self):
         raise NotImplementedError
-    
+
     @abstractmethod
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
         raise NotImplementedError
@@ -55,14 +61,29 @@ def __init__(self, engine, idx, trust_files_route):
         self._current_loss = None
         self._current_accuracy = None
         self._experiment_name = ""
-        
+        self._per_round = None
+        self._start_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
+        self._end_time = None
+
     async def init(self, experiment_name):
         self._experiment_name = experiment_name
         await EventManager.get_instance().subscribe_node_event(RoundEndEvent, self._process_round_end_event)
         await EventManager.get_instance().subscribe_addonevent(TestMetricsEvent, self._process_test_metrics_event)
         await EventManager.get_instance().subscribe_node_event(ExperimentFinishEvent, self._process_experiment_finished_event)
         await self._create_pk_files(experiment_name)
-            
+
+        self._per_round = PerRoundTrustMetrics(
+            experiment_name=experiment_name,
+            participant_idx=self._idx,
+            trust_dir=self._trust_files_route,
+            role_label="TRAINER",
+            enable_print=True,
+            enable_csv=True,
+            fi_every_n_rounds=1,  # cambia a 5/10 si quieres menos coste
+        )
+        await self._per_round.setup(self._engine)
+
+
     async def _create_pk_files(self, experiment_name):
         # Save data to local files to calculate the trustworthyness
         train_loader_filename = f"/nebula/app/logs/{experiment_name}/trustworthiness/participant_{self._idx}_train_loader.pk"
@@ -71,54 +92,97 @@ async def _create_pk_files(self, experiment_name):
         train_loader = self._engine.trainer.datamodule.train_dataloader()
         self._engine.trainer.datamodule.setup(stage="test")
         test_loader = self._engine.trainer.datamodule.test_dataloader()[0]
-        
+
         with open(train_loader_filename, 'wb') as f:
             pickle.dump(train_loader, f)
             f.close()
         with open(test_loader_filename, 'wb') as f:
             pickle.dump(test_loader, f)
             f.close()
-        
+
     def get_workload(self):
         return self._workload
-    
+
     def get_sample_size(self):
         return self._sample_size
-    
+
     def get_metrics(self):
         return (self._current_loss, self._current_accuracy)
-    
+
     async def finish_experiment_role_pre_actions(self):
         with open(self._train_loader_file, 'rb') as file:
             train_loader = pickle.load(file)
         self._sample_size = len(train_loader)
-        
+
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
-        pass
-       
-    async def _process_round_end_event(self, ree: RoundEndEvent):        
+        federation = trust_config.get("federation")  # "CFL" o "DFL" :contentReference[oaicite:13]{index=13}
+
+        if federation == "DFL":
+            self._end_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
+            data_file_path = os.path.join(os.environ.get('NEBULA_CONFIG_DIR'), experiment_name, "scenario.json")
+            with open(data_file_path, 'r') as data_file:
+                data = json.load(data_file)
+
+                weights = {
+                    "robustness": float(data["robustness_pillar"]),
+                    "resilience_to_attacks": float(data["resilience_to_attacks"]),
+                    "algorithm_robustness": float(data["algorithm_robustness"]),
+                    "client_reliability": float(data["client_reliability"]),
+                    "privacy": float(data["privacy_pillar"]),
+                    "technique": float(data["technique"]),
+                    "uncertainty": float(data["uncertainty"]),
+                    "indistinguishability": float(data["indistinguishability"]),
+                    "fairness": float(data["fairness_pillar"]),
+                    "class_distribution": float(data["class_distribution"]),
+                    "explainability": float(data["explainability_pillar"]),
+                    "interpretability": float(data["interpretability"]),
+                    "post_hoc_methods": float(data["post_hoc_methods"]),
+                    "accountability": float(data["accountability_pillar"]),
+                    "factsheet_completeness":  float(data["factsheet_completeness"]),
+                    "architectural_soundness": float(data["architectural_soundness_pillar"]),
+                    "client_management": float(data["client_management"]),
+                    "optimization": float(data["optimization"]),
+                    "sustainability": float(data["sustainability_pillar"]),
+                    "energy_source": float(data["energy_source"]),
+                    "federation_complexity": float(data["federation_complexity"])
+                }
+            # 1) calcula pesos (igual que ya hacías en el server, leyendo scenario.json)
+            # 2) cada nodo genera factsheet_participant_<idx>.json + results_participant_<idx>.json
+            compute_trust_local_dfl(experiment_name, self._idx, trust_config, self._start_time, self._end_time)
+
+            trust_metric_manager = TrustMetricManager(self._start_time, federation, self._idx)
+            trust_metric_manager.evaluate_participant(experiment_name, weights, self._idx, use_weights=True)
+        elif federation == "SDFL":
+            pass
+        else:
+            pass
+
+    async def _process_round_end_event(self, ree: RoundEndEvent):
         scenario_name = self._engine.config.participant["scenario_args"]["name"]
         train_model = f"/nebula/app/logs/{scenario_name}/trustworthiness/participant_{self._idx}_train_model.pk"
         # Save the train model in trustworthy dir
         with open(train_model, 'wb') as f:
             pickle.dump(self._engine.trainer.model, f)
-    
+
     async def _process_test_metrics_event(self, tme: TestMetricsEvent):
         cur_loss, cur_acc = await tme.get_event_data()
         if cur_loss and cur_acc:
             self._current_loss, self._current_accuracy = cur_loss, cur_acc
-            
-    async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):        
+
+        if self._per_round is not None:
+            await self._per_round.on_test_metrics(self._engine, float(cur_loss), float(cur_acc))
+
+    async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):
         model_file = f"/nebula/app/logs/{self._experiment_name}/trustworthiness/participant_{self._engine.idx}_final_model.pk"
-        
+
+
         # Save model in trustworthy dir
         with open(model_file, 'wb') as f:
             pickle.dump(self._engine.trainer.model, f)
-        
-        
-    
+
+
 class TrustWorkloadServer(TrustWorkload):
-    
+
     def __init__(self, engine: Engine, idx, trust_files_route):
         self._workload = 'aggregation'
         self._sample_size = 0
@@ -129,39 +193,54 @@ def __init__(self, engine: Engine, idx, trust_files_route):
         self._engine: Engine = engine
         self._end_time = None
         self._experiment_name = ""
-        
+        self._idx = idx
+        self._trust_files_route = trust_files_route
+        self._per_round = None
+
     async def init(self, experiment_name):
         self._experiment_name = experiment_name
-        await EventManager.get_instance().subscribe_addonevent(TestMetricsEvent, self._process_test_metrics_event)   
+        await EventManager.get_instance().subscribe_addonevent(TestMetricsEvent, self._process_test_metrics_event)
         await EventManager.get_instance().subscribe_node_event(ExperimentFinishEvent, self._process_experiment_finished_event)
-        
+
+        self._per_round = PerRoundTrustMetrics(
+            experiment_name=experiment_name,
+            participant_idx=self._idx,
+            trust_dir=self._trust_files_route,
+            role_label="SERVER",
+            enable_print=True,
+            enable_csv=True,
+            fi_every_n_rounds=1,
+        )
+        await self._per_round.setup(self._engine)
+
+
     def get_workload(self):
         return self._workload
-    
+
     def get_sample_size(self):
         return self._sample_size
-    
+
     def get_metrics(self):
         return (self._current_loss, self._current_accuracy)
-    
+
     async def finish_experiment_role_pre_actions(self):
         pass
-    
+
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
         from datetime  import datetime
         self._end_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
         await self._generate_factsheet(trust_config, experiment_name)
-        
+
     async def _generate_factsheet(self, trust_config, experiment_name):
         from nebula.addons.trustworthiness.factsheet import Factsheet
         from nebula.addons.trustworthiness.metric import TrustMetricManager
         import json
         import os
-        
+
         factsheet = Factsheet()
         factsheet.populate_factsheet_pre_train(trust_config, experiment_name)
         factsheet.populate_factsheet_post_train(experiment_name, self._start_time, self._end_time)
-        
+
         data_file_path = os.path.join(os.environ.get('NEBULA_CONFIG_DIR'), experiment_name, "scenario.json")
         with open(data_file_path, 'r') as data_file:
             data = json.load(data_file)
@@ -192,18 +271,22 @@ async def _generate_factsheet(self, trust_config, experiment_name):
                 "hardware_efficiency": float(data["hardware_efficiency"]),
                 "federation_complexity": float(data["federation_complexity"])
             }
+            federation = trust_config.get("federation")
 
-            trust_metric_manager = TrustMetricManager(self._start_time)
+            trust_metric_manager = TrustMetricManager(self._start_time, federation)
             trust_metric_manager.evaluate(experiment_name, weights, use_weights=True)
-    
+
     async def _process_test_metrics_event(self, tme: TestMetricsEvent):
         cur_loss, cur_acc = await tme.get_event_data()
         if cur_loss and cur_acc:
             self._current_loss, self._current_accuracy = cur_loss, cur_acc
 
-    async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):        
+        if self._per_round is not None:
+            await self._per_round.on_test_metrics(self._engine, float(cur_loss), float(cur_acc))
+
+    async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):
         model_file = f"/nebula/app/logs/{self._experiment_name}/trustworthiness/participant_{self._engine.idx}_final_model.pk"
-        
+
         # Save model in trustworthy dir
         with open(model_file, 'wb') as f:
             pickle.dump(self._engine.trainer.model, f)
@@ -228,55 +311,106 @@ def __init__(self, engine: Engine, config: Config):
         self._emissions_file = 'emissions.csv'
         self._role: Role = engine.rb.get_role()
         self._idx = self._config.participant["device_args"]["idx"]
-        self._trust_workload: TrustWorkload = self._factory_trust_workload(self._role, self._engine, self._idx, self._trust_dir_files)       
-        
+        self._trust_workload: TrustWorkload = self._factory_trust_workload(self._role, self._engine, self._idx, self._trust_dir_files)
+
         # EmissionsTracker from codecarbon to measure the emissions during the aggregation step in the server
         self._tracker= EmissionsTracker(tracking_mode='process', log_level='error', save_to_file=False)
-        
+
     @property
     def tw(self):
         """TrustWorkload depending on the node Role"""
         return self._trust_workload
-    
+
     async def start(self):
         await self._create_trustworthiness_directory()
         await self.tw.init(self._experiment_name)
         await EventManager.get_instance().subscribe_node_event(ExperimentFinishEvent, self._process_experiment_finish_event)
         self._tracker.start()
-        
+
     async def _create_trustworthiness_directory(self):
         import os
         trust_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), self._experiment_name, "trustworthiness")
         # Create a directory to save files to calcutate trust
         os.makedirs(trust_dir, exist_ok=True)
         os.chmod(trust_dir, 0o777)
-        
+
     async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         from nebula.addons.trustworthiness.utils import save_class_count_per_participant
         class_counter = self._engine.trainer.datamodule.get_samples_per_label()
         save_class_count_per_participant(self._experiment_name, class_counter, self._idx)
-        
+
         await self.tw.finish_experiment_role_pre_actions()
-        
+
         last_loss, last_accuracy = self.tw.get_metrics()
-        
+
         # Get bytes send/received from reporter
         bytes_sent = self._engine.reporter.acc_bytes_sent
         bytes_recv = self._engine.reporter.acc_bytes_recv
-        
+
         # Get TrustWorkload info
         workload = self.tw.get_workload()
         sample_size = self.tw.get_sample_size()
-        
+
         # Last operations
         save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_loss, last_accuracy)
-        stop_emissions_tracking_and_save(self._tracker, self._trust_dir_files, self._emissions_file, self._role.value, workload, sample_size)
-        
+        stop_emissions_tracking_and_save(self._tracker, self._trust_dir_files, self._emissions_file, self._role.value, workload, sample_size, self._idx)
+        save_confirmation_csv(self._experiment_name, self._idx)
+        """
+        federation = self._trust_config.get("federation")  # "CFL" o "DFL" :contentReference[oaicite:13]{index=13}
+
+        if federation == "DFL":
+            data_file_path = os.path.join(os.environ.get('NEBULA_CONFIG_DIR'), self._experiment_name, "scenario.json")
+            with open(data_file_path, 'r') as data_file:
+                data = json.load(data_file)
+
+                weights = {
+                    "robustness": float(data["robustness_pillar"]),
+                    "resilience_to_attacks": float(data["resilience_to_attacks"]),
+                    "algorithm_robustness": float(data["algorithm_robustness"]),
+                    "client_reliability": float(data["client_reliability"]),
+                    "privacy": float(data["privacy_pillar"]),
+                    "technique": float(data["technique"]),
+                    "uncertainty": float(data["uncertainty"]),
+                    "indistinguishability": float(data["indistinguishability"]),
+                    "fairness": float(data["fairness_pillar"]),
+                    "selection_fairness": float(data["selection_fairness"]),
+                    "performance_fairness": float(data["performance_fairness"]),
+                    "class_distribution": float(data["class_distribution"]),
+                    "explainability": float(data["explainability_pillar"]),
+                    "interpretability": float(data["interpretability"]),
+                    "post_hoc_methods": float(data["post_hoc_methods"]),
+                    "accountability": float(data["accountability_pillar"]),
+                    "factsheet_completeness":  float(data["factsheet_completeness"]),
+                    "architectural_soundness": float(data["architectural_soundness_pillar"]),
+                    "client_management": float(data["client_management"]),
+                    "optimization": float(data["optimization"]),
+                    "sustainability": float(data["sustainability_pillar"]),
+                    "energy_source": float(data["energy_source"]),
+                    "hardware_efficiency": float(data["hardware_efficiency"]),
+                    "federation_complexity": float(data["federation_complexity"])
+                }
+            # 1) calcula pesos (igual que ya hacías en el server, leyendo scenario.json)
+            # 2) cada nodo genera factsheet_participant_<idx>.json + results_participant_<idx>.json
+            compute_trust_local_dfl(self._experiment_name, self._idx, self._trust_config, weights)
+
+            # y SALES sin tocar el camino CFL
+            return
+
+        # Si NO es DFL => CFL (o lo que uses) sigue EXACTAMENTE IGUAL
+
+        elif federation == "SDFL":
+            #SDFL
+            return
+        """
         await self.tw.finish_experiment_role_post_actions(self._trust_config, self._experiment_name)
-        
-    def _factory_trust_workload(self, role: Role, engine: Engine, idx, trust_files_route) -> TrustWorkload:  
+
+    def _factory_trust_workload(self, role: Role, engine: Engine, idx, trust_files_route) -> TrustWorkload:
         trust_workloads = {
-            Role.TRAINER: TrustWorkloadTrainer, 
+            Role.TRAINER: TrustWorkloadTrainer,
+            Role.AGGREGATOR: TrustWorkloadTrainer,
+            Role.PROXY: TrustWorkloadTrainer,
+            Role.IDLE: TrustWorkloadTrainer,
+            Role.TRAINER_AGGREGATOR: TrustWorkloadTrainer,
             Role.SERVER: TrustWorkloadServer
         }
         trust_workload = trust_workloads.get(role)
@@ -284,5 +418,3 @@ def _factory_trust_workload(self, role: Role, engine: Engine, idx, trust_files_r
             return trust_workload(engine, idx, trust_files_route)
         else:
             raise TrustWorkloadException(f"Trustworthiness workload for role {role} not defined")
-    
-    
\ No newline at end of file
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
index e081fcafd..b4597c41f 100755
--- a/nebula/addons/trustworthiness/utils.py
+++ b/nebula/addons/trustworthiness/utils.py
@@ -59,7 +59,7 @@ def count_class_samples(scenario_name, dataloaders_files, class_counter: Counter
 
     result = {}
     dataloaders = []
-    
+
     if class_counter:
         result = {hashids.encode(int(class_id)): count for class_id, count in class_counter.items()}
     else:
@@ -81,7 +81,7 @@ def count_class_samples(scenario_name, dataloaders_files, class_counter: Counter
         name_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "count_class.json")
     except:
         name_file = os.path.join("nebula", "app", "logs", scenario_name, "trustworthiness", "count_class.json")
-        
+
     with open(name_file, "w") as f:
         json.dump(result, f)
 
@@ -90,7 +90,7 @@ def get_all_data_entropy(experiment_name):
     participant_id = 0
     data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_id)}_class_count.json")
     entropy_per_participant = {}
-    
+
     while True:
         data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_id)}_class_count.json")
 
@@ -109,12 +109,12 @@ def get_all_data_entropy(experiment_name):
 
         entropy_per_participant[str(participant_id)] = round(entropy_value, 6)
         participant_id += 1
-        
+
     name_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'),experiment_name, "trustworthiness", "entropy.json")
 
     with open(name_file, "w") as f:
         json.dump(entropy_per_participant, f, indent=2)
-       
+
 def get_entropy(client_id, scenario_name, dataloader):
     """
     Get the entropy of each client in the scenario.
@@ -129,7 +129,7 @@ def get_entropy(client_id, scenario_name, dataloader):
     client_entropy = {}
 
     name_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "entropy.json")
-        
+
     if os.path.exists(name_file):
         logging.info(f"entropy fiel already exists.. loading.")
         with open(name_file, "r") as f:
@@ -274,18 +274,64 @@ def save_results_csv(scenario_name: str, id: int, bytes_sent: int, bytes_recv: i
         data_results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
     except:
         data_results_file = os.path.join("nebula", "app", "logs", scenario_name, "trustworthiness", "data_results.csv")
-        
+
     if exists(data_results_file):
         df = pd.read_csv(data_results_file)
     else:
         df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss"])
-        
+
     try:
         # Add new entry to DataFrame
         new_data = pd.DataFrame({'id': [id], 'bytes_sent': [bytes_sent],
                                     'bytes_recv': [bytes_recv], 'accuracy': [accuracy],
                                     'loss': [loss]})
         df = pd.concat([df, new_data], ignore_index=True)
+        logger.info(f"new_data={new_data}")
+
+        df.to_csv(data_results_file, encoding='utf-8', index=False)
+
+    except Exception as e:
+        logger.warning(e)
+
+    try:
+        data_results_id_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", f"data_results_{id}.csv")
+    except:
+        data_results_id_file = os.path.join("nebula", "app", "logs", scenario_name, "trustworthiness", f"data_results_{id}.csv")
+
+    if exists(data_results_id_file):
+        df = pd.read_csv(data_results_id_file)
+    else:
+        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss"])
+
+    try:
+        # Add new entry to DataFrame
+        new_data = pd.DataFrame({'id': [id], 'bytes_sent': [bytes_sent],
+                                    'bytes_recv': [bytes_recv], 'accuracy': [accuracy],
+                                    'loss': [loss]})
+        df = pd.concat([df, new_data], ignore_index=True)
+        logger.info(f"new_data={new_data}")
+
+        df.to_csv(data_results_id_file, encoding='utf-8', index=False)
+
+    except Exception as e:
+        logger.warning(e)
+
+def save_confirmation_csv(scenario_name: str, id: int):
+    try:
+        data_results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "confirmation.csv")
+    except:
+        data_results_file = os.path.join("nebula", "app", "logs", scenario_name, "trustworthiness", "confirmation.csv")
+
+    if exists(data_results_file):
+        df = pd.read_csv(data_results_file)
+    else:
+        df = pd.DataFrame(columns=["id", "OK"])
+
+    try:
+        # Add new entry to DataFrame
+        new_data = pd.DataFrame({'id': [id], 'OK': ["OK"]})
+        df = pd.concat([df, new_data], ignore_index=True)
+        logger.info(f"new_data={new_data}")
 
         df.to_csv(data_results_file, encoding='utf-8', index=False)
 
diff --git a/nebula/controller/scenarios.py b/nebula/controller/scenarios.py
index bbfa8996c..e91e23e0e 100644
--- a/nebula/controller/scenarios.py
+++ b/nebula/controller/scenarios.py
@@ -23,6 +23,9 @@
 from nebula.core.datasets.cifar100.cifar100 import CIFAR100Dataset
 from nebula.core.datasets.emnist.emnist import EMNISTDataset
 from nebula.core.datasets.fashionmnist.fashionmnist import FashionMNISTDataset
+from nebula.core.datasets.covtype.covtype import CovtypeDataset
+from nebula.core.datasets.adultcensus.adultcensus import AdultCensusDataset
+from nebula.core.datasets.breast_cancer.breast_cancer import BreastCancerDataset
 from nebula.core.datasets.mnist.mnist import MNISTDataset
 from nebula.core.utils.certificate import generate_ca_certificate, generate_certificate
 from nebula.utils import DockerUtils, FileUtils
@@ -988,9 +991,15 @@ async def load_configurations_and_start_nodes(
         if additional_participants:
             self.n_nodes += len(additional_participants)
 
+
+
         # Splitting dataset
         dataset_name = self.scenario.dataset
         dataset = None
+
+
+        logging.info(f"[DEBUG] dataset_name received: {dataset_name!r}")
+        logging.info("SALE YA")
         if dataset_name == "MNIST":
             dataset = MNISTDataset(
                 num_classes=10,
@@ -1011,6 +1020,36 @@ async def load_configurations_and_start_nodes(
                 seed=42,
                 config_dir=self.config_dir,
             )
+        elif dataset_name == "Covtype":
+            dataset = CovtypeDataset(
+                num_classes=7,
+                partitions_number=self.n_nodes,
+                iid=self.scenario.iid,
+                partition=self.scenario.partition_selection,
+                partition_parameter=self.scenario.partition_parameter,
+                seed=42,
+                config_dir=self.config_dir,
+            )
+        elif dataset_name == "AdultCensus":
+            dataset = AdultCensusDataset(
+                num_classes=2,
+                partitions_number=self.n_nodes,
+                iid=self.scenario.iid,
+                partition=self.scenario.partition_selection,
+                partition_parameter=self.scenario.partition_parameter,
+                seed=42,
+                config_dir=self.config_dir,
+            )
+        elif dataset_name == "BreastCancer":
+            dataset = BreastCancerDataset(
+                num_classes=2,
+                partitions_number=self.n_nodes,
+                iid=self.scenario.iid,
+                partition=self.scenario.partition_selection,
+                partition_parameter=self.scenario.partition_parameter,
+                seed=42,
+                config_dir=self.config_dir,
+            )
         elif dataset_name == "EMNIST":
             dataset = EMNISTDataset(
                 num_classes=47,
@@ -1046,6 +1085,15 @@ async def load_configurations_and_start_nodes(
 
         logging.info(f"Splitting {dataset_name} dataset...")
         dataset.initialize_dataset()
+        logging.info(
+            f"[DEBUG] train_set is None? {dataset.train_set is None} | "
+            f"test_set is None? {dataset.test_set is None}"
+        )
+
+        if dataset.train_set is not None and hasattr(dataset.train_set, "data"):
+            logging.info(f"[DEBUG] AdultCensus train_set.data.shape = {dataset.train_set.data.shape}")
+        else:
+            logging.info("[DEBUG] AdultCensus train_set has no .data yet (or train_set is None)")
         logging.info(f"Splitting {dataset_name} dataset... Done")
 
         if self.scenario.deployment in ["docker", "process", "physical"]:
diff --git a/nebula/core/datasets/adultcensus/__init__.py b/nebula/core/datasets/adultcensus/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/nebula/core/datasets/adultcensus/adultcensus.py b/nebula/core/datasets/adultcensus/adultcensus.py
new file mode 100644
index 000000000..f85d472e9
--- /dev/null
+++ b/nebula/core/datasets/adultcensus/adultcensus.py
@@ -0,0 +1,242 @@
+# nebula/core/datasets/adultcensus/adultcensus.py
+
+import os
+from typing import Tuple, Any
+
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+from nebula.core.datasets.nebuladataset import NebulaDataset, NebulaPartitionHandler
+
+
+class AdultCensusTorchDataset(Dataset):
+    """
+    Torch Dataset wrapper for Adult Census Income dataset (tabular, already numeric).
+    x: float32 tensor (n_features,)
+    y: long scalar {0,1} where 1 means >50K
+    """
+    def __init__(self, x: np.ndarray, y: np.ndarray):
+        if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
+            raise ValueError("x and y must be numpy arrays")
+
+        if x.ndim != 2:
+            raise ValueError(f"x must be 2D (n_samples, n_features). Got shape={x.shape}")
+
+        y_arr: np.ndarray = np.asarray(y).reshape(-1)
+        if x.shape[0] != y_arr.shape[0]:
+            raise ValueError(f"x and y must have same number of samples. Got {x.shape[0]} != {y_arr.shape[0]}")
+
+        self.x: np.ndarray = x.astype(np.float32, copy=False)
+        self.y: np.ndarray = y_arr.astype(np.int64, copy=False)
+
+        # Nebula conventions
+        self.data: np.ndarray = self.x
+        self.targets: np.ndarray = self.y
+        self.classes: list[str] = ["<=50K", ">50K"]
+
+    def __len__(self) -> int:
+        return int(self.y.shape[0])
+
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        x_i: torch.Tensor = torch.from_numpy(self.x[idx])
+        y_i: torch.Tensor = torch.tensor(int(self.y[idx]), dtype=torch.long)
+        return x_i, y_i
+
+
+class AdultCensusPartitionHandler(NebulaPartitionHandler):
+    """
+    Partition handler for tabular data.
+    """
+    def __init__(self, file_path: str, prefix: str, config: Any, empty: bool = False):
+        super().__init__(file_path, prefix, config, empty)
+        self.transform = None  # no torchvision transforms for tabular
+
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        data, target = super().__getitem__(idx)
+
+        # Some Nebula handlers may wrap data in tuples
+        if isinstance(data, tuple):
+            data = data[0]
+
+        if isinstance(data, torch.Tensor):
+            x: torch.Tensor = data.to(dtype=torch.float32)
+        else:
+            x = torch.tensor(np.asarray(data), dtype=torch.float32)
+
+        if isinstance(target, torch.Tensor):
+            y: torch.Tensor = target.to(dtype=torch.long)
+        else:
+            y = torch.tensor(int(target), dtype=torch.long)
+
+        if self.target_transform is not None:
+            y = self.target_transform(y)
+
+        return x, y
+
+
+class AdultCensusDataset(NebulaDataset):
+    """
+    Adult Census Income dataset integration for Nebula.
+
+    - 2 classes: <=50K vs >50K
+    - mixed categorical + numerical -> numeric via preprocessing (impute + OHE + scale)
+    - deterministic stratified train/test split
+    """
+    def __init__(
+        self,
+        num_classes: int = 2,
+        partitions_number: int = 1,
+        batch_size: int = 32,
+        num_workers: int = 4,
+        iid: bool = True,
+        partition: str = "dirichlet",
+        partition_parameter: float = 0.5,
+        seed: int = 42,
+        config_dir: str | None = None,
+        test_size: float = 0.2,
+    ):
+        super().__init__(
+            num_classes=num_classes,
+            partitions_number=partitions_number,
+            batch_size=batch_size,
+            num_workers=num_workers,
+            iid=iid,
+            partition=partition,
+            partition_parameter=partition_parameter,
+            seed=seed,
+            config_dir=config_dir,
+        )
+        self.test_size: float = float(test_size)
+
+    def initialize_dataset(self) -> None:
+        if self.train_set is None or self.test_set is None:
+            self.train_set, self.test_set = self.load_adult_census_dataset()
+
+        self.data_partitioning(plot=True)
+
+    @staticmethod
+    def _make_ohe_dense():
+        """
+        scikit-learn compatibility:
+        - older: OneHotEncoder(..., sparse=False)
+        - newer: OneHotEncoder(..., sparse_output=False)
+        """
+        from sklearn.preprocessing import OneHotEncoder
+
+        try:
+            return OneHotEncoder(handle_unknown="ignore", sparse_output=False)
+        except TypeError:
+            return OneHotEncoder(handle_unknown="ignore", sparse=False)
+
+    def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensusTorchDataset]:
+        """
+        Loads Adult dataset from OpenML and preprocesses to all-numeric features.
+
+        Steps:
+          1) fetch_openml(data_id=1590, as_frame=True)
+          2) y = (target == '>50K').astype(int)
+          3) replace '?' with NA for missing values
+          4) ColumnTransformer:
+              - numeric: median impute + StandardScaler
+              - categorical: most_frequent impute + OneHotEncoder(dense)
+          5) train/test split (stratified), fit preprocessing only on train (avoid leakage)
+        """
+        data_dir: str = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
+        os.makedirs(data_dir, exist_ok=True)
+
+        try:
+            import pandas as pd
+            from sklearn.datasets import fetch_openml
+            from sklearn.model_selection import train_test_split
+            from sklearn.compose import ColumnTransformer, make_column_selector as selector
+            from sklearn.pipeline import Pipeline
+            from sklearn.impute import SimpleImputer
+            from sklearn.preprocessing import StandardScaler
+        except Exception as e:
+            raise ImportError(
+                "AdultCensusDataset requires pandas + scikit-learn. Install them (e.g., pip install pandas scikit-learn)."
+            ) from e
+
+        # 1) Load from OpenML
+        bunch = fetch_openml(data_id=1590, as_frame=True, data_home=data_dir)
+        X_df = bunch.data.copy()
+        y_raw = bunch.target
+
+        # 2) Target -> {0,1}
+        # Normalize spaces to avoid variants like ' >50K'
+        y_str = y_raw.astype(str).str.strip()
+        y: np.ndarray = (y_str == ">50K").astype(np.int64).to_numpy()
+
+        # 3) Replace '?' markers with NA (UCI Adult uses '?' for missing categorical values)
+        X_df = X_df.replace(r"^\s*\?\s*$", pd.NA, regex=True)
+
+        # 4) Preprocess
+        numeric_selector = selector(dtype_exclude=["object", "category", "string"])
+        categorical_selector = selector(dtype_include=["object", "category", "string"])
+
+        numeric_transformer = Pipeline(
+            steps=[
+                ("impute", SimpleImputer(strategy="median")),
+                ("scaler", StandardScaler(with_mean=True, with_std=True)),
+            ]
+        )
+
+        categorical_transformer = Pipeline(
+            steps=[
+                ("impute", SimpleImputer(strategy="most_frequent")),
+                ("ohe", self._make_ohe_dense()),
+            ]
+        )
+
+        preprocessor = ColumnTransformer(
+            transformers=[
+                ("num", numeric_transformer, numeric_selector),
+                ("cat", categorical_transformer, categorical_selector),
+            ],
+            remainder="drop",
+        )
+
+        # 5) Split then fit on train
+        X_train_df, X_test_df, y_train, y_test = train_test_split(
+            X_df,
+            y,
+            test_size=self.test_size,
+            random_state=self.seed,
+            shuffle=True,
+            stratify=y,
+        )
+
+        X_train = preprocessor.fit_transform(X_train_df)
+        X_test = preprocessor.transform(X_test_df)
+
+        # In case some sklearn path returns sparse matrices, densify safely
+        if hasattr(X_train, "toarray"):
+            X_train = X_train.toarray()
+        if hasattr(X_test, "toarray"):
+            X_test = X_test.toarray()
+
+        X_train_np: np.ndarray = np.asarray(X_train, dtype=np.float32)
+        import logging
+        logging.getLogger().info(f"[AdultCensus] X_train shape = {X_train_np.shape}")
+        logging.getLogger().info(f"[AdultCensus] INPUT_DIM (post-OHE) = {int(X_train_np.shape[1])}")
+        X_test_np: np.ndarray = np.asarray(X_test, dtype=np.float32)
+
+        train_ds = AdultCensusTorchDataset(X_train_np, np.asarray(y_train, dtype=np.int64))
+        test_ds = AdultCensusTorchDataset(X_test_np, np.asarray(y_test, dtype=np.int64))
+
+        return train_ds, test_ds
+
+    def generate_non_iid_map(self, dataset, partition: str = "dirichlet", partition_parameter: float = 0.5):
+        if partition == "dirichlet":
+            return self.dirichlet_partition(dataset, alpha=partition_parameter)
+        if partition == "percent":
+            return self.percentage_partition(dataset, percentage=partition_parameter)
+        raise ValueError(f"Partition {partition} is not supported for Non-IID map")
+
+    def generate_iid_map(self, dataset, partition: str = "balancediid", partition_parameter: float = 2):
+        if partition == "balancediid":
+            return self.balanced_iid_partition(dataset)
+        if partition == "unbalancediid":
+            return self.unbalanced_iid_partition(dataset, imbalance_factor=partition_parameter)
+        raise ValueError(f"Partition {partition} is not supported for IID map")
diff --git a/nebula/core/datasets/breast_cancer/__init__.py b/nebula/core/datasets/breast_cancer/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/nebula/core/datasets/breast_cancer/breast_cancer.py b/nebula/core/datasets/breast_cancer/breast_cancer.py
new file mode 100644
index 000000000..9181c1422
--- /dev/null
+++ b/nebula/core/datasets/breast_cancer/breast_cancer.py
@@ -0,0 +1,158 @@
+import os
+from typing import Tuple, Any
+
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+from nebula.core.datasets.nebuladataset import NebulaDataset, NebulaPartitionHandler
+
+
+class BreastCancerTorchDataset(Dataset):
+    """
+    Torch Dataset wrapper for sklearn breast cancer dataset (tabular).
+    x: float32 tensor (n_features,)
+    y: long scalar {0,1}
+    """
+    def __init__(self, x: np.ndarray, y: np.ndarray):
+        if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
+            raise ValueError("x and y must be numpy arrays")
+
+        if x.ndim != 2:
+            raise ValueError(f"x must be 2D (n_samples, n_features). Got shape={x.shape}")
+
+        y = np.asarray(y).reshape(-1)
+        if x.shape[0] != y.shape[0]:
+            raise ValueError(f"x and y must have same number of samples. Got {x.shape[0]} != {y.shape[0]}")
+
+        self.x = x.astype(np.float32, copy=False)
+        self.y = y.astype(np.int64, copy=False)
+
+        # Nebula conventions (some utilities expect these)
+        self.data = self.x
+        self.targets = self.y
+        self.classes = ["0", "1"]
+
+    def __len__(self) -> int:
+        return int(self.y.shape[0])
+
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        x_i = torch.from_numpy(self.x[idx])
+        y_i = torch.tensor(self.y[idx], dtype=torch.long)
+        return x_i, y_i
+
+
+class BreastCancerPartitionHandler(NebulaPartitionHandler):
+    """
+    Partition handler for tabular data.
+    """
+    def __init__(self, file_path: str, prefix: str, config: Any, empty: bool = False):
+        super().__init__(file_path, prefix, config, empty)
+        self.transform = None  # no torchvision transforms for tabular
+
+    def __getitem__(self, idx: int):
+        data, target = super().__getitem__(idx)
+
+        if isinstance(data, tuple):
+            data = data[0]
+
+        if isinstance(data, torch.Tensor):
+            x = data.to(dtype=torch.float32)
+        else:
+            x = torch.tensor(np.asarray(data), dtype=torch.float32)
+
+        if isinstance(target, torch.Tensor):
+            y = target.to(dtype=torch.long)
+        else:
+            y = torch.tensor(int(target), dtype=torch.long)
+
+        if self.target_transform is not None:
+            y = self.target_transform(y)
+
+        return x, y
+
+
+class BreastCancerDataset(NebulaDataset):
+    """
+    Breast Cancer Wisconsin (Diagnostic) dataset integration for Nebula.
+
+    - 2 classes
+    - tabular features (30)
+    - deterministic stratified train/test split
+    """
+    def __init__(
+        self,
+        num_classes: int = 2,
+        partitions_number: int = 1,
+        batch_size: int = 32,
+        num_workers: int = 4,
+        iid: bool = True,
+        partition: str = "dirichlet",
+        partition_parameter: float = 0.5,
+        seed: int = 42,
+        config_dir: str | None = None,
+        test_size: float = 0.2,
+    ):
+        super().__init__(
+            num_classes=num_classes,
+            partitions_number=partitions_number,
+            batch_size=batch_size,
+            num_workers=num_workers,
+            iid=iid,
+            partition=partition,
+            partition_parameter=partition_parameter,
+            seed=seed,
+            config_dir=config_dir,
+        )
+        self.test_size = float(test_size)
+
+    def initialize_dataset(self):
+        if self.train_set is None or self.test_set is None:
+            self.train_set, self.test_set = self.load_breast_cancer_dataset()
+
+        self.data_partitioning(plot=True)
+
+    def load_breast_cancer_dataset(self):
+        # Local cache directory (aunque load_breast_cancer no descarga, seguimos el patrón)
+        data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
+        os.makedirs(data_dir, exist_ok=True)
+
+        try:
+            from sklearn.datasets import load_breast_cancer
+            from sklearn.model_selection import train_test_split
+        except Exception as e:
+            raise ImportError(
+                "BreastCancerDataset requires scikit-learn. Install it (e.g., pip install scikit-learn)."
+            ) from e
+
+        ds = load_breast_cancer()
+        x = np.asarray(ds.data)
+        y = np.asarray(ds.target).reshape(-1)  # already 0/1
+
+        x_train, x_test, y_train, y_test = train_test_split(
+            x,
+            y,
+            test_size=self.test_size,
+            random_state=self.seed,
+            shuffle=True,
+            stratify=y,
+        )
+
+        train_ds = BreastCancerTorchDataset(x_train, y_train)
+        test_ds = BreastCancerTorchDataset(x_test, y_test)
+
+        return train_ds, test_ds
+
+    def generate_non_iid_map(self, dataset, partition: str = "dirichlet", partition_parameter: float = 0.5):
+        if partition == "dirichlet":
+            return self.dirichlet_partition(dataset, alpha=partition_parameter)
+        if partition == "percent":
+            return self.percentage_partition(dataset, percentage=partition_parameter)
+        raise ValueError(f"Partition {partition} is not supported for Non-IID map")
+
+    def generate_iid_map(self, dataset, partition: str = "balancediid", partition_parameter: float = 2):
+        if partition == "balancediid":
+            return self.balanced_iid_partition(dataset)
+        if partition == "unbalancediid":
+            return self.unbalanced_iid_partition(dataset, imbalance_factor=partition_parameter)
+        raise ValueError(f"Partition {partition} is not supported for IID map")
diff --git a/nebula/core/datasets/covtype/__init__.py b/nebula/core/datasets/covtype/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/nebula/core/datasets/covtype/covtype.py b/nebula/core/datasets/covtype/covtype.py
new file mode 100644
index 000000000..f46a6b289
--- /dev/null
+++ b/nebula/core/datasets/covtype/covtype.py
@@ -0,0 +1,220 @@
+# nebula/core/datasets/covtype/covtype.py
+
+import os
+from typing import Tuple, Any
+
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+from nebula.core.datasets.nebuladataset import NebulaDataset, NebulaPartitionHandler
+
+
+class CovtypeTorchDataset(Dataset):
+    """
+    Simple torch Dataset wrapper for tabular Covtype data.
+
+    Returns:
+        x: torch.float32 tensor of shape (n_features,)
+        y: torch.long scalar in [0, num_classes-1]
+    """
+    def __init__(self, x: np.ndarray, y: np.ndarray):
+        if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
+            raise ValueError("x and y must be numpy arrays")
+
+        if x.ndim != 2:
+            raise ValueError(f"x must be 2D (n_samples, n_features). Got shape={x.shape}")
+        if y.ndim != 1:
+            y = y.reshape(-1)
+
+        if x.shape[0] != y.shape[0]:
+            raise ValueError(f"x and y must have same number of samples. Got {x.shape[0]} != {y.shape[0]}")
+
+        self.x = x.astype(np.float32, copy=False)
+        self.y = y.astype(np.int64, copy=False)
+
+        self.data = self.x
+        self.targets = self.y
+
+        n_classes = int(np.max(self.targets)) + 1
+        self.classes = [str(i) for i in range(n_classes)]
+
+    def __len__(self) -> int:
+        return int(self.y.shape[0])
+
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        x_i = torch.from_numpy(self.x[idx])
+        y_i = torch.tensor(self.y[idx], dtype=torch.long)
+        return x_i, y_i
+
+
+class CovtypePartitionHandler(NebulaPartitionHandler):
+    """
+    Partition handler for tabular datasets.
+
+    NebulaPartitionHandler provides (data, target) from the partition storage.
+    For images, we usually convert to PIL and apply torchvision transforms.
+    Here we convert features to float32 torch tensors and targets to long.
+    """
+    def __init__(self, file_path: str, prefix: str, config: Any, empty: bool = False):
+        super().__init__(file_path, prefix, config, empty)
+
+        # For tabular data we typically don't apply torchvision transforms.
+        # If you later want normalization here, do it explicitly and carefully
+        # (train stats vs test stats, per-partition stats, etc.).
+        self.transform = None
+
+    def __getitem__(self, idx: int):
+        data, target = super().__getitem__(idx)
+
+        # Defensive: depending on how NebulaPartitionHandler stores/returns,
+        # "data" might be list/tuple/np.ndarray. Ensure we end up with 1D float32 tensor.
+        if isinstance(data, tuple):
+            # Some vision datasets store (img, meta). For tabular we ignore extras.
+            data = data[0]
+
+        if isinstance(data, torch.Tensor):
+            x = data.to(dtype=torch.float32)
+        else:
+            x = torch.tensor(np.asarray(data), dtype=torch.float32)
+
+        # Ensure target in [0..num_classes-1] and torch.long
+        if isinstance(target, torch.Tensor):
+            y = target.to(dtype=torch.long)
+        else:
+            y = torch.tensor(int(target), dtype=torch.long)
+
+        if self.target_transform is not None:
+            y = self.target_transform(y)
+
+        return x, y
+
+
+class CovtypeDataset(NebulaDataset):
+    """
+    Covtype (Forest CoverType) dataset integration for Nebula.
+
+    Notes:
+    - Covtype has 7 classes.
+    - Features are tabular (54 features in the classic version).
+    - We provide a simple train/test split with fixed seed.
+
+    Requirements:
+    - scikit-learn must be installed (for fetch_covtype + train_test_split).
+    """
+    def __init__(
+        self,
+        num_classes: int = 7,
+        partitions_number: int = 1,
+        batch_size: int = 32,
+        num_workers: int = 4,
+        iid: bool = True,
+        partition: str = "dirichlet",
+        partition_parameter: float = 0.5,
+        seed: int = 42,
+        config_dir: str | None = None,
+        test_size: float = 0.2,
+        train_limit: int | None = 40000,
+        test_limit: int | None = 5000,
+    ):
+        super().__init__(
+            num_classes=num_classes,
+            partitions_number=partitions_number,
+            batch_size=batch_size,
+            num_workers=num_workers,
+            iid=iid,
+            partition=partition,
+            partition_parameter=partition_parameter,
+            seed=seed,
+            config_dir=config_dir,
+        )
+        self.test_size = float(test_size)
+        self.train_limit = train_limit
+        self.test_limit = test_limit
+
+    def initialize_dataset(self):
+        if self.train_set is None or self.test_set is None:
+            self.train_set, self.test_set = self.load_covtype_dataset()
+
+        self.data_partitioning(plot=True)
+
+    def load_covtype_dataset(self):
+        """
+        Loads Covtype via sklearn, performs a deterministic train/test split,
+        and wraps into torch Datasets.
+        """
+        # Local cache directory for sklearn dataset downloads
+        data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
+        os.makedirs(data_dir, exist_ok=True)
+
+        try:
+            from sklearn.datasets import fetch_covtype
+            from sklearn.model_selection import train_test_split
+        except Exception as e:
+            raise ImportError(
+                "CovtypeDataset requires scikit-learn. Install it (e.g., pip install scikit-learn)."
+            ) from e
+
+        cov = fetch_covtype(data_home=data_dir, download_if_missing=True)
+
+        x = cov.data
+        y = cov.target  # commonly 1..7 in sklearn
+
+        # Map labels to 0..6 (CrossEntropyLoss convention)
+        # If already 0..6, this is harmless for 1..7 only if we detect min.
+        y = np.asarray(y).reshape(-1)
+        if y.min() == 1:
+            y = y - 1
+
+        # Split "grande"
+        x_train, x_test, y_train, y_test = train_test_split(
+            x, y,
+            test_size=self.test_size,
+            random_state=self.seed,
+            shuffle=True,
+            stratify=y,
+        )
+
+        # Submuestreo estratificado (corto y determinista)
+        if self.train_limit is not None and len(y_train) > self.train_limit:
+            x_train, _, y_train, _ = train_test_split(
+                x_train, y_train,
+                train_size=self.train_limit,
+                random_state=self.seed,
+                shuffle=True,
+                stratify=y_train,
+            )
+
+        if self.test_limit is not None and len(y_test) > self.test_limit:
+            x_test, _, y_test, _ = train_test_split(
+                x_test, y_test,
+                train_size=self.test_limit,
+                random_state=self.seed,
+                shuffle=True,
+                stratify=y_test,
+            )
+
+        train_ds = CovtypeTorchDataset(x_train, y_train)
+        test_ds = CovtypeTorchDataset(x_test, y_test)
+
+        return train_ds, test_ds
+
+    def generate_non_iid_map(self, dataset, partition: str = "dirichlet", partition_parameter: float = 0.5):
+        if partition == "dirichlet":
+            partitions_map = self.dirichlet_partition(dataset, alpha=partition_parameter)
+        elif partition == "percent":
+            partitions_map = self.percentage_partition(dataset, percentage=partition_parameter)
+        else:
+            raise ValueError(f"Partition {partition} is not supported for Non-IID map")
+
+        return partitions_map
+
+    def generate_iid_map(self, dataset, partition: str = "balancediid", partition_parameter: float = 2):
+        if partition == "balancediid":
+            partitions_map = self.balanced_iid_partition(dataset)
+        elif partition == "unbalancediid":
+            partitions_map = self.unbalanced_iid_partition(dataset, imbalance_factor=partition_parameter)
+        else:
+            raise ValueError(f"Partition {partition} is not supported for IID map")
+
+        return partitions_map
diff --git a/nebula/core/datasets/nebuladataset.py b/nebula/core/datasets/nebuladataset.py
index 0c2e03d8a..e42657989 100755
--- a/nebula/core/datasets/nebuladataset.py
+++ b/nebula/core/datasets/nebuladataset.py
@@ -1285,11 +1285,17 @@ def factory_nebuladataset(dataset, **config) -> NebulaDataset:
     from nebula.core.datasets.cifar100.cifar100 import CIFAR100Dataset
     from nebula.core.datasets.emnist.emnist import EMNISTDataset
     from nebula.core.datasets.fashionmnist.fashionmnist import FashionMNISTDataset
+    from nebula.core.datasets.covtype.covtype import CovtypeDataset
+    from nebula.core.datasets.adultcensus.adultcensus import AdultCensusDataset
+    from nebula.core.datasets.breast_cancer.breast_cancer import BreastCancerDataset
     from nebula.core.datasets.mnist.mnist import MNISTDataset
 
     options = {
         "MNIST": MNISTDataset,
         "FashionMNIST": FashionMNISTDataset,
+        "Covtype": CovtypeDataset,
+        "AdultCensus": AdultCensusDataset,
+        "BreastCancer": BreastCancerDataset,
         "EMNIST": EMNISTDataset,
         "CIFAR10": CIFAR10Dataset,
         "CIFAR100": CIFAR100Dataset,
diff --git a/nebula/core/models/adultcensus/__init__.py b/nebula/core/models/adultcensus/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/nebula/core/models/adultcensus/mlp.py b/nebula/core/models/adultcensus/mlp.py
new file mode 100644
index 000000000..b2f33eacb
--- /dev/null
+++ b/nebula/core/models/adultcensus/mlp.py
@@ -0,0 +1,67 @@
+# nebula/core/models/adultcensus/mlp.py
+
+import torch
+
+from nebula.core.models.nebulamodel import NebulaModel
+
+
+class AdultCensusModelMLP(NebulaModel):
+    """
+    Simple MLP for Adult Census (tabular).
+    - input_dim MUST match the number of features after preprocessing (OneHot + scaling).
+    - num_classes = 2 (<=50K vs >50K)
+    """
+    def __init__(
+        self,
+        input_dim: int = 105,
+        num_classes: int = 2,
+        learning_rate: float = 1e-3,
+        metrics=None,
+        confusion_matrix=None,
+        seed=None,
+        hidden1: int = 256,
+        hidden2: int = 128,
+        dropout: float = 0.0,
+    ):
+        # NebulaModel expects something like input_channels first; for tabular we pass input_dim there.
+        super().__init__(input_dim, num_classes, learning_rate, metrics, confusion_matrix, seed)
+
+        self.config = {"beta1": 0.9, "beta2": 0.999, "amsgrad": True}
+
+        self.example_input_array = torch.rand(1, int(input_dim))
+        self.learning_rate = float(learning_rate)
+        self.criterion = torch.nn.CrossEntropyLoss()
+
+        self.l1 = torch.nn.Linear(int(input_dim), int(hidden1))
+        self.l2 = torch.nn.Linear(int(hidden1), int(hidden2))
+        self.l3 = torch.nn.Linear(int(hidden2), int(num_classes))
+
+        self.dropout = torch.nn.Dropout(float(dropout)) if float(dropout) > 0.0 else None
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # Expected: (batch, input_dim). Sometimes: (batch, 1, input_dim)
+        if x.dim() == 3 and x.size(1) == 1:
+            x = x.squeeze(1)
+
+        x = self.l1(x)
+        x = torch.relu(x)
+        if self.dropout is not None:
+            x = self.dropout(x)
+
+        x = self.l2(x)
+        x = torch.relu(x)
+        if self.dropout is not None:
+            x = self.dropout(x)
+
+        x = self.l3(x)
+        return x
+
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
+        return optimizer
+
+    def get_learning_rate(self) -> float:
+        return float(self.learning_rate)
+
+    def count_parameters(self) -> int:
+        return int(sum(p.numel() for p in self.parameters() if p.requires_grad))
diff --git a/nebula/core/models/breast_cancer/__init__.py b/nebula/core/models/breast_cancer/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/nebula/core/models/breast_cancer/mlp.py b/nebula/core/models/breast_cancer/mlp.py
new file mode 100644
index 000000000..e84d099f2
--- /dev/null
+++ b/nebula/core/models/breast_cancer/mlp.py
@@ -0,0 +1,55 @@
+# nebula/core/models/covtype/mlp.py
+
+import torch
+
+from nebula.core.models.nebulamodel import NebulaModel
+
+
+class BreastCancerModelMLP(NebulaModel):
+    def __init__(
+        self,
+        input_dim=30,
+        num_classes=2,
+        learning_rate=1e-3,
+        metrics=None,
+        confusion_matrix=None,
+        seed=None,
+    ):
+        # OJO: NebulaModel está pensado para imágenes (input_channels),
+        # pero en la práctica se usa ese primer argumento como "input shape info".
+        # Para tabular, pasamos input_dim en input_channels para mantener la firma.
+        super().__init__(input_dim, num_classes, learning_rate, metrics, confusion_matrix, seed)
+
+        # Mantengo el mismo patrón que tu MLP de FashionMNIST.
+        self.config = {"beta1": 0.9, "beta2": 0.999, "amsgrad": True}
+
+        self.example_input_array = torch.rand(1, input_dim)
+        self.learning_rate = learning_rate
+        self.criterion = torch.nn.CrossEntropyLoss()
+
+        self.l1 = torch.nn.Linear(input_dim, 256)
+        self.l2 = torch.nn.Linear(256, 128)
+        self.l3 = torch.nn.Linear(128, num_classes)
+
+    def forward(self, x):
+        # En tabular, x debe ser (batch, input_dim).
+        # A veces puede venir con dimensión extra (batch, 1, input_dim) por loaders.
+        if x.dim() == 3 and x.size(1) == 1:
+            x = x.squeeze(1)
+
+        x = self.l1(x)
+        x = torch.relu(x)
+        x = self.l2(x)
+        x = torch.relu(x)
+        x = self.l3(x)
+        return x
+
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
+        return optimizer
+
+    def get_learning_rate(self) -> float:
+        return float(self.learning_rate)
+
+    def count_parameters(self) -> int:
+        return int(sum(p.numel() for p in self.parameters() if p.requires_grad))
diff --git a/nebula/core/models/covtype/__init__.py b/nebula/core/models/covtype/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/nebula/core/models/covtype/mlp.py b/nebula/core/models/covtype/mlp.py
new file mode 100644
index 000000000..0399caa43
--- /dev/null
+++ b/nebula/core/models/covtype/mlp.py
@@ -0,0 +1,55 @@
+# nebula/core/models/covtype/mlp.py
+
+import torch
+
+from nebula.core.models.nebulamodel import NebulaModel
+
+
+class CovtypeModelMLP(NebulaModel):
+    def __init__(
+        self,
+        input_dim=54,
+        num_classes=7,
+        learning_rate=1e-3,
+        metrics=None,
+        confusion_matrix=None,
+        seed=None,
+    ):
+        # OJO: NebulaModel está pensado para imágenes (input_channels),
+        # pero en la práctica se usa ese primer argumento como "input shape info".
+        # Para tabular, pasamos input_dim en input_channels para mantener la firma.
+        super().__init__(input_dim, num_classes, learning_rate, metrics, confusion_matrix, seed)
+
+        # Mantengo el mismo patrón que tu MLP de FashionMNIST.
+        self.config = {"beta1": 0.9, "beta2": 0.999, "amsgrad": True}
+
+        self.example_input_array = torch.rand(1, input_dim)
+        self.learning_rate = learning_rate
+        self.criterion = torch.nn.CrossEntropyLoss()
+
+        self.l1 = torch.nn.Linear(input_dim, 256)
+        self.l2 = torch.nn.Linear(256, 128)
+        self.l3 = torch.nn.Linear(128, num_classes)
+
+    def forward(self, x):
+        # En tabular, x debe ser (batch, input_dim).
+        # A veces puede venir con dimensión extra (batch, 1, input_dim) por loaders.
+        if x.dim() == 3 and x.size(1) == 1:
+            x = x.squeeze(1)
+
+        x = self.l1(x)
+        x = torch.relu(x)
+        x = self.l2(x)
+        x = torch.relu(x)
+        x = self.l3(x)
+        return x
+
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
+        return optimizer
+
+    def get_learning_rate(self) -> float:
+        return float(self.learning_rate)
+
+    def count_parameters(self) -> int:
+        return int(sum(p.numel() for p in self.parameters() if p.requires_grad))
diff --git a/nebula/core/node.py b/nebula/core/node.py
index 86a73cc2a..c5f80843e 100755
--- a/nebula/core/node.py
+++ b/nebula/core/node.py
@@ -25,6 +25,9 @@
 from nebula.core.datasets.datamodule import DataModule
 from nebula.core.datasets.emnist.emnist import EMNISTPartitionHandler
 from nebula.core.datasets.fashionmnist.fashionmnist import FashionMNISTPartitionHandler
+from nebula.core.datasets.covtype.covtype import CovtypePartitionHandler
+from nebula.core.datasets.adultcensus.adultcensus import AdultCensusPartitionHandler
+from nebula.core.datasets.breast_cancer.breast_cancer import BreastCancerPartitionHandler
 from nebula.core.datasets.mnist.mnist import MNISTPartitionHandler
 from nebula.core.datasets.nebuladataset import NebulaPartition
 from nebula.core.models.cifar10.cnn import CIFAR10ModelCNN
@@ -38,6 +41,9 @@
 from nebula.core.models.emnist.mlp import EMNISTModelMLP
 from nebula.core.models.fashionmnist.cnn import FashionMNISTModelCNN
 from nebula.core.models.fashionmnist.mlp import FashionMNISTModelMLP
+from nebula.core.models.covtype.mlp import CovtypeModelMLP
+from nebula.core.models.adultcensus.mlp import AdultCensusModelMLP
+from nebula.core.models.breast_cancer.mlp import BreastCancerModelMLP
 from nebula.core.models.mnist.cnn import MNISTModelCNN
 from nebula.core.models.mnist.mlp import MNISTModelMLP
 from nebula.core.engine import Engine
@@ -112,6 +118,27 @@ async def main(config: Config):
             model = FashionMNISTModelCNN()
         else:
             raise ValueError(f"Model {model} not supported for dataset {dataset_name}")
+    elif dataset_name == "Covtype":
+        batch_size = 32
+        handler = CovtypePartitionHandler
+        if model_name == "MLP":
+            model = CovtypeModelMLP()
+        else:
+            raise ValueError(f"Model {model} not supported for dataset {dataset_name}")
+    elif dataset_name == "AdultCensus":
+        batch_size = 32
+        handler = AdultCensusPartitionHandler
+        if model_name == "MLP":
+            model = AdultCensusModelMLP()
+        else:
+            raise ValueError(f"Model {model} not supported for dataset {dataset_name}")
+    elif dataset_name == "BreastCancer":
+        batch_size = 32
+        handler = BreastCancerPartitionHandler
+        if model_name == "MLP":
+            model = BreastCancerModelMLP()
+        else:
+            raise ValueError(f"Model {model} not supported for dataset {dataset_name}")
     elif dataset_name == "EMNIST":
         batch_size = 32
         handler = EMNISTPartitionHandler
diff --git a/nebula/frontend/static/js/deployment/help-content.js b/nebula/frontend/static/js/deployment/help-content.js
index 673cae881..111d4e4a8 100644
--- a/nebula/frontend/static/js/deployment/help-content.js
+++ b/nebula/frontend/static/js/deployment/help-content.js
@@ -61,6 +61,9 @@ const HelpContent = (function() {
             <li>MNIST: The MNIST dataset</li>
             <li>FashionMNIST: The FashionMNIST dataset</li>
             <li>CIFAR10: The CIFAR10 dataset</li>
+            <li>Covtype: The Covtype dataset</li>
+            <li>AdultCensus: The AdultCensus dataset</li>
+            <li>BreastCancer: The BreastCancer dataset</li>
         </ul>
     </div>`;
 
diff --git a/nebula/frontend/static/js/deployment/main.js b/nebula/frontend/static/js/deployment/main.js
index 3ec18a8ba..43b546f35 100644
--- a/nebula/frontend/static/js/deployment/main.js
+++ b/nebula/frontend/static/js/deployment/main.js
@@ -210,7 +210,7 @@ const DeploymentManager = (function() {
         datasetSelect.innerHTML = "";
 
         // Add dataset options
-        const datasets = ['MNIST', 'FashionMNIST', 'EMNIST', 'CIFAR10', 'CIFAR100'];
+        const datasets = ['MNIST', 'FashionMNIST', 'EMNIST', 'CIFAR10', 'CIFAR100', 'Covtype', 'AdultCensus', 'BreastCancer'];
         datasets.forEach(dataset => {
             const option = document.createElement("option");
             option.value = dataset;
@@ -251,6 +251,12 @@ const DeploymentManager = (function() {
                 return ['CNN', 'ResNet9', 'fastermobilenet', 'simplemobilenet', 'CNNv2', 'CNNv3'];
             case 'cifar100':
                 return ['CNN'];
+            case 'covtype':
+                return ['MLP'];
+            case 'adultcensus':
+                return ['MLP'];
+            case 'breast_cancer':
+                return ['MLP'];
             default:
                 return ['MLP', 'CNN'];
         }
diff --git a/nebula/frontend/static/js/deployment/scenario.js b/nebula/frontend/static/js/deployment/scenario.js
index feb5c978b..553c5211c 100644
--- a/nebula/frontend/static/js/deployment/scenario.js
+++ b/nebula/frontend/static/js/deployment/scenario.js
@@ -100,31 +100,115 @@ const ScenarioManager = (function () {
             sar_training: window.SaManager.getSaConfig().sar_training || false,
             sar_training_policy: window.SaManager.getSaConfig().sar_training_policy || "Broad-Propagation Strategy",
             random_topology_probability: document.getElementById("random-probability").value || 0.5,
+            // --- Trustworthiness (IDs distintos para CFL/DFL) ---
             with_trustworthiness: document.getElementById("TrustworthinessSwitch").checked ? true : false,
-            robustness_pillar: document.getElementById("robustness-pillar").value,
-            resilience_to_attacks: document.getElementById("robustness-notion-1").value,
-            algorithm_robustness: document.getElementById("robustness-notion-2").value,
-            client_reliability: document.getElementById("robustness-notion-3").value,
-            privacy_pillar: document.getElementById("privacy-pillar").value,
-            technique: document.getElementById("privacy-notion-1").value,
-            uncertainty: document.getElementById("privacy-notion-2").value,
-            indistinguishability: document.getElementById("privacy-notion-3").value,
-            fairness_pillar: document.getElementById("fairness-pillar").value,
-            selection_fairness: document.getElementById("fairness-notion-1").value,
-            performance_fairness: document.getElementById("fairness-notion-2").value,
-            class_distribution: document.getElementById("fairness-notion-3").value,
-            explainability_pillar: document.getElementById("explainability-pillar").value,
-            interpretability: document.getElementById("explainability-notion-1").value,
-            post_hoc_methods: document.getElementById("explainability-notion-2").value,
-            accountability_pillar: document.getElementById("accountability-pillar").value,
-            factsheet_completeness: document.getElementById("accountability-notion-1").value,
-            architectural_soundness_pillar: document.getElementById("architectural-soundness-pillar").value,
-            client_management: document.getElementById("architectural-soundness-notion-1").value,
-            optimization: document.getElementById("architectural-soundness-notion-2").value,
-            sustainability_pillar: document.getElementById("sustainability-pillar").value,
-            energy_source: document.getElementById("sustainability-notion-1").value,
-            hardware_efficiency: document.getElementById("sustainability-notion-2").value,
-            federation_complexity: document.getElementById("sustainability-notion-3").value,
+
+            // Si no está activado, manda 0s para mantener el schema
+            ...(document.getElementById("TrustworthinessSwitch").checked
+                ? (() => {
+                    const federationType = document.getElementById("federationArchitecture").value;
+                    const useDFL = (federationType === "DFL" || federationType === "SDFL");
+
+                    if (useDFL) {
+                        // DFL (AJUSTA si tu DFL tiene otras nociones)
+                        return {
+                            robustness_pillar: document.getElementById("dfl-robustness-pillar")?.value || "0",
+                            resilience_to_attacks: document.getElementById("dfl-robustness-notion-1")?.value || "0",
+                            algorithm_robustness: document.getElementById("dfl-robustness-notion-2")?.value || "0",
+                            client_reliability: document.getElementById("dfl-robustness-notion-3")?.value || "0",
+
+                            privacy_pillar: document.getElementById("dfl-privacy-pillar")?.value || "0",
+                            technique: document.getElementById("dfl-privacy-notion-1")?.value || "0",
+                            uncertainty: document.getElementById("dfl-privacy-notion-2")?.value || "0",
+                            indistinguishability: document.getElementById("dfl-privacy-notion-3")?.value || "0",
+
+                            fairness_pillar: document.getElementById("dfl-fairness-pillar")?.value || "0",
+                            // En DFL normalmente solo guardas class_distribution (notion-3)
+                            selection_fairness: "0",
+                            performance_fairness: "0",
+                            class_distribution: document.getElementById("dfl-fairness-notion-3")?.value || "0",
+
+                            explainability_pillar: document.getElementById("dfl-explainability-pillar")?.value || "0",
+                            interpretability: document.getElementById("dfl-explainability-notion-1")?.value || "0",
+                            post_hoc_methods: document.getElementById("dfl-explainability-notion-2")?.value || "0",
+
+                            accountability_pillar: document.getElementById("dfl-accountability-pillar")?.value || "0",
+                            factsheet_completeness: document.getElementById("dfl-accountability-notion-1")?.value || "100",
+
+                            architectural_soundness_pillar: document.getElementById("dfl-architectural-soundness-pillar")?.value || "0",
+                            client_management: document.getElementById("dfl-architectural-soundness-notion-1")?.value || "0",
+                            optimization: document.getElementById("dfl-architectural-soundness-notion-2")?.value || "0",
+
+                            sustainability_pillar: document.getElementById("dfl-sustainability-pillar")?.value || "0",
+                            energy_source: document.getElementById("dfl-sustainability-notion-1")?.value || "0",
+                            // Si en DFL no existe hardware_efficiency, lo dejamos a 0
+                            hardware_efficiency: "0",
+                            // En DFL mapea federation_complexity a tu notion-3 (si es así)
+                            federation_complexity: document.getElementById("dfl-sustainability-notion-3")?.value || "0",
+                        };
+                    }
+
+                    // CFL
+                    return {
+                        robustness_pillar: document.getElementById("cfl-robustness-pillar")?.value || "0",
+                        resilience_to_attacks: document.getElementById("cfl-robustness-notion-1")?.value || "0",
+                        algorithm_robustness: document.getElementById("cfl-robustness-notion-2")?.value || "0",
+                        client_reliability: document.getElementById("cfl-robustness-notion-3")?.value || "0",
+
+                        privacy_pillar: document.getElementById("cfl-privacy-pillar")?.value || "0",
+                        technique: document.getElementById("cfl-privacy-notion-1")?.value || "0",
+                        uncertainty: document.getElementById("cfl-privacy-notion-2")?.value || "0",
+                        indistinguishability: document.getElementById("cfl-privacy-notion-3")?.value || "0",
+
+                        fairness_pillar: document.getElementById("cfl-fairness-pillar")?.value || "0",
+                        selection_fairness: document.getElementById("cfl-fairness-notion-1")?.value || "0",
+                        performance_fairness: document.getElementById("cfl-fairness-notion-2")?.value || "0",
+                        class_distribution: document.getElementById("cfl-fairness-notion-3")?.value || "0",
+
+                        explainability_pillar: document.getElementById("cfl-explainability-pillar")?.value || "0",
+                        interpretability: document.getElementById("cfl-explainability-notion-1")?.value || "0",
+                        post_hoc_methods: document.getElementById("cfl-explainability-notion-2")?.value || "0",
+
+                        accountability_pillar: document.getElementById("cfl-accountability-pillar")?.value || "0",
+                        factsheet_completeness: document.getElementById("cfl-accountability-notion-1")?.value || "100",
+
+                        architectural_soundness_pillar: document.getElementById("cfl-architectural-soundness-pillar")?.value || "0",
+                        client_management: document.getElementById("cfl-architectural-soundness-notion-1")?.value || "0",
+                        optimization: document.getElementById("cfl-architectural-soundness-notion-2")?.value || "0",
+
+                        sustainability_pillar: document.getElementById("cfl-sustainability-pillar")?.value || "0",
+                        energy_source: document.getElementById("cfl-sustainability-notion-1")?.value || "0",
+                        hardware_efficiency: document.getElementById("cfl-sustainability-notion-2")?.value || "0",
+                        federation_complexity: document.getElementById("cfl-sustainability-notion-3")?.value || "0",
+                    };
+                })()
+                : {
+                    robustness_pillar: "0",
+                    resilience_to_attacks: "0",
+                    algorithm_robustness: "0",
+                    client_reliability: "0",
+                    privacy_pillar: "0",
+                    technique: "0",
+                    uncertainty: "0",
+                    indistinguishability: "0",
+                    fairness_pillar: "0",
+                    selection_fairness: "0",
+                    performance_fairness: "0",
+                    class_distribution: "0",
+                    explainability_pillar: "0",
+                    interpretability: "0",
+                    post_hoc_methods: "0",
+                    accountability_pillar: "0",
+                    factsheet_completeness: "100",
+                    architectural_soundness_pillar: "0",
+                    client_management: "0",
+                    optimization: "0",
+                    sustainability_pillar: "0",
+                    energy_source: "0",
+                    hardware_efficiency: "0",
+                    federation_complexity: "0",
+                }),
+            // --- /Trustworthiness ---
             network_subnet: "172.20.0.0/16",
             network_gateway: "172.20.0.1",
             additional_participants: window.MobilityManager.getMobilityConfig().additionalParticipants || [],
diff --git a/nebula/frontend/static/js/deployment/trustworthiness.js b/nebula/frontend/static/js/deployment/trustworthiness.js
index 7ba4d3f43..ec6ad92b0 100644
--- a/nebula/frontend/static/js/deployment/trustworthiness.js
+++ b/nebula/frontend/static/js/deployment/trustworthiness.js
@@ -2,268 +2,501 @@
 const TrustworthinessManager = (function() {
     function initializeTrustworthinessSystem() {
         setupTrustworthinessSwitch();
+        setupTrustworthinessFederationSwitch();
         setupWeightValidation();
     }
- 
+
+    function isDFL() {
+        const ft = document.getElementById("federationArchitecture")?.value || "CFL";
+        return (ft === "DFL" || ft === "SDFL");
+    }
+
+    function showTrustworthinessWeightsBlock() {
+        const cflBlock = document.getElementById("tw-cfl");
+        const dflBlock = document.getElementById("tw-dfl");
+        if (!cflBlock || !dflBlock) return;
+
+        const use = isDFL();
+        cflBlock.style.display = use ? "none" : "block";
+        dflBlock.style.display = use ? "block" : "none";
+    }
+
     function setupTrustworthinessSwitch() {
-        document.getElementById("TrustworthinessSwitch").addEventListener("change", function() {
+        const sw = document.getElementById("TrustworthinessSwitch");
+        if (!sw) return;
+
+        sw.addEventListener("change", function() {
             const trustworthinessOptionsDiv = document.getElementById("trustworthiness-options");
-            
-            if(this.checked){
-                document.getElementById("federationArchitecture").value = "CFL";
-                document.getElementById("federationArchitecture").dispatchEvent(new Event('change'));
-                document.getElementById("federationArchitecture").disabled = true;
-                trustworthinessOptionsDiv.style.display = "block"
+            if (!trustworthinessOptionsDiv) return;
+
+            if (this.checked) {
+                trustworthinessOptionsDiv.style.display = "block";
+                showTrustworthinessWeightsBlock();
+                validateWeights();
             } else {
-                document.getElementById("federationArchitecture").disabled = false;
                 trustworthinessOptionsDiv.style.display = "none";
             }
         });
     }
- 
+
+    function setupTrustworthinessFederationSwitch() {
+        const fed = document.getElementById("federationArchitecture");
+        if (!fed) return;
+
+        fed.addEventListener("change", function() {
+            const trustworthinessOptionsDiv = document.getElementById("trustworthiness-options");
+            if (trustworthinessOptionsDiv?.style.display === "block") {
+                showTrustworthinessWeightsBlock();
+                validateWeights();
+            }
+        });
+    }
+
     function setupWeightValidation() {
-        const pillarIds = [
-            "robustness-pillar",
-            "privacy-pillar",
-            "fairness-pillar",
-            "explainability-pillar",
-            "accountability-pillar",
-            "architectural-soundness-pillar",
-            "sustainability-pillar"
+        // IDs CFL
+        const cflPillarIds = [
+            "cfl-robustness-pillar",
+            "cfl-privacy-pillar",
+            "cfl-fairness-pillar",
+            "cfl-explainability-pillar",
+            "cfl-accountability-pillar",
+            "cfl-architectural-soundness-pillar",
+            "cfl-sustainability-pillar"
+        ];
+        const cflNotionIds = [
+            "cfl-robustness-notion-1",
+            "cfl-robustness-notion-2",
+            "cfl-robustness-notion-3",
+            "cfl-privacy-notion-1",
+            "cfl-privacy-notion-2",
+            "cfl-privacy-notion-3",
+            "cfl-fairness-notion-1",
+            "cfl-fairness-notion-2",
+            "cfl-fairness-notion-3",
+            "cfl-explainability-notion-1",
+            "cfl-explainability-notion-2",
+            "cfl-accountability-notion-1",
+            "cfl-architectural-soundness-notion-1",
+            "cfl-architectural-soundness-notion-2",
+            "cfl-sustainability-notion-1",
+            "cfl-sustainability-notion-2",
+            "cfl-sustainability-notion-3"
         ];
-        const notionIds = [
-            "robustness-notion-1",
-            "robustness-notion-2",
-            "robustness-notion-3",
-            "privacy-notion-1",
-            "privacy-notion-2",
-            "privacy-notion-3",
-            "fairness-notion-1",
-            "fairness-notion-2",
-            "fairness-notion-3",
-            "explainability-notion-1",
-            "explainability-notion-2",
-            "architectural-soundness-notion-1",
-            "architectural-soundness-notion-2",
-            "sustainability-notion-1",
-            "sustainability-notion-2",
-            "sustainability-notion-3"
+
+        // IDs DFL (AJUSTA si tu DFL tiene otras nociones)
+        const dflPillarIds = [
+            "dfl-robustness-pillar",
+            "dfl-privacy-pillar",
+            "dfl-fairness-pillar",
+            "dfl-explainability-pillar",
+            "dfl-accountability-pillar",
+            "dfl-architectural-soundness-pillar",
+            "dfl-sustainability-pillar"
         ];
- 
-        pillarIds.concat(notionIds).forEach(id => {
+        const dflNotionIds = [
+            "dfl-robustness-notion-1",
+            "dfl-robustness-notion-2",
+            "dfl-robustness-notion-3",
+            "dfl-privacy-notion-1",
+            "dfl-privacy-notion-2",
+            "dfl-privacy-notion-3",
+            // DFL fairness reducido:
+            "dfl-fairness-notion-3",
+            "dfl-explainability-notion-1",
+            "dfl-explainability-notion-2",
+            "dfl-accountability-notion-1",
+            "dfl-architectural-soundness-notion-1",
+            "dfl-architectural-soundness-notion-2",
+            // DFL sustainability reducido:
+            "dfl-sustainability-notion-1",
+            "dfl-sustainability-notion-3"
+        ];
+
+        cflPillarIds.concat(cflNotionIds, dflPillarIds, dflNotionIds).forEach(id => {
             const input = document.getElementById(id);
-            if (input) {
-                input.addEventListener("input", validateWeights);
-            }
+            if (input) input.addEventListener("input", validateWeights);
         });
     }
- 
+
     function validateWeights() {
-        const robustnessPercent = parseFloat(document.getElementById("robustness-pillar").value) || 0;
-        const privacyPercent = parseFloat(document.getElementById("privacy-pillar").value) || 0;
-        const fairnessPercent = parseFloat(document.getElementById("fairness-pillar").value) || 0;
-        const explainabilityPercent = parseFloat(document.getElementById("explainability-pillar").value) || 0;
-        const accountabilityPercent = parseFloat(document.getElementById("accountability-pillar").value) || 0;
-        const architecturalSoundnessPercent = parseFloat(document.getElementById("architectural-soundness-pillar").value) || 0;
-        const sustainabilityPercent = parseFloat(document.getElementById("sustainability-pillar").value) || 0;
- 
-        const robustnessNotion1 = parseFloat(document.getElementById("robustness-notion-1").value) || 0;
-        const robustnessNotion2 = parseFloat(document.getElementById("robustness-notion-2").value) || 0;
-        const robustnessNotion3 = parseFloat(document.getElementById("robustness-notion-3").value) || 0;
-        const privacyNotion1 = parseFloat(document.getElementById("privacy-notion-1").value) || 0;
-        const privacyNotion2 = parseFloat(document.getElementById("privacy-notion-2").value) || 0;
-        const privacyNotion3 = parseFloat(document.getElementById("privacy-notion-3").value) || 0;
-        const fairnessNotion1 = parseFloat(document.getElementById("fairness-notion-1").value) || 0;
-        const fairnessNotion2 = parseFloat(document.getElementById("fairness-notion-2").value) || 0;
-        const fairnessNotion3 = parseFloat(document.getElementById("fairness-notion-3").value) || 0;
-        const explainabilityNotion1 = parseFloat(document.getElementById("explainability-notion-1").value) || 0;
-        const explainabilityNotion2 = parseFloat(document.getElementById("explainability-notion-2").value) || 0;
-        const architecturalSoundnessNotion1 = parseFloat(document.getElementById("architectural-soundness-notion-1").value) || 0;
-        const architecturalSoundnessNotion2 = parseFloat(document.getElementById("architectural-soundness-notion-2").value) || 0;
-        const sustainabilityNotion1 = parseFloat(document.getElementById("sustainability-notion-1").value) || 0;
-        const sustainabilityNotion2 = parseFloat(document.getElementById("sustainability-notion-2").value) || 0;
-        const sustainabilityNotion3 = parseFloat(document.getElementById("sustainability-notion-3").value) || 0;
- 
+        if (isDFL()) {
+            return validateWeightsDFL();
+        }
+        return validateWeightsCFL();
+    }
+
+    function validateWeightsCFL() {
+        const robustnessPercent = parseFloat(document.getElementById("cfl-robustness-pillar").value) || 0;
+        const privacyPercent = parseFloat(document.getElementById("cfl-privacy-pillar").value) || 0;
+        const fairnessPercent = parseFloat(document.getElementById("cfl-fairness-pillar").value) || 0;
+        const explainabilityPercent = parseFloat(document.getElementById("cfl-explainability-pillar").value) || 0;
+        const accountabilityPercent = parseFloat(document.getElementById("cfl-accountability-pillar").value) || 0;
+        const architecturalSoundnessPercent = parseFloat(document.getElementById("cfl-architectural-soundness-pillar").value) || 0;
+        const sustainabilityPercent = parseFloat(document.getElementById("cfl-sustainability-pillar").value) || 0;
+
+        const robustnessNotion1 = parseFloat(document.getElementById("cfl-robustness-notion-1").value) || 0;
+        const robustnessNotion2 = parseFloat(document.getElementById("cfl-robustness-notion-2").value) || 0;
+        const robustnessNotion3 = parseFloat(document.getElementById("cfl-robustness-notion-3").value) || 0;
+
+        const privacyNotion1 = parseFloat(document.getElementById("cfl-privacy-notion-1").value) || 0;
+        const privacyNotion2 = parseFloat(document.getElementById("cfl-privacy-notion-2").value) || 0;
+        const privacyNotion3 = parseFloat(document.getElementById("cfl-privacy-notion-3").value) || 0;
+
+        const fairnessNotion1 = parseFloat(document.getElementById("cfl-fairness-notion-1").value) || 0;
+        const fairnessNotion2 = parseFloat(document.getElementById("cfl-fairness-notion-2").value) || 0;
+        const fairnessNotion3 = parseFloat(document.getElementById("cfl-fairness-notion-3").value) || 0;
+
+        const explainabilityNotion1 = parseFloat(document.getElementById("cfl-explainability-notion-1").value) || 0;
+        const explainabilityNotion2 = parseFloat(document.getElementById("cfl-explainability-notion-2").value) || 0;
+
+        const architecturalSoundnessNotion1 = parseFloat(document.getElementById("cfl-architectural-soundness-notion-1").value) || 0;
+        const architecturalSoundnessNotion2 = parseFloat(document.getElementById("cfl-architectural-soundness-notion-2").value) || 0;
+
+        const sustainabilityNotion1 = parseFloat(document.getElementById("cfl-sustainability-notion-1").value) || 0;
+        const sustainabilityNotion2 = parseFloat(document.getElementById("cfl-sustainability-notion-2").value) || 0;
+        const sustainabilityNotion3 = parseFloat(document.getElementById("cfl-sustainability-notion-3").value) || 0;
+
         const totalPillar =
-            robustnessPercent +
-            privacyPercent +
-            fairnessPercent +
-            explainabilityPercent +
-            accountabilityPercent +
-            architecturalSoundnessPercent +
-            sustainabilityPercent;
- 
+            robustnessPercent + privacyPercent + fairnessPercent + explainabilityPercent +
+            accountabilityPercent + architecturalSoundnessPercent + sustainabilityPercent;
+
         const totalRobustnessNotion = robustnessNotion1 + robustnessNotion2 + robustnessNotion3;
         const totalPrivacyNotion = privacyNotion1 + privacyNotion2 + privacyNotion3;
         const totalFairnessNotion = fairnessNotion1 + fairnessNotion2 + fairnessNotion3;
         const totalExplainabilityNotion = explainabilityNotion1 + explainabilityNotion2;
         const totalArchitecturalSoundnessNotion = architecturalSoundnessNotion1 + architecturalSoundnessNotion2;
         const totalSustainabilityNotion = sustainabilityNotion1 + sustainabilityNotion2 + sustainabilityNotion3;
- 
-        if (totalPillar !== 100) {
-            return "[Trustworthiness] Check pillars weights";
-        }
-        if (totalRobustnessNotion !== 100) {
-            return "[Trustworthiness] Check robustness notions weights";
-        }
-        if (totalPrivacyNotion !== 100) {
-            return "[Trustworthiness] Check privacy notions weights";
-        }
-        if (totalFairnessNotion !== 100) {
-            return "[Trustworthiness] Check fairness notions weights";
-        }
-        if (totalExplainabilityNotion !== 100) {
-            return "[Trustworthiness] Check explainability notions weights";
-        }
-        if (totalArchitecturalSoundnessNotion !== 100) {
-            return "[Trustworthiness] Check architectural soundness notions weights";
-        }
-        if (totalSustainabilityNotion !== 100) {
-            return "[Trustworthiness] Check sustainability notions weights";
-        }
+
+        if (totalPillar !== 100) return "[Trustworthiness] Check pillars weights";
+        if (totalRobustnessNotion !== 100) return "[Trustworthiness] Check robustness notions weights";
+        if (totalPrivacyNotion !== 100) return "[Trustworthiness] Check privacy notions weights";
+        if (totalFairnessNotion !== 100) return "[Trustworthiness] Check fairness notions weights";
+        if (totalExplainabilityNotion !== 100) return "[Trustworthiness] Check explainability notions weights";
+        if (totalArchitecturalSoundnessNotion !== 100) return "[Trustworthiness] Check architectural soundness notions weights";
+        if (totalSustainabilityNotion !== 100) return "[Trustworthiness] Check sustainability notions weights";
     }
- 
+
+    function validateWeightsDFL() {
+        const robustnessPercent = parseFloat(document.getElementById("dfl-robustness-pillar").value) || 0;
+        const privacyPercent = parseFloat(document.getElementById("dfl-privacy-pillar").value) || 0;
+        const fairnessPercent = parseFloat(document.getElementById("dfl-fairness-pillar").value) || 0;
+        const explainabilityPercent = parseFloat(document.getElementById("dfl-explainability-pillar").value) || 0;
+        const accountabilityPercent = parseFloat(document.getElementById("dfl-accountability-pillar").value) || 0;
+        const architecturalSoundnessPercent = parseFloat(document.getElementById("dfl-architectural-soundness-pillar").value) || 0;
+        const sustainabilityPercent = parseFloat(document.getElementById("dfl-sustainability-pillar").value) || 0;
+
+        const robustnessNotion1 = parseFloat(document.getElementById("dfl-robustness-notion-1").value) || 0;
+        const robustnessNotion2 = parseFloat(document.getElementById("dfl-robustness-notion-2").value) || 0;
+        const robustnessNotion3 = parseFloat(document.getElementById("dfl-robustness-notion-3").value) || 0;
+
+        const privacyNotion1 = parseFloat(document.getElementById("dfl-privacy-notion-1").value) || 0;
+        const privacyNotion2 = parseFloat(document.getElementById("dfl-privacy-notion-2").value) || 0;
+        const privacyNotion3 = parseFloat(document.getElementById("dfl-privacy-notion-3").value) || 0;
+
+        // DFL fairness reducido (AJUSTA si corresponde)
+        const fairnessNotion3 = parseFloat(document.getElementById("dfl-fairness-notion-3").value) || 0;
+
+        const explainabilityNotion1 = parseFloat(document.getElementById("dfl-explainability-notion-1").value) || 0;
+        const explainabilityNotion2 = parseFloat(document.getElementById("dfl-explainability-notion-2").value) || 0;
+
+        const architecturalSoundnessNotion1 = parseFloat(document.getElementById("dfl-architectural-soundness-notion-1").value) || 0;
+        const architecturalSoundnessNotion2 = parseFloat(document.getElementById("dfl-architectural-soundness-notion-2").value) || 0;
+
+        // DFL sustainability reducido (AJUSTA si corresponde)
+        const sustainabilityNotion1 = parseFloat(document.getElementById("dfl-sustainability-notion-1").value) || 0;
+        const sustainabilityNotion3 = parseFloat(document.getElementById("dfl-sustainability-notion-3").value) || 0;
+
+        const totalPillar =
+            robustnessPercent + privacyPercent + fairnessPercent + explainabilityPercent +
+            accountabilityPercent + architecturalSoundnessPercent + sustainabilityPercent;
+
+        const totalRobustnessNotion = robustnessNotion1 + robustnessNotion2 + robustnessNotion3;
+        const totalPrivacyNotion = privacyNotion1 + privacyNotion2 + privacyNotion3;
+        const totalFairnessNotion = fairnessNotion3;
+        const totalExplainabilityNotion = explainabilityNotion1 + explainabilityNotion2;
+        const totalArchitecturalSoundnessNotion = architecturalSoundnessNotion1 + architecturalSoundnessNotion2;
+        const totalSustainabilityNotion = sustainabilityNotion1 + sustainabilityNotion3;
+
+        if (totalPillar !== 100) return "[Trustworthiness] Check pillars weights";
+        if (totalRobustnessNotion !== 100) return "[Trustworthiness] Check robustness notions weights";
+        if (totalPrivacyNotion !== 100) return "[Trustworthiness] Check privacy notions weights";
+        if (totalFairnessNotion !== 100) return "[Trustworthiness] Check fairness notions weights";
+        if (totalExplainabilityNotion !== 100) return "[Trustworthiness] Check explainability notions weights";
+        if (totalArchitecturalSoundnessNotion !== 100) return "[Trustworthiness] Check architectural soundness notions weights";
+        if (totalSustainabilityNotion !== 100) return "[Trustworthiness] Check sustainability notions weights";
+    }
+
     function getTrustworthinessConfig() {
         const enabled = document.getElementById("trustworthiness-options").style.display === "block";
         const federationArchitecture = document.getElementById("federationArchitecture").value;
- 
+
+        if (isDFL()) return getTrustworthinessConfigDFL(enabled, federationArchitecture);
+        return getTrustworthinessConfigCFL(enabled, federationArchitecture);
+    }
+
+    function getTrustworthinessConfigCFL(enabled, federationArchitecture) {
         const pillars = {
-            robustness: parseFloat(document.getElementById("robustness-pillar").value) || 0,
-            privacy: parseFloat(document.getElementById("privacy-pillar").value) || 0,
-            fairness: parseFloat(document.getElementById("fairness-pillar").value) || 0,
-            explainability: parseFloat(document.getElementById("explainability-pillar").value) || 0,
-            accountability: parseFloat(document.getElementById("accountability-pillar").value) || 0,
-            architecturalSoundness: parseFloat(document.getElementById("architectural-soundness-pillar").value) || 0,
-            sustainability: parseFloat(document.getElementById("sustainability-pillar").value) || 0
+            robustness: parseFloat(document.getElementById("cfl-robustness-pillar").value) || 0,
+            privacy: parseFloat(document.getElementById("cfl-privacy-pillar").value) || 0,
+            fairness: parseFloat(document.getElementById("cfl-fairness-pillar").value) || 0,
+            explainability: parseFloat(document.getElementById("cfl-explainability-pillar").value) || 0,
+            accountability: parseFloat(document.getElementById("cfl-accountability-pillar").value) || 0,
+            architecturalSoundness: parseFloat(document.getElementById("cfl-architectural-soundness-pillar").value) || 0,
+            sustainability: parseFloat(document.getElementById("cfl-sustainability-pillar").value) || 0
         };
- 
+
         const notions = {
             robustness: [
-                parseFloat(document.getElementById("robustness-notion-1").value) || 0,
-                parseFloat(document.getElementById("robustness-notion-2").value) || 0,
-                parseFloat(document.getElementById("robustness-notion-3").value) || 0
+                parseFloat(document.getElementById("cfl-robustness-notion-1").value) || 0,
+                parseFloat(document.getElementById("cfl-robustness-notion-2").value) || 0,
+                parseFloat(document.getElementById("cfl-robustness-notion-3").value) || 0
             ],
             privacy: [
-                parseFloat(document.getElementById("privacy-notion-1").value) || 0,
-                parseFloat(document.getElementById("privacy-notion-2").value) || 0,
-                parseFloat(document.getElementById("privacy-notion-3").value) || 0
+                parseFloat(document.getElementById("cfl-privacy-notion-1").value) || 0,
+                parseFloat(document.getElementById("cfl-privacy-notion-2").value) || 0,
+                parseFloat(document.getElementById("cfl-privacy-notion-3").value) || 0
             ],
             fairness: [
-                parseFloat(document.getElementById("fairness-notion-1").value) || 0,
-                parseFloat(document.getElementById("fairness-notion-2").value) || 0,
-                parseFloat(document.getElementById("fairness-notion-3").value) || 0
+                parseFloat(document.getElementById("cfl-fairness-notion-1").value) || 0,
+                parseFloat(document.getElementById("cfl-fairness-notion-2").value) || 0,
+                parseFloat(document.getElementById("cfl-fairness-notion-3").value) || 0
             ],
             explainability: [
-                parseFloat(document.getElementById("explainability-notion-1").value) || 0,
-                parseFloat(document.getElementById("explainability-notion-2").value) || 0
+                parseFloat(document.getElementById("cfl-explainability-notion-1").value) || 0,
+                parseFloat(document.getElementById("cfl-explainability-notion-2").value) || 0
+            ],
+            accountability: [
+                parseFloat(document.getElementById("cfl-accountability-notion-1")?.value) || 100
             ],
             architecturalSoundness: [
-                parseFloat(document.getElementById("architectural-soundness-notion-1").value) || 0,
-                parseFloat(document.getElementById("architectural-soundness-notion-2").value) || 0
+                parseFloat(document.getElementById("cfl-architectural-soundness-notion-1").value) || 0,
+                parseFloat(document.getElementById("cfl-architectural-soundness-notion-2").value) || 0
             ],
             sustainability: [
-                parseFloat(document.getElementById("sustainability-notion-1").value) || 0,
-                parseFloat(document.getElementById("sustainability-notion-2").value) || 0,
-                parseFloat(document.getElementById("sustainability-notion-3").value) || 0
+                parseFloat(document.getElementById("cfl-sustainability-notion-1").value) || 0,
+                parseFloat(document.getElementById("cfl-sustainability-notion-2").value) || 0,
+                parseFloat(document.getElementById("cfl-sustainability-notion-3").value) || 0
             ]
         };
- 
-        return {
-            enabled,
-            federationArchitecture,
-            pillars,
-            notions
+
+        return { enabled, federationArchitecture, pillars, notions };
+    }
+
+    function getTrustworthinessConfigDFL(enabled, federationArchitecture) {
+        const pillars = {
+            robustness: parseFloat(document.getElementById("dfl-robustness-pillar").value) || 0,
+            privacy: parseFloat(document.getElementById("dfl-privacy-pillar").value) || 0,
+            fairness: parseFloat(document.getElementById("dfl-fairness-pillar").value) || 0,
+            explainability: parseFloat(document.getElementById("dfl-explainability-pillar").value) || 0,
+            accountability: parseFloat(document.getElementById("dfl-accountability-pillar").value) || 0,
+            architecturalSoundness: parseFloat(document.getElementById("dfl-architectural-soundness-pillar").value) || 0,
+            sustainability: parseFloat(document.getElementById("dfl-sustainability-pillar").value) || 0
+        };
+
+        const notions = {
+            robustness: [
+                parseFloat(document.getElementById("dfl-robustness-notion-1").value) || 0,
+                parseFloat(document.getElementById("dfl-robustness-notion-2").value) || 0,
+                parseFloat(document.getElementById("dfl-robustness-notion-3").value) || 0
+            ],
+            privacy: [
+                parseFloat(document.getElementById("dfl-privacy-notion-1").value) || 0,
+                parseFloat(document.getElementById("dfl-privacy-notion-2").value) || 0,
+                parseFloat(document.getElementById("dfl-privacy-notion-3").value) || 0
+            ],
+            // DFL fairness reducido (AJUSTA si corresponde)
+            fairness: [
+                parseFloat(document.getElementById("dfl-fairness-notion-3").value) || 0
+            ],
+            explainability: [
+                parseFloat(document.getElementById("dfl-explainability-notion-1").value) || 0,
+                parseFloat(document.getElementById("dfl-explainability-notion-2").value) || 0
+            ],
+            accountability: [
+                parseFloat(document.getElementById("dfl-accountability-notion-1")?.value) || 100
+            ],
+            architecturalSoundness: [
+                parseFloat(document.getElementById("dfl-architectural-soundness-notion-1").value) || 0,
+                parseFloat(document.getElementById("dfl-architectural-soundness-notion-2").value) || 0
+            ],
+            // DFL sustainability reducido (AJUSTA si corresponde)
+            sustainability: [
+                parseFloat(document.getElementById("dfl-sustainability-notion-1").value) || 0,
+                parseFloat(document.getElementById("dfl-sustainability-notion-3").value) || 0
+            ]
         };
+
+        return { enabled, federationArchitecture, pillars, notions };
     }
- 
+
     function setTrustworthinessConfig(config) {
         if (!config) return;
- 
-        // Set pillar weights
+
+        if (isDFL()) setTrustworthinessConfigDFL(config);
+        else setTrustworthinessConfigCFL(config);
+
+        validateWeights();
+    }
+
+    function setTrustworthinessConfigCFL(config) {
         if (config.pillars) {
-            document.getElementById("robustness-pillar").value = config.pillars.robustness || 0;
-            document.getElementById("privacy-pillar").value = config.pillars.privacy || 0;
-            document.getElementById("fairness-pillar").value = config.pillars.fairness || 0;
-            document.getElementById("explainability-pillar").value = config.pillars.explainability || 0;
-            document.getElementById("accountability-pillar").value = config.pillars.accountability || 0;
-            document.getElementById("architectural-soundness-pillar").value = config.pillars.architecturalSoundness || 0;
-            document.getElementById("sustainability-pillar").value = config.pillars.sustainability || 0;
+            document.getElementById("cfl-robustness-pillar").value = config.pillars.robustness || 0;
+            document.getElementById("cfl-privacy-pillar").value = config.pillars.privacy || 0;
+            document.getElementById("cfl-fairness-pillar").value = config.pillars.fairness || 0;
+            document.getElementById("cfl-explainability-pillar").value = config.pillars.explainability || 0;
+            document.getElementById("cfl-accountability-pillar").value = config.pillars.accountability || 0;
+            document.getElementById("cfl-architectural-soundness-pillar").value = config.pillars.architecturalSoundness || 0;
+            document.getElementById("cfl-sustainability-pillar").value = config.pillars.sustainability || 0;
         }
- 
-        // Set notion weights
+
         if (config.notions) {
-            const rNotions = config.notions.robustness || [0, 0, 0];
-            document.getElementById("robustness-notion-1").value = rNotions[0];
-            document.getElementById("robustness-notion-2").value = rNotions[1];
-            document.getElementById("robustness-notion-3").value = rNotions[2];
- 
-            const pNotions = config.notions.privacy || [0, 0, 0];
-            document.getElementById("privacy-notion-1").value = pNotions[0];
-            document.getElementById("privacy-notion-2").value = pNotions[1];
-            document.getElementById("privacy-notion-3").value = pNotions[2];
- 
-            const fNotions = config.notions.fairness || [0, 0, 0];
-            document.getElementById("fairness-notion-1").value = fNotions[0];
-            document.getElementById("fairness-notion-2").value = fNotions[1];
-            document.getElementById("fairness-notion-3").value = fNotions[2];
- 
-            const eNotions = config.notions.explainability || [0, 0];
-            document.getElementById("explainability-notion-1").value = eNotions[0];
-            document.getElementById("explainability-notion-2").value = eNotions[1];
- 
-            const aNotions = config.notions.architecturalSoundness || [0, 0];
-            document.getElementById("architectural-soundness-notion-1").value = aNotions[0];
-            document.getElementById("architectural-soundness-notion-2").value = aNotions[1];
- 
-            const sNotions = config.notions.sustainability || [0, 0, 0];
-            document.getElementById("sustainability-notion-1").value = sNotions[0];
-            document.getElementById("sustainability-notion-2").value = sNotions[1];
-            document.getElementById("sustainability-notion-3").value = sNotions[2];
+            const r = config.notions.robustness || [0, 0, 0];
+            document.getElementById("cfl-robustness-notion-1").value = r[0];
+            document.getElementById("cfl-robustness-notion-2").value = r[1];
+            document.getElementById("cfl-robustness-notion-3").value = r[2];
+
+            const p = config.notions.privacy || [0, 0, 0];
+            document.getElementById("cfl-privacy-notion-1").value = p[0];
+            document.getElementById("cfl-privacy-notion-2").value = p[1];
+            document.getElementById("cfl-privacy-notion-3").value = p[2];
+
+            const f = config.notions.fairness || [0, 0, 0];
+            document.getElementById("cfl-fairness-notion-1").value = f[0];
+            document.getElementById("cfl-fairness-notion-2").value = f[1];
+            document.getElementById("cfl-fairness-notion-3").value = f[2];
+
+            const e = config.notions.explainability || [0, 0];
+            document.getElementById("cfl-explainability-notion-1").value = e[0];
+            document.getElementById("cfl-explainability-notion-2").value = e[1];
+
+            const a = config.notions.architecturalSoundness || [0, 0];
+            document.getElementById("cfl-architectural-soundness-notion-1").value = a[0];
+            document.getElementById("cfl-architectural-soundness-notion-2").value = a[1];
+
+            const s = config.notions.sustainability || [0, 0, 0];
+            document.getElementById("cfl-sustainability-notion-1").value = s[0];
+            document.getElementById("cfl-sustainability-notion-2").value = s[1];
+            document.getElementById("cfl-sustainability-notion-3").value = s[2];
         }
- 
-        // Perform a weight validation check to update any warnings if needed
-        validateWeights();
     }
- 
+
+    function setTrustworthinessConfigDFL(config) {
+        if (config.pillars) {
+            document.getElementById("dfl-robustness-pillar").value = config.pillars.robustness || 0;
+            document.getElementById("dfl-privacy-pillar").value = config.pillars.privacy || 0;
+            document.getElementById("dfl-fairness-pillar").value = config.pillars.fairness || 0;
+            document.getElementById("dfl-explainability-pillar").value = config.pillars.explainability || 0;
+            document.getElementById("dfl-accountability-pillar").value = config.pillars.accountability || 0;
+            document.getElementById("dfl-architectural-soundness-pillar").value = config.pillars.architecturalSoundness || 0;
+            document.getElementById("dfl-sustainability-pillar").value = config.pillars.sustainability || 0;
+        }
+
+        if (config.notions) {
+            const r = config.notions.robustness || [0, 0, 0];
+            document.getElementById("dfl-robustness-notion-1").value = r[0];
+            document.getElementById("dfl-robustness-notion-2").value = r[1];
+            document.getElementById("dfl-robustness-notion-3").value = r[2];
+
+            const p = config.notions.privacy || [0, 0, 0];
+            document.getElementById("dfl-privacy-notion-1").value = p[0];
+            document.getElementById("dfl-privacy-notion-2").value = p[1];
+            document.getElementById("dfl-privacy-notion-3").value = p[2];
+
+            // DFL fairness reducido (AJUSTA si corresponde)
+            const f = config.notions.fairness || [0];
+            document.getElementById("dfl-fairness-notion-3").value = f[0];
+
+            const e = config.notions.explainability || [0, 0];
+            document.getElementById("dfl-explainability-notion-1").value = e[0];
+            document.getElementById("dfl-explainability-notion-2").value = e[1];
+
+            const a = config.notions.architecturalSoundness || [0, 0];
+            document.getElementById("dfl-architectural-soundness-notion-1").value = a[0];
+            document.getElementById("dfl-architectural-soundness-notion-2").value = a[1];
+
+            // DFL sustainability reducido (AJUSTA si corresponde)
+            const s = config.notions.sustainability || [0, 0];
+            document.getElementById("dfl-sustainability-notion-1").value = s[0];
+            document.getElementById("dfl-sustainability-notion-3").value = s[1];
+        }
+    }
+
     function resetTrustworthinessConfig() {
         const trustworthinessOptionsDiv = document.getElementById("trustworthiness-options");
         const fedArchElement = document.getElementById("federationArchitecture");
- 
-        // Hide options and re-enable federationArchitecture
+
         trustworthinessOptionsDiv.style.display = "none";
         fedArchElement.disabled = false;
- 
-        // Reset pillars to 0
-        document.getElementById("robustness-pillar").value = "0";
-        document.getElementById("privacy-pillar").value = "0";
-        document.getElementById("fairness-pillar").value = "0";
-        document.getElementById("explainability-pillar").value = "0";
-        document.getElementById("accountability-pillar").value = "0";
-        document.getElementById("architectural-soundness-pillar").value = "0";
-        document.getElementById("sustainability-pillar").value = "0";
- 
-        // Reset notions to 0
-        document.getElementById("robustness-notion-1").value = "0";
-        document.getElementById("robustness-notion-2").value = "0";
-        document.getElementById("robustness-notion-3").value = "0";
-        document.getElementById("privacy-notion-1").value = "0";
-        document.getElementById("privacy-notion-2").value = "0";
-        document.getElementById("privacy-notion-3").value = "0";
-        document.getElementById("fairness-notion-1").value = "0";
-        document.getElementById("fairness-notion-2").value = "0";
-        document.getElementById("fairness-notion-3").value = "0";
-        document.getElementById("explainability-notion-1").value = "0";
-        document.getElementById("explainability-notion-2").value = "0";
-        document.getElementById("architectural-soundness-notion-1").value = "0";
-        document.getElementById("architectural-soundness-notion-2").value = "0";
-        document.getElementById("sustainability-notion-1").value = "0";
-        document.getElementById("sustainability-notion-2").value = "0";
-        document.getElementById("sustainability-notion-3").value = "0";
- 
-        // Re-validate weights after reset
+
+        if (isDFL()) resetTrustworthinessConfigDFL();
+        else resetTrustworthinessConfigCFL();
+
         validateWeights();
     }
- 
+
+    function resetTrustworthinessConfigCFL() {
+        document.getElementById("cfl-robustness-pillar").value = "0";
+        document.getElementById("cfl-privacy-pillar").value = "0";
+        document.getElementById("cfl-fairness-pillar").value = "0";
+        document.getElementById("cfl-explainability-pillar").value = "0";
+        document.getElementById("cfl-accountability-pillar").value = "0";
+        document.getElementById("cfl-architectural-soundness-pillar").value = "0";
+        document.getElementById("cfl-sustainability-pillar").value = "0";
+
+        document.getElementById("cfl-robustness-notion-1").value = "0";
+        document.getElementById("cfl-robustness-notion-2").value = "0";
+        document.getElementById("cfl-robustness-notion-3").value = "0";
+
+        document.getElementById("cfl-privacy-notion-1").value = "0";
+        document.getElementById("cfl-privacy-notion-2").value = "0";
+        document.getElementById("cfl-privacy-notion-3").value = "0";
+
+        document.getElementById("cfl-fairness-notion-1").value = "0";
+        document.getElementById("cfl-fairness-notion-2").value = "0";
+        document.getElementById("cfl-fairness-notion-3").value = "0";
+
+        document.getElementById("cfl-explainability-notion-1").value = "0";
+        document.getElementById("cfl-explainability-notion-2").value = "0";
+
+        document.getElementById("cfl-architectural-soundness-notion-1").value = "0";
+        document.getElementById("cfl-architectural-soundness-notion-2").value = "0";
+
+        document.getElementById("cfl-sustainability-notion-1").value = "0";
+        document.getElementById("cfl-sustainability-notion-2").value = "0";
+        document.getElementById("cfl-sustainability-notion-3").value = "0";
+    }
+
+    function resetTrustworthinessConfigDFL() {
+        document.getElementById("dfl-robustness-pillar").value = "0";
+        document.getElementById("dfl-privacy-pillar").value = "0";
+        document.getElementById("dfl-fairness-pillar").value = "0";
+        document.getElementById("dfl-explainability-pillar").value = "0";
+        document.getElementById("dfl-accountability-pillar").value = "0";
+        document.getElementById("dfl-architectural-soundness-pillar").value = "0";
+        document.getElementById("dfl-sustainability-pillar").value = "0";
+
+        document.getElementById("dfl-robustness-notion-1").value = "0";
+        document.getElementById("dfl-robustness-notion-2").value = "0";
+        document.getElementById("dfl-robustness-notion-3").value = "0";
+
+        document.getElementById("dfl-privacy-notion-1").value = "0";
+        document.getElementById("dfl-privacy-notion-2").value = "0";
+        document.getElementById("dfl-privacy-notion-3").value = "0";
+
+        // DFL fairness reducido (AJUSTA si corresponde)
+        document.getElementById("dfl-fairness-notion-3").value = "0";
+
+        document.getElementById("dfl-explainability-notion-1").value = "0";
+        document.getElementById("dfl-explainability-notion-2").value = "0";
+
+        document.getElementById("dfl-architectural-soundness-notion-1").value = "0";
+        document.getElementById("dfl-architectural-soundness-notion-2").value = "0";
+
+        // DFL sustainability reducido (AJUSTA si corresponde)
+        document.getElementById("dfl-sustainability-notion-1").value = "0";
+        document.getElementById("dfl-sustainability-notion-3").value = "0";
+    }
+
     return {
         initializeTrustworthinessSystem,
         getTrustworthinessConfig,
@@ -271,5 +504,5 @@ const TrustworthinessManager = (function() {
         resetTrustworthinessConfig
     };
 })();
- 
-export default TrustworthinessManager;
\ No newline at end of file
+
+export default TrustworthinessManager;
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index 8572403ff..18d2f2e42 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -143,6 +143,18 @@ <h5>Dataset <i class="fa fa-database"></i>
                             <input type="checkbox" id="mod-fmnist" value='FashionMNIST'
                                 style="margin-right: 10px; margin-left: 10px;">FashionMNIST
                         </label>
+                        <label style="display: inline-block; align-items: center; margin-bottom: 10px;">
+                            <input type="checkbox" id="mod-fmnist" value='Covtype'
+                                style="margin-right: 10px; margin-left: 10px;">Covtype
+                        </label>
+                        <label style="display: inline-block; align-items: center; margin-bottom: 10px;">
+                            <input type="checkbox" id="mod-fmnist" value='AdultCensus'
+                                style="margin-right: 10px; margin-left: 10px;">AdultCensus
+                        </label>
+                        <label style="display: inline-block; align-items: center; margin-bottom: 10px;">
+                            <input type="checkbox" id="mod-fmnist" value='BreastCancer'
+                                style="margin-right: 10px; margin-left: 10px;">BreastCancer
+                        </label>
                         <label style="display: inline-block; align-items: center; margin-bottom: 10px;">
                             <input type="checkbox" id="mod-cifar10" value='CIFAR10'
                                 style="margin-right: 10px; margin-left: 10px;">CIFAR10
@@ -891,202 +903,381 @@ <h5 class="step-title">Enable/Disable Trustworthiness</h5>
                         <input class="form-check-input" type="checkbox" id="TrustworthinessSwitch" style="display: inline; width: 80px; height: 30px;">
                     </div>
                     <div id="trustworthiness-options" style="display: none;">
-                        <div class="trust-options">
-                            <div class="pillar">
-                                <h5 class="step-title">Robustness pillar</h5>
-                                <input type="number" class="form-control" id="robustness-pillar"
-                                    placeholder="Robustness pillar" min="5" value="20"
-                                    style="display: inline; width: 67%;">
-                                <small class="form-text text-muted">%</small>
-                            </div>
-                            <div class="notion-container">
-                                <div class="notion">
-                                    <h5 class="step-title">Resilience to attacks notion</h5>
-                                    <input type="number" class="form-control" id="robustness-notion-1"
-                                        placeholder="Resilience to attacks" min="20" value="40"
-                                        style="display: inline; width: 70%;">
+                        <div id="tw-cfl" style="display: none;">
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Robustness pillar</h5>
+                                    <input type="number" class="form-control" id="cfl-robustness-pillar"
+                                        placeholder="Robustness pillar" min="5" value="20"
+                                        style="display: inline; width: 67%;">
                                     <small class="form-text text-muted">%</small>
                                 </div>
-                                <div class="notion">
-                                    <h5 class="step-title" >Algorithm robustness notion</h5>
-                                    <input type="number" class="form-control" id="robustness-notion-2"
-                                        placeholder="Algorithm robustness" min="20" value="40"
-                                        style="display: inline; width: 70%;">
-                                    <small class="form-text text-muted">%</small>
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Resilience to attacks notion</h5>
+                                        <input type="number" class="form-control" id="cfl-robustness-notion-1"
+                                            placeholder="Resilience to attacks" min="20" value="40"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title" >Algorithm robustness notion</h5>
+                                        <input type="number" class="form-control" id="cfl-robustness-notion-2"
+                                            placeholder="Algorithm robustness" min="20" value="40"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Client reliability notion</h5>
+                                        <input type="number" class="form-control" id="cfl-robustness-notion-3"
+                                            placeholder="Client reliability" min="20" value="20"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
                                 </div>
-                                <div class="notion">
-                                    <h5 class="step-title">Client reliability notion</h5>
-                                    <input type="number" class="form-control" id="robustness-notion-3"
-                                        placeholder="Client reliability" min="20" value="20"
-                                        style="display: inline; width: 70%;">
+                            </div>
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Privacy pillar</h5>
+                                    <input type="number" class="form-control" id="cfl-privacy-pillar"
+                                        placeholder="Privacy pillar" min="5" value="15"
+                                        style="display: inline; width: 67%">
                                     <small class="form-text text-muted">%</small>
                                 </div>
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Technique notion</h5>
+                                        <input type="number" class="form-control" id="cfl-privacy-notion-1"
+                                            placeholder="Technique notion" min="20" value="20"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Uncertainty notion</h5>
+                                        <input type="number" class="form-control" id="cfl-privacy-notion-2"
+                                            placeholder="Uncertainty notion" min="20" value="60"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Indistinguishability notion</h5>
+                                        <input type="number" class="form-control" id="cfl-privacy-notion-3"
+                                            placeholder="Indistinguishability notion" min="20" value="20"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                </div>
                             </div>
-                        </div>
-                        <div class="trust-options">
-                            <div class="pillar">
-                                <h5 class="step-title">Privacy pillar</h5>
-                                <input type="number" class="form-control" id="privacy-pillar"
-                                    placeholder="Privacy pillar" min="5" value="15"
-                                    style="display: inline; width: 67%">
-                                <small class="form-text text-muted">%</small>
-                            </div>
-                            <div class="notion-container">
-                                <div class="notion">
-                                    <h5 class="step-title">Technique notion</h5>
-                                    <input type="number" class="form-control" id="privacy-notion-1"
-                                        placeholder="Technique notion" min="20" value="20"
-                                        style="display: inline; width: 70%;">
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Fairness pillar</h5>
+                                    <input type="number" class="form-control" id="cfl-fairness-pillar"
+                                        placeholder="Fairness pillar" min="5" value="15"
+                                        style="display: inline; width: 67%">
                                     <small class="form-text text-muted">%</small>
                                 </div>
-                                <div class="notion">
-                                    <h5 class="step-title">Uncertainty notion</h5>
-                                    <input type="number" class="form-control" id="privacy-notion-2"
-                                        placeholder="Uncertainty notion" min="20" value="60"
-                                        style="display: inline; width: 70%;">
-                                    <small class="form-text text-muted">%</small>
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Selection fairness notion</h5>
+                                        <input type="number" class="form-control" id="cfl-fairness-notion-1"
+                                            placeholder="Selection fairness notion" min="20" value="30"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Performance fairness  notion</h5>
+                                        <input type="number" class="form-control" id="cfl-fairness-notion-2"
+                                            placeholder="Performance fairness notion" min="20" value="35"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Class distribution notion</h5>
+                                        <input type="number" class="form-control" id="cfl-fairness-notion-3"
+                                            placeholder="Class distribution notion" min="20" value="35"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
                                 </div>
-                                <div class="notion">
-                                    <h5 class="step-title">Indistinguishability notion</h5>
-                                    <input type="number" class="form-control" id="privacy-notion-3"
-                                        placeholder="Indistinguishability notion" min="20" value="20"
-                                        style="display: inline; width: 70%;">
+                            </div>
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Explainability pillar</h5>
+                                    <input type="number" class="form-control" id="cfl-explainability-pillar"
+                                        placeholder="Explainability pillar" min="5" value="15"
+                                        style="display: inline; width: 67%">
                                     <small class="form-text text-muted">%</small>
                                 </div>
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Interpretability notion</h5>
+                                        <input type="number" class="form-control" id="cfl-explainability-notion-1"
+                                            placeholder="Interpretability notion" min="20" value="40"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Post hoc methods notion</h5>
+                                        <input type="number" class="form-control" id="cfl-explainability-notion-2"
+                                            placeholder="Post hoc methods notion" min="20" value="60"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                </div>
                             </div>
-                        </div>
-                        <div class="trust-options">
-                            <div class="pillar">
-                                <h5 class="step-title">Fairness pillar</h5>
-                                <input type="number" class="form-control" id="fairness-pillar"
-                                    placeholder="Fairness pillar" min="5" value="15"
-                                    style="display: inline; width: 67%">
-                                <small class="form-text text-muted">%</small> 
-                            </div>
-                            <div class="notion-container">
-                                <div class="notion">
-                                    <h5 class="step-title">Selection fairness notion</h5>
-                                    <input type="number" class="form-control" id="fairness-notion-1"
-                                        placeholder="Selection fairness notion" min="20" value="30"
-                                        style="display: inline; width: 70%;">
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Accountability pillar</h5>
+                                    <input type="number" class="form-control" id="cfl-accountability-pillar"
+                                        placeholder="Accountability pillar" min="5" value="10"
+                                        style="display: inline; width: 67%">
                                     <small class="form-text text-muted">%</small>
                                 </div>
-                                <div class="notion">
-                                    <h5 class="step-title">Performance fairness  notion</h5>
-                                    <input type="number" class="form-control" id="fairness-notion-2"
-                                        placeholder="Performance fairness notion" min="20" value="35"
-                                        style="display: inline; width: 70%;">
-                                    <small class="form-text text-muted">%</small>
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Factsheet completeness notion</h5>
+                                        <input type="number" class="form-control" id="cfl-accountability-notion-1"
+                                            placeholder="Factsheet completeness notion" min="100" value="100"
+                                            style="display: inline; width: 70%;" disabled="true">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
                                 </div>
-                                <div class="notion">
-                                    <h5 class="step-title">Class distribution notion</h5>
-                                    <input type="number" class="form-control" id="fairness-notion-3"
-                                        placeholder="Class distribution notion" min="20" value="35"
-                                        style="display: inline; width: 70%;">
+                            </div>
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Architectural soundness pillar</h5>
+                                    <input type="number" class="form-control" id="cfl-architectural-soundness-pillar"
+                                        placeholder="Architectural soundness pillar" min="5" value="10"
+                                        style="display: inline; width: 67%">
                                     <small class="form-text text-muted">%</small>
                                 </div>
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Client management notion</h5>
+                                        <input type="number" class="form-control" id="cfl-architectural-soundness-notion-1"
+                                            placeholder="Client management notion" min="20" value="50"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Optimization notion</h5>
+                                        <input type="number" class="form-control" id="cfl-architectural-soundness-notion-2"
+                                            placeholder="Optimization notion" min="20" value="50"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                </div>
                             </div>
-                        </div>
-                        <div class="trust-options">
-                            <div class="pillar">
-                                <h5 class="step-title">Explainability pillar</h5>
-                                <input type="number" class="form-control" id="explainability-pillar"
-                                    placeholder="Explainability pillar" min="5" value="15"
-                                    style="display: inline; width: 67%">
-                                <small class="form-text text-muted">%</small>
-                            </div>
-                            <div class="notion-container">
-                                <div class="notion">
-                                    <h5 class="step-title">Interpretability notion</h5>
-                                    <input type="number" class="form-control" id="explainability-notion-1"
-                                        placeholder="Interpretability notion" min="20" value="40"
-                                        style="display: inline; width: 70%;">
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Sustainability pillar</h5>
+                                    <input type="number" class="form-control" id="cfl-sustainability-pillar"
+                                        placeholder="Sustainability pillar" min="5" value="15"
+                                        style="display: inline; width: 67%">
                                     <small class="form-text text-muted">%</small>
                                 </div>
-                                <div class="notion">
-                                    <h5 class="step-title">Post hoc methods notion</h5>
-                                    <input type="number" class="form-control" id="explainability-notion-2"
-                                        placeholder="Post hoc methods notion" min="20" value="60"
-                                        style="display: inline; width: 70%;">
-                                    <small class="form-text text-muted">%</small>
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Energy source notion</h5>
+                                        <input type="number" class="form-control" id="cfl-sustainability-notion-1"
+                                            placeholder="Energy source notion" min="20" value="50"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Hardware efficiency notion</h5>
+                                        <input type="number" class="form-control" id="cfl-sustainability-notion-2"
+                                            placeholder="Hardware efficiency notion" min="20" value="25"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Federation complexity notion</h5>
+                                        <input type="number" class="form-control" id="cfl-sustainability-notion-3"
+                                            placeholder="Federation complexity notion" min="20" value="25"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
                                 </div>
                             </div>
-                        </div>
-                        <div class="trust-options">
-                            <div class="pillar">
-                                <h5 class="step-title">Accountability pillar</h5>
-                                <input type="number" class="form-control" id="accountability-pillar"
-                                    placeholder="Accountability pillar" min="5" value="10"
-                                    style="display: inline; width: 67%">
-                                <small class="form-text text-muted">%</small>
-                            </div>
-                            <div class="notion-container">
-                                <div class="notion">
-                                    <h5 class="step-title">Factsheet completeness notion</h5>
-                                    <input type="number" class="form-control" id="accountability-notion-1"
-                                        placeholder="Factsheet completeness notion" min="100" value="100"
-                                        style="display: inline; width: 70%;" disabled="true">
+                        </div> <!-- /#tw-cfl -->
+                        <div id="tw-dfl" style="display: none;">
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Robustness pillar</h5>
+                                    <input type="number" class="form-control" id="dfl-robustness-pillar"
+                                        placeholder="Robustness pillar" min="5" value="20"
+                                        style="display: inline; width: 67%;">
                                     <small class="form-text text-muted">%</small>
                                 </div>
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Resilience to attacks notion</h5>
+                                        <input type="number" class="form-control" id="dfl-robustness-notion-1"
+                                            placeholder="Resilience to attacks" min="20" value="40"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title" >Algorithm robustness notion</h5>
+                                        <input type="number" class="form-control" id="dfl-robustness-notion-2"
+                                            placeholder="Algorithm robustness" min="20" value="40"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Client reliability notion</h5>
+                                        <input type="number" class="form-control" id="dfl-robustness-notion-3"
+                                            placeholder="Client reliability" min="20" value="20"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                </div>
                             </div>
-                        </div>
-                        <div class="trust-options">
-                            <div class="pillar">
-                                <h5 class="step-title">Architectural soundness pillar</h5>
-                                <input type="number" class="form-control" id="architectural-soundness-pillar"
-                                    placeholder="Architectural soundness pillar" min="5" value="10"
-                                    style="display: inline; width: 67%">
-                                <small class="form-text text-muted">%</small>
-                            </div>
-                            <div class="notion-container">
-                                <div class="notion">
-                                    <h5 class="step-title">Client management notion</h5>
-                                    <input type="number" class="form-control" id="architectural-soundness-notion-1"
-                                        placeholder="Client management notion" min="20" value="50"
-                                        style="display: inline; width: 70%;">
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Privacy pillar</h5>
+                                    <input type="number" class="form-control" id="dfl-privacy-pillar"
+                                        placeholder="Privacy pillar" min="5" value="15"
+                                        style="display: inline; width: 67%">
                                     <small class="form-text text-muted">%</small>
                                 </div>
-                                <div class="notion">
-                                    <h5 class="step-title">Optimization notion</h5>
-                                    <input type="number" class="form-control" id="architectural-soundness-notion-2"
-                                        placeholder="Optimization notion" min="20" value="50"
-                                        style="display: inline; width: 70%;">
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Technique notion</h5>
+                                        <input type="number" class="form-control" id="dfl-privacy-notion-1"
+                                            placeholder="Technique notion" min="20" value="20"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Uncertainty notion</h5>
+                                        <input type="number" class="form-control" id="dfl-privacy-notion-2"
+                                            placeholder="Uncertainty notion" min="20" value="60"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Indistinguishability notion</h5>
+                                        <input type="number" class="form-control" id="dfl-privacy-notion-3"
+                                            placeholder="Indistinguishability notion" min="20" value="20"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                </div>
+                            </div>
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Fairness pillar</h5>
+                                    <input type="number" class="form-control" id="dfl-fairness-pillar"
+                                        placeholder="Fairness pillar" min="5" value="15"
+                                        style="display: inline; width: 67%">
                                     <small class="form-text text-muted">%</small>
                                 </div>
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Class distribution notion</h5>
+                                        <input type="number" class="form-control" id="dfl-fairness-notion-3"
+                                            placeholder="Class distribution notion" min="20" value="100"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                </div>
                             </div>
-                        </div>
-                        <div class="trust-options">
-                            <div class="pillar">
-                                <h5 class="step-title">Sustainability pillar</h5>
-                                <input type="number" class="form-control" id="sustainability-pillar"
-                                    placeholder="Sustainability pillar" min="5" value="15"
-                                    style="display: inline; width: 67%">
-                                <small class="form-text text-muted">%</small>
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Explainability pillar</h5>
+                                    <input type="number" class="form-control" id="dfl-explainability-pillar"
+                                        placeholder="Explainability pillar" min="5" value="15"
+                                        style="display: inline; width: 67%">
+                                    <small class="form-text text-muted">%</small>
+                                </div>
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Interpretability notion</h5>
+                                        <input type="number" class="form-control" id="dfl-explainability-notion-1"
+                                            placeholder="Interpretability notion" min="20" value="40"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Post hoc methods notion</h5>
+                                        <input type="number" class="form-control" id="dfl-explainability-notion-2"
+                                            placeholder="Post hoc methods notion" min="20" value="60"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                </div>
                             </div>
-                            <div class="notion-container">
-                                <div class="notion">
-                                    <h5 class="step-title">Energy source notion</h5>
-                                    <input type="number" class="form-control" id="sustainability-notion-1"
-                                        placeholder="Energy source notion" min="20" value="50"
-                                        style="display: inline; width: 70%;">
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Accountability pillar</h5>
+                                    <input type="number" class="form-control" id="dfl-accountability-pillar"
+                                        placeholder="Accountability pillar" min="5" value="10"
+                                        style="display: inline; width: 67%">
                                     <small class="form-text text-muted">%</small>
                                 </div>
-                                <div class="notion">
-                                    <h5 class="step-title">Hardware efficiency notion</h5>
-                                    <input type="number" class="form-control" id="sustainability-notion-2"
-                                        placeholder="Hardware efficiency notion" min="20" value="25"
-                                        style="display: inline; width: 70%;">
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Factsheet completeness notion</h5>
+                                        <input type="number" class="form-control" id="dfl-accountability-notion-1"
+                                            placeholder="Factsheet completeness notion" min="100" value="100"
+                                            style="display: inline; width: 70%;" disabled="true">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                </div>
+                            </div>
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Architectural soundness pillar</h5>
+                                    <input type="number" class="form-control" id="dfl-architectural-soundness-pillar"
+                                        placeholder="Architectural soundness pillar" min="5" value="10"
+                                        style="display: inline; width: 67%">
                                     <small class="form-text text-muted">%</small>
                                 </div>
-                                <div class="notion">
-                                    <h5 class="step-title">Federation complexity notion</h5>
-                                    <input type="number" class="form-control" id="sustainability-notion-3"
-                                        placeholder="Federation complexity notion" min="20" value="25"
-                                        style="display: inline; width: 70%;">
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Client management notion</h5>
+                                        <input type="number" class="form-control" id="dfl-architectural-soundness-notion-1"
+                                            placeholder="Client management notion" min="20" value="50"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Optimization notion</h5>
+                                        <input type="number" class="form-control" id="dfl-architectural-soundness-notion-2"
+                                            placeholder="Optimization notion" min="20" value="50"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                </div>
+                            </div>
+                            <div class="trust-options">
+                                <div class="pillar">
+                                    <h5 class="step-title">Sustainability pillar</h5>
+                                    <input type="number" class="form-control" id="dfl-sustainability-pillar"
+                                        placeholder="Sustainability pillar" min="5" value="15"
+                                        style="display: inline; width: 67%">
                                     <small class="form-text text-muted">%</small>
                                 </div>
+                                <div class="notion-container">
+                                    <div class="notion">
+                                        <h5 class="step-title">Energy source notion</h5>
+                                        <input type="number" class="form-control" id="dfl-sustainability-notion-1"
+                                            placeholder="Energy source notion" min="20" value="50"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Federation complexity notion</h5>
+                                        <input type="number" class="form-control" id="dfl-sustainability-notion-3"
+                                            placeholder="Federation complexity notion" min="20" value="50"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                </div>
                             </div>
-                        </div>
+                        </div> <!-- /#tw-dfl -->
                     </div>
                 </div>
             </div>
@@ -1139,4 +1330,4 @@ <h5 class="step-title">Federation complexity notion</h5>
         });
     </script>
 
-    {% endblock %}
\ No newline at end of file
+    {% endblock %}

From 87dd13af28db9f3ff432531b28e7c70a41ab1d61 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 6 Mar 2026 10:15:54 +0100
Subject: [PATCH 02/66] Minor changes in trustworthiness

---
 nebula/addons/trustworthiness/calculation.py  | 221 ++++++++----------
 nebula/addons/trustworthiness/dfl_local.py    |  19 +-
 .../trustworthiness/per_round_metrics.py      |  11 -
 .../addons/trustworthiness/trustworthiness.py |  56 +----
 nebula/core/datasets/covtype/covtype.py       |   4 +-
 .../frontend/static/js/deployment/scenario.js |   8 +-
 .../static/js/deployment/trustworthiness.js   |  11 -
 7 files changed, 103 insertions(+), 227 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index 251fe1e5a..b5f3dbd1f 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -347,42 +347,11 @@ def get_avg_loss_accuracy(scenario_name):
     total_accuracy = 0
     total_loss = 0
 
-    expected_nodes = 3
-    """
-    if os.path.exists(factsheet_file):
-        with open(factsheet_file, "r") as f:
-            fs = json.load(f)
-        # normalmente client_num viene como string, lo convierto
-        expected_nodes = int(fs.get("participants", {}).get("client_num", 0) or 0)
-        logger.info(f"nodes={expected_nodes}")
-    """
-
-
     data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
 
-    logger.info(f"FIRST 5 LINES:\n{open(data_file,'r').read().splitlines()[:5]}")
-    logger.info(f"LAST 5 LINES:\n{open(data_file,'r').read().splitlines()[-5:]}")
-
     data = read_csv(data_file)
 
-    logger.info(f"shape={data.shape}")
-    logger.info(f"dtypes={data.dtypes.to_dict()}")
-    logger.info(f"accuracy sample raw={data['accuracy'].head(20).tolist()}")
-    logger.info(f"accuracy non-null={data['accuracy'].notna().sum()}")
-
     number_files = len(data)
-    logger.info(f"number_files={number_files}")
-
-    """
-    while (number_files != expected_nodes):
-        logger.info("WAIT")
-        time.sleep(5)
-        data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
-        data = read_csv(data_file)
-        number_files = len(data)
-        logger.info(f"number_files={number_files}")
-        logger.info(f"expected_nodes={expected_nodes}")
-    """
 
     total_loss = data["loss"].sum()
     total_accuracy = data["accuracy"].sum()
@@ -448,30 +417,26 @@ def get_clever_score(model, test_sample, nb_classes, learning_rate):
     """
 
 
-    images, _ = test_sample
+    samples, _ = test_sample
     input_shape = None
 
-    # Si por cualquier motivo llega sin batch, lo añadimos
-    if torch.is_tensor(images) and images.dim() >= 1 and images.shape[0] != 0:
+    if torch.is_tensor(samples) and samples.dim() >= 1 and samples.shape[0] != 0:
         pass
     else:
-        raise ValueError("`test_sample[0]` debe ser un torch.Tensor no vacío.")
+        raise ValueError("`test_sample[0]` must be a non-empty torch.Tensor.")
 
     if input_shape is None:
-        if images.dim() >= 2:
+        if samples.dim() >= 2:
             # (B, ...) -> input_shape = (...)
-            input_shape = tuple(images.shape[1:])
+            input_shape = tuple(samples.shape[1:])
         else:
-            # (...) sin batch
-            input_shape = tuple(images.shape)
+            # (...) without batch
+            input_shape = tuple(samples.shape)
 
-    # Escogemos un "background" (aquí el último del batch, como hacías tú)
-    background = images[-1] if images.dim() >= 2 else images
+    background = samples[-1] if samples.dim() >= 2 else samples
 
-    # Convertir a numpy de forma segura (GPU-friendly)
     x = background.detach().cpu().numpy()
 
-    # Asegurar batch dimension para clever_u: (1, *input_shape)
     if tuple(x.shape) == tuple(input_shape):
         x = x.reshape((1,) + tuple(input_shape))
 
@@ -479,8 +444,6 @@ def get_clever_score(model, test_sample, nb_classes, learning_rate):
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.Adam(model.parameters(), learning_rate)
 
-
-
     # Create the ART classifier
     classifier = PyTorchClassifier(
         model=model,
@@ -579,7 +542,7 @@ def comm_efficiency(bytes_up: int, bytes_down: int, test_acc_avg: float, eps: fl
     Args:
         bytes_up: total uploaded bytes
         bytes_down: total downloaded bytes
-        final_accuracy: final test accuracy in [0,1] (or [0,100] if your factsheet uses %)
+        final_accuracy: final test accuracy in [0,1]
         eps: small constant to avoid division by zero
 
     Returns:
@@ -588,10 +551,6 @@ def comm_efficiency(bytes_up: int, bytes_down: int, test_acc_avg: float, eps: fl
     total_bytes = float(bytes_up) + float(bytes_down)
     acc = float(test_acc_avg)
 
-    # Si tu factsheet guarda accuracy como porcentaje (0-100), descomenta esto:
-    # if acc > 1.0:
-    #     acc = acc / 100.0
-
     if acc < eps:
         acc = eps
 
@@ -599,8 +558,21 @@ def comm_efficiency(bytes_up: int, bytes_down: int, test_acc_avg: float, eps: fl
 
 def get_loss_sensitivity_score(model, test_sample, nb_classes, learning_rate):
 
-    images, labels = test_sample
-    sample = images[-1].unsqueeze(0)
+    """
+    Calculates the loss sensitivity score.
+
+    Args:
+        model (object): The model.
+        test_sample (object): One test sample to calculate the loss sensitivity score.
+        nb_classes (int): The nb_classes of the model.
+        learning_rate (float): The learning rate of the model.
+
+    Returns:
+        float: The loss sensitivity score.
+    """
+
+    samples, labels = test_sample
+    sample = samples[-1].unsqueeze(0)
     label = labels[-1].unsqueeze(0)
 
     label = F.one_hot(label, num_classes=nb_classes).float()
@@ -632,7 +604,17 @@ def compute_adversarial_accuracy_art(
     epsilon=0.03
 ):
     """
-    Computes adversarial accuracy using ART FGSM attack.
+    Computes adversarial accuracy using FGSM attack.
+
+    Args:
+        model (object): The model.
+        test_loader (DataLoader): DataLoader providing test samples.
+        nb_classes (int): The nb_classes of the model.
+        learning_rate (float): The learning rate of the model.
+        epsilon (float): Maximum perturbation magnitude for the attacks.
+
+    Returns:
+        float: The adversarial accuracy score.
     """
 
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -642,10 +624,9 @@ def compute_adversarial_accuracy_art(
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.Adam(model.parameters(), lr=learning_rate)
 
-    # Obtener shape dinámicamente
     sample_batch = next(iter(test_loader))
-    images, _ = sample_batch
-    input_shape = images.shape[1:] #CAMBIAR
+    samples, _ = sample_batch
+    input_shape = samples.shape[1:]
 
     classifier = PyTorchClassifier(
         model=model,
@@ -654,27 +635,16 @@ def compute_adversarial_accuracy_art(
         input_shape=input_shape,
         nb_classes=nb_classes,
     )
-    """
-    from art.attack.evasion import FastGradientMethod
-
-    attack = FastGradientMethod(
-        estimator=classifier,
-        eps=epsilon,
-        norm=np.inf
-    )
-    """
 
     correct = 0
     total = 0
 
-    for images, labels in test_loader:
-        images = images.to(device)
+    for samples, labels in test_loader:
+        samples = samples.to(device)
         labels = labels.to(device)
 
-        # Generar adversarios con FGSM puro
-        x_adv = fgsm_attack(model, images, labels, epsilon=epsilon)
+        x_adv = fgsm_attack(model, samples, labels, epsilon=epsilon)
 
-        # Predicciones
         with torch.no_grad():
             outputs = model(x_adv)
             preds = outputs.argmax(dim=1)
@@ -685,26 +655,23 @@ def compute_adversarial_accuracy_art(
     return correct / total
 
 def get_empirical_robustness_score(
-    model: object,
-    test_sample: object,
-    nb_classes: int,
-    learning_rate: float,
-    attack_name: str = "fgsm",
-    attack_params: dict | None = None,
-    max_samples: int = 32,
-) -> float:
+    model,
+    test_sample,
+    nb_classes,
+    learning_rate,
+    attack_name = "fgsm",
+    attack_params = None,
+    max_samples = 32,
+):
     """
     Calculates the Empirical Robustness score using Adversarial Robustness Toolbox (ART).
 
-    Empirical robustness estimates the minimal relative perturbation required for a successful attack
-    on the provided samples. Higher is better (needs larger perturbation to fool the model).
-
     Args:
         model (object): The model.
-        test_sample (object): A batch from the test dataloader (images, labels).
-        nb_classes (int): Number of classes.
-        learning_rate (float): LR used to build the ART classifier wrapper.
-        attack_name (str): Attack key supported by ART empirical_robustness (commonly "fgsm" or "hsj").
+        test_sample (object): A batch from the test dataloader (samples, labels).
+        nb_classes (int): The nb_classes of the model.
+        learning_rate (float): The learning rate of the model.
+        attack_name (str): Attack key supported by ART empirical_robustness.
         attack_params (dict | None): Optional attack parameters.
         max_samples (int): Max number of samples from the batch to use.
 
@@ -712,15 +679,13 @@ def get_empirical_robustness_score(
         float: Empirical robustness score (>= 0.0). If it cannot be computed, returns 0.0.
     """
     try:
-        images, _ = test_sample
+        samples, _ = test_sample
 
-        # Limit how many samples we use from the batch (keeps it lightweight)
-        batch_size: int = int(images.shape[0])
+        batch_size: int = int(samples.shape[0])
         n: int = int(min(max_samples, batch_size))
-        x = images[:n].detach().cpu().numpy()
+        x = samples[:n].detach().cpu().numpy()
 
-        # Infer input shape for ART (no batch dimension)
-        input_shape = tuple(images.shape[1:])
+        input_shape = tuple(samples.shape[1:])
 
         criterion = nn.CrossEntropyLoss()
         optimizer = optim.Adam(model.parameters(), learning_rate)
@@ -740,7 +705,6 @@ def get_empirical_robustness_score(
             attack_params=attack_params,
         )
 
-        # ART may return ndarray depending on input; aggregate to scalar
         if isinstance(score, np.ndarray):
             score = float(np.mean(score))
 
@@ -756,47 +720,50 @@ def get_empirical_robustness_score(
 
 
 
-def fgsm_attack(model, images, labels, epsilon=0.03):
+def fgsm_attack(model, samples, labels, epsilon=0.03):
     """
-    Genera ejemplos adversariales usando FGSM puro en PyTorch. Cuando se pueda meter los ataques de ART se podría cambiar
+        Performs an FGSM (Fast Gradient Sign Method) adversarial attack on a batch of samples.
+
+        Args:
+            model (torch.nn.Module): The PyTorch model to attack.
+            samples (torch.Tensor): Input samples to perturb, shape (B, ...).
+            labels (torch.Tensor): True labels corresponding to the samples.
+            epsilon (float, optional): Maximum perturbation magnitude for the attack. Defaults to 0.03.
+
+        Returns:
+            torch.Tensor: Adversarially perturbed samples with the same shape as `samples`.
     """
-    images = images.clone().detach().to(images.device)
-    labels = labels.to(images.device)
-    images.requires_grad = True
+    samples = samples.clone().detach().to(samples.device)
+    labels = labels.to(samples.device)
+    samples.requires_grad = True
 
-    outputs = model(images)
+    outputs = model(samples)
     loss = nn.CrossEntropyLoss()(outputs, labels)
     model.zero_grad()
     loss.backward()
 
-    # FGSM: x_adv = x + epsilon * sign(grad)
-    perturbation = epsilon * images.grad.sign()
-    x_adv = images + perturbation
+    perturbation = epsilon * samples.grad.sign()
+    x_adv = samples + perturbation
 
-    # Limitar valores al rango [0,1]
-    #x_adv = torch.clamp(x_adv, 0, 1)
     return x_adv.detach()
 
 def get_confidence_score(
     model,
     test_sample,
-    max_samples: int = 128,
-    use_true_label: bool = True,
-) -> float:
+    max_samples = 128,
+    use_true_label = True,
+):
     """
-    Confidence Score basado en probabilidades softmax.
-
-    - Si use_true_label=True: devuelve la media de P(y_true | x).
-    - Si use_true_label=False: devuelve la media de max softmax prob (MSP).
+    Calculates the confidence score.
 
     Args:
-        model (object): Modelo (torch.nn.Module).
-        test_sample (object): Batch del dataloader: (x, y).
-        max_samples (int): Máximo nº de muestras del batch a usar.
-        use_true_label (bool): Ver arriba.
+        model (object): The model.
+        test_sample (object): A batch from the test dataloader (samples, labels).
+        max_samples (int): Max number of samples from the batch to use.
+        use_true_label (bool): Whether to compute confidence with respect to the true labels. Defaults to True.
 
     Returns:
-        float: Confidence score en [0, 1] (o 0.0 si falla).
+        float: Confidence score.
     """
     try:
         if not isinstance(model, torch.nn.Module):
@@ -805,13 +772,11 @@ def get_confidence_score(
 
         x, y = test_sample
 
-        # Recorta batch para que sea barato
         if isinstance(x, torch.Tensor):
             x = x[:max_samples]
         if isinstance(y, torch.Tensor):
             y = y[:max_samples]
 
-        # Usa el device real del modelo
         try:
             device = next(model.parameters()).device
         except Exception:
@@ -822,23 +787,19 @@ def get_confidence_score(
             x = x.to(device) if isinstance(x, torch.Tensor) else x
             out = model(x)
 
-            # Por si el modelo devuelve tupla (logits, ...)
             logits = out[0] if isinstance(out, (tuple, list)) else out
             probs = torch.softmax(logits, dim=1)
 
             if use_true_label and isinstance(y, torch.Tensor):
-                # y puede venir como índices [B] o one-hot [B, C]
                 if y.ndim > 1:
                     y_idx = torch.argmax(y, dim=1)
                 else:
                     y_idx = y
                 y_idx = y_idx.to(device)
 
-                # P(y_true|x)
                 true_probs = probs.gather(1, y_idx.view(-1, 1)).squeeze(1)
                 return float(true_probs.mean().detach().cpu().item())
 
-            # MSP: max_c P(c|x)
             msp = probs.max(dim=1).values
             return float(msp.mean().detach().cpu().item())
 
@@ -849,10 +810,15 @@ def get_confidence_score(
 
 def attack_success_rate(model, test_sample,epsilon=0.03):
     """
-    Calcula ASR para un ataque untargeted.
+    Calculates the ASR.
+
+    Args:
+        model (object): The model.
+        test_sample (object): A batch from the test dataloader (samples, labels).
+        epsilon (float): Maximum perturbation magnitude for the attacks.
 
-    attack_fn debe recibir (model, images, labels)
-    y devolver imágenes adversariales.
+    Returns:
+        float: The ASR.
     """
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.eval()
@@ -861,27 +827,22 @@ def attack_success_rate(model, test_sample,epsilon=0.03):
     images = images.to(device)
     labels = labels.to(device)
 
-    # 1️⃣ Predicciones originales
     with torch.no_grad():
         outputs = model(images)
         preds = outputs.argmax(dim=1)
 
-    # Solo consideramos los originalmente correctos
     correct_mask = preds.eq(labels)
     num_correct = correct_mask.sum().item()
 
     if num_correct == 0:
-        return 0.0  # evitar división por cero
+        return 0.0
 
-    # 2️⃣ Generar adversariales
     x_adv = fgsm_attack(model, images, labels, epsilon=epsilon)
 
-    # 3️⃣ Predicciones adversariales
     with torch.no_grad():
         outputs_adv = model(x_adv)
         preds_adv = outputs_adv.argmax(dim=1)
 
-    # 4️⃣ Ataque exitoso = antes correcto y ahora incorrecto
     successful_attacks = (correct_mask & preds_adv.ne(labels)).sum().item()
 
     asr = successful_attacks / num_correct
diff --git a/nebula/addons/trustworthiness/dfl_local.py b/nebula/addons/trustworthiness/dfl_local.py
index ee24b3d58..f1b0c5b38 100644
--- a/nebula/addons/trustworthiness/dfl_local.py
+++ b/nebula/addons/trustworthiness/dfl_local.py
@@ -27,22 +27,17 @@ def compute_trust_local_dfl(experiment_name, participant_idx, data, start_time,
     trust_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
     os.makedirs(trust_dir, exist_ok=True)
 
-    # 1) Factsheet por nodo
     factsheet_name = f"factsheet_participant_{participant_idx}.json"
     factsheet_path = os.path.join(trust_dir, factsheet_name)
 
-    # Copia de template (la misma que usa Factsheet) :contentReference[oaicite:9]{index=9}
     template_path = os.path.join(dirname, "configs", "factsheet_template_dfl.json")
     if not os.path.exists(factsheet_path):
         shutil.copyfile(template_path, factsheet_path)
 
-    # Relleno mínimo: aquí pones valores LOCALES del nodo.
-    # (puedes ir ampliándolo)
     with open(factsheet_path, "r+", encoding="utf-8") as f:
         factsheet = {}
         factsheet = json.load(f)
 
-        # Pre-train básico desde data (usa federation, dataset, etc.) :contentReference[oaicite:10]{index=10}
         logging.info("DFL FactSheet: Populating factsheet with pre training metrics")
 
         federation = data["federation"]
@@ -223,20 +218,18 @@ def compute_trust_local_dfl(experiment_name, participant_idx, data, start_time,
         f.truncate()
         json.dump(factsheet, f, indent=4)
 
-def load_round_metrics(experiment_name: str, participant_idx: int):
+def load_round_metrics(experiment_name, participant_idx):
     files_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
     path = os.path.join(files_dir, f"round_metrics_participant_{participant_idx}.csv")
     df = pd.read_csv(path)
 
-    # Asegura orden
     if "round" in df.columns:
         df = df.sort_values("round")
 
-    # Limpieza básica
     df = df.dropna(subset=["loss", "accuracy"])
     return df
 
-def get_bytes(experiment_name: str, participant_idx: int):
+def get_bytes(experiment_name, participant_idx):
     data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"data_results_{participant_idx}.csv")
 
     data = read_csv(data_file)
@@ -248,7 +241,7 @@ def get_bytes(experiment_name: str, participant_idx: int):
 
     return bytes_sent, bytes_recv
 
-def get_emissions(emissions_file, participant_idx: int):
+def get_emissions(emissions_file, participant_idx):
     data = read_csv(emissions_file)
 
     row = data[data["id"] == participant_idx]
@@ -260,7 +253,7 @@ def get_emissions(emissions_file, participant_idx: int):
 
     return avg_carbon_intensity_clients, emissions_training, energy_consumed, sample_size
 
-def normalized_entropy_from_class_counts(count_class_file: str) -> float:
+def normalized_entropy_from_class_counts(count_class_file):
     with open(count_class_file, "r") as f:
         dist = json.load(f)
 
@@ -271,15 +264,13 @@ def normalized_entropy_from_class_counts(count_class_file: str) -> float:
 
     p = counts / total
 
-    # Entropía (evita log(0))
     eps = 1e-12
     H = -float(np.sum(p * np.log(p + eps)))
 
-    # Normalización por número de clases
     K = len(p)
     if K <= 1:
         return 0.0
 
     H_norm = H / float(np.log(K))
-    # seguridad numérica
+
     return max(0.0, min(1.0, H_norm))
diff --git a/nebula/addons/trustworthiness/per_round_metrics.py b/nebula/addons/trustworthiness/per_round_metrics.py
index 086167065..efb1facf9 100644
--- a/nebula/addons/trustworthiness/per_round_metrics.py
+++ b/nebula/addons/trustworthiness/per_round_metrics.py
@@ -19,7 +19,6 @@ def _safe_get_round(engine) -> int:
     if trainer is None:
         return -1
 
-    # Nebula suele exponer get_round() o el atributo round
     try:
         return int(trainer.get_round())
     except Exception:
@@ -39,7 +38,6 @@ def _get_local_test_loader(engine):
 
     try:
         tdl = dm.test_dataloader()
-        # En Nebula normalmente: [local_loader, global_loader]
         if isinstance(tdl, (list, tuple)) and len(tdl) > 0:
             return tdl[0]
         return tdl
@@ -48,10 +46,6 @@ def _get_local_test_loader(engine):
 
 
 def _build_test_sample_min_bs(test_loader, min_bs: int = 10) -> Optional[Tuple[Any, Any]]:
-    """
-    Devuelve un batch (x, y) con batch_size >= min_bs si es posible.
-    así que min_bs=10 es lo ideal.
-    """
     if test_loader is None:
         return None
 
@@ -101,13 +95,9 @@ class PerRoundTrustMetrics:
     trust_dir: str
     role_label: str
 
-    # Control
     enable_print: bool = True
     enable_csv: bool = True
 
-    fi_every_n_rounds: int = 1  # pon 5 o 10 si quieres reducir coste
-
-    # Estado interno
     _csv_path: str = field(init=False)
     _prev_acc: Optional[float] = field(default=None, init=False)
     _test_loader: Any = field(default=None, init=False)
@@ -137,7 +127,6 @@ async def on_test_metrics(self, engine, loss: float, acc: float) -> None:
         async with self._lock:
             round_id = _safe_get_round(engine)
 
-            # Métrica sencilla per-round (ejemplo): estabilidad de accuracy
             if self._prev_acc is None:
                 tw_stability = 1.0
             else:
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index ab860816f..68afcfa23 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -79,7 +79,6 @@ async def init(self, experiment_name):
             role_label="TRAINER",
             enable_print=True,
             enable_csv=True,
-            fi_every_n_rounds=1,  # cambia a 5/10 si quieres menos coste
         )
         await self._per_round.setup(self._engine)
 
@@ -115,9 +114,9 @@ async def finish_experiment_role_pre_actions(self):
         self._sample_size = len(train_loader)
 
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
-        federation = trust_config.get("federation")  # "CFL" o "DFL" :contentReference[oaicite:13]{index=13}
+        federation = trust_config.get("federation")  # "CFL" or "DFL"
 
-        if federation == "DFL":
+        if federation == "DFL" or (federation == "SDFL" and self._idx == 0):
             self._end_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
             data_file_path = os.path.join(os.environ.get('NEBULA_CONFIG_DIR'), experiment_name, "scenario.json")
             with open(data_file_path, 'r') as data_file:
@@ -146,8 +145,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                     "energy_source": float(data["energy_source"]),
                     "federation_complexity": float(data["federation_complexity"])
                 }
-            # 1) calcula pesos (igual que ya hacías en el server, leyendo scenario.json)
-            # 2) cada nodo genera factsheet_participant_<idx>.json + results_participant_<idx>.json
+
             compute_trust_local_dfl(experiment_name, self._idx, trust_config, self._start_time, self._end_time)
 
             trust_metric_manager = TrustMetricManager(self._start_time, federation, self._idx)
@@ -209,7 +207,6 @@ async def init(self, experiment_name):
             role_label="SERVER",
             enable_print=True,
             enable_csv=True,
-            fi_every_n_rounds=1,
         )
         await self._per_round.setup(self._engine)
 
@@ -355,53 +352,6 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_loss, last_accuracy)
         stop_emissions_tracking_and_save(self._tracker, self._trust_dir_files, self._emissions_file, self._role.value, workload, sample_size, self._idx)
         save_confirmation_csv(self._experiment_name, self._idx)
-        """
-        federation = self._trust_config.get("federation")  # "CFL" o "DFL" :contentReference[oaicite:13]{index=13}
-
-        if federation == "DFL":
-            data_file_path = os.path.join(os.environ.get('NEBULA_CONFIG_DIR'), self._experiment_name, "scenario.json")
-            with open(data_file_path, 'r') as data_file:
-                data = json.load(data_file)
-
-                weights = {
-                    "robustness": float(data["robustness_pillar"]),
-                    "resilience_to_attacks": float(data["resilience_to_attacks"]),
-                    "algorithm_robustness": float(data["algorithm_robustness"]),
-                    "client_reliability": float(data["client_reliability"]),
-                    "privacy": float(data["privacy_pillar"]),
-                    "technique": float(data["technique"]),
-                    "uncertainty": float(data["uncertainty"]),
-                    "indistinguishability": float(data["indistinguishability"]),
-                    "fairness": float(data["fairness_pillar"]),
-                    "selection_fairness": float(data["selection_fairness"]),
-                    "performance_fairness": float(data["performance_fairness"]),
-                    "class_distribution": float(data["class_distribution"]),
-                    "explainability": float(data["explainability_pillar"]),
-                    "interpretability": float(data["interpretability"]),
-                    "post_hoc_methods": float(data["post_hoc_methods"]),
-                    "accountability": float(data["accountability_pillar"]),
-                    "factsheet_completeness":  float(data["factsheet_completeness"]),
-                    "architectural_soundness": float(data["architectural_soundness_pillar"]),
-                    "client_management": float(data["client_management"]),
-                    "optimization": float(data["optimization"]),
-                    "sustainability": float(data["sustainability_pillar"]),
-                    "energy_source": float(data["energy_source"]),
-                    "hardware_efficiency": float(data["hardware_efficiency"]),
-                    "federation_complexity": float(data["federation_complexity"])
-                }
-            # 1) calcula pesos (igual que ya hacías en el server, leyendo scenario.json)
-            # 2) cada nodo genera factsheet_participant_<idx>.json + results_participant_<idx>.json
-            compute_trust_local_dfl(self._experiment_name, self._idx, self._trust_config, weights)
-
-            # y SALES sin tocar el camino CFL
-            return
-
-        # Si NO es DFL => CFL (o lo que uses) sigue EXACTAMENTE IGUAL
-
-        elif federation == "SDFL":
-            #SDFL
-            return
-        """
         await self.tw.finish_experiment_role_post_actions(self._trust_config, self._experiment_name)
 
     def _factory_trust_workload(self, role: Role, engine: Engine, idx, trust_files_route) -> TrustWorkload:
diff --git a/nebula/core/datasets/covtype/covtype.py b/nebula/core/datasets/covtype/covtype.py
index f46a6b289..60868889a 100644
--- a/nebula/core/datasets/covtype/covtype.py
+++ b/nebula/core/datasets/covtype/covtype.py
@@ -114,8 +114,8 @@ def __init__(
         seed: int = 42,
         config_dir: str | None = None,
         test_size: float = 0.2,
-        train_limit: int | None = 40000,
-        test_limit: int | None = 5000,
+        train_limit: int | None = None,
+        test_limit: int | None = None,
     ):
         super().__init__(
             num_classes=num_classes,
diff --git a/nebula/frontend/static/js/deployment/scenario.js b/nebula/frontend/static/js/deployment/scenario.js
index 553c5211c..02d1daa3f 100644
--- a/nebula/frontend/static/js/deployment/scenario.js
+++ b/nebula/frontend/static/js/deployment/scenario.js
@@ -100,17 +100,15 @@ const ScenarioManager = (function () {
             sar_training: window.SaManager.getSaConfig().sar_training || false,
             sar_training_policy: window.SaManager.getSaConfig().sar_training_policy || "Broad-Propagation Strategy",
             random_topology_probability: document.getElementById("random-probability").value || 0.5,
-            // --- Trustworthiness (IDs distintos para CFL/DFL) ---
+            // --- Trustworthiness (CFL/DFL) ---
             with_trustworthiness: document.getElementById("TrustworthinessSwitch").checked ? true : false,
 
-            // Si no está activado, manda 0s para mantener el schema
             ...(document.getElementById("TrustworthinessSwitch").checked
                 ? (() => {
                     const federationType = document.getElementById("federationArchitecture").value;
                     const useDFL = (federationType === "DFL" || federationType === "SDFL");
 
                     if (useDFL) {
-                        // DFL (AJUSTA si tu DFL tiene otras nociones)
                         return {
                             robustness_pillar: document.getElementById("dfl-robustness-pillar")?.value || "0",
                             resilience_to_attacks: document.getElementById("dfl-robustness-notion-1")?.value || "0",
@@ -123,7 +121,7 @@ const ScenarioManager = (function () {
                             indistinguishability: document.getElementById("dfl-privacy-notion-3")?.value || "0",
 
                             fairness_pillar: document.getElementById("dfl-fairness-pillar")?.value || "0",
-                            // En DFL normalmente solo guardas class_distribution (notion-3)
+
                             selection_fairness: "0",
                             performance_fairness: "0",
                             class_distribution: document.getElementById("dfl-fairness-notion-3")?.value || "0",
@@ -141,9 +139,7 @@ const ScenarioManager = (function () {
 
                             sustainability_pillar: document.getElementById("dfl-sustainability-pillar")?.value || "0",
                             energy_source: document.getElementById("dfl-sustainability-notion-1")?.value || "0",
-                            // Si en DFL no existe hardware_efficiency, lo dejamos a 0
                             hardware_efficiency: "0",
-                            // En DFL mapea federation_complexity a tu notion-3 (si es así)
                             federation_complexity: document.getElementById("dfl-sustainability-notion-3")?.value || "0",
                         };
                     }
diff --git a/nebula/frontend/static/js/deployment/trustworthiness.js b/nebula/frontend/static/js/deployment/trustworthiness.js
index ec6ad92b0..64cc721fe 100644
--- a/nebula/frontend/static/js/deployment/trustworthiness.js
+++ b/nebula/frontend/static/js/deployment/trustworthiness.js
@@ -83,7 +83,6 @@ const TrustworthinessManager = (function() {
             "cfl-sustainability-notion-3"
         ];
 
-        // IDs DFL (AJUSTA si tu DFL tiene otras nociones)
         const dflPillarIds = [
             "dfl-robustness-pillar",
             "dfl-privacy-pillar",
@@ -100,14 +99,12 @@ const TrustworthinessManager = (function() {
             "dfl-privacy-notion-1",
             "dfl-privacy-notion-2",
             "dfl-privacy-notion-3",
-            // DFL fairness reducido:
             "dfl-fairness-notion-3",
             "dfl-explainability-notion-1",
             "dfl-explainability-notion-2",
             "dfl-accountability-notion-1",
             "dfl-architectural-soundness-notion-1",
             "dfl-architectural-soundness-notion-2",
-            // DFL sustainability reducido:
             "dfl-sustainability-notion-1",
             "dfl-sustainability-notion-3"
         ];
@@ -193,7 +190,6 @@ const TrustworthinessManager = (function() {
         const privacyNotion2 = parseFloat(document.getElementById("dfl-privacy-notion-2").value) || 0;
         const privacyNotion3 = parseFloat(document.getElementById("dfl-privacy-notion-3").value) || 0;
 
-        // DFL fairness reducido (AJUSTA si corresponde)
         const fairnessNotion3 = parseFloat(document.getElementById("dfl-fairness-notion-3").value) || 0;
 
         const explainabilityNotion1 = parseFloat(document.getElementById("dfl-explainability-notion-1").value) || 0;
@@ -202,7 +198,6 @@ const TrustworthinessManager = (function() {
         const architecturalSoundnessNotion1 = parseFloat(document.getElementById("dfl-architectural-soundness-notion-1").value) || 0;
         const architecturalSoundnessNotion2 = parseFloat(document.getElementById("dfl-architectural-soundness-notion-2").value) || 0;
 
-        // DFL sustainability reducido (AJUSTA si corresponde)
         const sustainabilityNotion1 = parseFloat(document.getElementById("dfl-sustainability-notion-1").value) || 0;
         const sustainabilityNotion3 = parseFloat(document.getElementById("dfl-sustainability-notion-3").value) || 0;
 
@@ -304,7 +299,6 @@ const TrustworthinessManager = (function() {
                 parseFloat(document.getElementById("dfl-privacy-notion-2").value) || 0,
                 parseFloat(document.getElementById("dfl-privacy-notion-3").value) || 0
             ],
-            // DFL fairness reducido (AJUSTA si corresponde)
             fairness: [
                 parseFloat(document.getElementById("dfl-fairness-notion-3").value) || 0
             ],
@@ -319,7 +313,6 @@ const TrustworthinessManager = (function() {
                 parseFloat(document.getElementById("dfl-architectural-soundness-notion-1").value) || 0,
                 parseFloat(document.getElementById("dfl-architectural-soundness-notion-2").value) || 0
             ],
-            // DFL sustainability reducido (AJUSTA si corresponde)
             sustainability: [
                 parseFloat(document.getElementById("dfl-sustainability-notion-1").value) || 0,
                 parseFloat(document.getElementById("dfl-sustainability-notion-3").value) || 0
@@ -402,7 +395,6 @@ const TrustworthinessManager = (function() {
             document.getElementById("dfl-privacy-notion-2").value = p[1];
             document.getElementById("dfl-privacy-notion-3").value = p[2];
 
-            // DFL fairness reducido (AJUSTA si corresponde)
             const f = config.notions.fairness || [0];
             document.getElementById("dfl-fairness-notion-3").value = f[0];
 
@@ -414,7 +406,6 @@ const TrustworthinessManager = (function() {
             document.getElementById("dfl-architectural-soundness-notion-1").value = a[0];
             document.getElementById("dfl-architectural-soundness-notion-2").value = a[1];
 
-            // DFL sustainability reducido (AJUSTA si corresponde)
             const s = config.notions.sustainability || [0, 0];
             document.getElementById("dfl-sustainability-notion-1").value = s[0];
             document.getElementById("dfl-sustainability-notion-3").value = s[1];
@@ -483,7 +474,6 @@ const TrustworthinessManager = (function() {
         document.getElementById("dfl-privacy-notion-2").value = "0";
         document.getElementById("dfl-privacy-notion-3").value = "0";
 
-        // DFL fairness reducido (AJUSTA si corresponde)
         document.getElementById("dfl-fairness-notion-3").value = "0";
 
         document.getElementById("dfl-explainability-notion-1").value = "0";
@@ -492,7 +482,6 @@ const TrustworthinessManager = (function() {
         document.getElementById("dfl-architectural-soundness-notion-1").value = "0";
         document.getElementById("dfl-architectural-soundness-notion-2").value = "0";
 
-        // DFL sustainability reducido (AJUSTA si corresponde)
         document.getElementById("dfl-sustainability-notion-1").value = "0";
         document.getElementById("dfl-sustainability-notion-3").value = "0";
     }

From 2bcad93ad9670b882c00258be6d1a252d9710691 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 11 Mar 2026 13:03:42 +0100
Subject: [PATCH 03/66] KDDCUP99 dataset added and errors fixed

---
 nebula/addons/trustworthiness/dfl_local.py    |   4 +
 nebula/addons/trustworthiness/factsheet.py    |   7 +
 .../addons/trustworthiness/final_reports.py   |  93 ++++++
 nebula/controller/scenarios.py                |  16 +-
 nebula/core/datasets/kddcup99/__init__.py     |   0
 nebula/core/datasets/kddcup99/kddcup99.py     | 264 ++++++++++++++++++
 nebula/core/datasets/nebuladataset.py         |   2 +
 nebula/core/models/kddcup99/__init__.py       |   0
 nebula/core/models/kddcup99/mlp.py            |  49 ++++
 nebula/core/node.py                           |   9 +
 .../static/js/deployment/help-content.js      |   1 +
 nebula/frontend/static/js/deployment/main.js  |   4 +-
 nebula/frontend/templates/deployment.html     |   4 +
 13 files changed, 450 insertions(+), 3 deletions(-)
 create mode 100644 nebula/addons/trustworthiness/final_reports.py
 create mode 100755 nebula/core/datasets/kddcup99/__init__.py
 create mode 100644 nebula/core/datasets/kddcup99/kddcup99.py
 create mode 100755 nebula/core/models/kddcup99/__init__.py
 create mode 100644 nebula/core/models/kddcup99/mlp.py

diff --git a/nebula/addons/trustworthiness/dfl_local.py b/nebula/addons/trustworthiness/dfl_local.py
index f1b0c5b38..9c91520b9 100644
--- a/nebula/addons/trustworthiness/dfl_local.py
+++ b/nebula/addons/trustworthiness/dfl_local.py
@@ -15,6 +15,7 @@
 from nebula.core.models.mnist.mlp import MNISTModelMLP
 from nebula.core.models.mnist.cnn import MNISTModelCNN
 from nebula.core.models.covtype.mlp import CovtypeModelMLP
+from nebula.core.models.kddcup99.mlp import KDDCUP99ModelMLP
 from nebula.core.models.adultcensus.mlp import AdultCensusModelMLP
 from nebula.core.models.breast_cancer.mlp import BreastCancerModelMLP
 from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model
@@ -109,6 +110,9 @@ def compute_trust_local_dfl(experiment_name, participant_idx, data, start_time,
         elif dataset == "Covtype" and algorithm == "MLP":
             model = CovtypeModelMLP()
             num_classes_temp = 7
+        elif dataset == "KDDCUP99" and algorithm == "MLP":
+            model = KDDCUP99ModelMLP()
+            num_classes_temp = 23
         elif dataset == "AdultCensus" and algorithm == "MLP":
             model = AdultCensusModelMLP()
             num_classes_temp = 2
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 55aaa1bc2..56ac39087 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -13,6 +13,7 @@
 from nebula.core.models.mnist.mlp import MNISTModelMLP
 from nebula.core.models.mnist.cnn import MNISTModelCNN
 from nebula.core.models.covtype.mlp import CovtypeModelMLP
+from nebula.core.models.kddcup99.mlp import KDDCUP99ModelMLP
 from nebula.core.models.adultcensus.mlp import AdultCensusModelMLP
 from nebula.core.models.breast_cancer.mlp import BreastCancerModelMLP
 from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate
@@ -124,6 +125,9 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                     elif dataset == "Covtype" and algorithm == "MLP":
                         model = CovtypeModelMLP()
                         num_classes_temp = 7
+                    elif dataset == "KDDCUP99" and algorithm == "MLP":
+                        model = KDDCUP99ModelMLP()
+                        num_classes_temp = 2
                     elif dataset == "AdultCensus" and algorithm == "MLP":
                         model = AdultCensusModelMLP()
                         num_classes_temp = 2
@@ -256,6 +260,9 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time):
                 elif dataset == "Covtype" and model == "MLP":
                     model = CovtypeModelMLP()
                     num_classes_temp = 7
+                elif dataset == "KDDCUP99" and algorithm == "MLP":
+                    model = KDDCUP99ModelMLP()
+                    num_classes_temp = 23
                 elif dataset == "AdultCensus" and model == "MLP":
                     model = AdultCensusModelMLP()
                     num_classes_temp = 2
diff --git a/nebula/addons/trustworthiness/final_reports.py b/nebula/addons/trustworthiness/final_reports.py
new file mode 100644
index 000000000..eaff2cdef
--- /dev/null
+++ b/nebula/addons/trustworthiness/final_reports.py
@@ -0,0 +1,93 @@
+from __future__ import annotations
+
+import csv
+import os
+from typing import Any
+
+
+def ensure_trust_dir(base_logs_dir: str, experiment_name: str) -> str:
+    trust_dir = os.path.join(base_logs_dir, experiment_name, "trustworthiness")
+    os.makedirs(trust_dir, exist_ok=True)
+    return trust_dir
+
+
+def append_trust_report_to_csv(
+    base_logs_dir: str,
+    experiment_name: str,
+    report: dict[str, Any],
+) -> None:
+    """
+    Escribe la información del reporte en:
+      - data_results.csv
+      - emissions.csv
+    """
+
+    trust_dir = ensure_trust_dir(base_logs_dir, experiment_name)
+
+    data_results_path = os.path.join(trust_dir, "data_results.csv")
+    emissions_path = os.path.join(trust_dir, "emissions.csv")
+
+    _append_data_results(data_results_path, report)
+    _append_emissions(emissions_path, report)
+
+
+def _append_data_results(path: str, report: dict[str, Any]) -> None:
+    exists = os.path.exists(path)
+
+    with open(path, "a", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(
+            f,
+            fieldnames=[
+                "node_id",
+                "round",
+                "bytes_sent",
+                "bytes_recv",
+                "loss",
+                "accuracy",
+            ],
+        )
+
+        if not exists:
+            writer.writeheader()
+
+        writer.writerow(
+            {
+                "node_id": report["node_id"],
+                "round": report["round"],
+                "bytes_sent": report["bytes_sent"],
+                "bytes_recv": report["bytes_recv"],
+                "loss": report["loss"],
+                "accuracy": report["accuracy"],
+            }
+        )
+
+
+def _append_emissions(path: str, report: dict[str, Any]) -> None:
+    exists = os.path.exists(path)
+
+    with open(path, "a", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(
+            f,
+            fieldnames=[
+                "node_role",
+                "node_id",
+                "round",
+                "workload",
+                "sample_size",
+                "emissions",
+            ],
+        )
+
+        if not exists:
+            writer.writeheader()
+
+        writer.writerow(
+            {
+                "node_role": report["node_role"],
+                "node_id": report["node_id"],
+                "round": report["round"],
+                "workload": report["workload"],
+                "sample_size": report["sample_size"],
+                "emissions": report["emissions"],
+            }
+        )
diff --git a/nebula/controller/scenarios.py b/nebula/controller/scenarios.py
index e91e23e0e..1aba47f35 100644
--- a/nebula/controller/scenarios.py
+++ b/nebula/controller/scenarios.py
@@ -23,6 +23,7 @@
 from nebula.core.datasets.cifar100.cifar100 import CIFAR100Dataset
 from nebula.core.datasets.emnist.emnist import EMNISTDataset
 from nebula.core.datasets.fashionmnist.fashionmnist import FashionMNISTDataset
+from nebula.core.datasets.kddcup99.kddcup99 import KDDCUP99Dataset
 from nebula.core.datasets.covtype.covtype import CovtypeDataset
 from nebula.core.datasets.adultcensus.adultcensus import AdultCensusDataset
 from nebula.core.datasets.breast_cancer.breast_cancer import BreastCancerDataset
@@ -1030,6 +1031,17 @@ async def load_configurations_and_start_nodes(
                 seed=42,
                 config_dir=self.config_dir,
             )
+        elif dataset_name == "KDDCUP99":
+            logging.info("[DEBUG] entrando en rama KDDCUP99 para crear dataset")
+            dataset = KDDCUP99Dataset(
+                num_classes=2,
+                partitions_number=self.n_nodes,
+                iid=self.scenario.iid,
+                partition=self.scenario.partition_selection,
+                partition_parameter=self.scenario.partition_parameter,
+                seed=42,
+                config_dir=self.config_dir,
+            )
         elif dataset_name == "AdultCensus":
             dataset = AdultCensusDataset(
                 num_classes=2,
@@ -1091,9 +1103,9 @@ async def load_configurations_and_start_nodes(
         )
 
         if dataset.train_set is not None and hasattr(dataset.train_set, "data"):
-            logging.info(f"[DEBUG] AdultCensus train_set.data.shape = {dataset.train_set.data.shape}")
+            logging.info(f"[DEBUG] Dataset train_set.data.shape = {dataset.train_set.data.shape}")
         else:
-            logging.info("[DEBUG] AdultCensus train_set has no .data yet (or train_set is None)")
+            logging.info("[DEBUG] Dataset train_set has no .data yet (or train_set is None)")
         logging.info(f"Splitting {dataset_name} dataset... Done")
 
         if self.scenario.deployment in ["docker", "process", "physical"]:
diff --git a/nebula/core/datasets/kddcup99/__init__.py b/nebula/core/datasets/kddcup99/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/nebula/core/datasets/kddcup99/kddcup99.py b/nebula/core/datasets/kddcup99/kddcup99.py
new file mode 100644
index 000000000..6ff0e8a0f
--- /dev/null
+++ b/nebula/core/datasets/kddcup99/kddcup99.py
@@ -0,0 +1,264 @@
+import os
+from typing import Tuple, Any
+
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+from nebula.core.datasets.nebuladataset import NebulaDataset, NebulaPartitionHandler
+
+
+class KDDCUP99TorchDataset(Dataset):
+    """
+    Simple torch Dataset wrapper for tabular KDDCUP99 data.
+
+    Returns:
+        x: torch.float32 tensor of shape (n_features,)
+        y: torch.long scalar in [0, num_classes-1]
+    """
+    def __init__(self, x: np.ndarray, y: np.ndarray):
+        if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
+            raise ValueError("x and y must be numpy arrays")
+
+        if x.ndim != 2:
+            raise ValueError(f"x must be 2D (n_samples, n_features). Got shape={x.shape}")
+        if y.ndim != 1:
+            y = y.reshape(-1)
+
+        if x.shape[0] != y.shape[0]:
+            raise ValueError(f"x and y must have same number of samples. Got {x.shape[0]} != {y.shape[0]}")
+
+        self.x = x.astype(np.float32, copy=False)
+        self.y = y.astype(np.int64, copy=False)
+
+        self.data = self.x
+        self.targets = self.y
+
+        n_classes = int(np.max(self.targets)) + 1
+        self.classes = [str(i) for i in range(n_classes)]
+
+    def __len__(self) -> int:
+        return int(self.y.shape[0])
+
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        x_i = torch.from_numpy(self.x[idx])
+        y_i = torch.tensor(self.y[idx], dtype=torch.long)
+        return x_i, y_i
+
+
+class KDDCUP99PartitionHandler(NebulaPartitionHandler):
+    """
+    Partition handler for tabular datasets.
+
+    NebulaPartitionHandler provides (data, target) from the partition storage.
+    For images, we usually convert to PIL and apply torchvision transforms.
+    Here we convert features to float32 torch tensors and targets to long.
+    """
+    def __init__(self, file_path: str, prefix: str, config: Any, empty: bool = False):
+        super().__init__(file_path, prefix, config, empty)
+
+        # For tabular data we typically don't apply torchvision transforms.
+        self.transform = None
+
+    def __getitem__(self, idx: int):
+        data, target = super().__getitem__(idx)
+
+        # Defensive: depending on how NebulaPartitionHandler stores/returns,
+        # "data" might be list/tuple/np.ndarray. Ensure we end up with 1D float32 tensor.
+        if isinstance(data, tuple):
+            data = data[0]
+
+        if isinstance(data, torch.Tensor):
+            x = data.to(dtype=torch.float32)
+        else:
+            x = torch.tensor(np.asarray(data), dtype=torch.float32)
+
+        # Ensure target in [0..num_classes-1] and torch.long
+        if isinstance(target, torch.Tensor):
+            y = target.to(dtype=torch.long)
+        else:
+            y = torch.tensor(int(target), dtype=torch.long)
+
+        if self.target_transform is not None:
+            y = self.target_transform(y)
+
+        return x, y
+
+
+class KDDCUP99Dataset(NebulaDataset):
+    """
+    KDDCUP99 dataset integration for Nebula.
+
+    Notes:
+    - KDDCUP99 is a tabular intrusion-detection dataset.
+    - sklearn fetch_kddcup99 exposes 41 features and 23 classes.
+    - Some columns are categorical/string-like, so we one-hot encode them.
+    - Targets may come as bytes/strings, so we map them to 0..num_classes-1.
+
+    Requirements:
+    - scikit-learn must be installed
+    - pandas must be installed
+    """
+    def __init__(
+        self,
+        num_classes: int = 23,
+        partitions_number: int = 1,
+        batch_size: int = 32,
+        num_workers: int = 4,
+        iid: bool = True,
+        partition: str = "dirichlet",
+        partition_parameter: float = 0.5,
+        seed: int = 42,
+        config_dir: str | None = None,
+        test_size: float = 0.2,
+        train_limit: int | None = None,
+        test_limit: int | None = None,
+        subset: str | None = None,
+        percent10: bool = True,
+    ):
+        super().__init__(
+            num_classes=num_classes,
+            partitions_number=partitions_number,
+            batch_size=batch_size,
+            num_workers=num_workers,
+            iid=iid,
+            partition=partition,
+            partition_parameter=partition_parameter,
+            seed=seed,
+            config_dir=config_dir,
+        )
+        self.test_size = float(test_size)
+        self.train_limit = train_limit
+        self.test_limit = test_limit
+        self.subset = subset
+        self.percent10 = percent10
+
+    def initialize_dataset(self):
+        if self.train_set is None or self.test_set is None:
+            self.train_set, self.test_set = self.load_kddcup99_dataset()
+
+        self.data_partitioning(plot=True)
+
+    def load_kddcup99_dataset(self):
+        """
+        Loads KDDCUP99 via sklearn, performs deterministic preprocessing
+        and train/test split, and wraps into torch Datasets.
+        """
+        # Local cache directory for sklearn dataset downloads
+        data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
+        os.makedirs(data_dir, exist_ok=True)
+
+        try:
+            import pandas as pd
+            from sklearn.datasets import fetch_kddcup99
+            from sklearn.model_selection import train_test_split
+        except Exception as e:
+            raise ImportError(
+                "KDDCUP99Dataset requires scikit-learn and pandas. "
+                "Install them (e.g., pip install scikit-learn pandas)."
+            ) from e
+
+        kdd = fetch_kddcup99(
+            subset=self.subset,
+            data_home=data_dir,
+            shuffle=True,
+            random_state=self.seed,
+            percent10=self.percent10,
+            download_if_missing=True,
+            as_frame=True,
+        )
+
+        x = kdd.data
+        y = kdd.target
+
+        # Defensive conversion to pandas objects
+        if not hasattr(x, "columns"):
+            x = pd.DataFrame(x)
+        if not hasattr(y, "astype"):
+            y = pd.Series(y)
+
+        # Decode bytes -> str where needed
+        def _decode_if_bytes(v):
+            if isinstance(v, (bytes, bytearray)):
+                return v.decode("utf-8", errors="ignore")
+            return v
+
+        # Some KDDCUP99 columns are categorical (e.g. protocol/service/flag).
+        # We decode bytes and one-hot encode object/category columns.
+        for col in x.columns:
+            if x[col].dtype == object:
+                x[col] = x[col].map(_decode_if_bytes)
+
+        y = y.map(_decode_if_bytes)
+
+        # One-hot encode categorical columns, keep numeric ones as-is.
+        x = pd.get_dummies(x, drop_first=False)
+
+        # Ensure fully numeric dense matrix
+        x = x.astype(np.float32).to_numpy(copy=False)
+
+        # Map labels to 0..num_classes-1 deterministically
+        y = pd.Series(y).astype(str)
+        classes = sorted(y.unique().tolist())
+        class_to_idx = {cls_name: idx for idx, cls_name in enumerate(classes)}
+        y = y.map(class_to_idx).to_numpy(dtype=np.int64, copy=False)
+
+        # Keep self.num_classes aligned with actual loaded subset
+        self.num_classes = len(classes)
+
+        # Split "grande"
+        x_train, x_test, y_train, y_test = train_test_split(
+            x, y,
+            test_size=self.test_size,
+            random_state=self.seed,
+            shuffle=True,
+            stratify=y,
+        )
+
+        # Submuestreo estratificado (corto y determinista)
+        if self.train_limit is not None and len(y_train) > self.train_limit:
+            x_train, _, y_train, _ = train_test_split(
+                x_train, y_train,
+                train_size=self.train_limit,
+                random_state=self.seed,
+                shuffle=True,
+                stratify=y_train,
+            )
+
+        if self.test_limit is not None and len(y_test) > self.test_limit:
+            x_test, _, y_test, _ = train_test_split(
+                x_test, y_test,
+                train_size=self.test_limit,
+                random_state=self.seed,
+                shuffle=True,
+                stratify=y_test,
+            )
+
+        train_ds = KDDCUP99TorchDataset(x_train, y_train)
+        test_ds = KDDCUP99TorchDataset(x_test, y_test)
+
+        # Optional: preserve original class names for inspection/debugging
+        train_ds.classes = classes
+        test_ds.classes = classes
+
+        return train_ds, test_ds
+
+    def generate_non_iid_map(self, dataset, partition: str = "dirichlet", partition_parameter: float = 0.5):
+        if partition == "dirichlet":
+            partitions_map = self.dirichlet_partition(dataset, alpha=partition_parameter)
+        elif partition == "percent":
+            partitions_map = self.percentage_partition(dataset, percentage=partition_parameter)
+        else:
+            raise ValueError(f"Partition {partition} is not supported for Non-IID map")
+
+        return partitions_map
+
+    def generate_iid_map(self, dataset, partition: str = "balancediid", partition_parameter: float = 2):
+        if partition == "balancediid":
+            partitions_map = self.balanced_iid_partition(dataset)
+        elif partition == "unbalancediid":
+            partitions_map = self.unbalanced_iid_partition(dataset, imbalance_factor=partition_parameter)
+        else:
+            raise ValueError(f"Partition {partition} is not supported for IID map")
+
+        return partitions_map
diff --git a/nebula/core/datasets/nebuladataset.py b/nebula/core/datasets/nebuladataset.py
index e42657989..a4468ded8 100755
--- a/nebula/core/datasets/nebuladataset.py
+++ b/nebula/core/datasets/nebuladataset.py
@@ -1286,6 +1286,7 @@ def factory_nebuladataset(dataset, **config) -> NebulaDataset:
     from nebula.core.datasets.emnist.emnist import EMNISTDataset
     from nebula.core.datasets.fashionmnist.fashionmnist import FashionMNISTDataset
     from nebula.core.datasets.covtype.covtype import CovtypeDataset
+    from nebula.core.datasets.kddcup99.kddcup99 import KDDCUP99Dataset
     from nebula.core.datasets.adultcensus.adultcensus import AdultCensusDataset
     from nebula.core.datasets.breast_cancer.breast_cancer import BreastCancerDataset
     from nebula.core.datasets.mnist.mnist import MNISTDataset
@@ -1294,6 +1295,7 @@ def factory_nebuladataset(dataset, **config) -> NebulaDataset:
         "MNIST": MNISTDataset,
         "FashionMNIST": FashionMNISTDataset,
         "Covtype": CovtypeDataset,
+        "KDDCUP99": KDDCUP99Dataset,
         "AdultCensus": AdultCensusDataset,
         "BreastCancer": BreastCancerDataset,
         "EMNIST": EMNISTDataset,
diff --git a/nebula/core/models/kddcup99/__init__.py b/nebula/core/models/kddcup99/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/nebula/core/models/kddcup99/mlp.py b/nebula/core/models/kddcup99/mlp.py
new file mode 100644
index 000000000..4bb59fec9
--- /dev/null
+++ b/nebula/core/models/kddcup99/mlp.py
@@ -0,0 +1,49 @@
+import torch
+
+from nebula.core.models.nebulamodel import NebulaModel
+
+
+class KDDCUP99ModelMLP(NebulaModel):
+    def __init__(
+        self,
+        input_channels=1,
+        num_classes=23,
+        learning_rate=1e-3,
+        metrics=None,
+        confusion_matrix=None,
+        seed=None,
+        input_size=118,
+    ):
+        super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+
+        self.input_size = input_size
+        self.example_input_array = torch.zeros(1, self.input_size)
+        self.learning_rate = learning_rate
+        self.criterion = torch.nn.CrossEntropyLoss()
+
+        self.l1 = torch.nn.Linear(self.input_size, 256)
+        self.l2 = torch.nn.Linear(256, 128)
+        self.l3 = torch.nn.Linear(128, num_classes)
+
+    def forward(self, x):
+        if x.dim() == 1:
+            x = x.unsqueeze(0)
+
+        x = x.view(x.size(0), -1)
+        x = self.l1(x)
+        x = torch.relu(x)
+        x = self.l2(x)
+        x = torch.relu(x)
+        x = self.l3(x)
+        return x
+
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
+        self._optimizer = optimizer
+        return optimizer
+
+    def get_learning_rate(self):
+        return self.learning_rate
+
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
diff --git a/nebula/core/node.py b/nebula/core/node.py
index c5f80843e..c9541098c 100755
--- a/nebula/core/node.py
+++ b/nebula/core/node.py
@@ -26,6 +26,7 @@
 from nebula.core.datasets.emnist.emnist import EMNISTPartitionHandler
 from nebula.core.datasets.fashionmnist.fashionmnist import FashionMNISTPartitionHandler
 from nebula.core.datasets.covtype.covtype import CovtypePartitionHandler
+from nebula.core.datasets.kddcup99.kddcup99 import KDDCUP99PartitionHandler
 from nebula.core.datasets.adultcensus.adultcensus import AdultCensusPartitionHandler
 from nebula.core.datasets.breast_cancer.breast_cancer import BreastCancerPartitionHandler
 from nebula.core.datasets.mnist.mnist import MNISTPartitionHandler
@@ -42,6 +43,7 @@
 from nebula.core.models.fashionmnist.cnn import FashionMNISTModelCNN
 from nebula.core.models.fashionmnist.mlp import FashionMNISTModelMLP
 from nebula.core.models.covtype.mlp import CovtypeModelMLP
+from nebula.core.models.kddcup99.mlp import KDDCUP99ModelMLP
 from nebula.core.models.adultcensus.mlp import AdultCensusModelMLP
 from nebula.core.models.breast_cancer.mlp import BreastCancerModelMLP
 from nebula.core.models.mnist.cnn import MNISTModelCNN
@@ -125,6 +127,13 @@ async def main(config: Config):
             model = CovtypeModelMLP()
         else:
             raise ValueError(f"Model {model} not supported for dataset {dataset_name}")
+    elif dataset_name == "KDDCUP99":
+        batch_size = 32
+        handler = KDDCUP99PartitionHandler
+        if model_name == "MLP":
+            model = KDDCUP99ModelMLP()
+        else:
+            raise ValueError(f"Model {model} not supported for dataset {dataset_name}")
     elif dataset_name == "AdultCensus":
         batch_size = 32
         handler = AdultCensusPartitionHandler
diff --git a/nebula/frontend/static/js/deployment/help-content.js b/nebula/frontend/static/js/deployment/help-content.js
index 111d4e4a8..6e9b1f1ff 100644
--- a/nebula/frontend/static/js/deployment/help-content.js
+++ b/nebula/frontend/static/js/deployment/help-content.js
@@ -62,6 +62,7 @@ const HelpContent = (function() {
             <li>FashionMNIST: The FashionMNIST dataset</li>
             <li>CIFAR10: The CIFAR10 dataset</li>
             <li>Covtype: The Covtype dataset</li>
+            <li>KDDCUP99: The KDDCUP99 dataset</li>
             <li>AdultCensus: The AdultCensus dataset</li>
             <li>BreastCancer: The BreastCancer dataset</li>
         </ul>
diff --git a/nebula/frontend/static/js/deployment/main.js b/nebula/frontend/static/js/deployment/main.js
index 43b546f35..3f306f588 100644
--- a/nebula/frontend/static/js/deployment/main.js
+++ b/nebula/frontend/static/js/deployment/main.js
@@ -210,7 +210,7 @@ const DeploymentManager = (function() {
         datasetSelect.innerHTML = "";
 
         // Add dataset options
-        const datasets = ['MNIST', 'FashionMNIST', 'EMNIST', 'CIFAR10', 'CIFAR100', 'Covtype', 'AdultCensus', 'BreastCancer'];
+        const datasets = ['MNIST', 'FashionMNIST', 'EMNIST', 'CIFAR10', 'CIFAR100', 'Covtype', 'KDDCUP99', 'AdultCensus', 'BreastCancer'];
         datasets.forEach(dataset => {
             const option = document.createElement("option");
             option.value = dataset;
@@ -253,6 +253,8 @@ const DeploymentManager = (function() {
                 return ['CNN'];
             case 'covtype':
                 return ['MLP'];
+            case 'kddcup99':
+                return ['MLP'];
             case 'adultcensus':
                 return ['MLP'];
             case 'breast_cancer':
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index 18d2f2e42..83717c656 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -147,6 +147,10 @@ <h5>Dataset <i class="fa fa-database"></i>
                             <input type="checkbox" id="mod-fmnist" value='Covtype'
                                 style="margin-right: 10px; margin-left: 10px;">Covtype
                         </label>
+                        <label style="display: inline-block; align-items: center; margin-bottom: 10px;">
+                            <input type="checkbox" id="mod-fmnist" value='KDDCUP99'
+                                style="margin-right: 10px; margin-left: 10px;">KDDCUP99
+                        </label>
                         <label style="display: inline-block; align-items: center; margin-bottom: 10px;">
                             <input type="checkbox" id="mod-fmnist" value='AdultCensus'
                                 style="margin-right: 10px; margin-left: 10px;">AdultCensus

From d51f50b058c5148282cc0f7fb518e3c272ce57a0 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Thu, 12 Mar 2026 10:42:31 +0100
Subject: [PATCH 04/66] Trustworthiness message system implemented

---
 .../addons/trustworthiness/trustworthiness.py | 102 +++++++++++++++++-
 nebula/core/engine.py                         |  59 +++++++---
 nebula/core/network/actions.py                |   8 ++
 nebula/core/network/communications.py         |   6 +-
 nebula/core/network/connection.py             |   6 +-
 nebula/core/network/messages.py               |  15 +++
 nebula/core/pb/nebula.proto                   |  18 ++++
 nebula/core/pb/nebula_pb2.py                  |  91 ++++++++--------
 8 files changed, 240 insertions(+), 65 deletions(-)

diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 68afcfa23..ce56cb63d 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -16,6 +16,7 @@
 from nebula.addons.trustworthiness.metric import TrustMetricManager
 from nebula.addons.trustworthiness.dfl_local import compute_trust_local_dfl
 import json, os
+from nebula.core.network.communications import CommunicationsManager
 
 """                                                     ##############################
                                                         #       TRUST WORKLOADS      #
@@ -153,7 +154,58 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
         elif federation == "SDFL":
             pass
         else:
-            pass
+            cm = CommunicationsManager.get_instance()
+            server_addr = "192.168.51.2:45001"  # cambiar por la IP:PUERTO real del servidor
+
+            logging.info("connections=%s", list(cm.connections.keys()))
+            logging.info("server in connections? %s", server_addr in cm.connections)
+
+            # Sustituye estos valores por los reales que tengas en este punto
+            bytes_sent = 111
+            bytes_recv = 222
+            accuracy = 0.91
+            loss = 0.12
+            energy_grid = 0.33
+            emissions = 0.44
+            energy_consumed = 0.55
+            sample_size = 667
+
+            message = cm.mm.create_message(
+                "trustworthiness",
+                action="report",
+                node_id=str(self._idx),
+                bytes_sent=bytes_sent,
+                bytes_recv=bytes_recv,
+                accuracy=accuracy,
+                loss=loss,
+                energy_grid=energy_grid,
+                emissions=emissions,
+                energy_consumed=energy_consumed,
+                sample_size=sample_size,
+            )
+
+            logging.info(
+                "[TW SEND] dest=%s node_id=%s bytes_sent=%s bytes_recv=%s "
+                "accuracy=%s loss=%s energy_grid=%s emissions=%s "
+                "energy_consumed=%s sample_size=%s",
+                server_addr,
+                str(self._idx),
+                bytes_sent,
+                bytes_recv,
+                accuracy,
+                loss,
+                energy_grid,
+                emissions,
+                energy_consumed,
+                sample_size,
+            )
+
+            await cm.send_message(
+                server_addr,
+                message,
+                message_type="trustworthiness",
+                allow_after_learning_finished=True,
+            )
 
     async def _process_round_end_event(self, ree: RoundEndEvent):
         scenario_name = self._engine.config.participant["scenario_args"]["name"]
@@ -194,6 +246,11 @@ def __init__(self, engine: Engine, idx, trust_files_route):
         self._idx = idx
         self._trust_files_route = trust_files_route
         self._per_round = None
+        self._trustworthiness_reports = {}
+        self._expected_reports = 2
+        self._trust_config = None
+        self._csv_completed = False
+        self._finish_post = False
 
     async def init(self, experiment_name):
         self._experiment_name = experiment_name
@@ -225,9 +282,50 @@ async def finish_experiment_role_pre_actions(self):
 
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
         from datetime  import datetime
+
         self._end_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
+        self._trust_config = trust_config
+        self._experiment_name = experiment_name
+
+        if self._csv_completed == True:
+            logging.info("[TW SERVER] finish_experiment_role_post_actions called, trustworthiness reports OK, starting generate_factsheet")
+            #await self._generate_factsheet(trust_config, experiment_name)
+        else:
+            self._finish_post = True
+            logging.info("[TW SERVER] finish_experiment_role_post_actions called, waiting for trustworthiness reports")
         await self._generate_factsheet(trust_config, experiment_name)
 
+    async def register_trustworthiness_report(self, source, message):
+        self._trustworthiness_reports[message.node_id] = {
+            "source": source,
+            "node_id": message.node_id,
+            "bytes_sent": message.bytes_sent,
+            "bytes_recv": message.bytes_recv,
+            "accuracy": message.accuracy,
+            "loss": message.loss,
+            "energy_grid": message.energy_grid,
+            "emissions": message.emissions,
+            "energy_consumed": message.energy_consumed,
+            "sample_size": message.sample_size,
+        }
+
+        logging.info(
+            "[TW SERVER] received report from node_id=%s total=%s",
+            message.node_id,
+            len(self._trustworthiness_reports),
+        )
+
+        if (len(self._trustworthiness_reports) >= self._expected_reports):
+            logging.info("[TW SERVER] all reports received, generating csv")
+            #GENERAR CSV
+            if self._finish_post == True:
+                logging.info("[TW SERVER] all reports received and post OK, generating factsheet")
+                #await self._generate_factsheet(self._trust_config, self._experiment_name)
+            else:
+                self._csv_completed = True
+                logging.info(f"[TW SERVER] all reports received, waiting for finish post, csv_completed {self._csv_completed}")
+
+
     async def _generate_factsheet(self, trust_config, experiment_name):
         from nebula.addons.trustworthiness.factsheet import Factsheet
         from nebula.addons.trustworthiness.metric import TrustMetricManager
@@ -310,6 +408,8 @@ def __init__(self, engine: Engine, config: Config):
         self._idx = self._config.participant["device_args"]["idx"]
         self._trust_workload: TrustWorkload = self._factory_trust_workload(self._role, self._engine, self._idx, self._trust_dir_files)
 
+        self._engine.trustworthiness = self
+
         # EmissionsTracker from codecarbon to measure the emissions during the aggregation step in the server
         self._tracker= EmissionsTracker(tracking_mode='process', log_level='error', save_to_file=False)
 
diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index 151ae6b22..0fc916178 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -94,7 +94,7 @@ def __init__(
         self.ip = config.participant["network_args"]["ip"]
         self.port = config.participant["network_args"]["port"]
         self.addr = config.participant["network_args"]["addr"]
-        
+
         self.name = config.participant["device_args"]["name"]
         self.client = docker.from_env()
 
@@ -187,7 +187,7 @@ def aggregator(self):
     def trainer(self):
         """Trainer"""
         return self._trainer
-    
+
     @property
     def rb(self):
         """Role Behavior"""
@@ -317,7 +317,7 @@ async def _control_alive_callback(self, source, message):
 
     async def _control_leadership_transfer_callback(self, source, message):
         logging.info(f"🔧  handle_control_message | Trigger | Received leadership transfer message from {source}")
-        
+
         if await self._round_in_process_lock.locked_async():
             logging.info("Learning cycle is executing, role behavior will be modified next round")
             await self.rb.set_next_role(Role.AGGREGATOR, source_to_notificate=source)
@@ -354,7 +354,7 @@ async def _control_leadership_transfer_ack_callback(self, source, message):
             except TimeoutError:
                 logging.info("Learning cycle is locked, role behavior will be modified next round")
                 await self.rb.set_next_role(Role.TRAINER)
-        
+
 
     async def _connection_connect_callback(self, source, message):
         logging.info(f"🔗  handle_connection_message | Trigger | Received connection message from {source}")
@@ -414,6 +414,32 @@ async def _reputation_share_callback(self, source, message):
         except Exception as e:
             logging.exception(f"Error handling reputation message: {e}")
 
+    async def _trustworthiness_report_callback(self, source, message):
+        try:
+            report = {
+                "source": source,
+                "node_id": message.node_id,
+                "bytes_sent": message.bytes_sent,
+                "bytes_recv": message.bytes_recv,
+                "accuracy": message.accuracy,
+                "loss": message.loss,
+                "energy_grid": message.energy_grid,
+                "emissions": message.emissions,
+                "energy_consumed": message.energy_consumed,
+                "sample_size": message.sample_size,
+            }
+
+            logging.info(f"handle_trustworthiness_message | Trigger | {report}")
+
+            if hasattr(self, "trustworthiness") and self.trustworthiness is not None:
+                if hasattr(self.trustworthiness, "tw") and self.trustworthiness.tw is not None:
+                    if hasattr(self.trustworthiness.tw, "register_trustworthiness_report"):
+                        await self.trustworthiness.tw.register_trustworthiness_report(source, message)
+
+
+        except Exception as e:
+            logging.exception(f"Error handling trustworthiness message: {e}")
+
     """                                                     ##############################
                                                             #    REGISTERING CALLBACKS   #
                                                             ##############################
@@ -710,10 +736,10 @@ async def _start_learning(self):
                 await self.get_federation_ready_lock().acquire_async()
                 if self.config.participant["device_args"]["start"]:
                     logging.info("Propagate initial model updates.")
-                    
+
                     mpe = ModelPropagationEvent(await self.cm.get_addrs_current_connections(only_direct=True, myself=False), "initialization")
                     await EventManager.get_instance().publish_node_event(mpe)
-                    
+
                     await self.get_federation_ready_lock().release_async()
 
                 self.trainer.set_epochs(epochs)
@@ -764,7 +790,8 @@ async def learning_cycle_finished(self):
             return False
         else:
             return current_round >= self.total_rounds
-        
+            #return False
+
     async def resolve_missing_updates(self):
         """
         Delegates the resolution strategy for missing updates to the current role behavior.
@@ -778,7 +805,7 @@ async def resolve_missing_updates(self):
         """
         logging.info(f"Using Role behavior: {self.rb.get_role_name()} conflict resolve strategy")
         return await self.rb.resolve_missing_updates()
-    
+
     async def update_self_role(self):
         """
         Checks whether a role update is required and performs the transition if necessary.
@@ -806,7 +833,7 @@ async def update_self_role(self):
                 logging.info(f"Sending role modification ACK to transferer: {source_to_notificate}")
                 message = self.cm.create_message("control", "leadership_transfer_ack")
                 asyncio.create_task(self.cm.send_message(source_to_notificate, message))
-             
+
     async def _learning_cycle(self):
         """
         Main asynchronous loop for executing the Federated Learning process across multiple rounds.
@@ -837,9 +864,9 @@ async def _learning_cycle(self):
                     indent=2,
                     title="Round information",
                 )
-                
+
                 await self.update_self_role()
-                
+
                 logging.info(f"Federation nodes: {self.federation_nodes}")
                 await self.update_federation_nodes(
                     await self.cm.get_addrs_current_connections(only_direct=True, myself=True)
@@ -851,10 +878,10 @@ async def _learning_cycle(self):
                 logging.info(f"Expected nodes: {expected_nodes}")
                 direct_connections = await self.cm.get_addrs_current_connections(only_direct=True)
                 undirected_connections = await self.cm.get_addrs_current_connections(only_undirected=True)
-                
+
                 logging.info(f"Direct connections: {direct_connections} | Undirected connections: {undirected_connections}")
                 logging.info(f"[Role {self.rb.get_role_name()}] Starting learning cycle...")
-                
+
                 await self.aggregator.update_federation_nodes(expected_nodes)
                 async with self._role_behavior_performance_lock:
                     await self.rb.extended_learning_cycle()
@@ -882,13 +909,13 @@ async def _learning_cycle(self):
         self.trainer.on_learning_cycle_end()
 
         await self.trainer.test()
-        
+
         # Shutdown protocol
         await self._shutdown_protocol()
-            
+
     async def _shutdown_protocol(self):
         logging.info("Starting graceful shutdown process...")
-        
+
         # 1.- Publish Experiment Finish Event to the last update on modules
         logging.info("Publishing Experiment Finish Event...")
         efe = ExperimentFinishEvent()
diff --git a/nebula/core/network/actions.py b/nebula/core/network/actions.py
index 77e1997c5..a3abca1f2 100644
--- a/nebula/core/network/actions.py
+++ b/nebula/core/network/actions.py
@@ -83,6 +83,13 @@ class ReputationAction(Enum):
 
     SHARE = nebula_pb2.ReputationMessage.Action.SHARE
 
+class TrustworthinessAction(Enum):
+    """
+    Enum for reputation exchange messages in the federation.
+    """
+
+    REPORT = nebula_pb2.TrustworthinessMessage.Action.REPORT
+
 
 # Mapping between message type strings and their corresponding Enum classes
 ACTION_CLASSES = {
@@ -94,6 +101,7 @@ class ReputationAction(Enum):
     "offer": OfferAction,
     "link": LinkAction,
     "reputation": ReputationAction,
+    "trustworthiness": TrustworthinessAction,
 }
 
 
diff --git a/nebula/core/network/communications.py b/nebula/core/network/communications.py
index e0b1c17a5..5533b379f 100755
--- a/nebula/core/network/communications.py
+++ b/nebula/core/network/communications.py
@@ -854,7 +854,7 @@ async def send_message_to_neighbors(self, message, neighbors=None, interval=0):
             if interval > 0:
                 await asyncio.sleep(interval)
 
-    async def send_message(self, dest_addr, message, message_type=""):
+    async def send_message(self, dest_addr, message, message_type="", allow_after_learning_finished = False,):
         """
         Sends a message to a specific destination address, with optional compression for large messages.
 
@@ -868,7 +868,7 @@ async def send_message(self, dest_addr, message, message_type=""):
             try:
                 if dest_addr in self.connections:
                     conn = self.connections[dest_addr]
-                    await conn.send(data=message)
+                    await conn.send(data=message, allow_after_learning_finished=allow_after_learning_finished)
             except Exception as e:
                 logging.exception(f"❗️  Cannot send message {message} to {dest_addr}. Error: {e!s}")
                 await self.disconnect(dest_addr, mutual_disconnection=False)
@@ -879,7 +879,7 @@ async def send_message(self, dest_addr, message, message_type=""):
                     if conn is None:
                         logging.info(f"❗️  Connection with {dest_addr} not found")
                         return
-                    await conn.send(data=message, is_compressed=True)
+                    await conn.send(data=message, is_compressed=True, allow_after_learning_finished=allow_after_learning_finished)
                 except Exception as e:
                     logging.exception(f"❗️  Cannot send model to {dest_addr}: {e!s}")
                     await self.disconnect(dest_addr, mutual_disconnection=False)
diff --git a/nebula/core/network/connection.py b/nebula/core/network/connection.py
index 6ba60749b..578907572 100755
--- a/nebula/core/network/connection.py
+++ b/nebula/core/network/connection.py
@@ -338,6 +338,7 @@ async def send(
         pb: bool = True,
         encoding_type: str = "utf-8",
         is_compressed: bool = False,
+        allow_after_learning_finished: bool = False,
     ) -> None:
         """
         Sends data over the active connection.
@@ -359,10 +360,13 @@ async def send(
             return
 
         # Check if learning cycle has finished - don't send messages
-        if await self.cm.learning_finished():
+        if not allow_after_learning_finished and await self.cm.learning_finished():
             logging.info(f"Not sending message to {self.addr} because learning cycle has finished")
             return
 
+        if await self.cm.learning_finished() and allow_after_learning_finished:
+            logging.info(f"Sending message to {self.addr} after learning cycle finished (allowed)")
+
         try:
             message_id = uuid.uuid4().bytes
             data_prefix, encoded_data = self._prepare_data(data, pb, encoding_type)
diff --git a/nebula/core/network/messages.py b/nebula/core/network/messages.py
index 7870acddf..b6a4ce0c0 100644
--- a/nebula/core/network/messages.py
+++ b/nebula/core/network/messages.py
@@ -94,6 +94,21 @@ def _define_message_templates(self):
             },
             "discover": {"parameters": ["action"], "defaults": {}},
             "link": {"parameters": ["action", "addrs"], "defaults": {}},
+            "trustworthiness": {
+                "parameters": [
+                    "action",
+                    "node_id",
+                    "bytes_sent",
+                    "bytes_recv",
+                    "accuracy",
+                    "loss",
+                    "energy_grid",
+                    "emissions",
+                    "energy_consumed",
+                    "sample_size"
+                ],
+                "defaults": {},
+            }
             # Add additional message types here
         }
 
diff --git a/nebula/core/pb/nebula.proto b/nebula/core/pb/nebula.proto
index 3360196ed..b1f1d2073 100755
--- a/nebula/core/pb/nebula.proto
+++ b/nebula/core/pb/nebula.proto
@@ -26,6 +26,7 @@ message Wrapper {
     DiscoverMessage discover_message = 9;
     OfferMessage offer_message = 10;
     LinkMessage link_message = 11;
+    TrustworthinessMessage trustworthiness_message = 12;
   }
 }
 
@@ -130,3 +131,20 @@ message ReputationMessage {
 message ResponseMessage {
   string response = 1;      // Outcome of the requested operation.
 }
+
+message TrustworthinessMessage {
+  enum Action {
+    REPORT = 0;
+  }
+
+  Action action = 1;
+  string node_id = 2;
+  int64 bytes_sent = 3;
+  int64 bytes_recv = 4;
+  double accuracy = 5;
+  double loss = 6;
+  double energy_grid = 7;
+  double emissions = 8;
+  double energy_consumed = 9;
+  int32 sample_size = 10;
+}
diff --git a/nebula/core/pb/nebula_pb2.py b/nebula/core/pb/nebula_pb2.py
index 448675b31..ed1ca508c 100644
--- a/nebula/core/pb/nebula_pb2.py
+++ b/nebula/core/pb/nebula_pb2.py
@@ -1,12 +1,11 @@
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
 # source: nebula.proto
-# Protobuf Python Version: 4.25.3
 """Generated protocol buffer code."""
+from google.protobuf.internal import builder as _builder
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
 from google.protobuf import symbol_database as _symbol_database
-from google.protobuf.internal import builder as _builder
 # @@protoc_insertion_point(imports)
 
 _sym_db = _symbol_database.Default()
@@ -14,49 +13,53 @@
 
 
 
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xae\x04\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\tb\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xf1\x04\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\x94\x02\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x13\n\x0b\x65nergy_grid\x18\x07 \x01(\x01\x12\x11\n\temissions\x18\x08 \x01(\x01\x12\x17\n\x0f\x65nergy_consumed\x18\t \x01(\x01\x12\x13\n\x0bsample_size\x18\n \x01(\x05\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\x62\x06proto3')
 
-_globals = globals()
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'nebula_pb2', _globals)
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'nebula_pb2', globals())
 if _descriptor._USE_C_DESCRIPTORS == False:
+
   DESCRIPTOR._options = None
-  _globals['_WRAPPER']._serialized_start=25
-  _globals['_WRAPPER']._serialized_end=583
-  _globals['_DISCOVERYMESSAGE']._serialized_start=586
-  _globals['_DISCOVERYMESSAGE']._serialized_end=744
-  _globals['_DISCOVERYMESSAGE_ACTION']._serialized_start=692
-  _globals['_DISCOVERYMESSAGE_ACTION']._serialized_end=744
-  _globals['_CONTROLMESSAGE']._serialized_start=747
-  _globals['_CONTROLMESSAGE']._serialized_end=956
-  _globals['_CONTROLMESSAGE_ACTION']._serialized_start=826
-  _globals['_CONTROLMESSAGE_ACTION']._serialized_end=956
-  _globals['_FEDERATIONMESSAGE']._serialized_start=959
-  _globals['_FEDERATIONMESSAGE']._serialized_end=1164
-  _globals['_FEDERATIONMESSAGE_ACTION']._serialized_start=1064
-  _globals['_FEDERATIONMESSAGE_ACTION']._serialized_end=1164
-  _globals['_MODELMESSAGE']._serialized_start=1166
-  _globals['_MODELMESSAGE']._serialized_end=1231
-  _globals['_CONNECTIONMESSAGE']._serialized_start=1234
-  _globals['_CONNECTIONMESSAGE']._serialized_end=1377
-  _globals['_CONNECTIONMESSAGE_ACTION']._serialized_start=1305
-  _globals['_CONNECTIONMESSAGE_ACTION']._serialized_end=1377
-  _globals['_DISCOVERMESSAGE']._serialized_start=1380
-  _globals['_DISCOVERMESSAGE']._serialized_end=1529
-  _globals['_DISCOVERMESSAGE_ACTION']._serialized_start=1447
-  _globals['_DISCOVERMESSAGE_ACTION']._serialized_end=1529
-  _globals['_OFFERMESSAGE']._serialized_start=1532
-  _globals['_OFFERMESSAGE']._serialized_end=1738
-  _globals['_OFFERMESSAGE_ACTION']._serialized_start=1695
-  _globals['_OFFERMESSAGE_ACTION']._serialized_end=1738
-  _globals['_LINKMESSAGE']._serialized_start=1740
-  _globals['_LINKMESSAGE']._serialized_end=1859
-  _globals['_LINKMESSAGE_ACTION']._serialized_start=1814
-  _globals['_LINKMESSAGE_ACTION']._serialized_end=1859
-  _globals['_REPUTATIONMESSAGE']._serialized_start=1862
-  _globals['_REPUTATIONMESSAGE']._serialized_end=1999
-  _globals['_REPUTATIONMESSAGE_ACTION']._serialized_start=1980
-  _globals['_REPUTATIONMESSAGE_ACTION']._serialized_end=1999
-  _globals['_RESPONSEMESSAGE']._serialized_start=2001
-  _globals['_RESPONSEMESSAGE']._serialized_end=2036
+  _WRAPPER._serialized_start=25
+  _WRAPPER._serialized_end=650
+  _DISCOVERYMESSAGE._serialized_start=653
+  _DISCOVERYMESSAGE._serialized_end=811
+  _DISCOVERYMESSAGE_ACTION._serialized_start=759
+  _DISCOVERYMESSAGE_ACTION._serialized_end=811
+  _CONTROLMESSAGE._serialized_start=814
+  _CONTROLMESSAGE._serialized_end=1023
+  _CONTROLMESSAGE_ACTION._serialized_start=893
+  _CONTROLMESSAGE_ACTION._serialized_end=1023
+  _FEDERATIONMESSAGE._serialized_start=1026
+  _FEDERATIONMESSAGE._serialized_end=1231
+  _FEDERATIONMESSAGE_ACTION._serialized_start=1131
+  _FEDERATIONMESSAGE_ACTION._serialized_end=1231
+  _MODELMESSAGE._serialized_start=1233
+  _MODELMESSAGE._serialized_end=1298
+  _CONNECTIONMESSAGE._serialized_start=1301
+  _CONNECTIONMESSAGE._serialized_end=1444
+  _CONNECTIONMESSAGE_ACTION._serialized_start=1372
+  _CONNECTIONMESSAGE_ACTION._serialized_end=1444
+  _DISCOVERMESSAGE._serialized_start=1447
+  _DISCOVERMESSAGE._serialized_end=1596
+  _DISCOVERMESSAGE_ACTION._serialized_start=1514
+  _DISCOVERMESSAGE_ACTION._serialized_end=1596
+  _OFFERMESSAGE._serialized_start=1599
+  _OFFERMESSAGE._serialized_end=1805
+  _OFFERMESSAGE_ACTION._serialized_start=1762
+  _OFFERMESSAGE_ACTION._serialized_end=1805
+  _LINKMESSAGE._serialized_start=1807
+  _LINKMESSAGE._serialized_end=1926
+  _LINKMESSAGE_ACTION._serialized_start=1881
+  _LINKMESSAGE_ACTION._serialized_end=1926
+  _REPUTATIONMESSAGE._serialized_start=1929
+  _REPUTATIONMESSAGE._serialized_end=2066
+  _REPUTATIONMESSAGE_ACTION._serialized_start=2047
+  _REPUTATIONMESSAGE_ACTION._serialized_end=2066
+  _RESPONSEMESSAGE._serialized_start=2068
+  _RESPONSEMESSAGE._serialized_end=2103
+  _TRUSTWORTHINESSMESSAGE._serialized_start=2106
+  _TRUSTWORTHINESSMESSAGE._serialized_end=2382
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2362
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2382
 # @@protoc_insertion_point(module_scope)

From 77302f520318d4a93a7e5828a3ca63941c917cf9 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Thu, 12 Mar 2026 16:12:40 +0100
Subject: [PATCH 05/66] Minor changes in trustworthiness CFL

---
 .../addons/trustworthiness/trustworthiness.py |  34 ++--
 nebula/addons/trustworthiness/utils.py        | 147 ++++++++++++++++++
 2 files changed, 170 insertions(+), 11 deletions(-)

diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index ce56cb63d..ca88a27cf 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -8,7 +8,7 @@
 from nebula.core.engine import Engine
 import pickle
 from nebula.addons.trustworthiness.calculation import stop_emissions_tracking_and_save
-from nebula.addons.trustworthiness.utils import save_results_csv, save_confirmation_csv
+from nebula.addons.trustworthiness.utils import save_results_csv, save_confirmation_csv, save_trustworthiness_reports_csv, load_emissions_participant, load_data_results_participant, save_results_csv_cfl, save_emissions_csv_cfl
 from codecarbon import EmissionsTracker
 from nebula.addons.trustworthiness.per_round_metrics import PerRoundTrustMetrics
 from datetime import datetime
@@ -160,15 +160,9 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
             logging.info("connections=%s", list(cm.connections.keys()))
             logging.info("server in connections? %s", server_addr in cm.connections)
 
-            # Sustituye estos valores por los reales que tengas en este punto
-            bytes_sent = 111
-            bytes_recv = 222
-            accuracy = 0.91
-            loss = 0.12
-            energy_grid = 0.33
-            emissions = 0.44
-            energy_consumed = 0.55
-            sample_size = 667
+            bytes_sent, bytes_recv, accuracy, loss = load_data_results_participant(experiment_name, self._idx)
+
+            energy_grid, emissions, energy_consumed, sample_size = load_emissions_participant(experiment_name, self._idx)
 
             message = cm.mm.create_message(
                 "trustworthiness",
@@ -289,6 +283,23 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
         if self._csv_completed == True:
             logging.info("[TW SERVER] finish_experiment_role_post_actions called, trustworthiness reports OK, starting generate_factsheet")
+            bytes_sent, bytes_recv, accuracy, loss = load_data_results_participant(
+                self._experiment_name,
+                self._idx,
+            )
+
+            energy_grid, emissions, energy_consumed, sample_size = load_emissions_participant(
+                self._experiment_name,
+                self._idx,
+            )
+
+            logging.info(
+                "[TW SERVER] local server report added for node_id=%s",
+                str(self._idx),
+            )
+
+            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss)
+            save_emissions_csv_cfl(self._experiment_name, self._idx, energy_grid, emissions, energy_consumed, sample_size)
             #await self._generate_factsheet(trust_config, experiment_name)
         else:
             self._finish_post = True
@@ -318,6 +329,7 @@ async def register_trustworthiness_report(self, source, message):
         if (len(self._trustworthiness_reports) >= self._expected_reports):
             logging.info("[TW SERVER] all reports received, generating csv")
             #GENERAR CSV
+            save_trustworthiness_reports_csv(self._trustworthiness_reports, self._experiment_name)
             if self._finish_post == True:
                 logging.info("[TW SERVER] all reports received and post OK, generating factsheet")
                 #await self._generate_factsheet(self._trust_config, self._experiment_name)
@@ -450,7 +462,7 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
 
         # Last operations
         save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_loss, last_accuracy)
-        stop_emissions_tracking_and_save(self._tracker, self._trust_dir_files, self._emissions_file, self._role.value, workload, sample_size, self._idx)
+        stop_emissions_tracking_and_save(self._tracker, self._trust_dir_files, f'emissions_{self._idx}.csv', self._role.value, workload, sample_size, self._idx)
         save_confirmation_csv(self._experiment_name, self._idx)
         await self.tw.finish_experiment_role_post_actions(self._trust_config, self._experiment_name)
 
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
index b4597c41f..35ae58a64 100755
--- a/nebula/addons/trustworthiness/utils.py
+++ b/nebula/addons/trustworthiness/utils.py
@@ -1,4 +1,5 @@
 import json
+import csv
 import logging
 import math
 import os
@@ -268,8 +269,153 @@ def write_results_json(out_file, dict):
     with open(out_file, "a") as f:
         json.dump(dict, f, indent=4)
 
+def load_data_results_participant(experiment_name: str, participant_id: int | str):
+    data_results_path = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"data_results_{participant_id}.csv")
+
+    if not os.path.exists(data_results_path):
+        raise FileNotFoundError(f"File not found: {data_results_path}")
+
+    with open(data_results_path, "r", newline="") as csv_file:
+        reader = csv.DictReader(csv_file)
+        rows = list(reader)
+
+    if len(rows) == 0:
+        raise ValueError(f"No rows found in {data_results_path}")
+
+    row = rows[0]
+
+    bytes_sent = int(float(row["bytes_sent"]))
+    bytes_recv = int(float(row["bytes_recv"]))
+    accuracy = float(row["accuracy"])
+    loss = float(row["loss"])
+
+    return bytes_sent, bytes_recv, accuracy, loss
+
+
+def load_emissions_participant(experiment_name: str, participant_id: int | str):
+    emissions_path = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"emissions_{participant_id}.csv")
+
+    if not os.path.exists(emissions_path):
+        raise FileNotFoundError(f"File not found: {emissions_path}")
+
+    with open(emissions_path, "r", newline="") as csv_file:
+        reader = csv.DictReader(csv_file)
+        rows = list(reader)
+
+    if len(rows) == 0:
+        raise ValueError(f"No rows found in {emissions_path}")
+
+    row = rows[0]
+
+    energy_grid = float(row["energy_grid"])
+    emissions = float(row["emissions"])
+    energy_consumed = float(row["energy_consumed"])
+    sample_size = int(float(row["sample_size"]))
+
+    return energy_grid, emissions, energy_consumed, sample_size
+
+def save_trustworthiness_reports_csv(
+    reports: dict,
+    experiment_name: str,
+) -> None:
+
+    data_results_path = os.path.join("nebula", "app", "logs", experiment_name, "trustworthiness", "data_results.csv")
+    emissions_path = os.path.join("nebula", "app", "logs", experiment_name, "trustworthiness", "emissions.csv")
+
+    sorted_reports = sorted(
+        reports.values(),
+        key=lambda report: int(report["node_id"])
+    )
+
+    with open(data_results_path, "w", newline="") as csv_file:
+        writer = csv.DictWriter(
+            csv_file,
+            fieldnames=["id", "bytes_sent", "bytes_recv", "accuracy", "loss"],
+        )
+        writer.writeheader()
+
+        for report in sorted_reports:
+            writer.writerow({
+                "id": report["node_id"],
+                "bytes_sent": report["bytes_sent"],
+                "bytes_recv": report["bytes_recv"],
+                "accuracy": report["accuracy"],
+                "loss": report["loss"],
+            })
+
+    with open(emissions_path, "w", newline="") as csv_file:
+        writer = csv.DictWriter(
+            csv_file,
+            fieldnames=["id", "energy_grid", "emissions", "energy_consumed", "sample_size"],
+        )
+        writer.writeheader()
+
+        for report in sorted_reports:
+            writer.writerow({
+                "id": report["node_id"],
+                "energy_grid": report["energy_grid"],
+                "emissions": report["emissions"],
+                "energy_consumed": report["energy_consumed"],
+                "sample_size": report["sample_size"],
+            })
+
+    logging.info(
+        "[TW SERVER] CSV files written correctly: %s, %s",
+        data_results_path,
+        emissions_path,
+    )
+
+def save_results_csv_cfl(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float):
+    try:
+        data_results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
+    except:
+        data_results_file = os.path.join("nebula", "app", "logs", scenario_name, "trustworthiness", "data_results.csv")
+
+    if exists(data_results_file):
+        df = pd.read_csv(data_results_file)
+    else:
+        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss"])
+
+    try:
+        # Add new entry to DataFrame
+        new_data = pd.DataFrame({'id': [id], 'bytes_sent': [bytes_sent],
+                                    'bytes_recv': [bytes_recv], 'accuracy': [accuracy],
+                                    'loss': [loss]})
+        df = pd.concat([df, new_data], ignore_index=True)
+        logger.info(f"new_data={new_data}")
+
+        df.to_csv(data_results_file, encoding='utf-8', index=False)
+
+    except Exception as e:
+        logger.warning(e)
+
+def save_emissions_csv_cfl(scenario_name: str, id: int, energy_grid: float, emissions: float, energy_consumed: float, sample_size: int):
+    try:
+        data_results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "emissions.csv")
+    except:
+        data_results_file = os.path.join("nebula", "app", "logs", scenario_name, "trustworthiness", "emissions.csv")
+
+    if exists(data_results_file):
+        df = pd.read_csv(data_results_file)
+    else:
+        df = pd.DataFrame(columns=["id", "energy_grid", "emissions", "energy_consumed", "sample_size"])
+
+    try:
+        # Add new entry to DataFrame
+        new_data = pd.DataFrame({'id': [id], 'energy_grid': [energy_grid],
+                                    'emissions': [emissions], 'energy_consumed': [energy_consumed],
+                                    'sample_size': [sample_size]})
+        df = pd.concat([df, new_data], ignore_index=True)
+        logger.info(f"new_data={new_data}")
+
+        df.to_csv(data_results_file, encoding='utf-8', index=False)
+
+    except Exception as e:
+        logger.warning(e)
+
 
 def save_results_csv(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float):
+    """
     try:
         data_results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
     except:
@@ -292,6 +438,7 @@ def save_results_csv(scenario_name: str, id: int, bytes_sent: int, bytes_recv: i
 
     except Exception as e:
         logger.warning(e)
+    """
 
     try:
         data_results_id_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", f"data_results_{id}.csv")

From 39a6bfb65bfdcd2250a8b879e6a62f24a8013599 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Thu, 12 Mar 2026 17:54:05 +0100
Subject: [PATCH 06/66] CFL trustworthiness emissions and dataresults done

---
 nebula/addons/trustworthiness/factsheet.py    |  2 ++
 .../addons/trustworthiness/trustworthiness.py | 34 ++++++++++++++-----
 nebula/addons/trustworthiness/utils.py        | 24 +++++++++----
 nebula/core/engine.py                         |  6 ++++
 nebula/core/network/messages.py               |  6 ++++
 nebula/core/pb/nebula.proto                   | 14 +++++---
 nebula/core/pb/nebula_pb2.py                  |  8 ++---
 7 files changed, 72 insertions(+), 22 deletions(-)

diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 56ac39087..4abc312c3 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -167,6 +167,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time):
             try:
                 factsheet = json.load(f)
 
+                """
                 expected_total = int(factsheet.get("participants", {}).get("client_num", 0) or 0)
                 logging.info(f"[Factsheet] expected_total_nodes = {expected_total}")
 
@@ -186,6 +187,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time):
                     number_files = len(data)
                     logger.info(f"number_files={number_files}")
                     logger.info(f"expected_nodes={expected_total}")
+                """
 
 
 
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index ca88a27cf..aa0179fdc 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -162,7 +162,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             bytes_sent, bytes_recv, accuracy, loss = load_data_results_participant(experiment_name, self._idx)
 
-            energy_grid, emissions, energy_consumed, sample_size = load_emissions_participant(experiment_name, self._idx)
+            role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size = load_emissions_participant(experiment_name, self._idx)
 
             message = cm.mm.create_message(
                 "trustworthiness",
@@ -172,28 +172,40 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 bytes_recv=bytes_recv,
                 accuracy=accuracy,
                 loss=loss,
+                role=role,
                 energy_grid=energy_grid,
                 emissions=emissions,
+                workload = workload,
+                cpu_model = cpu_model,
+                gpu_model = gpu_model,
+                cpu_used = cpu_used,
+                gpu_used = gpu_used,
                 energy_consumed=energy_consumed,
                 sample_size=sample_size,
             )
-
+            """
             logging.info(
                 "[TW SEND] dest=%s node_id=%s bytes_sent=%s bytes_recv=%s "
-                "accuracy=%s loss=%s energy_grid=%s emissions=%s "
-                "energy_consumed=%s sample_size=%s",
+                "accuracy=%s loss=%s energy_grid=%s emissions=%s workload=%s"
+                "cpu_model=%s gpu_model=%s cpu_used=%s gpu_used=%s energy_consumed=%s sample_size=%s",
                 server_addr,
                 str(self._idx),
                 bytes_sent,
                 bytes_recv,
                 accuracy,
                 loss,
+                role,
                 energy_grid,
                 emissions,
+                workload,
+                cpu_model,
+                gpu_model,
+                cpu_used,
+                gpu_used,
                 energy_consumed,
                 sample_size,
             )
-
+            """
             await cm.send_message(
                 server_addr,
                 message,
@@ -288,7 +300,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 self._idx,
             )
 
-            energy_grid, emissions, energy_consumed, sample_size = load_emissions_participant(
+            role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size = load_emissions_participant(
                 self._experiment_name,
                 self._idx,
             )
@@ -299,7 +311,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
             )
 
             save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss)
-            save_emissions_csv_cfl(self._experiment_name, self._idx, energy_grid, emissions, energy_consumed, sample_size)
+            save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
             #await self._generate_factsheet(trust_config, experiment_name)
         else:
             self._finish_post = True
@@ -314,8 +326,14 @@ async def register_trustworthiness_report(self, source, message):
             "bytes_recv": message.bytes_recv,
             "accuracy": message.accuracy,
             "loss": message.loss,
+            "role": message.role,
             "energy_grid": message.energy_grid,
             "emissions": message.emissions,
+            "workload": message.workload,
+            "cpu_model": message.cpu_model,
+            "gpu_model": message.gpu_model,
+            "cpu_used": message.cpu_used,
+            "gpu_used": message.gpu_used,
             "energy_consumed": message.energy_consumed,
             "sample_size": message.sample_size,
         }
@@ -463,7 +481,7 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         # Last operations
         save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_loss, last_accuracy)
         stop_emissions_tracking_and_save(self._tracker, self._trust_dir_files, f'emissions_{self._idx}.csv', self._role.value, workload, sample_size, self._idx)
-        save_confirmation_csv(self._experiment_name, self._idx)
+        #save_confirmation_csv(self._experiment_name, self._idx)
         await self.tw.finish_experiment_role_post_actions(self._trust_config, self._experiment_name)
 
     def _factory_trust_workload(self, role: Role, engine: Engine, idx, trust_files_route) -> TrustWorkload:
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
index 35ae58a64..a4fa25b8d 100755
--- a/nebula/addons/trustworthiness/utils.py
+++ b/nebula/addons/trustworthiness/utils.py
@@ -307,12 +307,18 @@ def load_emissions_participant(experiment_name: str, participant_id: int | str):
 
     row = rows[0]
 
+    role = str(row["role"])
     energy_grid = float(row["energy_grid"])
     emissions = float(row["emissions"])
+    workload = str(row["workload"])
+    cpu_model = str(row["CPU_model"])
+    gpu_model = str(row["GPU_model"])
+    cpu_used = str(row["CPU_used"]).strip().lower() == "true"
+    gpu_used = str(row["GPU_used"]).strip().lower() == "true"
     energy_consumed = float(row["energy_consumed"])
     sample_size = int(float(row["sample_size"]))
 
-    return energy_grid, emissions, energy_consumed, sample_size
+    return role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size
 
 def save_trustworthiness_reports_csv(
     reports: dict,
@@ -346,15 +352,21 @@ def save_trustworthiness_reports_csv(
     with open(emissions_path, "w", newline="") as csv_file:
         writer = csv.DictWriter(
             csv_file,
-            fieldnames=["id", "energy_grid", "emissions", "energy_consumed", "sample_size"],
+            fieldnames=["id", "role", "energy_grid", "emissions", "workload", "CPU_model", "GPU_model", "CPU_used", "GPU_used", "energy_consumed", "sample_size"],
         )
         writer.writeheader()
 
         for report in sorted_reports:
             writer.writerow({
                 "id": report["node_id"],
+                "role": report["role"],
                 "energy_grid": report["energy_grid"],
                 "emissions": report["emissions"],
+                "workload": report["workload"],
+                "CPU_model": report["cpu_model"],
+                "GPU_model": report["gpu_model"],
+                "CPU_used": report["cpu_used"],
+                "GPU_used": report["gpu_used"],
                 "energy_consumed": report["energy_consumed"],
                 "sample_size": report["sample_size"],
             })
@@ -389,7 +401,7 @@ def save_results_csv_cfl(scenario_name: str, id: int, bytes_sent: int, bytes_rec
     except Exception as e:
         logger.warning(e)
 
-def save_emissions_csv_cfl(scenario_name: str, id: int, energy_grid: float, emissions: float, energy_consumed: float, sample_size: int):
+def save_emissions_csv_cfl(scenario_name: str, id: int, role: str, energy_grid: float, emissions: float, workload: str, cpu_model: str, gpu_model: str, cpu_used: bool, gpu_used: bool, energy_consumed: float, sample_size: int):
     try:
         data_results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "emissions.csv")
     except:
@@ -398,12 +410,12 @@ def save_emissions_csv_cfl(scenario_name: str, id: int, energy_grid: float, emis
     if exists(data_results_file):
         df = pd.read_csv(data_results_file)
     else:
-        df = pd.DataFrame(columns=["id", "energy_grid", "emissions", "energy_consumed", "sample_size"])
+        df = pd.DataFrame(columns=["id", "role", "energy_grid", "emissions", "workload", "CPU_model", "GPU_model", "CPU_used", "GPU_used", "energy_consumed", "sample_size"])
 
     try:
         # Add new entry to DataFrame
-        new_data = pd.DataFrame({'id': [id], 'energy_grid': [energy_grid],
-                                    'emissions': [emissions], 'energy_consumed': [energy_consumed],
+        new_data = pd.DataFrame({'id': [id], 'role': [role], 'energy_grid': [energy_grid],
+                                    'emissions': [emissions], 'workload': [workload], 'CPU_model': [cpu_model], 'GPU_model': [gpu_model], 'CPU_used': [cpu_used], 'GPU_used': [gpu_used], 'energy_consumed': [energy_consumed],
                                     'sample_size': [sample_size]})
         df = pd.concat([df, new_data], ignore_index=True)
         logger.info(f"new_data={new_data}")
diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index 0fc916178..dd3328fea 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -423,8 +423,14 @@ async def _trustworthiness_report_callback(self, source, message):
                 "bytes_recv": message.bytes_recv,
                 "accuracy": message.accuracy,
                 "loss": message.loss,
+                "role": message.role,
                 "energy_grid": message.energy_grid,
                 "emissions": message.emissions,
+                "workload": message.workload,
+                "cpu_model": message.cpu_model,
+                "gpu_model": message.gpu_model,
+                "cpu_used": message.cpu_used,
+                "gpu_used": message.gpu_used,
                 "energy_consumed": message.energy_consumed,
                 "sample_size": message.sample_size,
             }
diff --git a/nebula/core/network/messages.py b/nebula/core/network/messages.py
index b6a4ce0c0..78914a299 100644
--- a/nebula/core/network/messages.py
+++ b/nebula/core/network/messages.py
@@ -102,8 +102,14 @@ def _define_message_templates(self):
                     "bytes_recv",
                     "accuracy",
                     "loss",
+                    "role",
                     "energy_grid",
                     "emissions",
+                    "workload",
+                    "cpu_model",
+                    "gpu_model",
+                    "cpu_used",
+                    "gpu_used",
                     "energy_consumed",
                     "sample_size"
                 ],
diff --git a/nebula/core/pb/nebula.proto b/nebula/core/pb/nebula.proto
index b1f1d2073..44629cced 100755
--- a/nebula/core/pb/nebula.proto
+++ b/nebula/core/pb/nebula.proto
@@ -143,8 +143,14 @@ message TrustworthinessMessage {
   int64 bytes_recv = 4;
   double accuracy = 5;
   double loss = 6;
-  double energy_grid = 7;
-  double emissions = 8;
-  double energy_consumed = 9;
-  int32 sample_size = 10;
+  string role = 7;
+  double energy_grid = 8;
+  double emissions = 9;
+  string workload = 10;
+  string cpu_model = 11;
+  string gpu_model = 12;
+  bool cpu_used = 13;
+  bool gpu_used = 14;
+  double energy_consumed = 15;
+  int32 sample_size = 16;
 }
diff --git a/nebula/core/pb/nebula_pb2.py b/nebula/core/pb/nebula_pb2.py
index ed1ca508c..f37e89f97 100644
--- a/nebula/core/pb/nebula_pb2.py
+++ b/nebula/core/pb/nebula_pb2.py
@@ -13,7 +13,7 @@
 
 
 
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xf1\x04\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\x94\x02\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x13\n\x0b\x65nergy_grid\x18\x07 \x01(\x01\x12\x11\n\temissions\x18\x08 \x01(\x01\x12\x17\n\x0f\x65nergy_consumed\x18\t \x01(\x01\x12\x13\n\x0bsample_size\x18\n \x01(\x05\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xf1\x04\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\xfe\x02\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\x62\x06proto3')
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
 _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'nebula_pb2', globals())
@@ -59,7 +59,7 @@
   _RESPONSEMESSAGE._serialized_start=2068
   _RESPONSEMESSAGE._serialized_end=2103
   _TRUSTWORTHINESSMESSAGE._serialized_start=2106
-  _TRUSTWORTHINESSMESSAGE._serialized_end=2382
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2362
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2382
+  _TRUSTWORTHINESSMESSAGE._serialized_end=2488
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2468
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2488
 # @@protoc_insertion_point(module_scope)

From 0ffa58e68cd70dfd6b72bf29cbda37d2d411cf85 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 13 Mar 2026 16:29:18 +0100
Subject: [PATCH 07/66] All CFL files updated

---
 nebula/addons/trustworthiness/calculation.py  | 30 +++++++++++++++++
 nebula/addons/trustworthiness/dfl_local.py    |  6 ++--
 nebula/addons/trustworthiness/factsheet.py    | 10 +++---
 .../addons/trustworthiness/trustworthiness.py | 32 ++++++++++++++++---
 nebula/addons/trustworthiness/utils.py        | 17 ++++++++++
 5 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index b5f3dbd1f..4b51e3067 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -6,6 +6,7 @@
 from datetime import datetime
 from math import e
 from os.path import exists
+import json
 
 import numpy as np
 import pandas as pd
@@ -206,6 +207,17 @@ def check_properties(*args):
     result = map(lambda x: x is not None and x != "", args)
     return np.mean(list(result))
 
+def get_class_imbalance_local(participant_id, experiment_name):
+    data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_id)}_class_count.json")
+
+    with open(data_class_count_file, "r") as file:
+        class_distribution = json.load(file)
+
+    class_samples_sizes = [x for x in class_distribution.values()]
+    class_imbalance = get_cv(list=class_samples_sizes)
+
+    return class_imbalance
+
 
 def get_cv(list=None, std=None, mean=None):
     """
@@ -303,6 +315,24 @@ def get_bytes_model(model_file):
 
     return model_size
 
+def get_bytes_final_model_id(id, scenario_name):
+    """
+    Calculates the bytes of the final model of a node by id.
+
+    Args:
+        id: Participant ID.
+
+    Returns:
+        float: The bytes of the model.
+    """
+
+
+    model_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", f"participant_{id}_final_model.pk")
+
+    model_size = os.path.getsize(model_file)
+
+    return model_size
+
 
 def get_bytes_sent_recv(scenario_name):
     """
diff --git a/nebula/addons/trustworthiness/dfl_local.py b/nebula/addons/trustworthiness/dfl_local.py
index 9c91520b9..eeb2f7b95 100644
--- a/nebula/addons/trustworthiness/dfl_local.py
+++ b/nebula/addons/trustworthiness/dfl_local.py
@@ -126,10 +126,10 @@ def compute_trust_local_dfl(experiment_name, participant_idx, data, start_time,
 
         files_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
 
-        final_model_file = os.path.join(files_dir, f"participant_{participant_idx}_final_model.pk")
-        train_model_file = os.path.join(files_dir, f"participant_{participant_idx}_train_model.pk")
+        train_model_file = os.path.join(files_dir, f"participant_{participant_idx}_final_model.pk")
+        #train_model_file = os.path.join(files_dir, f"participant_{participant_idx}_train_model.pk")
         test_dataloader_file = os.path.join(files_dir, f"participant_{participant_idx}_test_loader.pk")
-        emissions_file = os.path.join(files_dir, f"emissions.csv")
+        emissions_file = os.path.join(files_dir, f"emissions_{participant_idx}.csv")
 
         with open(train_model_file, "rb") as t_file:
             lightning_model = pickle.load(t_file)
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 4abc312c3..a44c58748 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -151,7 +151,7 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                 logging.warning(f"{factsheet_file} is invalid")
                 logging.error(e)
 
-    def populate_factsheet_post_train(self, scenario_name, start_time, end_time):
+    def populate_factsheet_post_train(self, scenario_name, start_time, end_time, participant_idx):
         """
         Populates the factsheet with values after the training.
 
@@ -196,10 +196,10 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time):
 
                 files_dir = f"{os.environ.get('NEBULA_LOGS_DIR')}/{scenario_name}/trustworthiness"
 
-                models_files = glob.glob(os.path.join(files_dir, "*final_model*"))
+                models_files = glob.glob(os.path.join(files_dir, "*final_model*")) # MANDAR MENSAJE
                 #dataloaders_files = glob.glob(os.path.join(files_dir, "*train_loader*"))
-                test_dataloader_file = f"{files_dir}/participant_1_test_loader.pk"
-                train_model_file = f"{files_dir}/participant_1_train_model.pk"
+                test_dataloader_file = f"{files_dir}/participant_{participant_idx}_test_loader.pk"
+                final_model_file = f"{files_dir}/participant_{participant_idx}_final_model.pk"
                 emissions_file = os.path.join(files_dir, "emissions.csv")
 
                 # # Entropy
@@ -250,7 +250,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time):
                 class_imbalance = get_cv(list=class_samples_sizes)
                 factsheet["fairness"]["class_imbalance"] = 1 if class_imbalance > 1 else class_imbalance
 
-                with open(train_model_file, "rb") as file:
+                with open(final_model_file, "rb") as file:
                     lightning_model = pickle.load(file)
 
                 if dataset == "MNIST" and model == "MLP":
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index aa0179fdc..abbc95ab8 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -7,8 +7,8 @@
 from nebula.config.config import Config
 from nebula.core.engine import Engine
 import pickle
-from nebula.addons.trustworthiness.calculation import stop_emissions_tracking_and_save
-from nebula.addons.trustworthiness.utils import save_results_csv, save_confirmation_csv, save_trustworthiness_reports_csv, load_emissions_participant, load_data_results_participant, save_results_csv_cfl, save_emissions_csv_cfl
+from nebula.addons.trustworthiness.calculation import stop_emissions_tracking_and_save, get_bytes_final_model_id, get_class_imbalance_local
+from nebula.addons.trustworthiness.utils import save_results_csv, save_confirmation_csv, save_trustworthiness_reports_csv, load_emissions_participant, load_data_results_participant, save_results_csv_cfl, save_emissions_csv_cfl, save_class_count_per_participant, get_local_entropy
 from codecarbon import EmissionsTracker
 from nebula.addons.trustworthiness.per_round_metrics import PerRoundTrustMetrics
 from datetime import datetime
@@ -155,7 +155,8 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
             pass
         else:
             cm = CommunicationsManager.get_instance()
-            server_addr = "192.168.51.2:45001"  # cambiar por la IP:PUERTO real del servidor
+
+            server_addr = str(self._engine.config.participant["network_args"]["neighbors"]).strip()
 
             logging.info("connections=%s", list(cm.connections.keys()))
             logging.info("server in connections? %s", server_addr in cm.connections)
@@ -164,6 +165,15 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size = load_emissions_participant(experiment_name, self._idx)
 
+            class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
+            logging.info("class_imbalance=%s", class_imbalance)
+
+            model_size = get_bytes_final_model_id(self._idx, experiment_name)
+            logging.info("model_size=%s", model_size)
+
+            local_entropy = get_local_entropy(self._idx, experiment_name)
+            logging.info("local_entropy=%s", local_entropy)
+
             message = cm.mm.create_message(
                 "trustworthiness",
                 action="report",
@@ -262,6 +272,7 @@ async def init(self, experiment_name):
         self._experiment_name = experiment_name
         await EventManager.get_instance().subscribe_addonevent(TestMetricsEvent, self._process_test_metrics_event)
         await EventManager.get_instance().subscribe_node_event(ExperimentFinishEvent, self._process_experiment_finished_event)
+        await self._create_pk_files(experiment_name)
 
         self._per_round = PerRoundTrustMetrics(
             experiment_name=experiment_name,
@@ -273,6 +284,16 @@ async def init(self, experiment_name):
         )
         await self._per_round.setup(self._engine)
 
+    async def _create_pk_files(self, experiment_name):
+        # Save data to local files to calculate the trustworthyness
+        test_loader_filename = f"/nebula/app/logs/{experiment_name}/trustworthiness/participant_{self._idx}_test_loader.pk"
+        self._engine.trainer.datamodule.setup(stage="test")
+        test_loader = self._engine.trainer.datamodule.test_dataloader()[0]
+
+        with open(test_loader_filename, 'wb') as f:
+            pickle.dump(test_loader, f)
+            f.close()
+
 
     def get_workload(self):
         return self._workload
@@ -364,7 +385,7 @@ async def _generate_factsheet(self, trust_config, experiment_name):
 
         factsheet = Factsheet()
         factsheet.populate_factsheet_pre_train(trust_config, experiment_name)
-        factsheet.populate_factsheet_post_train(experiment_name, self._start_time, self._end_time)
+        factsheet.populate_factsheet_post_train(experiment_name, self._start_time, self._end_time, self._idx)
 
         data_file_path = os.path.join(os.environ.get('NEBULA_CONFIG_DIR'), experiment_name, "scenario.json")
         with open(data_file_path, 'r') as data_file:
@@ -462,8 +483,9 @@ async def _create_trustworthiness_directory(self):
         os.chmod(trust_dir, 0o777)
 
     async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
-        from nebula.addons.trustworthiness.utils import save_class_count_per_participant
         class_counter = self._engine.trainer.datamodule.get_samples_per_label()
+        logging.info("COUNTER=%s", class_counter)
+
         save_class_count_per_participant(self._experiment_name, class_counter, self._idx)
 
         await self.tw.finish_experiment_role_pre_actions()
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
index a4fa25b8d..0cc161770 100755
--- a/nebula/addons/trustworthiness/utils.py
+++ b/nebula/addons/trustworthiness/utils.py
@@ -116,6 +116,23 @@ def get_all_data_entropy(experiment_name):
     with open(name_file, "w") as f:
         json.dump(entropy_per_participant, f, indent=2)
 
+def get_local_entropy(id, experiment_name):
+    data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(id)}_class_count.json")
+
+    with open(data_class_count_file, "r") as f:
+        class_count = json.load(f)
+
+    total = sum(class_count.values())
+    if total == 0:
+        entropy_value = 0.0
+    else:
+        probabilities = [count / total for count in class_count.values()]
+        entropy_value = entropy(probabilities, base=2)
+
+    entropy_local = round(entropy_value, 6)
+
+    return entropy_local
+
 def get_entropy(client_id, scenario_name, dataloader):
     """
     Get the entropy of each client in the scenario.

From ed01c8f876aec4c06e9a7001ddeb139903b0893c Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Mon, 16 Mar 2026 12:53:13 +0100
Subject: [PATCH 08/66] CFL totally adapted

---
 nebula/addons/trustworthiness/calculation.py  | 52 +++++++++++++++++++
 nebula/addons/trustworthiness/factsheet.py    | 24 +++++++--
 .../addons/trustworthiness/trustworthiness.py | 23 ++++++--
 nebula/addons/trustworthiness/utils.py        | 11 ++--
 nebula/core/engine.py                         |  3 ++
 nebula/core/network/messages.py               |  5 +-
 nebula/core/pb/nebula.proto                   |  3 ++
 nebula/core/pb/nebula_pb2.py                  |  8 +--
 8 files changed, 113 insertions(+), 16 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index 4b51e3067..96a1b2844 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -392,6 +392,58 @@ def get_avg_loss_accuracy(scenario_name):
 
     return avg_loss, avg_accuracy, std_accuracy
 
+
+def get_avg_class_imbalance_model_size(scenario_name):
+    """
+    Calculates the mean class imbalance and model size of the nodes.
+
+    Args:
+        data_results_files (list): Files that contain the class imbalance and model size of the nodes
+
+    Returns:
+        2-tupla: The mean class imbalance mean and model size mean of the nodes.
+    """
+    total_class_imbalance = 0
+    total_model_size = 0
+
+    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
+
+    data = read_csv(data_file)
+
+    number_files = len(data)
+
+    total_class_imbalance = data["class_imbalance"].sum()
+    total_model_size = data["model_size"].sum()
+
+    avg_class_imbalance = total_class_imbalance / number_files
+    avg_model_size = total_model_size / number_files
+
+    return avg_class_imbalance, avg_model_size
+
+
+def get_entropy_list(scenario_name):
+    """
+    Obtiene una lista con los valores de entropy de todos los nodos.
+
+    Args:
+        scenario_name (str): Nombre del escenario.
+
+    Returns:
+        list: Lista con los valores de entropy
+    """
+    data_file = os.path.join(
+        os.environ.get('NEBULA_LOGS_DIR'),
+        scenario_name,
+        "trustworthiness",
+        "data_results.csv"
+    )
+
+    data = read_csv(data_file)
+
+    entropy_list = data["local_entropy"].tolist()
+
+    return entropy_list
+
 def get_feature_importance_cv(model, test_sample):
     """
     Calculates the coefficient of variation of the feature importance.
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index a44c58748..cb90819e0 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -16,7 +16,7 @@
 from nebula.core.models.kddcup99.mlp import KDDCUP99ModelMLP
 from nebula.core.models.adultcensus.mlp import AdultCensusModelMLP
 from nebula.core.models.breast_cancer.mlp import BreastCancerModelMLP
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
 
@@ -196,7 +196,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
 
                 files_dir = f"{os.environ.get('NEBULA_LOGS_DIR')}/{scenario_name}/trustworthiness"
 
-                models_files = glob.glob(os.path.join(files_dir, "*final_model*")) # MANDAR MENSAJE
+                #models_files = glob.glob(os.path.join(files_dir, "*final_model*")) # MANDAR MENSAJE
                 #dataloaders_files = glob.glob(os.path.join(files_dir, "*train_loader*"))
                 test_dataloader_file = f"{files_dir}/participant_{participant_idx}_test_loader.pk"
                 final_model_file = f"{files_dir}/participant_{participant_idx}_final_model.pk"
@@ -210,6 +210,8 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 #     get_entropy(i, scenario_name, dataloader)
                 #     i += 1
 
+
+                """
                 get_all_data_entropy(scenario_name)
 
                 with open(f"{files_dir}/entropy.json", "r") as file:
@@ -221,6 +223,18 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
 
                 avg_entropy = np.mean(normalized_values)
 
+                factsheet["data"]["avg_entropy"] = avg_entropy
+                """
+
+                avg_class_imbalance, avg_model_size = get_avg_class_imbalance_model_size(scenario_name)
+                entropy_distribution = get_entropy_list (scenario_name)
+
+                values = np.array(entropy_distribution)
+
+                normalized_values = (values - np.min(values)) / (np.max(values) - np.min(values))
+
+                avg_entropy = np.mean(normalized_values)
+
                 factsheet["data"]["avg_entropy"] = avg_entropy
 
                 # Set performance data
@@ -231,7 +245,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 factsheet["fairness"]["test_acc_cv"] = 1 if test_acc_cv > 1 else test_acc_cv
 
                 factsheet["system"]["avg_time_minutes"] = get_elapsed_time(start_time, end_time)
-                factsheet["system"]["avg_model_size"] = get_bytes_models(models_files)
+                factsheet["system"]["avg_model_size"] = avg_model_size
 
                 result_bytes_sent_recv = get_bytes_sent_recv(scenario_name)
                 factsheet["system"]["total_upload_bytes"] = result_bytes_sent_recv[0]
@@ -241,6 +255,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
 
                 factsheet["fairness"]["selection_cv"] = 1
 
+                """
                 count_all_class_samples(scenario_name)
 
                 with open(f"{files_dir}/count_class.json", "r") as file:
@@ -249,6 +264,9 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 class_samples_sizes = [x for x in class_distribution.values()]
                 class_imbalance = get_cv(list=class_samples_sizes)
                 factsheet["fairness"]["class_imbalance"] = 1 if class_imbalance > 1 else class_imbalance
+                """
+                class_imbalance_score = 1 / (1+avg_class_imbalance)
+                factsheet["fairness"]["class_imbalance"] = 1 if class_imbalance_score > 1 else class_imbalance_score
 
                 with open(final_model_file, "rb") as file:
                     lightning_model = pickle.load(file)
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index abbc95ab8..b2d21c5f9 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -192,6 +192,9 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 gpu_used = gpu_used,
                 energy_consumed=energy_consumed,
                 sample_size=sample_size,
+                class_imbalance=class_imbalance,
+                model_size=model_size,
+                local_entropy=local_entropy,
             )
             """
             logging.info(
@@ -331,13 +334,22 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 str(self._idx),
             )
 
-            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss)
+            class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
+            logging.info("class_imbalance=%s", class_imbalance)
+
+            model_size = get_bytes_final_model_id(self._idx, experiment_name)
+            logging.info("model_size=%s", model_size)
+
+            local_entropy = get_local_entropy(self._idx, experiment_name)
+            logging.info("local_entropy=%s", local_entropy)
+
+            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy)
             save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
-            #await self._generate_factsheet(trust_config, experiment_name)
+            await self._generate_factsheet(trust_config, experiment_name)
         else:
             self._finish_post = True
             logging.info("[TW SERVER] finish_experiment_role_post_actions called, waiting for trustworthiness reports")
-        await self._generate_factsheet(trust_config, experiment_name)
+        #await self._generate_factsheet(trust_config, experiment_name)
 
     async def register_trustworthiness_report(self, source, message):
         self._trustworthiness_reports[message.node_id] = {
@@ -357,6 +369,9 @@ async def register_trustworthiness_report(self, source, message):
             "gpu_used": message.gpu_used,
             "energy_consumed": message.energy_consumed,
             "sample_size": message.sample_size,
+            "class_imbalance": message.class_imbalance,
+            "model_size": message.model_size,
+            "local_entropy": message.local_entropy,
         }
 
         logging.info(
@@ -371,7 +386,7 @@ async def register_trustworthiness_report(self, source, message):
             save_trustworthiness_reports_csv(self._trustworthiness_reports, self._experiment_name)
             if self._finish_post == True:
                 logging.info("[TW SERVER] all reports received and post OK, generating factsheet")
-                #await self._generate_factsheet(self._trust_config, self._experiment_name)
+                await self._generate_factsheet(self._trust_config, self._experiment_name)
             else:
                 self._csv_completed = True
                 logging.info(f"[TW SERVER] all reports received, waiting for finish post, csv_completed {self._csv_completed}")
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
index 0cc161770..537ea5b4f 100755
--- a/nebula/addons/trustworthiness/utils.py
+++ b/nebula/addons/trustworthiness/utils.py
@@ -353,7 +353,7 @@ def save_trustworthiness_reports_csv(
     with open(data_results_path, "w", newline="") as csv_file:
         writer = csv.DictWriter(
             csv_file,
-            fieldnames=["id", "bytes_sent", "bytes_recv", "accuracy", "loss"],
+            fieldnames=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "class_imbalance", "model_size", "local_entropy"],
         )
         writer.writeheader()
 
@@ -364,6 +364,9 @@ def save_trustworthiness_reports_csv(
                 "bytes_recv": report["bytes_recv"],
                 "accuracy": report["accuracy"],
                 "loss": report["loss"],
+                "class_imbalance": report["class_imbalance"],
+                "model_size": report["model_size"],
+                "local_entropy": report["local_entropy"],
             })
 
     with open(emissions_path, "w", newline="") as csv_file:
@@ -394,7 +397,7 @@ def save_trustworthiness_reports_csv(
         emissions_path,
     )
 
-def save_results_csv_cfl(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float):
+def save_results_csv_cfl(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float, class_imbalance: float, model_size: int, local_entropy: float):
     try:
         data_results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
     except:
@@ -403,13 +406,13 @@ def save_results_csv_cfl(scenario_name: str, id: int, bytes_sent: int, bytes_rec
     if exists(data_results_file):
         df = pd.read_csv(data_results_file)
     else:
-        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss"])
+        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "class_imbalance", "model_size", "local_entropy"])
 
     try:
         # Add new entry to DataFrame
         new_data = pd.DataFrame({'id': [id], 'bytes_sent': [bytes_sent],
                                     'bytes_recv': [bytes_recv], 'accuracy': [accuracy],
-                                    'loss': [loss]})
+                                    'loss': [loss], 'class_imbalance': [class_imbalance], 'model_size': [model_size], 'local_entropy': [local_entropy]})
         df = pd.concat([df, new_data], ignore_index=True)
         logger.info(f"new_data={new_data}")
 
diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index dd3328fea..b4613c4b3 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -433,6 +433,9 @@ async def _trustworthiness_report_callback(self, source, message):
                 "gpu_used": message.gpu_used,
                 "energy_consumed": message.energy_consumed,
                 "sample_size": message.sample_size,
+                "class_imbalance": message.class_imbalance,
+                "model_size": message.model_size,
+                "local_entropy": message.local_entropy,
             }
 
             logging.info(f"handle_trustworthiness_message | Trigger | {report}")
diff --git a/nebula/core/network/messages.py b/nebula/core/network/messages.py
index 78914a299..d8fa3dd8f 100644
--- a/nebula/core/network/messages.py
+++ b/nebula/core/network/messages.py
@@ -111,7 +111,10 @@ def _define_message_templates(self):
                     "cpu_used",
                     "gpu_used",
                     "energy_consumed",
-                    "sample_size"
+                    "sample_size",
+                    "class_imbalance",
+                    "model_size",
+                    "local_entropy"
                 ],
                 "defaults": {},
             }
diff --git a/nebula/core/pb/nebula.proto b/nebula/core/pb/nebula.proto
index 44629cced..2171f395a 100755
--- a/nebula/core/pb/nebula.proto
+++ b/nebula/core/pb/nebula.proto
@@ -153,4 +153,7 @@ message TrustworthinessMessage {
   bool gpu_used = 14;
   double energy_consumed = 15;
   int32 sample_size = 16;
+  float class_imbalance = 17;
+  int64 model_size = 18;
+  float local_entropy = 19;
 }
diff --git a/nebula/core/pb/nebula_pb2.py b/nebula/core/pb/nebula_pb2.py
index f37e89f97..fd3caebe1 100644
--- a/nebula/core/pb/nebula_pb2.py
+++ b/nebula/core/pb/nebula_pb2.py
@@ -13,7 +13,7 @@
 
 
 
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xf1\x04\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\xfe\x02\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xf1\x04\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\xc2\x03\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\x12\x17\n\x0f\x63lass_imbalance\x18\x11 \x01(\x02\x12\x12\n\nmodel_size\x18\x12 \x01(\x03\x12\x15\n\rlocal_entropy\x18\x13 \x01(\x02\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\x62\x06proto3')
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
 _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'nebula_pb2', globals())
@@ -59,7 +59,7 @@
   _RESPONSEMESSAGE._serialized_start=2068
   _RESPONSEMESSAGE._serialized_end=2103
   _TRUSTWORTHINESSMESSAGE._serialized_start=2106
-  _TRUSTWORTHINESSMESSAGE._serialized_end=2488
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2468
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2488
+  _TRUSTWORTHINESSMESSAGE._serialized_end=2556
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2536
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2556
 # @@protoc_insertion_point(module_scope)

From 7c6941eedab7ea847a5e20ff80e2b48c1dd5c3b9 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Tue, 17 Mar 2026 12:26:34 +0100
Subject: [PATCH 09/66] Desynchronization error in CFL, accuracy/loss error and
 reports fixed

---
 nebula/addons/trustworthiness/factsheet.py    |  4 +-
 .../addons/trustworthiness/trustworthiness.py | 41 +++++++++++++++----
 nebula/core/network/propagator.py             |  3 +-
 .../frontend/config/participant.json.example  |  2 +-
 4 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index cb90819e0..201475a24 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -280,13 +280,13 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 elif dataset == "Covtype" and model == "MLP":
                     model = CovtypeModelMLP()
                     num_classes_temp = 7
-                elif dataset == "KDDCUP99" and algorithm == "MLP":
+                elif dataset == "KDDCUP99" and model == "MLP":
                     model = KDDCUP99ModelMLP()
                     num_classes_temp = 23
                 elif dataset == "AdultCensus" and model == "MLP":
                     model = AdultCensusModelMLP()
                     num_classes_temp = 2
-                elif dataset == "BreastCancer" and algorithm == "MLP":
+                elif dataset == "BreastCancer" and model == "MLP":
                     model = BreastCancerModelMLP()
                     num_classes_temp = 2
                 # elif dataset == "Syscall" and model == "MLP":
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index b2d21c5f9..a52dcc25a 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -1,4 +1,5 @@
 import logging
+import asyncio
 from nebula.addons.functions import print_msg_box
 from nebula.core.nebulaevents import ExperimentFinishEvent, RoundEndEvent, TestMetricsEvent
 from nebula.core.eventmanager import EventManager
@@ -349,6 +350,36 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
         else:
             self._finish_post = True
             logging.info("[TW SERVER] finish_experiment_role_post_actions called, waiting for trustworthiness reports")
+            await asyncio.sleep(60)
+            if self._trustworthiness_reports != None and self._csv_completed == False:
+                save_trustworthiness_reports_csv(self._trustworthiness_reports, self._experiment_name)
+            bytes_sent, bytes_recv, accuracy, loss = load_data_results_participant(
+                self._experiment_name,
+                self._idx,
+            )
+
+            role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size = load_emissions_participant(
+                self._experiment_name,
+                self._idx,
+            )
+
+            logging.info(
+                "[TW SERVER] local server report added for node_id=%s",
+                str(self._idx),
+            )
+
+            class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
+            logging.info("class_imbalance=%s", class_imbalance)
+
+            model_size = get_bytes_final_model_id(self._idx, experiment_name)
+            logging.info("model_size=%s", model_size)
+
+            local_entropy = get_local_entropy(self._idx, experiment_name)
+            logging.info("local_entropy=%s", local_entropy)
+
+            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy)
+            save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
+            await self._generate_factsheet(trust_config, experiment_name)
         #await self._generate_factsheet(trust_config, experiment_name)
 
     async def register_trustworthiness_report(self, source, message):
@@ -384,12 +415,8 @@ async def register_trustworthiness_report(self, source, message):
             logging.info("[TW SERVER] all reports received, generating csv")
             #GENERAR CSV
             save_trustworthiness_reports_csv(self._trustworthiness_reports, self._experiment_name)
-            if self._finish_post == True:
-                logging.info("[TW SERVER] all reports received and post OK, generating factsheet")
-                await self._generate_factsheet(self._trust_config, self._experiment_name)
-            else:
-                self._csv_completed = True
-                logging.info(f"[TW SERVER] all reports received, waiting for finish post, csv_completed {self._csv_completed}")
+            self._csv_completed = True
+            logging.info(f"[TW SERVER] all reports received, waiting for finish post, csv_completed {self._csv_completed}")
 
 
     async def _generate_factsheet(self, trust_config, experiment_name):
@@ -516,7 +543,7 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         sample_size = self.tw.get_sample_size()
 
         # Last operations
-        save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_loss, last_accuracy)
+        save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_accuracy, last_loss)
         stop_emissions_tracking_and_save(self._tracker, self._trust_dir_files, f'emissions_{self._idx}.csv', self._role.value, workload, sample_size, self._idx)
         #save_confirmation_csv(self._experiment_name, self._idx)
         await self.tw.finish_experiment_role_post_actions(self._trust_config, self._experiment_name)
diff --git a/nebula/core/network/propagator.py b/nebula/core/network/propagator.py
index 717ea5f94..b3fa18de6 100755
--- a/nebula/core/network/propagator.py
+++ b/nebula/core/network/propagator.py
@@ -308,7 +308,7 @@ async def _propagate(self, mpe: ModelPropagationEvent):
             bool: True if propagation occurred (payload sent), False if halted early.
         """
         eligible_neighbors, strategy_id = await mpe.get_event_data()
-        
+
         self.reset_status_history()
         if strategy_id not in self.strategies:
             logging.info(f"Strategy {strategy_id} not found.")
@@ -344,6 +344,7 @@ async def _propagate(self, mpe: ModelPropagationEvent):
 
         current_round = await self.get_round()
         round_number = -1 if strategy_id == "initialization" else current_round
+        await asyncio.sleep(10)
         parameters = serialized_model
         message = self.cm.create_message("model", "", round_number, parameters, weight)
         for neighbor_addr in eligible_neighbors:
diff --git a/nebula/frontend/config/participant.json.example b/nebula/frontend/config/participant.json.example
index ca1d1cfd6..ef2976063 100755
--- a/nebula/frontend/config/participant.json.example
+++ b/nebula/frontend/config/participant.json.example
@@ -88,7 +88,7 @@
   },
   "aggregator_args": {
     "algorithm": "FedAvg",
-    "aggregation_timeout": 60,
+    "aggregation_timeout": 240,
     "aggregation_push": "slow"
   },
   "defense_args": {

From 1f4a1e43201872505dcfbcb222e07bf7befd53cd Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 18 Mar 2026 15:58:18 +0100
Subject: [PATCH 10/66] Expected nodes fixed

---
 nebula/addons/trustworthiness/trustworthiness.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index a52dcc25a..6d17f4bc7 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -267,7 +267,7 @@ def __init__(self, engine: Engine, idx, trust_files_route):
         self._trust_files_route = trust_files_route
         self._per_round = None
         self._trustworthiness_reports = {}
-        self._expected_reports = 2
+        self._expected_reports = int(self._engine.config.participant["scenario_args"]["n_nodes"])-1
         self._trust_config = None
         self._csv_completed = False
         self._finish_post = False

From 6c345499682cf16187994ebd3187d0a815e57d41 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 25 Mar 2026 10:33:46 +0100
Subject: [PATCH 11/66] DFL global trust score implemented with reputation

---
 nebula/addons/trustworthiness/dfl_local.py    |  21 +-
 .../addons/trustworthiness/trustworthiness.py | 318 +++++++++++++++++-
 nebula/addons/trustworthiness/utils.py        | 197 ++++++++++-
 nebula/core/engine.py                         |  23 ++
 nebula/core/network/actions.py                |  10 +-
 nebula/core/network/forwarder.py              |  17 +-
 nebula/core/network/messages.py               |  12 +-
 nebula/core/pb/nebula.proto                   |  11 +
 nebula/core/pb/nebula_pb2.py                  |  88 ++---
 9 files changed, 630 insertions(+), 67 deletions(-)

diff --git a/nebula/addons/trustworthiness/dfl_local.py b/nebula/addons/trustworthiness/dfl_local.py
index eeb2f7b95..f1d7a06d4 100644
--- a/nebula/addons/trustworthiness/dfl_local.py
+++ b/nebula/addons/trustworthiness/dfl_local.py
@@ -49,9 +49,18 @@ def compute_trust_local_dfl(experiment_name, participant_idx, data, start_time,
         n_rounds = int(data["rounds"])
         attack = data["attack_params"]["attacks"]
         if attack != "No Attack":
-            poisoned_node_percent = int(data["attack_params"]["poisoned_node_percent"])
-            poisoned_sample_percent = int(data["attack_params"]["poisoned_sample_percent"])
-            poisoned_noise_percent = int(data["attack_params"]["poisoned_noise_percent"])
+            if attack == "Model Poisoning":
+                poisoned_node_percent = int(data["attack_params"]["poisoned_node_percent"])
+                poisoned_sample_percent = 0
+                poisoned_noise_percent = int(data["attack_params"]["poisoned_noise_percent"])
+            elif attack == "Model Poisoning":
+                poisoned_node_percent = int(data["attack_params"]["poisoned_node_percent"])
+                poisoned_sample_percent = int(data["attack_params"]["poisoned_sample_percent"])
+                poisoned_noise_percent = int(data["attack_params"]["poisoned_noise_percent"])
+            else:
+                poisoned_node_percent = 0
+                poisoned_sample_percent = 0
+                poisoned_noise_percent = 0
         else:
             poisoned_node_percent = 0
             poisoned_sample_percent = 0
@@ -60,7 +69,7 @@ def compute_trust_local_dfl(experiment_name, participant_idx, data, start_time,
         is_dynamic_topology = False # data["is_dynamic_topology"]
         is_dynamic_aggregation = False # data["is_dynamic_aggregation"]
         target_aggregation = False # data["target_aggregation"]
-
+        """
         if attack != "No Attack" and with_reputation == True and is_dynamic_aggregation == True:
             background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used against the clients is {attack}, where the percentage of attacked nodes is {poisoned_node_percent}, the percentage of attacked samples of each node is {poisoned_sample_percent}, and the percent of poisoned noise is {poisoned_noise_percent}. A reputation-based defence with a dynamic aggregation based on the aggregation algorithm {target_aggregation} is used, and the trustworthiness of the project is desired."
 
@@ -72,6 +81,10 @@ def compute_trust_local_dfl(experiment_name, participant_idx, data, start_time,
 
         elif attack == "No Attack":
             background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks against clients are used, and the trustworthiness of the project is desired."
+        """
+
+        #CAMBIAR
+        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks against clients are used, and the trustworthiness of the project is desired."
 
         # Set project specifications
         factsheet["project"]["overview"] = data["scenario_title"]
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 6d17f4bc7..8bbf3d517 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -9,7 +9,7 @@
 from nebula.core.engine import Engine
 import pickle
 from nebula.addons.trustworthiness.calculation import stop_emissions_tracking_and_save, get_bytes_final_model_id, get_class_imbalance_local
-from nebula.addons.trustworthiness.utils import save_results_csv, save_confirmation_csv, save_trustworthiness_reports_csv, load_emissions_participant, load_data_results_participant, save_results_csv_cfl, save_emissions_csv_cfl, save_class_count_per_participant, get_local_entropy
+from nebula.addons.trustworthiness.utils import save_results_csv, save_confirmation_csv, save_trustworthiness_reports_csv, load_emissions_participant, load_data_results_participant, save_results_csv_cfl, save_emissions_csv_cfl, save_class_count_per_participant, get_local_entropy, load_trust_report_json_dumped, create_local_trust_report_copy, accumulate_weighted_trustscores, build_weighted_trustscores_report, save_trust_report_json
 from codecarbon import EmissionsTracker
 from nebula.addons.trustworthiness.per_round_metrics import PerRoundTrustMetrics
 from datetime import datetime
@@ -53,6 +53,10 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
         raise NotImplementedError
 
 class TrustWorkloadTrainer(TrustWorkload):
+    TRUSTSCORES_WAIT_TIMEOUT_SECONDS = 10
+    TRUSTSCORES_FORWARDING_GRACE_SECONDS = 1.0
+    TRUSTSCORES_FORWARDING_GRACE_MARGIN_SECONDS = 0.25
+
     def __init__(self, engine, idx, trust_files_route):
         self._engine: Engine = engine
         self._workload = 'training'
@@ -66,9 +70,20 @@ def __init__(self, engine, idx, trust_files_route):
         self._per_round = None
         self._start_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
         self._end_time = None
+        self._expected_trustscores_sources = set()
+        self._expected_trustscores_reports = int(self._engine.config.participant["scenario_args"]["n_nodes"]) - 1
+        self._received_trustscores_node_ids = set()
+        self._trustscores_wait_event = None
+        self._trustscores_score_accumulator = {}
+        self._trustscores_weight_accumulator = {}
+        self._trustscores_template_report = None
+        self._trustscores_local_copy_path = None
+        self._trustscores_local_report_initialized = False
 
     async def init(self, experiment_name):
         self._experiment_name = experiment_name
+        #self._reset_trustscores_exchange_state()
+        self._trustscores_wait_event = asyncio.Event()
         await EventManager.get_instance().subscribe_node_event(RoundEndEvent, self._process_round_end_event)
         await EventManager.get_instance().subscribe_addonevent(TestMetricsEvent, self._process_test_metrics_event)
         await EventManager.get_instance().subscribe_node_event(ExperimentFinishEvent, self._process_experiment_finished_event)
@@ -86,7 +101,7 @@ async def init(self, experiment_name):
 
 
     async def _create_pk_files(self, experiment_name):
-        # Save data to local files to calculate the trustworthyness
+        # Save data to local files to compute trustworthiness
         train_loader_filename = f"/nebula/app/logs/{experiment_name}/trustworthiness/participant_{self._idx}_train_loader.pk"
         test_loader_filename = f"/nebula/app/logs/{experiment_name}/trustworthiness/participant_{self._idx}_test_loader.pk"
         self._engine.trainer.datamodule.setup(stage="fit")
@@ -118,8 +133,9 @@ async def finish_experiment_role_pre_actions(self):
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
         federation = trust_config.get("federation")  # "CFL" or "DFL"
 
-        if federation == "DFL" or (federation == "SDFL" and self._idx == 0):
+        if federation == "DFL":
             self._end_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
+            await self._prepare_trustscores_exchange()
             data_file_path = os.path.join(os.environ.get('NEBULA_CONFIG_DIR'), experiment_name, "scenario.json")
             with open(data_file_path, 'r') as data_file:
                 data = json.load(data_file)
@@ -148,10 +164,19 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                     "federation_complexity": float(data["federation_complexity"])
                 }
 
-            compute_trust_local_dfl(experiment_name, self._idx, trust_config, self._start_time, self._end_time)
-
-            trust_metric_manager = TrustMetricManager(self._start_time, federation, self._idx)
-            trust_metric_manager.evaluate_participant(experiment_name, weights, self._idx, use_weights=True)
+            json_dumped = await asyncio.to_thread(
+                self._compute_local_trustscores_report,
+                experiment_name,
+                trust_config,
+                weights,
+                federation,
+            )
+            logging.info("JSON_dumped=%s", json_dumped)
+            self._initialize_local_trustscores_aggregation(experiment_name)
+            await self._share_trustscores_report(json_dumped)
+            await self._wait_for_trustscores_reports()
+            await self._wait_for_trustscores_forwarding_drain()
+            self._finalize_trustscores_aggregation()
         elif federation == "SDFL":
             pass
         else:
@@ -227,10 +252,268 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 allow_after_learning_finished=True,
             )
 
+    def _compute_local_trustscores_report(self, experiment_name, trust_config, weights, federation) -> str:
+        compute_trust_local_dfl(experiment_name, self._idx, trust_config, self._start_time, self._end_time)
+
+        trust_metric_manager = TrustMetricManager(self._start_time, federation, self._idx)
+        trust_metric_manager.evaluate_participant(experiment_name, weights, self._idx, use_weights=True)
+
+        return load_trust_report_json_dumped(experiment_name, self._idx)
+    """
+    def _reset_trustscores_exchange_state(self):
+        self._expected_trustscores_sources = set()
+        self._received_trustscores_node_ids = set()
+        self._trustscores_score_accumulator = {}
+        self._trustscores_weight_accumulator = {}
+        self._trustscores_template_report = None
+        self._trustscores_local_copy_path = None
+        self._trustscores_local_report_initialized = False
+    """
+    def _is_reputation_enabled(self) -> bool:
+        defense_args = self._engine.config.participant.get("defense_args", {})
+        reputation_config = defense_args.get("reputation", {})
+        return bool(reputation_config.get("enabled", False))
+
+    def _get_reputation_system(self):
+        return getattr(self._engine, "_reputation", None)
+
+    def _get_trustscores_weight_for_source(self, source: str, node_id: int | str) -> float:
+        if not self._is_reputation_enabled():
+            return 1.0
+
+        reputation_system = self._get_reputation_system()
+        if reputation_system is None:
+            logging.warning(
+                "[TW DFL] Reputation is enabled but the reputation system is not available. Using fallback weight=1.0 for node_id=%s source=%s",
+                node_id,
+                source,
+            )
+            return 1.0
+
+        reputation_entry = reputation_system.reputation.get(source)
+        if reputation_entry is None or reputation_entry.get("reputation") is None:
+            logging.warning(
+                "[TW DFL] No reputation value available for node_id=%s source=%s. Using fallback weight=1.0",
+                node_id,
+                source,
+            )
+            return 1.0
+
+        return float(reputation_entry["reputation"])
+
+    def _get_trustscores_peer_weights_from_reputation(self) -> dict:
+        if not self._is_reputation_enabled():
+            return {}
+
+        reputation_system = self._get_reputation_system()
+        if reputation_system is None:
+            return {}
+
+        peer_weights = {}
+        for addr, data in reputation_system.reputation.items():
+            reputation_value = data.get("reputation")
+            if addr == self._engine.addr or reputation_value is None:
+                continue
+            peer_weights[addr] = float(reputation_value)
+        return peer_weights
+
+    def _get_trustscores_self_weight(self) -> float:
+        return 1.0
+
+    def _log_trustscores_node_weights(self):
+        if not self._is_reputation_enabled():
+            logging.info(
+                "[TW DFL] Reputation system disabled. trustscores weights fallback to 1.0 for all nodes"
+            )
+            return
+
+        peer_weight_map = self._get_trustscores_peer_weights_from_reputation()
+        if not peer_weight_map:
+            logging.info(
+                "[TW DFL] Reputation system enabled, but no peer reputation weights are available yet. Falling back to 1.0 when needed"
+            )
+            return
+
+        logging.info(
+            "[TW DFL] Local trustscores weights from reputation | self_node_id=%s self_weight=%s peer_weights_by_addr=%s",
+            self._idx,
+            self._get_trustscores_self_weight(),
+            peer_weight_map,
+        )
+
+        for addr, weight in sorted(peer_weight_map.items()):
+            logging.info(
+                "[TW DFL] Local trustscores weight from reputation | self_node_id=%s target_addr=%s weight=%s",
+                self._idx,
+                addr,
+                weight,
+            )
+
+    def _initialize_local_trustscores_aggregation(self, experiment_name: str):
+        if self._trustscores_local_report_initialized:
+            return
+
+        trust_report_template, copy_path = create_local_trust_report_copy(experiment_name, self._idx)
+        self._trustscores_template_report = trust_report_template
+        self._trustscores_local_copy_path = copy_path
+        accumulate_weighted_trustscores(
+            report=trust_report_template,
+            weight=self._get_trustscores_self_weight(),
+            score_accumulator=self._trustscores_score_accumulator,
+            weight_accumulator=self._trustscores_weight_accumulator,
+        )
+        self._trustscores_local_report_initialized = True
+        logging.info(
+            "[TW DFL] Local trustscores copy created at %s and accumulator initialized with local weight=%s",
+            copy_path,
+            self._get_trustscores_self_weight(),
+        )
+
+    async def _prepare_trustscores_exchange(self):
+        cm = CommunicationsManager.get_instance()
+        self._expected_trustscores_sources = await cm.get_all_addrs_current_connections(only_direct=True)
+
+        if self._trustscores_wait_event is None:
+            self._trustscores_wait_event = asyncio.Event()
+        self._trustscores_wait_event.clear()
+
+        if len(self._received_trustscores_node_ids) >= self._expected_trustscores_reports:
+            self._trustscores_wait_event.set()
+
+        if self._expected_trustscores_reports <= 0:
+            self._trustscores_wait_event.set()
+            logging.info("[TW DFL] No remote trustscores reports expected")
+            return
+
+        logging.info(
+            "[TW DFL] Expecting %s trustscores reports. Initial neighbors=%s",
+            self._expected_trustscores_reports,
+            sorted(self._expected_trustscores_sources),
+        )
+        self._log_trustscores_node_weights()
+
+    async def _share_trustscores_report(self, trust_report_json: str):
+        cm = CommunicationsManager.get_instance()
+        neighbors = self._expected_trustscores_sources.copy()
+
+        if not neighbors:
+            logging.info("[TW DFL] No direct neighbors available to share trustscores")
+            return
+
+        message = cm.create_message(
+            "trustscores",
+            action="share",
+            node_id=str(self._idx),
+            trust_report_json=trust_report_json,
+        )
+
+        logging.info("[TW DFL] Sharing trustscores report with neighbors=%s", sorted(neighbors))
+        for neighbor in neighbors:
+            await cm.send_message(
+                neighbor,
+                message,
+                message_type="trustscores",
+                allow_after_learning_finished=True,
+            )
+
+    async def _wait_for_trustscores_reports(self):
+        if self._trustscores_wait_event is None:
+            return
+
+        try:
+            await asyncio.wait_for(
+                self._trustscores_wait_event.wait(),
+                timeout=self.TRUSTSCORES_WAIT_TIMEOUT_SECONDS,
+            )
+            logging.info(
+                "[TW DFL] Trustscores exchange complete (%s/%s)",
+                len(self._received_trustscores_node_ids),
+                self._expected_trustscores_reports,
+            )
+        except asyncio.TimeoutError:
+            logging.warning(
+                "[TW DFL] Timeout waiting trustscores reports. Received=%s/%s missing=%s",
+                len(self._received_trustscores_node_ids),
+                self._expected_trustscores_reports,
+                self._expected_trustscores_reports - len(self._received_trustscores_node_ids),
+            )
+
+    async def _wait_for_trustscores_forwarding_drain(self):
+        if not self._expected_trustscores_sources:
+            return
+
+        cm = CommunicationsManager.get_instance()
+        forwarder = getattr(cm, "forwarder", None)
+        forwarder_interval = getattr(forwarder, "interval", 0)
+        messages_interval = getattr(forwarder, "messages_interval", 0)
+        forwarding_grace = max(
+            self.TRUSTSCORES_FORWARDING_GRACE_SECONDS,
+            float(forwarder_interval) + float(messages_interval) + self.TRUSTSCORES_FORWARDING_GRACE_MARGIN_SECONDS,
+        )
+
+        logging.info(
+            "[TW DFL] Waiting %.2fs to drain forwarded trustscores messages before shutdown",
+            forwarding_grace,
+        )
+        await asyncio.sleep(forwarding_grace)
+
+    def _finalize_trustscores_aggregation(self):
+        if self._trustscores_template_report is None or self._trustscores_local_copy_path is None:
+            logging.warning("[TW DFL] Skipping weighted trustscores write because local copy/template is not available")
+            return
+
+        aggregated_report = build_weighted_trustscores_report(
+            template_report=self._trustscores_template_report,
+            score_accumulator=self._trustscores_score_accumulator,
+            weight_accumulator=self._trustscores_weight_accumulator,
+        )
+        save_trust_report_json(self._trustscores_local_copy_path, aggregated_report)
+        logging.info(
+            "[TW DFL] Weighted trustscores written to local copy=%s",
+            self._trustscores_local_copy_path,
+        )
+
+    async def register_trustscores_report(self, source, message):
+        if str(message.node_id) == str(self._idx):
+            logging.info("[TW DFL] Ignoring own trustscores report from %s", source)
+            return
+
+        if str(message.node_id) in self._received_trustscores_node_ids:
+            logging.info(
+                "[TW DFL] Ignoring duplicated trustscores report from node_id=%s source=%s",
+                message.node_id,
+                source,
+            )
+            return
+
+        trust_report = json.loads(message.trust_report_json)
+        remote_weight = self._get_trustscores_weight_for_source(source, message.node_id)
+        accumulate_weighted_trustscores(
+            report=trust_report,
+            weight=remote_weight,
+            score_accumulator=self._trustscores_score_accumulator,
+            weight_accumulator=self._trustscores_weight_accumulator,
+        )
+        logging.info(
+            "[TW DFL] Trustscores report received from node_id=%s source=%s accumulated_with_weight=%s",
+            message.node_id,
+            source,
+            remote_weight,
+        )
+
+        self._received_trustscores_node_ids.add(str(message.node_id))
+        logging.info(
+            "[TW DFL] Trustscores progress %s/%s",
+            len(self._received_trustscores_node_ids),
+            self._expected_trustscores_reports,
+        )
+        if len(self._received_trustscores_node_ids) >= self._expected_trustscores_reports:
+            self._trustscores_wait_event.set()
+
     async def _process_round_end_event(self, ree: RoundEndEvent):
         scenario_name = self._engine.config.participant["scenario_args"]["name"]
         train_model = f"/nebula/app/logs/{scenario_name}/trustworthiness/participant_{self._idx}_train_model.pk"
-        # Save the train model in trustworthy dir
+        # Save the training model in the trustworthiness directory
         with open(train_model, 'wb') as f:
             pickle.dump(self._engine.trainer.model, f)
 
@@ -289,7 +572,7 @@ async def init(self, experiment_name):
         await self._per_round.setup(self._engine)
 
     async def _create_pk_files(self, experiment_name):
-        # Save data to local files to calculate the trustworthyness
+        # Save data to local files to compute trustworthiness
         test_loader_filename = f"/nebula/app/logs/{experiment_name}/trustworthiness/participant_{self._idx}_test_loader.pk"
         self._engine.trainer.datamodule.setup(stage="test")
         test_loader = self._engine.trainer.datamodule.test_dataloader()[0]
@@ -413,7 +696,7 @@ async def register_trustworthiness_report(self, source, message):
 
         if (len(self._trustworthiness_reports) >= self._expected_reports):
             logging.info("[TW SERVER] all reports received, generating csv")
-            #GENERAR CSV
+            # Generate CSV files
             save_trustworthiness_reports_csv(self._trustworthiness_reports, self._experiment_name)
             self._csv_completed = True
             logging.info(f"[TW SERVER] all reports received, waiting for finish post, csv_completed {self._csv_completed}")
@@ -475,7 +758,7 @@ async def _process_test_metrics_event(self, tme: TestMetricsEvent):
     async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):
         model_file = f"/nebula/app/logs/{self._experiment_name}/trustworthiness/participant_{self._engine.idx}_final_model.pk"
 
-        # Save model in trustworthy dir
+        # Save the model in the trustworthiness directory
         with open(model_file, 'wb') as f:
             pickle.dump(self._engine.trainer.model, f)
 
@@ -503,12 +786,12 @@ def __init__(self, engine: Engine, config: Config):
 
         self._engine.trustworthiness = self
 
-        # EmissionsTracker from codecarbon to measure the emissions during the aggregation step in the server
+        # EmissionsTracker from CodeCarbon to measure emissions during the server aggregation step
         self._tracker= EmissionsTracker(tracking_mode='process', log_level='error', save_to_file=False)
 
     @property
     def tw(self):
-        """TrustWorkload depending on the node Role"""
+        """TrustWorkload implementation chosen according to the node role."""
         return self._trust_workload
 
     async def start(self):
@@ -520,7 +803,7 @@ async def start(self):
     async def _create_trustworthiness_directory(self):
         import os
         trust_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), self._experiment_name, "trustworthiness")
-        # Create a directory to save files to calcutate trust
+        # Create a directory to store files used to compute trust
         os.makedirs(trust_dir, exist_ok=True)
         os.chmod(trust_dir, 0o777)
 
@@ -534,15 +817,15 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
 
         last_loss, last_accuracy = self.tw.get_metrics()
 
-        # Get bytes send/received from reporter
+        # Get sent/received bytes from the reporter
         bytes_sent = self._engine.reporter.acc_bytes_sent
         bytes_recv = self._engine.reporter.acc_bytes_recv
 
-        # Get TrustWorkload info
+        # Get TrustWorkload information
         workload = self.tw.get_workload()
         sample_size = self.tw.get_sample_size()
 
-        # Last operations
+        # Final operations
         save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_accuracy, last_loss)
         stop_emissions_tracking_and_save(self._tracker, self._trust_dir_files, f'emissions_{self._idx}.csv', self._role.value, workload, sample_size, self._idx)
         #save_confirmation_csv(self._experiment_name, self._idx)
@@ -555,6 +838,7 @@ def _factory_trust_workload(self, role: Role, engine: Engine, idx, trust_files_r
             Role.PROXY: TrustWorkloadTrainer,
             Role.IDLE: TrustWorkloadTrainer,
             Role.TRAINER_AGGREGATOR: TrustWorkloadTrainer,
+            Role.MALICIOUS: TrustWorkloadTrainer,
             Role.SERVER: TrustWorkloadServer
         }
         trust_workload = trust_workloads.get(role)
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
index 537ea5b4f..789d4d6af 100755
--- a/nebula/addons/trustworthiness/utils.py
+++ b/nebula/addons/trustworthiness/utils.py
@@ -5,6 +5,7 @@
 import os
 import pickle
 from os.path import exists
+import copy
 
 import pandas as pd
 from hashids import Hashids
@@ -42,7 +43,7 @@ def count_all_class_samples(experiment_name):
 
         participant_id += 1
 
-    # Guardar conteo total en class_count.json
+    # Save the total class count into class_count.json
     output_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'),experiment_name, "trustworthiness", "count_class.json")
 
     with open(output_file, "w") as f:
@@ -516,3 +517,197 @@ def save_confirmation_csv(scenario_name: str, id: int):
 
     except Exception as e:
         logger.warning(e)
+
+def load_trust_report_json_dumped(scenario_name: str, participant_id: int) -> str:
+    """
+    Read a participant trustworthiness JSON file and return it
+    serialized as a string with json.dumps(...).
+
+    Args:
+        scenario_name (str): Scenario/experiment name.
+        participant_id (int): Participant ID.
+
+    Returns:
+        str: JSON content serialized as a string.
+
+    Raises:
+        FileNotFoundError: If the file does not exist.
+        ValueError: If the file content is not valid JSON.
+    """
+    logs_dir = os.environ.get("NEBULA_LOGS_DIR")
+    if not logs_dir:
+        raise ValueError("The NEBULA_LOGS_DIR environment variable is not defined.")
+
+    file_name = f"nebula_trust_results_{participant_id}.json"
+    file_path = os.path.join(
+        logs_dir,
+        scenario_name,
+        "trustworthiness",
+        file_name,
+    )
+
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"The file does not exist: {file_path}")
+
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            trust_report = json.load(f)
+    except json.JSONDecodeError as e:
+        raise ValueError(f"The file does not contain valid JSON: {file_path}") from e
+
+    return json.dumps(trust_report)
+
+
+def load_trust_report_json(scenario_name: str, participant_id: int | str) -> dict:
+    trust_report_json = load_trust_report_json_dumped(scenario_name, participant_id)
+    return json.loads(trust_report_json)
+
+
+def create_local_trust_report_copy(scenario_name: str, participant_id: int | str, suffix: str = "global") -> tuple[dict, str]:
+    trust_report = load_trust_report_json(scenario_name, participant_id)
+    logs_dir = os.environ.get("NEBULA_LOGS_DIR")
+    if not logs_dir:
+        raise ValueError("The NEBULA_LOGS_DIR environment variable is not defined.")
+
+    trust_dir = os.path.join(logs_dir, scenario_name, "trustworthiness")
+    os.makedirs(trust_dir, exist_ok=True)
+
+    file_path = os.path.join(trust_dir, f"nebula_trust_results_{participant_id}_{suffix}.json")
+    with open(file_path, "w", encoding="utf-8") as f:
+        json.dump(trust_report, f, indent=4)
+
+    return trust_report, file_path
+
+
+def save_trust_report_json(file_path: str, trust_report: dict) -> str:
+    directory = os.path.dirname(file_path)
+    if directory:
+        os.makedirs(directory, exist_ok=True)
+
+    with open(file_path, "w", encoding="utf-8") as f:
+        json.dump(trust_report, f, indent=4)
+
+    return file_path
+
+
+def accumulate_weighted_trustscores(report: dict, weight: float, score_accumulator: dict, weight_accumulator: dict):
+    if weight <= 0:
+        raise ValueError("The aggregation weight must be greater than 0.")
+
+    _accumulate_weighted_trustscores_recursive(
+        obj=report,
+        weight=float(weight),
+        path=(),
+        score_accumulator=score_accumulator,
+        weight_accumulator=weight_accumulator,
+    )
+
+
+def build_weighted_trustscores_report(template_report: dict, score_accumulator: dict, weight_accumulator: dict) -> dict:
+    aggregated_report = copy.deepcopy(template_report)
+    _apply_weighted_trustscores_recursive(
+        obj=aggregated_report,
+        path=(),
+        score_accumulator=score_accumulator,
+        weight_accumulator=weight_accumulator,
+    )
+    return aggregated_report
+
+
+def _accumulate_weighted_trustscores_recursive(obj, weight: float, path: tuple, score_accumulator: dict, weight_accumulator: dict):
+    if isinstance(obj, dict):
+        structural_named_entry = _get_structural_named_entry(obj)
+        if structural_named_entry is not None:
+            _, nested_value = structural_named_entry
+            _accumulate_weighted_trustscores_recursive(
+                obj=nested_value,
+                weight=weight,
+                path=path + ("__named_entry__",),
+                score_accumulator=score_accumulator,
+                weight_accumulator=weight_accumulator,
+            )
+            return
+
+        for key, value in obj.items():
+            if key in {"trust_score", "score"} and _is_numeric_score(value):
+                score_path = path + (key,)
+                score_accumulator[score_path] = score_accumulator.get(score_path, 0.0) + (float(value) * weight)
+                weight_accumulator[score_path] = weight_accumulator.get(score_path, 0.0) + weight
+                continue
+
+            _accumulate_weighted_trustscores_recursive(
+                obj=value,
+                weight=weight,
+                path=path + (key,),
+                score_accumulator=score_accumulator,
+                weight_accumulator=weight_accumulator,
+            )
+        return
+
+    if isinstance(obj, list):
+        for index, item in enumerate(obj):
+            _accumulate_weighted_trustscores_recursive(
+                obj=item,
+                weight=weight,
+                path=path + (index,),
+                score_accumulator=score_accumulator,
+                weight_accumulator=weight_accumulator,
+            )
+
+
+def _apply_weighted_trustscores_recursive(obj, path: tuple, score_accumulator: dict, weight_accumulator: dict):
+    if isinstance(obj, dict):
+        structural_named_entry = _get_structural_named_entry(obj)
+        if structural_named_entry is not None:
+            entry_key, nested_value = structural_named_entry
+            obj[entry_key] = _apply_weighted_trustscores_recursive(
+                obj=nested_value,
+                path=path + ("__named_entry__",),
+                score_accumulator=score_accumulator,
+                weight_accumulator=weight_accumulator,
+            )
+            return obj
+
+        for key, value in obj.items():
+            if key in {"trust_score", "score"} and _is_numeric_score(value):
+                score_path = path + (key,)
+                total_weight = weight_accumulator.get(score_path)
+                if total_weight:
+                    obj[key] = round(score_accumulator[score_path] / total_weight, 6)
+                continue
+
+            obj[key] = _apply_weighted_trustscores_recursive(
+                obj=value,
+                path=path + (key,),
+                score_accumulator=score_accumulator,
+                weight_accumulator=weight_accumulator,
+            )
+        return obj
+
+    if isinstance(obj, list):
+        for index, item in enumerate(obj):
+            obj[index] = _apply_weighted_trustscores_recursive(
+                obj=item,
+                path=path + (index,),
+                score_accumulator=score_accumulator,
+                weight_accumulator=weight_accumulator,
+            )
+    return obj
+
+
+def _get_structural_named_entry(obj: dict):
+    if len(obj) != 1:
+        return None
+
+    entry_key, nested_value = next(iter(obj.items()))
+    if not isinstance(nested_value, dict):
+        return None
+
+    if any(key in nested_value for key in ("score", "metrics", "notions", "pillars")):
+        return entry_key, nested_value
+
+    return None
+
+
+def _is_numeric_score(value):
+    return isinstance(value, (int, float)) and not isinstance(value, bool)
diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index b4613c4b3..259f96580 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -449,6 +449,29 @@ async def _trustworthiness_report_callback(self, source, message):
         except Exception as e:
             logging.exception(f"Error handling trustworthiness message: {e}")
 
+    async def _trustscores_share_callback(self, source, message):
+        try:
+            report = {
+                "source": source,
+                "node_id": message.node_id,
+                "trust_report_json": message.trust_report_json,
+            }
+
+            logging.info(f"handle_trustscores_message | Trigger | {report}")
+
+            trust_handler = getattr(self, "trustworthiness", None)
+            if trust_handler is None:
+                trust_handler = getattr(self, "trustscores", None)
+
+            if trust_handler is not None:
+                if hasattr(trust_handler, "tw") and trust_handler.tw is not None:
+                    if hasattr(trust_handler.tw, "register_trustscores_report"):
+                        await trust_handler.tw.register_trustscores_report(source, message)
+
+
+        except Exception as e:
+            logging.exception(f"Error handling trustscores message: {e}")
+
     """                                                     ##############################
                                                             #    REGISTERING CALLBACKS   #
                                                             ##############################
diff --git a/nebula/core/network/actions.py b/nebula/core/network/actions.py
index a3abca1f2..bd5bde211 100644
--- a/nebula/core/network/actions.py
+++ b/nebula/core/network/actions.py
@@ -85,11 +85,18 @@ class ReputationAction(Enum):
 
 class TrustworthinessAction(Enum):
     """
-    Enum for reputation exchange messages in the federation.
+    Enum for trustworthiness exchange messages in the federation.
     """
 
     REPORT = nebula_pb2.TrustworthinessMessage.Action.REPORT
 
+class TrustscoresAction(Enum):
+    """
+    Enum for trustworthiness scores exchange messages in the federation.
+    """
+
+    SHARE = nebula_pb2.TrustscoresMessage.Action.SHARE
+
 
 # Mapping between message type strings and their corresponding Enum classes
 ACTION_CLASSES = {
@@ -102,6 +109,7 @@ class TrustworthinessAction(Enum):
     "link": LinkAction,
     "reputation": ReputationAction,
     "trustworthiness": TrustworthinessAction,
+    "trustscores": TrustscoresAction,
 }
 
 
diff --git a/nebula/core/network/forwarder.py b/nebula/core/network/forwarder.py
index 86ce75536..db98246fb 100755
--- a/nebula/core/network/forwarder.py
+++ b/nebula/core/network/forwarder.py
@@ -3,6 +3,7 @@
 import time
 
 from nebula.addons.functions import print_msg_box
+from nebula.core.pb import nebula_pb2
 from nebula.core.utils.locker import Locker
 
 
@@ -114,12 +115,17 @@ async def process_pending_messages(self, messages_left):
         """
         while messages_left > 0 and not self.pending_messages.empty():
             msg, neighbors = await self.pending_messages.get()
+            allow_after_learning_finished = self._allow_forward_after_learning_finished(msg)
             for neighbor in neighbors[:messages_left]:
                 if neighbor not in self.cm.connections:
                     continue
                 try:
                     logging.debug(f"🔁  Sending message (forwarding) --> to {neighbor}")
-                    await self.cm.send_message(neighbor, msg)
+                    await self.cm.send_message(
+                        neighbor,
+                        msg,
+                        allow_after_learning_finished=allow_after_learning_finished,
+                    )
                 except Exception as e:
                     logging.exception(f"🔁  Error forwarding message to {neighbor}. Error: {e!s}")
                     pass
@@ -129,6 +135,15 @@ async def process_pending_messages(self, messages_left):
                 logging.debug("🔁  Putting message back in queue for forwarding to the remaining neighbors")
                 await self.pending_messages.put((msg, neighbors[messages_left:]))
 
+    def _allow_forward_after_learning_finished(self, msg: bytes) -> bool:
+        try:
+            message_wrapper = nebula_pb2.Wrapper()
+            message_wrapper.ParseFromString(msg)
+            return message_wrapper.WhichOneof("message") == "trustscores_message"
+        except Exception as e:
+            logging.warning(f"🔁  Could not inspect forwarded message type: {e!s}")
+            return False
+
     async def forward(self, msg, addr_from):
         """
         Enqueue a received message for forwarding to all other direct neighbors.
diff --git a/nebula/core/network/messages.py b/nebula/core/network/messages.py
index d8fa3dd8f..1b687d7bb 100644
--- a/nebula/core/network/messages.py
+++ b/nebula/core/network/messages.py
@@ -117,6 +117,14 @@ def _define_message_templates(self):
                     "local_entropy"
                 ],
                 "defaults": {},
+            },
+            "trustscores": {
+                "parameters": [
+                    "action",
+                    "node_id",
+                    "trust_report_json"
+                ],
+                "defaults": {},
             }
             # Add additional message types here
         }
@@ -146,7 +154,7 @@ async def process_message(self, data, addr_from):
             addr_from (str): Address from which the message was received.
         """
         not_processing_messages = {"control_message", "connection_message"}
-        special_processing_messages = {"discovery_message", "federation_message", "model_message"}
+        special_processing_messages = {"discovery_message", "federation_message", "model_message", "trustscores_message"}
 
         try:
             message_wrapper = nebula_pb2.Wrapper()
@@ -225,6 +233,8 @@ def _should_forward_message(self, message_type, message_wrapper):
             == nebula_pb2.FederationMessage.Action.Value("FEDERATION_START")
         ):
             return True
+        if message_type == "trustscores_message":
+            return True
 
     def create_message(self, message_type: str, action: str = "", *args, **kwargs):
         """
diff --git a/nebula/core/pb/nebula.proto b/nebula/core/pb/nebula.proto
index 2171f395a..df6d55c6b 100755
--- a/nebula/core/pb/nebula.proto
+++ b/nebula/core/pb/nebula.proto
@@ -27,6 +27,7 @@ message Wrapper {
     OfferMessage offer_message = 10;
     LinkMessage link_message = 11;
     TrustworthinessMessage trustworthiness_message = 12;
+    TrustscoresMessage trustscores_message = 13;
   }
 }
 
@@ -157,3 +158,13 @@ message TrustworthinessMessage {
   int64 model_size = 18;
   float local_entropy = 19;
 }
+
+message TrustscoresMessage {
+  enum Action {
+    SHARE = 0;
+  }
+
+  Action action = 1;
+  string node_id = 2;
+  string trust_report_json = 3;
+}
diff --git a/nebula/core/pb/nebula_pb2.py b/nebula/core/pb/nebula_pb2.py
index fd3caebe1..e1227477e 100644
--- a/nebula/core/pb/nebula_pb2.py
+++ b/nebula/core/pb/nebula_pb2.py
@@ -13,7 +13,7 @@
 
 
 
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xf1\x04\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\xc2\x03\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\x12\x17\n\x0f\x63lass_imbalance\x18\x11 \x01(\x02\x12\x12\n\nmodel_size\x18\x12 \x01(\x03\x12\x15\n\rlocal_entropy\x18\x13 \x01(\x02\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xac\x05\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x12\x39\n\x13trustscores_message\x18\r \x01(\x0b\x32\x1a.nebula.TrustscoresMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\xc2\x03\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\x12\x17\n\x0f\x63lass_imbalance\x18\x11 \x01(\x02\x12\x12\n\nmodel_size\x18\x12 \x01(\x03\x12\x15\n\rlocal_entropy\x18\x13 \x01(\x02\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\"\x88\x01\n\x12TrustscoresMessage\x12\x31\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32!.nebula.TrustscoresMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x19\n\x11trust_report_json\x18\x03 \x01(\t\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\x62\x06proto3')
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
 _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'nebula_pb2', globals())
@@ -21,45 +21,49 @@
 
   DESCRIPTOR._options = None
   _WRAPPER._serialized_start=25
-  _WRAPPER._serialized_end=650
-  _DISCOVERYMESSAGE._serialized_start=653
-  _DISCOVERYMESSAGE._serialized_end=811
-  _DISCOVERYMESSAGE_ACTION._serialized_start=759
-  _DISCOVERYMESSAGE_ACTION._serialized_end=811
-  _CONTROLMESSAGE._serialized_start=814
-  _CONTROLMESSAGE._serialized_end=1023
-  _CONTROLMESSAGE_ACTION._serialized_start=893
-  _CONTROLMESSAGE_ACTION._serialized_end=1023
-  _FEDERATIONMESSAGE._serialized_start=1026
-  _FEDERATIONMESSAGE._serialized_end=1231
-  _FEDERATIONMESSAGE_ACTION._serialized_start=1131
-  _FEDERATIONMESSAGE_ACTION._serialized_end=1231
-  _MODELMESSAGE._serialized_start=1233
-  _MODELMESSAGE._serialized_end=1298
-  _CONNECTIONMESSAGE._serialized_start=1301
-  _CONNECTIONMESSAGE._serialized_end=1444
-  _CONNECTIONMESSAGE_ACTION._serialized_start=1372
-  _CONNECTIONMESSAGE_ACTION._serialized_end=1444
-  _DISCOVERMESSAGE._serialized_start=1447
-  _DISCOVERMESSAGE._serialized_end=1596
-  _DISCOVERMESSAGE_ACTION._serialized_start=1514
-  _DISCOVERMESSAGE_ACTION._serialized_end=1596
-  _OFFERMESSAGE._serialized_start=1599
-  _OFFERMESSAGE._serialized_end=1805
-  _OFFERMESSAGE_ACTION._serialized_start=1762
-  _OFFERMESSAGE_ACTION._serialized_end=1805
-  _LINKMESSAGE._serialized_start=1807
-  _LINKMESSAGE._serialized_end=1926
-  _LINKMESSAGE_ACTION._serialized_start=1881
-  _LINKMESSAGE_ACTION._serialized_end=1926
-  _REPUTATIONMESSAGE._serialized_start=1929
-  _REPUTATIONMESSAGE._serialized_end=2066
-  _REPUTATIONMESSAGE_ACTION._serialized_start=2047
-  _REPUTATIONMESSAGE_ACTION._serialized_end=2066
-  _RESPONSEMESSAGE._serialized_start=2068
-  _RESPONSEMESSAGE._serialized_end=2103
-  _TRUSTWORTHINESSMESSAGE._serialized_start=2106
-  _TRUSTWORTHINESSMESSAGE._serialized_end=2556
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2536
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2556
+  _WRAPPER._serialized_end=709
+  _DISCOVERYMESSAGE._serialized_start=712
+  _DISCOVERYMESSAGE._serialized_end=870
+  _DISCOVERYMESSAGE_ACTION._serialized_start=818
+  _DISCOVERYMESSAGE_ACTION._serialized_end=870
+  _CONTROLMESSAGE._serialized_start=873
+  _CONTROLMESSAGE._serialized_end=1082
+  _CONTROLMESSAGE_ACTION._serialized_start=952
+  _CONTROLMESSAGE_ACTION._serialized_end=1082
+  _FEDERATIONMESSAGE._serialized_start=1085
+  _FEDERATIONMESSAGE._serialized_end=1290
+  _FEDERATIONMESSAGE_ACTION._serialized_start=1190
+  _FEDERATIONMESSAGE_ACTION._serialized_end=1290
+  _MODELMESSAGE._serialized_start=1292
+  _MODELMESSAGE._serialized_end=1357
+  _CONNECTIONMESSAGE._serialized_start=1360
+  _CONNECTIONMESSAGE._serialized_end=1503
+  _CONNECTIONMESSAGE_ACTION._serialized_start=1431
+  _CONNECTIONMESSAGE_ACTION._serialized_end=1503
+  _DISCOVERMESSAGE._serialized_start=1506
+  _DISCOVERMESSAGE._serialized_end=1655
+  _DISCOVERMESSAGE_ACTION._serialized_start=1573
+  _DISCOVERMESSAGE_ACTION._serialized_end=1655
+  _OFFERMESSAGE._serialized_start=1658
+  _OFFERMESSAGE._serialized_end=1864
+  _OFFERMESSAGE_ACTION._serialized_start=1821
+  _OFFERMESSAGE_ACTION._serialized_end=1864
+  _LINKMESSAGE._serialized_start=1866
+  _LINKMESSAGE._serialized_end=1985
+  _LINKMESSAGE_ACTION._serialized_start=1940
+  _LINKMESSAGE_ACTION._serialized_end=1985
+  _REPUTATIONMESSAGE._serialized_start=1988
+  _REPUTATIONMESSAGE._serialized_end=2125
+  _REPUTATIONMESSAGE_ACTION._serialized_start=2106
+  _REPUTATIONMESSAGE_ACTION._serialized_end=2125
+  _RESPONSEMESSAGE._serialized_start=2127
+  _RESPONSEMESSAGE._serialized_end=2162
+  _TRUSTWORTHINESSMESSAGE._serialized_start=2165
+  _TRUSTWORTHINESSMESSAGE._serialized_end=2615
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2595
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2615
+  _TRUSTSCORESMESSAGE._serialized_start=2618
+  _TRUSTSCORESMESSAGE._serialized_end=2754
+  _TRUSTSCORESMESSAGE_ACTION._serialized_start=2106
+  _TRUSTSCORESMESSAGE_ACTION._serialized_end=2125
 # @@protoc_insertion_point(module_scope)

From fb7dccb4d3a2cf6b36aa33d9f3c06a61d64e2867 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 25 Mar 2026 13:42:42 +0100
Subject: [PATCH 12/66] DFL and SDFL global trust scores added to Real-Time
 metrics

---
 nebula/addons/trustworthiness/graphics.py     | 166 +++-----
 .../addons/trustworthiness/trustworthiness.py | 374 +++++++++++++++---
 2 files changed, 376 insertions(+), 164 deletions(-)

diff --git a/nebula/addons/trustworthiness/graphics.py b/nebula/addons/trustworthiness/graphics.py
index 03239ac72..e2f339eb3 100644
--- a/nebula/addons/trustworthiness/graphics.py
+++ b/nebula/addons/trustworthiness/graphics.py
@@ -30,7 +30,7 @@ def __init__(
         else:
             self.nebulalogger = NebulaTensorBoardLogger(scenario_start_time, f"{log_dir}", name="metrics", version=f"trust_{participant_id}", log_graph=True)
 
-    def __log_figure(self, df, pillar, color, notion_y_pos = -0.4, figsize=(10,6)):
+    def __log_figure(self, df, pillar, color, tag_root, notion_y_pos = -0.4, figsize=(10,6)):
         filtered_df = df[df['Pillar'] == pillar].copy()
 
         filtered_df.loc[:, 'Metric'] = filtered_df['Metric'].astype(str).str.replace('_', ' ')
@@ -92,14 +92,14 @@ def __log_figure(self, df, pillar, color, notion_y_pos = -0.4, figsize=(10,6)):
 
         plt.tight_layout()
 
-        self.nebulalogger.log_figure(ax.get_figure(), 0, f"Trust/Pillar/{pillar}")
+        self.nebulalogger.log_figure(ax.get_figure(), 0, f"{tag_root}/Pillar/{pillar}")
         plt.close()
 
-    def graphics(self):
-        results_file = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), self.scenario_name, "trustworthiness", "nebula_trust_results.json")
+    def _load_trust_results(self, results_file):
         with open(results_file, 'r') as f:
-            results = json.load(f)
+            return json.load(f)
 
+    def _log_trust_report(self, results, tag_root, all_pillars_tag, label_suffix=""):
         pillars_list = []
         notion_names = []
         notion_scores = []
@@ -133,13 +133,13 @@ def graphics(self):
             "Metric Score": metric_scores
         })
 
-        self.__log_figure(df, 'robustness', "#F8D3DF")
-        self.__log_figure(df, "privacy", "#DA8D8B", -0.2)
-        self.__log_figure(df, "fairness", "#DDDDDD")
-        self.__log_figure(df, "explainability", "#FCEFC3")
-        self.__log_figure(df, "accountability", "#8FAADC", -0.3)
-        self.__log_figure(df, "architectural_soundness", "#DBB9FA", -0.3)
-        self.__log_figure(df, "sustainability", "#BBFDAF", -0.5, figsize=(12,8))
+        self.__log_figure(df, 'robustness', "#F8D3DF", tag_root)
+        self.__log_figure(df, "privacy", "#DA8D8B", tag_root, -0.2)
+        self.__log_figure(df, "fairness", "#DDDDDD", tag_root)
+        self.__log_figure(df, "explainability", "#FCEFC3", tag_root)
+        self.__log_figure(df, "accountability", "#8FAADC", tag_root, -0.3)
+        self.__log_figure(df, "architectural_soundness", "#DBB9FA", tag_root, -0.3)
+        self.__log_figure(df, "sustainability", "#BBFDAF", tag_root, -0.5, figsize=(12,8))
 
         categories = [
             "robustness",
@@ -169,108 +169,58 @@ def graphics(self):
             ax.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=10)
 
         name_labels = [
-            "Robustness",
-            "Privacy",
-            "Fairness",
-            "Explainability",
-            "Accountability",
-            "Architectural Soundness",
-            "Sustainability",
-            "Trust Score"
+            f"Robustness{label_suffix}",
+            f"Privacy{label_suffix}",
+            f"Fairness{label_suffix}",
+            f"Explainability{label_suffix}",
+            f"Accountability{label_suffix}",
+            f"Architectural Soundness{label_suffix}",
+            f"Sustainability{label_suffix}",
+            f"Trust Score{label_suffix}"
         ]
 
         ax.set_xticks(range(len(categories)))
         ax.set_xticklabels(name_labels, rotation=45)
 
-        self.nebulalogger.log_figure(ax.get_figure(), 0, f"Trust/AllPillars")
+        self.nebulalogger.log_figure(ax.get_figure(), 0, all_pillars_tag)
         plt.close()
 
+    def graphics(self):
+        results_file = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), self.scenario_name, "trustworthiness", "nebula_trust_results.json")
+        results = self._load_trust_results(results_file)
+        self._log_trust_report(results, "Trust", "Trust/AllPillars")
+
     def graphics_dfl(self,participant_id):
             results_file = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), self.scenario_name, "trustworthiness", f"nebula_trust_results_{participant_id}.json")
-            with open(results_file, 'r') as f:
-                results = json.load(f)
-
-            pillars_list = []
-            notion_names = []
-            notion_scores = []
-            metric_names = []
-            metric_scores = []
-
-            for pillar in results["pillars"]:
-                for key, value in pillar.items():
-                    pillar_name = key
-                    if "notions" in value:
-                        for notion in value["notions"]:
-                            for notion_key, notion_value in notion.items():
-                                notion_name = notion_key
-                                notion_score = notion_value["score"]
-                                for metric in notion_value["metrics"]:
-                                    for metric_key, metric_value in metric.items():
-                                        metric_name = metric_key
-                                        metric_score = metric_value["score"]
-
-                                        pillars_list.append(pillar_name)
-                                        notion_names.append(notion_name)
-                                        notion_scores.append(notion_score)
-                                        metric_names.append(metric_name)
-                                        metric_scores.append(metric_score)
-
-            df = pd.DataFrame({
-                "Pillar": pillars_list,
-                "Notion": notion_names,
-                "Notion Score": notion_scores,
-                "Metric": metric_names,
-                "Metric Score": metric_scores
-            })
-
-            self.__log_figure(df, 'robustness', "#F8D3DF")
-            self.__log_figure(df, "privacy", "#DA8D8B", -0.2)
-            self.__log_figure(df, "fairness", "#DDDDDD")
-            self.__log_figure(df, "explainability", "#FCEFC3")
-            self.__log_figure(df, "accountability", "#8FAADC", -0.3)
-            self.__log_figure(df, "architectural_soundness", "#DBB9FA", -0.3)
-            self.__log_figure(df, "sustainability", "#BBFDAF", -0.5, figsize=(12,8))
-
-            categories = [
-                "robustness",
-                "privacy",
-                "fairness",
-                "explainability",
-                "accountability",
-                "architectural_soundness",
-                "sustainability"
-            ]
-
-            scores = [results["pillars"][i][category]["score"] for i, category in enumerate(categories)]
-
-            trust_score = results["trust_score"]
-            categories.append("trust_score")
-            scores.append(trust_score)
-
-            palette = ["#F8D3DF", "#DA8D8B", "#DDDDDD", "#FCEFC3", "#8FAADC", "#DBB9FA", "#BBFDAF", "#BF9000"]
-
-            plt.figure(figsize=(10, 8))
-            ax = sns.barplot(x=categories, y=scores, palette=palette, hue=categories, legend=False)
-            ax.set_xlabel("Pillar")
-            ax.set_ylabel("Score")
-            ax.set_title("Pillars and trust scores")
-
-            for i, v in enumerate(scores):
-                ax.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=10)
-
-            name_labels = [
-                f"Robustness_{participant_id}",
-                f"Privacy_{participant_id}",
-                f"Fairness_{participant_id}",
-                f"Explainability_{participant_id}",
-                f"Accountability_{participant_id}",
-                f"Architectural Soundness_{participant_id}",
-                f"Sustainability_{participant_id}",
-                f"Trust Score_{participant_id}"
-            ]
-
-            ax.set_xticks(range(len(categories)))
-            ax.set_xticklabels(name_labels, rotation=45)
-
-            self.nebulalogger.log_figure(ax.get_figure(), 0, f"Trust/AllPillars_{participant_id}")
-            plt.close()
+            results = self._load_trust_results(results_file)
+            self._log_trust_report(results, "Trust", f"Trust/AllPillars_{participant_id}", label_suffix=f"_{participant_id}")
+
+    def graphics_dfl_global(self, participant_id):
+            results_file = os.path.join(
+                os.environ.get("NEBULA_LOGS_DIR"),
+                self.scenario_name,
+                "trustworthiness",
+                f"nebula_trust_results_{participant_id}_global.json",
+            )
+            results = self._load_trust_results(results_file)
+            self._log_trust_report(
+                results,
+                "TrustGlobal",
+                f"TrustGlobal/AllPillars_{participant_id}",
+                label_suffix=f"_{participant_id}",
+            )
+
+    def graphics_sdfl_global(self, participant_id):
+            results_file = os.path.join(
+                os.environ.get("NEBULA_LOGS_DIR"),
+                self.scenario_name,
+                "trustworthiness",
+                "nebula_trust_results.json",
+            )
+            results = self._load_trust_results(results_file)
+            self._log_trust_report(
+                results,
+                "TrustGlobal",
+                f"TrustGlobal/AllPillars_{participant_id}",
+                label_suffix=f"_{participant_id}",
+            )
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 8bbf3d517..44c934ce9 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -16,6 +16,7 @@
 from nebula.addons.trustworthiness.factsheet import Factsheet
 from nebula.addons.trustworthiness.metric import TrustMetricManager
 from nebula.addons.trustworthiness.dfl_local import compute_trust_local_dfl
+from nebula.addons.trustworthiness.graphics import Graphics
 import json, os
 from nebula.core.network.communications import CommunicationsManager
 
@@ -53,9 +54,9 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
         raise NotImplementedError
 
 class TrustWorkloadTrainer(TrustWorkload):
-    TRUSTSCORES_WAIT_TIMEOUT_SECONDS = 10
+    TRUSTSCORES_WAIT_TIMEOUT_SECONDS = 20
     TRUSTSCORES_FORWARDING_GRACE_SECONDS = 1.0
-    TRUSTSCORES_FORWARDING_GRACE_MARGIN_SECONDS = 0.25
+    TRUSTSCORES_FORWARDING_GRACE_MARGIN_SECONDS = 1.0
 
     def __init__(self, engine, idx, trust_files_route):
         self._engine: Engine = engine
@@ -82,7 +83,7 @@ def __init__(self, engine, idx, trust_files_route):
 
     async def init(self, experiment_name):
         self._experiment_name = experiment_name
-        #self._reset_trustscores_exchange_state()
+        self._reset_trustscores_exchange_state()
         self._trustscores_wait_event = asyncio.Event()
         await EventManager.get_instance().subscribe_node_event(RoundEndEvent, self._process_round_end_event)
         await EventManager.get_instance().subscribe_addonevent(TestMetricsEvent, self._process_test_metrics_event)
@@ -134,51 +135,9 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
         federation = trust_config.get("federation")  # "CFL" or "DFL"
 
         if federation == "DFL":
-            self._end_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
-            await self._prepare_trustscores_exchange()
-            data_file_path = os.path.join(os.environ.get('NEBULA_CONFIG_DIR'), experiment_name, "scenario.json")
-            with open(data_file_path, 'r') as data_file:
-                data = json.load(data_file)
-
-                weights = {
-                    "robustness": float(data["robustness_pillar"]),
-                    "resilience_to_attacks": float(data["resilience_to_attacks"]),
-                    "algorithm_robustness": float(data["algorithm_robustness"]),
-                    "client_reliability": float(data["client_reliability"]),
-                    "privacy": float(data["privacy_pillar"]),
-                    "technique": float(data["technique"]),
-                    "uncertainty": float(data["uncertainty"]),
-                    "indistinguishability": float(data["indistinguishability"]),
-                    "fairness": float(data["fairness_pillar"]),
-                    "class_distribution": float(data["class_distribution"]),
-                    "explainability": float(data["explainability_pillar"]),
-                    "interpretability": float(data["interpretability"]),
-                    "post_hoc_methods": float(data["post_hoc_methods"]),
-                    "accountability": float(data["accountability_pillar"]),
-                    "factsheet_completeness":  float(data["factsheet_completeness"]),
-                    "architectural_soundness": float(data["architectural_soundness_pillar"]),
-                    "client_management": float(data["client_management"]),
-                    "optimization": float(data["optimization"]),
-                    "sustainability": float(data["sustainability_pillar"]),
-                    "energy_source": float(data["energy_source"]),
-                    "federation_complexity": float(data["federation_complexity"])
-                }
-
-            json_dumped = await asyncio.to_thread(
-                self._compute_local_trustscores_report,
-                experiment_name,
-                trust_config,
-                weights,
-                federation,
-            )
-            logging.info("JSON_dumped=%s", json_dumped)
-            self._initialize_local_trustscores_aggregation(experiment_name)
-            await self._share_trustscores_report(json_dumped)
-            await self._wait_for_trustscores_reports()
-            await self._wait_for_trustscores_forwarding_drain()
-            self._finalize_trustscores_aggregation()
+            await self._finish_dfl_trustscores_exchange(trust_config, experiment_name)
         elif federation == "SDFL":
-            pass
+            await self._finish_sdfl_trustscores_exchange(trust_config, experiment_name)
         else:
             cm = CommunicationsManager.get_instance()
 
@@ -252,6 +211,56 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 allow_after_learning_finished=True,
             )
 
+    # -------------------------------------------------------------------------
+    # DFL trustscores flow
+    # -------------------------------------------------------------------------
+
+    async def _finish_dfl_trustscores_exchange(self, trust_config, experiment_name):
+        self._end_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
+        await self._prepare_trustscores_exchange()
+
+        weights = self._load_local_trustscores_weights(experiment_name)
+        json_dumped = await asyncio.to_thread(
+            self._compute_local_trustscores_report,
+            experiment_name,
+            trust_config,
+            weights,
+            "DFL",
+        )
+        logging.info("JSON_dumped=%s", json_dumped)
+        self._initialize_local_trustscores_aggregation(experiment_name)
+        await self._share_trustscores_report(json_dumped)
+        await self._wait_for_trustscores_reports()
+        await self._wait_for_trustscores_forwarding_drain()
+        self._finalize_trustscores_aggregation()
+
+    # -------------------------------------------------------------------------
+    # SDFL trustscores flow
+    # -------------------------------------------------------------------------
+
+    async def _finish_sdfl_trustscores_exchange(self, trust_config, experiment_name):
+        self._end_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
+        await self._prepare_sdfl_trustscores_exchange()
+
+        weights = self._load_local_trustscores_weights(experiment_name)
+        local_trust_report_json = await asyncio.to_thread(
+            self._compute_local_trustscores_report,
+            experiment_name,
+            trust_config,
+            weights,
+            "DFL",
+        )
+
+        if self._is_sdfl_aggregator_node():
+            self._initialize_sdfl_global_trustscores_aggregation(experiment_name)
+
+        await self._share_sdfl_trustscores_report(local_trust_report_json)
+        await self._wait_for_sdfl_trustscores_reports()
+        await self._wait_for_sdfl_trustscores_forwarding_drain()
+
+        if self._is_sdfl_aggregator_node():
+            self._finalize_sdfl_global_trustscores_aggregation()
+
     def _compute_local_trustscores_report(self, experiment_name, trust_config, weights, federation) -> str:
         compute_trust_local_dfl(experiment_name, self._idx, trust_config, self._start_time, self._end_time)
 
@@ -259,7 +268,36 @@ def _compute_local_trustscores_report(self, experiment_name, trust_config, weigh
         trust_metric_manager.evaluate_participant(experiment_name, weights, self._idx, use_weights=True)
 
         return load_trust_report_json_dumped(experiment_name, self._idx)
-    """
+
+    def _load_local_trustscores_weights(self, experiment_name: str) -> dict:
+        data_file_path = os.path.join(os.environ.get('NEBULA_CONFIG_DIR'), experiment_name, "scenario.json")
+        with open(data_file_path, 'r') as data_file:
+            data = json.load(data_file)
+
+            return {
+                "robustness": float(data["robustness_pillar"]),
+                "resilience_to_attacks": float(data["resilience_to_attacks"]),
+                "algorithm_robustness": float(data["algorithm_robustness"]),
+                "client_reliability": float(data["client_reliability"]),
+                "privacy": float(data["privacy_pillar"]),
+                "technique": float(data["technique"]),
+                "uncertainty": float(data["uncertainty"]),
+                "indistinguishability": float(data["indistinguishability"]),
+                "fairness": float(data["fairness_pillar"]),
+                "class_distribution": float(data["class_distribution"]),
+                "explainability": float(data["explainability_pillar"]),
+                "interpretability": float(data["interpretability"]),
+                "post_hoc_methods": float(data["post_hoc_methods"]),
+                "accountability": float(data["accountability_pillar"]),
+                "factsheet_completeness": float(data["factsheet_completeness"]),
+                "architectural_soundness": float(data["architectural_soundness_pillar"]),
+                "client_management": float(data["client_management"]),
+                "optimization": float(data["optimization"]),
+                "sustainability": float(data["sustainability_pillar"]),
+                "energy_source": float(data["energy_source"]),
+                "federation_complexity": float(data["federation_complexity"]),
+            }
+
     def _reset_trustscores_exchange_state(self):
         self._expected_trustscores_sources = set()
         self._received_trustscores_node_ids = set()
@@ -268,7 +306,7 @@ def _reset_trustscores_exchange_state(self):
         self._trustscores_template_report = None
         self._trustscores_local_copy_path = None
         self._trustscores_local_report_initialized = False
-    """
+
     def _is_reputation_enabled(self) -> bool:
         defense_args = self._engine.config.participant.get("defense_args", {})
         reputation_config = defense_args.get("reputation", {})
@@ -279,25 +317,25 @@ def _get_reputation_system(self):
 
     def _get_trustscores_weight_for_source(self, source: str, node_id: int | str) -> float:
         if not self._is_reputation_enabled():
-            return 1.0
+            return 0.5
 
         reputation_system = self._get_reputation_system()
         if reputation_system is None:
             logging.warning(
-                "[TW DFL] Reputation is enabled but the reputation system is not available. Using fallback weight=1.0 for node_id=%s source=%s",
+                "[TW DFL] Reputation is enabled but the reputation system is not available. Using fallback weight=0.5 for node_id=%s source=%s",
                 node_id,
                 source,
             )
-            return 1.0
+            return 0.5
 
         reputation_entry = reputation_system.reputation.get(source)
         if reputation_entry is None or reputation_entry.get("reputation") is None:
             logging.warning(
-                "[TW DFL] No reputation value available for node_id=%s source=%s. Using fallback weight=1.0",
+                "[TW DFL] No reputation value available for node_id=%s source=%s. Using fallback weight=0.5",
                 node_id,
                 source,
             )
-            return 1.0
+            return 0.5
 
         return float(reputation_entry["reputation"])
 
@@ -323,14 +361,14 @@ def _get_trustscores_self_weight(self) -> float:
     def _log_trustscores_node_weights(self):
         if not self._is_reputation_enabled():
             logging.info(
-                "[TW DFL] Reputation system disabled. trustscores weights fallback to 1.0 for all nodes"
+                "[TW DFL] Reputation system disabled. trustscores weights fallback to 0.5 for all nodes"
             )
             return
 
         peer_weight_map = self._get_trustscores_peer_weights_from_reputation()
         if not peer_weight_map:
             logging.info(
-                "[TW DFL] Reputation system enabled, but no peer reputation weights are available yet. Falling back to 1.0 when needed"
+                "[TW DFL] Reputation system enabled, but no peer reputation weights are available yet. Falling back to 0.5 when needed"
             )
             return
 
@@ -473,7 +511,187 @@ def _finalize_trustscores_aggregation(self):
             self._trustscores_local_copy_path,
         )
 
+        graphics = Graphics(self._start_time, self._experiment_name, self._idx)
+        graphics.graphics_dfl_global(self._idx)
+
+    def _is_sdfl_aggregator_node(self) -> bool:
+        role = self._engine.rb.get_role()
+        return role in {Role.AGGREGATOR, Role.TRAINER_AGGREGATOR}
+
+    def _initialize_sdfl_global_trustscores_aggregation(self, experiment_name: str):
+        if self._trustscores_local_report_initialized:
+            return
+
+        trust_report_template = json.loads(load_trust_report_json_dumped(experiment_name, self._idx))
+        output_path = os.path.join(
+            os.environ.get("NEBULA_LOGS_DIR"),
+            experiment_name,
+            "trustworthiness",
+            "nebula_trust_results.json",
+        )
+        save_trust_report_json(output_path, trust_report_template)
+
+        self._trustscores_template_report = trust_report_template
+        self._trustscores_local_copy_path = output_path
+        accumulate_weighted_trustscores(
+            report=trust_report_template,
+            weight=1.0,
+            score_accumulator=self._trustscores_score_accumulator,
+            weight_accumulator=self._trustscores_weight_accumulator,
+        )
+        self._trustscores_local_report_initialized = True
+        logging.info(
+            "[TW SDFL] Global trustscores accumulator initialized at %s with local weight=1.0",
+            output_path,
+        )
+
+    async def _prepare_sdfl_trustscores_exchange(self):
+        cm = CommunicationsManager.get_instance()
+        self._expected_trustscores_sources = await cm.get_all_addrs_current_connections(only_direct=True)
+
+        if self._trustscores_wait_event is None:
+            self._trustscores_wait_event = asyncio.Event()
+        self._trustscores_wait_event.clear()
+
+        if len(self._received_trustscores_node_ids) >= self._expected_trustscores_reports:
+            self._trustscores_wait_event.set()
+
+        if self._expected_trustscores_reports <= 0:
+            self._trustscores_wait_event.set()
+            logging.info("[TW SDFL] No remote trustscores reports expected")
+            return
+
+        logging.info(
+            "[TW SDFL] Expecting %s trustscores reports. Initial neighbors=%s aggregator_mode=%s",
+            self._expected_trustscores_reports,
+            sorted(self._expected_trustscores_sources),
+            self._is_sdfl_aggregator_node(),
+        )
+        if self._is_sdfl_aggregator_node():
+            self._log_sdfl_trustscores_node_weights()
+
+    def _log_sdfl_trustscores_node_weights(self):
+        if not self._is_reputation_enabled():
+            logging.info(
+                "[TW SDFL] Reputation system disabled. trustscores weights fallback to 1.0 for all nodes"
+            )
+            return
+
+        peer_weight_map = self._get_trustscores_peer_weights_from_reputation()
+        if not peer_weight_map:
+            logging.info(
+                "[TW SDFL] Reputation system enabled, but no peer reputation weights are available yet. Falling back to 1.0 when needed"
+            )
+            return
+
+        logging.info(
+            "[TW SDFL] Global trustscores weights from reputation | self_node_id=%s self_weight=%s peer_weights_by_addr=%s",
+            self._idx,
+            self._get_trustscores_self_weight(),
+            peer_weight_map,
+        )
+
+        for addr, weight in sorted(peer_weight_map.items()):
+            logging.info(
+                "[TW SDFL] Global trustscores weight from reputation | self_node_id=%s target_addr=%s weight=%s",
+                self._idx,
+                addr,
+                weight,
+            )
+
+    async def _share_sdfl_trustscores_report(self, trust_report_json: str):
+        cm = CommunicationsManager.get_instance()
+        neighbors = self._expected_trustscores_sources.copy()
+
+        if not neighbors:
+            logging.info("[TW SDFL] No direct neighbors available to share trustscores")
+            return
+
+        message = cm.create_message(
+            "trustscores",
+            action="share",
+            node_id=str(self._idx),
+            trust_report_json=trust_report_json,
+        )
+
+        logging.info("[TW SDFL] Sharing local trustscores report with neighbors=%s", sorted(neighbors))
+        for neighbor in neighbors:
+            await cm.send_message(
+                neighbor,
+                message,
+                message_type="trustscores",
+                allow_after_learning_finished=True,
+            )
+
+    async def _wait_for_sdfl_trustscores_reports(self):
+        if self._trustscores_wait_event is None:
+            return
+
+        try:
+            await asyncio.wait_for(
+                self._trustscores_wait_event.wait(),
+                timeout=self.TRUSTSCORES_WAIT_TIMEOUT_SECONDS,
+            )
+            logging.info(
+                "[TW SDFL] Trustscores exchange complete (%s/%s)",
+                len(self._received_trustscores_node_ids),
+                self._expected_trustscores_reports,
+            )
+        except asyncio.TimeoutError:
+            logging.warning(
+                "[TW SDFL] Timeout waiting trustscores reports. Received=%s/%s missing=%s",
+                len(self._received_trustscores_node_ids),
+                self._expected_trustscores_reports,
+                self._expected_trustscores_reports - len(self._received_trustscores_node_ids),
+            )
+
+    async def _wait_for_sdfl_trustscores_forwarding_drain(self):
+        if not self._expected_trustscores_sources:
+            return
+
+        cm = CommunicationsManager.get_instance()
+        forwarder = getattr(cm, "forwarder", None)
+        forwarder_interval = getattr(forwarder, "interval", 0)
+        messages_interval = getattr(forwarder, "messages_interval", 0)
+        forwarding_grace = max(
+            self.TRUSTSCORES_FORWARDING_GRACE_SECONDS,
+            float(forwarder_interval) + float(messages_interval) + self.TRUSTSCORES_FORWARDING_GRACE_MARGIN_SECONDS,
+        )
+
+        logging.info(
+            "[TW SDFL] Waiting %.2fs to drain forwarded trustscores messages before shutdown",
+            forwarding_grace,
+        )
+        await asyncio.sleep(forwarding_grace)
+
+    def _finalize_sdfl_global_trustscores_aggregation(self):
+        if self._trustscores_template_report is None or self._trustscores_local_copy_path is None:
+            logging.warning("[TW SDFL] Skipping global trustscores write because the template/output is not available")
+            return
+
+        aggregated_report = build_weighted_trustscores_report(
+            template_report=self._trustscores_template_report,
+            score_accumulator=self._trustscores_score_accumulator,
+            weight_accumulator=self._trustscores_weight_accumulator,
+        )
+        save_trust_report_json(self._trustscores_local_copy_path, aggregated_report)
+        logging.info(
+            "[TW SDFL] Global weighted trustscores written to %s",
+            self._trustscores_local_copy_path,
+        )
+
+        graphics = Graphics(self._start_time, self._experiment_name, self._idx)
+        graphics.graphics_sdfl_global(self._idx)
+
     async def register_trustscores_report(self, source, message):
+        federation = self._engine.config.participant["trust_args"]["scenario"].get("federation")
+        if federation == "SDFL":
+            await self._register_sdfl_trustscores_report(source, message)
+            return
+
+        await self._register_dfl_trustscores_report(source, message)
+
+    async def _register_dfl_trustscores_report(self, source, message):
         if str(message.node_id) == str(self._idx):
             logging.info("[TW DFL] Ignoring own trustscores report from %s", source)
             return
@@ -510,6 +728,50 @@ async def register_trustscores_report(self, source, message):
         if len(self._received_trustscores_node_ids) >= self._expected_trustscores_reports:
             self._trustscores_wait_event.set()
 
+    async def _register_sdfl_trustscores_report(self, source, message):
+        if str(message.node_id) == str(self._idx):
+            logging.info("[TW SDFL] Ignoring own trustscores report from %s", source)
+            return
+
+        if str(message.node_id) in self._received_trustscores_node_ids:
+            logging.info(
+                "[TW SDFL] Ignoring duplicated trustscores report from node_id=%s source=%s",
+                message.node_id,
+                source,
+            )
+            return
+
+        if self._is_sdfl_aggregator_node():
+            trust_report = json.loads(message.trust_report_json)
+            remote_weight = self._get_trustscores_weight_for_source(source, message.node_id)
+            accumulate_weighted_trustscores(
+                report=trust_report,
+                weight=remote_weight,
+                score_accumulator=self._trustscores_score_accumulator,
+                weight_accumulator=self._trustscores_weight_accumulator,
+            )
+            logging.info(
+                "[TW SDFL] Trustscores report received from node_id=%s source=%s accumulated_with_weight=%s",
+                message.node_id,
+                source,
+                remote_weight,
+            )
+        else:
+            logging.info(
+                "[TW SDFL] Trustscores report received from node_id=%s source=%s forwarding_only=True",
+                message.node_id,
+                source,
+            )
+
+        self._received_trustscores_node_ids.add(str(message.node_id))
+        logging.info(
+            "[TW SDFL] Trustscores progress %s/%s",
+            len(self._received_trustscores_node_ids),
+            self._expected_trustscores_reports,
+        )
+        if len(self._received_trustscores_node_ids) >= self._expected_trustscores_reports:
+            self._trustscores_wait_event.set()
+
     async def _process_round_end_event(self, ree: RoundEndEvent):
         scenario_name = self._engine.config.participant["scenario_args"]["name"]
         train_model = f"/nebula/app/logs/{scenario_name}/trustworthiness/participant_{self._idx}_train_model.pk"

From 91523c912d6b9e22bfd19c8a91274e7586ba6bcb Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 27 Mar 2026 12:41:13 +0100
Subject: [PATCH 13/66] SDFL Trust malicious nodes fixed, frontend weights
 limit implemented and factsheet background fixed

---
 nebula/addons/trustworthiness/dfl_local.py    | 49 ++++++----------
 nebula/addons/trustworthiness/factsheet.py    | 38 ++++++-------
 .../addons/trustworthiness/trustworthiness.py |  4 +-
 nebula/frontend/static/js/deployment/main.js  | 45 +++++++++++++++
 .../static/js/deployment/trustworthiness.js   | 56 ++++++++++++++-----
 5 files changed, 125 insertions(+), 67 deletions(-)

diff --git a/nebula/addons/trustworthiness/dfl_local.py b/nebula/addons/trustworthiness/dfl_local.py
index f1d7a06d4..ff68b93f4 100644
--- a/nebula/addons/trustworthiness/dfl_local.py
+++ b/nebula/addons/trustworthiness/dfl_local.py
@@ -48,43 +48,30 @@ def compute_trust_local_dfl(experiment_name, participant_idx, data, start_time,
         aggregation_algorithm = data["agg_algorithm"]
         n_rounds = int(data["rounds"])
         attack = data["attack_params"]["attacks"]
-        if attack != "No Attack":
-            if attack == "Model Poisoning":
-                poisoned_node_percent = int(data["attack_params"]["poisoned_node_percent"])
-                poisoned_sample_percent = 0
-                poisoned_noise_percent = int(data["attack_params"]["poisoned_noise_percent"])
-            elif attack == "Model Poisoning":
-                poisoned_node_percent = int(data["attack_params"]["poisoned_node_percent"])
-                poisoned_sample_percent = int(data["attack_params"]["poisoned_sample_percent"])
-                poisoned_noise_percent = int(data["attack_params"]["poisoned_noise_percent"])
-            else:
-                poisoned_node_percent = 0
-                poisoned_sample_percent = 0
-                poisoned_noise_percent = 0
-        else:
-            poisoned_node_percent = 0
-            poisoned_sample_percent = 0
-            poisoned_noise_percent = 0
+
+        attack_params = data.get("attack_params", {})
+
+        poisoned_node_percent = int(attack_params.get("poisoned_node_percent", 0) or 0)
+        poisoned_sample_percent = int(attack_params.get("poisoned_sample_percent", 0) or 0)
+        poisoned_noise_percent = float(attack_params.get("poisoned_noise_percent", 0) or 0)
+
         with_reputation = data["reputation"]["enabled"]
-        is_dynamic_topology = False # data["is_dynamic_topology"]
-        is_dynamic_aggregation = False # data["is_dynamic_aggregation"]
-        target_aggregation = False # data["target_aggregation"]
-        """
-        if attack != "No Attack" and with_reputation == True and is_dynamic_aggregation == True:
-            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used against the clients is {attack}, where the percentage of attacked nodes is {poisoned_node_percent}, the percentage of attacked samples of each node is {poisoned_sample_percent}, and the percent of poisoned noise is {poisoned_noise_percent}. A reputation-based defence with a dynamic aggregation based on the aggregation algorithm {target_aggregation} is used, and the trustworthiness of the project is desired."
+        topology = data["topology"]
 
-        elif attack != "No Attack" and with_reputation == True and is_dynamic_topology == True:
-            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used against the clients is {attack}, where the percentage of attacked nodes is {poisoned_node_percent}, the percentage of attacked samples of each node is {poisoned_sample_percent}, and the percent of poisoned noise is {poisoned_noise_percent}. A reputation-based defence with a dynamic topology is used, and the trustworthiness of the project is desired."
+        if attack != "No Attack" and with_reputation == True:
+            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used is {attack}. A reputation-based defence is used, and the trustworthiness of the project is desired."
 
         elif attack != "No Attack" and with_reputation == False:
-            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used against the clients is {attack}, where the percentage of attacked nodes is {poisoned_node_percent}, the percentage of attacked samples of each node is {poisoned_sample_percent}, and the percent of poisoned noise is {poisoned_noise_percent}. No defence mechanism is used, and the trustworthiness of the project is desired."
+            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used is {attack}. No defence mechanism is used, and the trustworthiness of the project is desired."
+
+        elif attack == "No Attack" and with_reputation == True:
+            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks are used. A reputation-based defence is used, and the trustworthiness of the project is desired."
 
-        elif attack == "No Attack":
-            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks against clients are used, and the trustworthiness of the project is desired."
-        """
+        elif attack == "No Attack" and with_reputation == False:
+            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks are used. No defence mechanism is used, and the trustworthiness of the project is desired."
 
-        #CAMBIAR
-        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks against clients are used, and the trustworthiness of the project is desired."
+        else:
+            background = f"This shouldn't be here xd"
 
         # Set project specifications
         factsheet["project"]["overview"] = data["scenario_title"]
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 201475a24..73cc3bb3b 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -63,30 +63,30 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                     aggregation_algorithm = data["agg_algorithm"]
                     n_rounds = int(data["rounds"])
                     attack = data["attack_params"]["attacks"]
-                    if attack != "No Attack":
-                        poisoned_node_percent = int(data["attack_params"]["poisoned_node_percent"])
-                        poisoned_sample_percent = int(data["attack_params"]["poisoned_sample_percent"])
-                        poisoned_noise_percent = int(data["attack_params"]["poisoned_noise_percent"])
-                    else:
-                        poisoned_node_percent = 0
-                        poisoned_sample_percent = 0
-                        poisoned_noise_percent = 0
-                    with_reputation = data["reputation"]["enabled"]
-                    is_dynamic_topology = False # data["is_dynamic_topology"]
-                    is_dynamic_aggregation = False # data["is_dynamic_aggregation"]
-                    target_aggregation = False # data["target_aggregation"]
 
-                    if attack != "No Attack" and with_reputation == True and is_dynamic_aggregation == True:
-                        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used against the clients is {attack}, where the percentage of attacked nodes is {poisoned_node_percent}, the percentage of attacked samples of each node is {poisoned_sample_percent}, and the percent of poisoned noise is {poisoned_noise_percent}. A reputation-based defence with a dynamic aggregation based on the aggregation algorithm {target_aggregation} is used, and the trustworthiness of the project is desired."
+                    attack_params = data.get("attack_params", {})
+
+                    poisoned_node_percent = int(attack_params.get("poisoned_node_percent", 0) or 0)
+                    poisoned_sample_percent = int(attack_params.get("poisoned_sample_percent", 0) or 0)
+                    poisoned_noise_percent = float(attack_params.get("poisoned_noise_percent", 0) or 0)
+
+                    with_reputation = data["reputation"]["enabled"]
+                    topology = data["topology"]
 
-                    elif attack != "No Attack" and with_reputation == True and is_dynamic_topology == True:
-                        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used against the clients is {attack}, where the percentage of attacked nodes is {poisoned_node_percent}, the percentage of attacked samples of each node is {poisoned_sample_percent}, and the percent of poisoned noise is {poisoned_noise_percent}. A reputation-based defence with a dynamic topology is used, and the trustworthiness of the project is desired."
+                    if attack != "No Attack" and with_reputation == True:
+                        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used is {attack}. A reputation-based defence is used, and the trustworthiness of the project is desired."
 
                     elif attack != "No Attack" and with_reputation == False:
-                        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used against the clients is {attack}, where the percentage of attacked nodes is {poisoned_node_percent}, the percentage of attacked samples of each node is {poisoned_sample_percent}, and the percent of poisoned noise is {poisoned_noise_percent}. No defence mechanism is used, and the trustworthiness of the project is desired."
+                        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used is {attack}. No defence mechanism is used, and the trustworthiness of the project is desired."
+
+                    elif attack == "No Attack" and with_reputation == True:
+                        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks are used. A reputation-based defence is used, and the trustworthiness of the project is desired."
 
-                    elif attack == "No Attack":
-                        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks against clients are used, and the trustworthiness of the project is desired."
+                    elif attack == "No Attack" and with_reputation == False:
+                        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks are used. No defence mechanism is used, and the trustworthiness of the project is desired."
+
+                    else:
+                        background = f"This shouldn't be here xd"
 
                     # Set project specifications
                     factsheet["project"]["overview"] = data["scenario_title"]
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 44c934ce9..275bc95bc 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -515,8 +515,8 @@ def _finalize_trustscores_aggregation(self):
         graphics.graphics_dfl_global(self._idx)
 
     def _is_sdfl_aggregator_node(self) -> bool:
-        role = self._engine.rb.get_role()
-        return role in {Role.AGGREGATOR, Role.TRAINER_AGGREGATOR}
+        effective_role = self._engine.rb.get_role_name(True)
+        return effective_role in {Role.AGGREGATOR.value, Role.TRAINER_AGGREGATOR.value}
 
     def _initialize_sdfl_global_trustscores_aggregation(self, experiment_name: str):
         if self._trustscores_local_report_initialized:
diff --git a/nebula/frontend/static/js/deployment/main.js b/nebula/frontend/static/js/deployment/main.js
index 3f306f588..42fb11922 100644
--- a/nebula/frontend/static/js/deployment/main.js
+++ b/nebula/frontend/static/js/deployment/main.js
@@ -111,9 +111,54 @@ const DeploymentManager = (function() {
             return false;
         }
 
+        const trustWeightsValidationMessage = validateTrustworthinessWeights();
+        if (trustWeightsValidationMessage) {
+            Utils.showAlert('error', trustWeightsValidationMessage);
+            return false;
+        }
+
         return true;
     }
 
+    function validateTrustworthinessWeights() {
+        const manager = window.TrustworthinessManager || TrustworthinessManager;
+        if (manager && typeof manager.validateWeights === "function") {
+            return manager.validateWeights();
+        }
+
+        if (!manager || typeof manager.getTrustworthinessConfig !== "function") {
+            return null;
+        }
+
+        const config = manager.getTrustworthinessConfig();
+        if (!config?.enabled) {
+            return null;
+        }
+
+        const sumValues = (values) => values.reduce((sum, value) => sum + (parseFloat(value) || 0), 0);
+        const getWeightValidationMessage = (groupLabel, total) => {
+            if (total > 100) {
+                return `[Trustworthiness] ${groupLabel} weights exceed 100%. Please review the configuration.`;
+            }
+
+            if (total < 100) {
+                return `[Trustworthiness] ${groupLabel} weights are below 100%. Please review the configuration.`;
+            }
+
+            return null;
+        };
+
+        return (
+            getWeightValidationMessage("Pillars", sumValues(Object.values(config.pillars || {}))) ||
+            Object.entries(config.notions || {}).reduce((message, [groupName, weights]) => {
+                if (message) return message;
+
+                const label = `${groupName.charAt(0).toUpperCase()}${groupName.slice(1)} notions`;
+                return getWeightValidationMessage(label, sumValues(weights || []));
+            }, null)
+        );
+    }
+
     function setupDatasetListeners() {
         const datasetSelect = document.getElementById("datasetSelect");
         if (datasetSelect) {
diff --git a/nebula/frontend/static/js/deployment/trustworthiness.js b/nebula/frontend/static/js/deployment/trustworthiness.js
index 64cc721fe..3bdbadac5 100644
--- a/nebula/frontend/static/js/deployment/trustworthiness.js
+++ b/nebula/frontend/static/js/deployment/trustworthiness.js
@@ -1,5 +1,10 @@
 // Trustworthiness System Module
 const TrustworthinessManager = (function() {
+    function isTrustworthinessEnabled() {
+        const sw = document.getElementById("TrustworthinessSwitch");
+        return Boolean(sw?.checked);
+    }
+
     function initializeTrustworthinessSystem() {
         setupTrustworthinessSwitch();
         setupTrustworthinessFederationSwitch();
@@ -116,12 +121,28 @@ const TrustworthinessManager = (function() {
     }
 
     function validateWeights() {
+        if (!isTrustworthinessEnabled()) {
+            return null;
+        }
+
         if (isDFL()) {
             return validateWeightsDFL();
         }
         return validateWeightsCFL();
     }
 
+    function getWeightValidationMessage(groupLabel, total) {
+        if (total > 100) {
+            return `[Trustworthiness] ${groupLabel} weights exceed 100%. Please review the configuration.`;
+        }
+
+        if (total < 100) {
+            return `[Trustworthiness] ${groupLabel} weights are below 100%. Please review the configuration.`;
+        }
+
+        return null;
+    }
+
     function validateWeightsCFL() {
         const robustnessPercent = parseFloat(document.getElementById("cfl-robustness-pillar").value) || 0;
         const privacyPercent = parseFloat(document.getElementById("cfl-privacy-pillar").value) || 0;
@@ -164,13 +185,15 @@ const TrustworthinessManager = (function() {
         const totalArchitecturalSoundnessNotion = architecturalSoundnessNotion1 + architecturalSoundnessNotion2;
         const totalSustainabilityNotion = sustainabilityNotion1 + sustainabilityNotion2 + sustainabilityNotion3;
 
-        if (totalPillar !== 100) return "[Trustworthiness] Check pillars weights";
-        if (totalRobustnessNotion !== 100) return "[Trustworthiness] Check robustness notions weights";
-        if (totalPrivacyNotion !== 100) return "[Trustworthiness] Check privacy notions weights";
-        if (totalFairnessNotion !== 100) return "[Trustworthiness] Check fairness notions weights";
-        if (totalExplainabilityNotion !== 100) return "[Trustworthiness] Check explainability notions weights";
-        if (totalArchitecturalSoundnessNotion !== 100) return "[Trustworthiness] Check architectural soundness notions weights";
-        if (totalSustainabilityNotion !== 100) return "[Trustworthiness] Check sustainability notions weights";
+        return (
+            getWeightValidationMessage("Pillars", totalPillar) ||
+            getWeightValidationMessage("Robustness notions", totalRobustnessNotion) ||
+            getWeightValidationMessage("Privacy notions", totalPrivacyNotion) ||
+            getWeightValidationMessage("Fairness notions", totalFairnessNotion) ||
+            getWeightValidationMessage("Explainability notions", totalExplainabilityNotion) ||
+            getWeightValidationMessage("Architectural soundness notions", totalArchitecturalSoundnessNotion) ||
+            getWeightValidationMessage("Sustainability notions", totalSustainabilityNotion)
+        );
     }
 
     function validateWeightsDFL() {
@@ -212,13 +235,15 @@ const TrustworthinessManager = (function() {
         const totalArchitecturalSoundnessNotion = architecturalSoundnessNotion1 + architecturalSoundnessNotion2;
         const totalSustainabilityNotion = sustainabilityNotion1 + sustainabilityNotion3;
 
-        if (totalPillar !== 100) return "[Trustworthiness] Check pillars weights";
-        if (totalRobustnessNotion !== 100) return "[Trustworthiness] Check robustness notions weights";
-        if (totalPrivacyNotion !== 100) return "[Trustworthiness] Check privacy notions weights";
-        if (totalFairnessNotion !== 100) return "[Trustworthiness] Check fairness notions weights";
-        if (totalExplainabilityNotion !== 100) return "[Trustworthiness] Check explainability notions weights";
-        if (totalArchitecturalSoundnessNotion !== 100) return "[Trustworthiness] Check architectural soundness notions weights";
-        if (totalSustainabilityNotion !== 100) return "[Trustworthiness] Check sustainability notions weights";
+        return (
+            getWeightValidationMessage("Pillars", totalPillar) ||
+            getWeightValidationMessage("Robustness notions", totalRobustnessNotion) ||
+            getWeightValidationMessage("Privacy notions", totalPrivacyNotion) ||
+            getWeightValidationMessage("Fairness notions", totalFairnessNotion) ||
+            getWeightValidationMessage("Explainability notions", totalExplainabilityNotion) ||
+            getWeightValidationMessage("Architectural soundness notions", totalArchitecturalSoundnessNotion) ||
+            getWeightValidationMessage("Sustainability notions", totalSustainabilityNotion)
+        );
     }
 
     function getTrustworthinessConfig() {
@@ -490,7 +515,8 @@ const TrustworthinessManager = (function() {
         initializeTrustworthinessSystem,
         getTrustworthinessConfig,
         setTrustworthinessConfig,
-        resetTrustworthinessConfig
+        resetTrustworthinessConfig,
+        validateWeights
     };
 })();
 

From 262f7f8151a375325221e9290289952acc35c50a Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 27 Mar 2026 13:09:17 +0100
Subject: [PATCH 14/66] Names changed DFL trust

---
 .../addons/trustworthiness/{dfl_local.py => dfl_factsheet.py} | 4 ++--
 nebula/addons/trustworthiness/trustworthiness.py              | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)
 rename nebula/addons/trustworthiness/{dfl_local.py => dfl_factsheet.py} (99%)

diff --git a/nebula/addons/trustworthiness/dfl_local.py b/nebula/addons/trustworthiness/dfl_factsheet.py
similarity index 99%
rename from nebula/addons/trustworthiness/dfl_local.py
rename to nebula/addons/trustworthiness/dfl_factsheet.py
index ff68b93f4..e14dedf2d 100644
--- a/nebula/addons/trustworthiness/dfl_local.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -1,4 +1,4 @@
-# nebula/addons/trustworthiness/dfl_local.py
+# nebula/addons/trustworthiness/dfl_factsheet.py
 import json, os, shutil
 from datetime import datetime
 from nebula.addons.trustworthiness.metric import TrustMetricManager
@@ -24,7 +24,7 @@
 dirname = os.path.dirname(__file__)
 logger = logging.getLogger(__name__)
 
-def compute_trust_local_dfl(experiment_name, participant_idx, data, start_time, end_time):
+def populate_factsheet(experiment_name, participant_idx, data, start_time, end_time):
     trust_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
     os.makedirs(trust_dir, exist_ok=True)
 
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 275bc95bc..8cf4ab5a8 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -15,7 +15,7 @@
 from datetime import datetime
 from nebula.addons.trustworthiness.factsheet import Factsheet
 from nebula.addons.trustworthiness.metric import TrustMetricManager
-from nebula.addons.trustworthiness.dfl_local import compute_trust_local_dfl
+from nebula.addons.trustworthiness.dfl_factsheet import populate_factsheet
 from nebula.addons.trustworthiness.graphics import Graphics
 import json, os
 from nebula.core.network.communications import CommunicationsManager
@@ -262,7 +262,7 @@ async def _finish_sdfl_trustscores_exchange(self, trust_config, experiment_name)
             self._finalize_sdfl_global_trustscores_aggregation()
 
     def _compute_local_trustscores_report(self, experiment_name, trust_config, weights, federation) -> str:
-        compute_trust_local_dfl(experiment_name, self._idx, trust_config, self._start_time, self._end_time)
+        populate_factsheet(experiment_name, self._idx, trust_config, self._start_time, self._end_time)
 
         trust_metric_manager = TrustMetricManager(self._start_time, federation, self._idx)
         trust_metric_manager.evaluate_participant(experiment_name, weights, self._idx, use_weights=True)

From f0f8129980a638d3da3d8ce5ed7d4ce4985dcace Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 27 Mar 2026 16:19:43 +0100
Subject: [PATCH 15/66] Flooding and Delayer attacks fixed and minor changes

---
 .../addons/trustworthiness/dfl_factsheet.py   | 13 +--
 nebula/addons/trustworthiness/factsheet.py    | 82 +---------------
 .../addons/trustworthiness/final_reports.py   | 93 -------------------
 nebula/addons/trustworthiness/metric.py       |  2 -
 .../addons/trustworthiness/trustworthiness.py | 40 +++-----
 nebula/addons/trustworthiness/utils.py        | 46 ---------
 nebula/core/engine.py                         | 17 ++--
 7 files changed, 25 insertions(+), 268 deletions(-)
 delete mode 100644 nebula/addons/trustworthiness/final_reports.py

diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index e14dedf2d..0ffca4c10 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -70,9 +70,6 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         elif attack == "No Attack" and with_reputation == False:
             background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks are used. No defence mechanism is used, and the trustworthiness of the project is desired."
 
-        else:
-            background = f"This shouldn't be here xd"
-
         # Set project specifications
         factsheet["project"]["overview"] = data["scenario_title"]
         factsheet["project"]["purpose"] = data["scenario_description"]
@@ -127,7 +124,6 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         files_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
 
         train_model_file = os.path.join(files_dir, f"participant_{participant_idx}_final_model.pk")
-        #train_model_file = os.path.join(files_dir, f"participant_{participant_idx}_train_model.pk")
         test_dataloader_file = os.path.join(files_dir, f"participant_{participant_idx}_test_loader.pk")
         emissions_file = os.path.join(files_dir, f"emissions_{participant_idx}.csv")
 
@@ -189,33 +185,26 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         test_sample = next(iter(test_dataloader))
 
         lr = factsheet["configuration"]["learning_rate"]
-        value_clever = get_clever_score(model, test_sample, num_classes_temp, lr)
 
+        value_clever = get_clever_score(model, test_sample, num_classes_temp, lr)
         factsheet["performance"]["test_clever"] = 1 if value_clever > 1 else value_clever
 
         value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes_temp, lr)
-
         factsheet["performance"]["test_loss_sensitivity"] = 1 if value_loss_sensitivity > 1 else value_loss_sensitivity
 
         value_adv_accuracy = compute_adversarial_accuracy_art(model, test_dataloader, num_classes_temp, lr)
-
         factsheet["performance"]["test_adv_accuracy"] = 1 if value_adv_accuracy > 1 else value_adv_accuracy
 
         value_empirical_robustness = get_empirical_robustness_score(model, test_sample, num_classes_temp, lr)
-
         factsheet["performance"]["test_empirical_robustness"] = 1 if value_empirical_robustness > 1 else value_empirical_robustness
 
         value_confidence_score = get_confidence_score(model, test_sample)
-
         factsheet["performance"]["test_confidence_score"] = 1 if value_confidence_score > 1 else value_confidence_score
-        attack_success_rate
 
         value_attack_success_rate = attack_success_rate(model, test_sample)
-
         factsheet["performance"]["test_attack_success_rate"] = 1 if value_attack_success_rate > 1 else value_attack_success_rate
 
         feature_importance = get_feature_importance_cv(model, test_sample)
-
         factsheet["performance"]["test_feature_importance_cv"] = 1 if feature_importance > 1 else feature_importance
 
         f.seek(0)
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 73cc3bb3b..fc4b4808e 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -85,9 +85,6 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                     elif attack == "No Attack" and with_reputation == False:
                         background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks are used. No defence mechanism is used, and the trustworthiness of the project is desired."
 
-                    else:
-                        background = f"This shouldn't be here xd"
-
                     # Set project specifications
                     factsheet["project"]["overview"] = data["scenario_title"]
                     factsheet["project"]["purpose"] = data["scenario_description"]
@@ -134,10 +131,6 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                     elif dataset == "BreastCancer" and algorithm == "MLP":
                         model = BreastCancerModelMLP()
                         num_classes_temp = 2
-                    # elif dataset == "Syscall" and algorithm == "MLP":
-                    #     model = SyscallModelMLP()
-                    # else:
-                    #     model = CIFAR10ModelCNN()
 
                     factsheet["configuration"]["learning_rate"] = model.get_learning_rate()
                     factsheet["configuration"]["trainable_param_num"] = model.count_parameters()
@@ -167,65 +160,15 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
             try:
                 factsheet = json.load(f)
 
-                """
-                expected_total = int(factsheet.get("participants", {}).get("client_num", 0) or 0)
-                logging.info(f"[Factsheet] expected_total_nodes = {expected_total}")
-
-                data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "confirmation.csv")
-
-                data = read_csv(data_file)
-
-                number_files = len(data)
-
-                logger.info(f"number_files={number_files}")
-
-                while (number_files != expected_total):
-                    logger.info("WAIT")
-                    time.sleep(5)
-                    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "confirmation.csv")
-                    data = read_csv(data_file)
-                    number_files = len(data)
-                    logger.info(f"number_files={number_files}")
-                    logger.info(f"expected_nodes={expected_total}")
-                """
-
-
-
                 dataset = factsheet["data"]["provenance"]
                 model = factsheet["configuration"]["training_model"]
 
                 files_dir = f"{os.environ.get('NEBULA_LOGS_DIR')}/{scenario_name}/trustworthiness"
 
-                #models_files = glob.glob(os.path.join(files_dir, "*final_model*")) # MANDAR MENSAJE
-                #dataloaders_files = glob.glob(os.path.join(files_dir, "*train_loader*"))
                 test_dataloader_file = f"{files_dir}/participant_{participant_idx}_test_loader.pk"
                 final_model_file = f"{files_dir}/participant_{participant_idx}_final_model.pk"
                 emissions_file = os.path.join(files_dir, "emissions.csv")
 
-                # # Entropy
-                # i = 0
-                # for file in dataloaders_files:
-                #     with open(file, "rb") as file:
-                #         dataloader = pickle.load(file)
-                #     get_entropy(i, scenario_name, dataloader)
-                #     i += 1
-
-
-                """
-                get_all_data_entropy(scenario_name)
-
-                with open(f"{files_dir}/entropy.json", "r") as file:
-                    entropy_distribution = json.load(file)
-
-                values = np.array(list(entropy_distribution.values()))
-
-                normalized_values = (values - np.min(values)) / (np.max(values) - np.min(values))
-
-                avg_entropy = np.mean(normalized_values)
-
-                factsheet["data"]["avg_entropy"] = avg_entropy
-                """
-
                 avg_class_imbalance, avg_model_size = get_avg_class_imbalance_model_size(scenario_name)
                 entropy_distribution = get_entropy_list (scenario_name)
 
@@ -255,16 +198,6 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
 
                 factsheet["fairness"]["selection_cv"] = 1
 
-                """
-                count_all_class_samples(scenario_name)
-
-                with open(f"{files_dir}/count_class.json", "r") as file:
-                    class_distribution = json.load(file)
-
-                class_samples_sizes = [x for x in class_distribution.values()]
-                class_imbalance = get_cv(list=class_samples_sizes)
-                factsheet["fairness"]["class_imbalance"] = 1 if class_imbalance > 1 else class_imbalance
-                """
                 class_imbalance_score = 1 / (1+avg_class_imbalance)
                 factsheet["fairness"]["class_imbalance"] = 1 if class_imbalance_score > 1 else class_imbalance_score
 
@@ -273,7 +206,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
 
                 if dataset == "MNIST" and model == "MLP":
                     model = MNISTModelMLP()
-                    num_classes_temp = 10 # CAMBIAR
+                    num_classes_temp = 10
                 elif dataset == "MNIST" and model == "CNN":
                     model = MNISTModelCNN()
                     num_classes_temp = 10
@@ -289,10 +222,6 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 elif dataset == "BreastCancer" and model == "MLP":
                     model = BreastCancerModelMLP()
                     num_classes_temp = 2
-                # elif dataset == "Syscall" and model == "MLP":
-                #     model = SyscallModelMLP()
-                # else:
-                #     model = CIFAR10ModelCNN()
 
                 model.load_state_dict(lightning_model.state_dict())
 
@@ -302,33 +231,26 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 test_sample = next(iter(test_dataloader))
 
                 lr = factsheet["configuration"]["learning_rate"]
-                value_clever = get_clever_score(model, test_sample, num_classes_temp, lr)
 
+                value_clever = get_clever_score(model, test_sample, num_classes_temp, lr)
                 factsheet["performance"]["test_clever"] = 1 if value_clever > 1 else value_clever
 
                 value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes_temp, lr)
-
                 factsheet["performance"]["test_loss_sensitivity"] = 1 if value_loss_sensitivity > 1 else value_loss_sensitivity
 
                 value_adv_accuracy = compute_adversarial_accuracy_art(model, test_dataloader, num_classes_temp, lr)
-
                 factsheet["performance"]["test_adv_accuracy"] = 1 if value_adv_accuracy > 1 else value_adv_accuracy
 
                 value_empirical_robustness = get_empirical_robustness_score(model, test_sample, num_classes_temp, lr)
-
                 factsheet["performance"]["test_empirical_robustness"] = 1 if value_empirical_robustness > 1 else value_empirical_robustness
 
                 value_confidence_score = get_confidence_score(model, test_sample)
-
                 factsheet["performance"]["test_confidence_score"] = 1 if value_confidence_score > 1 else value_confidence_score
-                attack_success_rate
 
                 value_attack_success_rate = attack_success_rate(model, test_sample)
-
                 factsheet["performance"]["test_attack_success_rate"] = 1 if value_attack_success_rate > 1 else value_attack_success_rate
 
                 feature_importance = get_feature_importance_cv(model, test_sample)
-
                 factsheet["performance"]["test_feature_importance_cv"] = 1 if feature_importance > 1 else feature_importance
 
                 # Set emissions metrics
diff --git a/nebula/addons/trustworthiness/final_reports.py b/nebula/addons/trustworthiness/final_reports.py
deleted file mode 100644
index eaff2cdef..000000000
--- a/nebula/addons/trustworthiness/final_reports.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from __future__ import annotations
-
-import csv
-import os
-from typing import Any
-
-
-def ensure_trust_dir(base_logs_dir: str, experiment_name: str) -> str:
-    trust_dir = os.path.join(base_logs_dir, experiment_name, "trustworthiness")
-    os.makedirs(trust_dir, exist_ok=True)
-    return trust_dir
-
-
-def append_trust_report_to_csv(
-    base_logs_dir: str,
-    experiment_name: str,
-    report: dict[str, Any],
-) -> None:
-    """
-    Escribe la información del reporte en:
-      - data_results.csv
-      - emissions.csv
-    """
-
-    trust_dir = ensure_trust_dir(base_logs_dir, experiment_name)
-
-    data_results_path = os.path.join(trust_dir, "data_results.csv")
-    emissions_path = os.path.join(trust_dir, "emissions.csv")
-
-    _append_data_results(data_results_path, report)
-    _append_emissions(emissions_path, report)
-
-
-def _append_data_results(path: str, report: dict[str, Any]) -> None:
-    exists = os.path.exists(path)
-
-    with open(path, "a", newline="", encoding="utf-8") as f:
-        writer = csv.DictWriter(
-            f,
-            fieldnames=[
-                "node_id",
-                "round",
-                "bytes_sent",
-                "bytes_recv",
-                "loss",
-                "accuracy",
-            ],
-        )
-
-        if not exists:
-            writer.writeheader()
-
-        writer.writerow(
-            {
-                "node_id": report["node_id"],
-                "round": report["round"],
-                "bytes_sent": report["bytes_sent"],
-                "bytes_recv": report["bytes_recv"],
-                "loss": report["loss"],
-                "accuracy": report["accuracy"],
-            }
-        )
-
-
-def _append_emissions(path: str, report: dict[str, Any]) -> None:
-    exists = os.path.exists(path)
-
-    with open(path, "a", newline="", encoding="utf-8") as f:
-        writer = csv.DictWriter(
-            f,
-            fieldnames=[
-                "node_role",
-                "node_id",
-                "round",
-                "workload",
-                "sample_size",
-                "emissions",
-            ],
-        )
-
-        if not exists:
-            writer.writeheader()
-
-        writer.writerow(
-            {
-                "node_role": report["node_role"],
-                "node_id": report["node_id"],
-                "round": report["round"],
-                "workload": report["workload"],
-                "sample_size": report["sample_size"],
-                "emissions": report["emissions"],
-            }
-        )
diff --git a/nebula/addons/trustworthiness/metric.py b/nebula/addons/trustworthiness/metric.py
index da62568b7..8083dd6f0 100755
--- a/nebula/addons/trustworthiness/metric.py
+++ b/nebula/addons/trustworthiness/metric.py
@@ -99,8 +99,6 @@ def evaluate_participant(self, experiment_name, weights, participant_id, use_wei
 
         with open(factsheet_file, "r") as f, open(metrics_cfg_file, "r") as m:
             factsheet = json.load(f)
-            #metrics_cfg = json.load(m)
-            #metrics_cfg = replace_everywhere(metrics_cfg, "factsheet", f"factsheet_participant_{participant_id}")
 
             raw_metrics_cfg: str = m.read()
             raw_metrics_cfg = raw_metrics_cfg.replace("factsheet", f"factsheet_participant_{participant_id}")
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 8cf4ab5a8..8e8c54b68 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -9,7 +9,7 @@
 from nebula.core.engine import Engine
 import pickle
 from nebula.addons.trustworthiness.calculation import stop_emissions_tracking_and_save, get_bytes_final_model_id, get_class_imbalance_local
-from nebula.addons.trustworthiness.utils import save_results_csv, save_confirmation_csv, save_trustworthiness_reports_csv, load_emissions_participant, load_data_results_participant, save_results_csv_cfl, save_emissions_csv_cfl, save_class_count_per_participant, get_local_entropy, load_trust_report_json_dumped, create_local_trust_report_copy, accumulate_weighted_trustscores, build_weighted_trustscores_report, save_trust_report_json
+from nebula.addons.trustworthiness.utils import save_results_csv, save_trustworthiness_reports_csv, load_emissions_participant, load_data_results_participant, save_results_csv_cfl, save_emissions_csv_cfl, save_class_count_per_participant, get_local_entropy, load_trust_report_json_dumped, create_local_trust_report_copy, accumulate_weighted_trustscores, build_weighted_trustscores_report, save_trust_report_json
 from codecarbon import EmissionsTracker
 from nebula.addons.trustworthiness.per_round_metrics import PerRoundTrustMetrics
 from datetime import datetime
@@ -17,7 +17,8 @@
 from nebula.addons.trustworthiness.metric import TrustMetricManager
 from nebula.addons.trustworthiness.dfl_factsheet import populate_factsheet
 from nebula.addons.trustworthiness.graphics import Graphics
-import json, os
+import json
+import os
 from nebula.core.network.communications import CommunicationsManager
 
 """                                                     ##############################
@@ -143,21 +144,18 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             server_addr = str(self._engine.config.participant["network_args"]["neighbors"]).strip()
 
-            logging.info("connections=%s", list(cm.connections.keys()))
-            logging.info("server in connections? %s", server_addr in cm.connections)
+            #logging.info("connections=%s", list(cm.connections.keys()))
+            #logging.info("server in connections? %s", server_addr in cm.connections)
 
             bytes_sent, bytes_recv, accuracy, loss = load_data_results_participant(experiment_name, self._idx)
 
             role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size = load_emissions_participant(experiment_name, self._idx)
 
             class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
-            logging.info("class_imbalance=%s", class_imbalance)
 
             model_size = get_bytes_final_model_id(self._idx, experiment_name)
-            logging.info("model_size=%s", model_size)
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
-            logging.info("local_entropy=%s", local_entropy)
 
             message = cm.mm.create_message(
                 "trustworthiness",
@@ -181,11 +179,11 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 model_size=model_size,
                 local_entropy=local_entropy,
             )
-            """
+
             logging.info(
                 "[TW SEND] dest=%s node_id=%s bytes_sent=%s bytes_recv=%s "
                 "accuracy=%s loss=%s energy_grid=%s emissions=%s workload=%s"
-                "cpu_model=%s gpu_model=%s cpu_used=%s gpu_used=%s energy_consumed=%s sample_size=%s",
+                "cpu_model=%s gpu_model=%s cpu_used=%s gpu_used=%s energy_consumed=%s sample_size=%s class_imbalance=%s model_size=%s local_entropy=%s",
                 server_addr,
                 str(self._idx),
                 bytes_sent,
@@ -202,8 +200,11 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 gpu_used,
                 energy_consumed,
                 sample_size,
+                class_imbalance,
+                model_size,
+                local_entropy,
             )
-            """
+
             await cm.send_message(
                 server_addr,
                 message,
@@ -573,14 +574,14 @@ async def _prepare_sdfl_trustscores_exchange(self):
     def _log_sdfl_trustscores_node_weights(self):
         if not self._is_reputation_enabled():
             logging.info(
-                "[TW SDFL] Reputation system disabled. trustscores weights fallback to 1.0 for all nodes"
+                "[TW SDFL] Reputation system disabled. trustscores weights fallback to 0.5 for all nodes"
             )
             return
 
         peer_weight_map = self._get_trustscores_peer_weights_from_reputation()
         if not peer_weight_map:
             logging.info(
-                "[TW SDFL] Reputation system enabled, but no peer reputation weights are available yet. Falling back to 1.0 when needed"
+                "[TW SDFL] Reputation system enabled, but no peer reputation weights are available yet. Falling back to 0.5 when needed"
             )
             return
 
@@ -857,8 +858,6 @@ async def finish_experiment_role_pre_actions(self):
         pass
 
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
-        from datetime  import datetime
-
         self._end_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
         self._trust_config = trust_config
         self._experiment_name = experiment_name
@@ -881,13 +880,10 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
             )
 
             class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
-            logging.info("class_imbalance=%s", class_imbalance)
 
             model_size = get_bytes_final_model_id(self._idx, experiment_name)
-            logging.info("model_size=%s", model_size)
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
-            logging.info("local_entropy=%s", local_entropy)
 
             save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy)
             save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
@@ -914,13 +910,10 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
             )
 
             class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
-            logging.info("class_imbalance=%s", class_imbalance)
 
             model_size = get_bytes_final_model_id(self._idx, experiment_name)
-            logging.info("model_size=%s", model_size)
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
-            logging.info("local_entropy=%s", local_entropy)
 
             save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy)
             save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
@@ -965,11 +958,6 @@ async def register_trustworthiness_report(self, source, message):
 
 
     async def _generate_factsheet(self, trust_config, experiment_name):
-        from nebula.addons.trustworthiness.factsheet import Factsheet
-        from nebula.addons.trustworthiness.metric import TrustMetricManager
-        import json
-        import os
-
         factsheet = Factsheet()
         factsheet.populate_factsheet_pre_train(trust_config, experiment_name)
         factsheet.populate_factsheet_post_train(experiment_name, self._start_time, self._end_time, self._idx)
@@ -1063,7 +1051,6 @@ async def start(self):
         self._tracker.start()
 
     async def _create_trustworthiness_directory(self):
-        import os
         trust_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), self._experiment_name, "trustworthiness")
         # Create a directory to store files used to compute trust
         os.makedirs(trust_dir, exist_ok=True)
@@ -1090,7 +1077,6 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         # Final operations
         save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_accuracy, last_loss)
         stop_emissions_tracking_and_save(self._tracker, self._trust_dir_files, f'emissions_{self._idx}.csv', self._role.value, workload, sample_size, self._idx)
-        #save_confirmation_csv(self._experiment_name, self._idx)
         await self.tw.finish_experiment_role_post_actions(self._trust_config, self._experiment_name)
 
     def _factory_trust_workload(self, role: Role, engine: Engine, idx, trust_files_route) -> TrustWorkload:
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
index 789d4d6af..affff00a2 100755
--- a/nebula/addons/trustworthiness/utils.py
+++ b/nebula/addons/trustworthiness/utils.py
@@ -448,30 +448,6 @@ def save_emissions_csv_cfl(scenario_name: str, id: int, role: str, energy_grid:
 
 
 def save_results_csv(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float):
-    """
-    try:
-        data_results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
-    except:
-        data_results_file = os.path.join("nebula", "app", "logs", scenario_name, "trustworthiness", "data_results.csv")
-
-    if exists(data_results_file):
-        df = pd.read_csv(data_results_file)
-    else:
-        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss"])
-
-    try:
-        # Add new entry to DataFrame
-        new_data = pd.DataFrame({'id': [id], 'bytes_sent': [bytes_sent],
-                                    'bytes_recv': [bytes_recv], 'accuracy': [accuracy],
-                                    'loss': [loss]})
-        df = pd.concat([df, new_data], ignore_index=True)
-        logger.info(f"new_data={new_data}")
-
-        df.to_csv(data_results_file, encoding='utf-8', index=False)
-
-    except Exception as e:
-        logger.warning(e)
-    """
 
     try:
         data_results_id_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", f"data_results_{id}.csv")
@@ -496,28 +472,6 @@ def save_results_csv(scenario_name: str, id: int, bytes_sent: int, bytes_recv: i
     except Exception as e:
         logger.warning(e)
 
-def save_confirmation_csv(scenario_name: str, id: int):
-    try:
-        data_results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "confirmation.csv")
-    except:
-        data_results_file = os.path.join("nebula", "app", "logs", scenario_name, "trustworthiness", "confirmation.csv")
-
-    if exists(data_results_file):
-        df = pd.read_csv(data_results_file)
-    else:
-        df = pd.DataFrame(columns=["id", "OK"])
-
-    try:
-        # Add new entry to DataFrame
-        new_data = pd.DataFrame({'id': [id], 'OK': ["OK"]})
-        df = pd.concat([df, new_data], ignore_index=True)
-        logger.info(f"new_data={new_data}")
-
-        df.to_csv(data_results_file, encoding='utf-8', index=False)
-
-    except Exception as e:
-        logger.warning(e)
-
 def load_trust_report_json_dumped(scenario_name: str, participant_id: int) -> str:
     """
     Read a participant trustworthiness JSON file and return it
diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index 259f96580..85e767b61 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -118,14 +118,6 @@ def __init__(
         self._is_malicious = self.config.participant["adversarial_args"]["attack_params"]["attacks"] != "No Attack"
 
         role = config.participant["device_args"]["role"]
-        self._role_behavior: RoleBehavior = factory_role_behavior(role, self, config)
-        self._role_behavior_performance_lock = Locker("role_behavior_performance_lock", async_lock=True)
-
-        print_msg_box(
-            msg=f"Name {self.name}\nRole: {self._role_behavior.get_role_name()}",
-            indent=2,
-            title="Node information",
-        )
 
         msg = f"Trainer: {self._trainer.__class__.__name__}"
         msg += f"\nDataset: {self.config.participant['data_args']['dataset']}"
@@ -149,6 +141,15 @@ def __init__(
 
         self._cm = CommunicationsManager(engine=self)
 
+        self._role_behavior: RoleBehavior = factory_role_behavior(role, self, config)
+        self._role_behavior_performance_lock = Locker("role_behavior_performance_lock", async_lock=True)
+
+        print_msg_box(
+            msg=f"Name {self.name}\nRole: {self._role_behavior.get_role_name()}",
+            indent=2,
+            title="Node information",
+        )
+
         self._reporter = Reporter(config=self.config, trainer=self.trainer)
 
         self._sinchronized_status = True

From 49219b11b9c3aa349b2605a4c5a3af42027b163f Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 27 Mar 2026 17:30:44 +0100
Subject: [PATCH 16/66] Trust logs updated

---
 nebula/addons/trustworthiness/trustworthiness.py | 1 -
 nebula/addons/trustworthiness/utils.py           | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 8e8c54b68..c7f6f32e4 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -1058,7 +1058,6 @@ async def _create_trustworthiness_directory(self):
 
     async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         class_counter = self._engine.trainer.datamodule.get_samples_per_label()
-        logging.info("COUNTER=%s", class_counter)
 
         save_class_count_per_participant(self._experiment_name, class_counter, self._idx)
 
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
index affff00a2..8aeb68562 100755
--- a/nebula/addons/trustworthiness/utils.py
+++ b/nebula/addons/trustworthiness/utils.py
@@ -415,7 +415,6 @@ def save_results_csv_cfl(scenario_name: str, id: int, bytes_sent: int, bytes_rec
                                     'bytes_recv': [bytes_recv], 'accuracy': [accuracy],
                                     'loss': [loss], 'class_imbalance': [class_imbalance], 'model_size': [model_size], 'local_entropy': [local_entropy]})
         df = pd.concat([df, new_data], ignore_index=True)
-        logger.info(f"new_data={new_data}")
 
         df.to_csv(data_results_file, encoding='utf-8', index=False)
 
@@ -439,7 +438,6 @@ def save_emissions_csv_cfl(scenario_name: str, id: int, role: str, energy_grid:
                                     'emissions': [emissions], 'workload': [workload], 'CPU_model': [cpu_model], 'GPU_model': [gpu_model], 'CPU_used': [cpu_used], 'GPU_used': [gpu_used], 'energy_consumed': [energy_consumed],
                                     'sample_size': [sample_size]})
         df = pd.concat([df, new_data], ignore_index=True)
-        logger.info(f"new_data={new_data}")
 
         df.to_csv(data_results_file, encoding='utf-8', index=False)
 
@@ -465,7 +463,6 @@ def save_results_csv(scenario_name: str, id: int, bytes_sent: int, bytes_recv: i
                                     'bytes_recv': [bytes_recv], 'accuracy': [accuracy],
                                     'loss': [loss]})
         df = pd.concat([df, new_data], ignore_index=True)
-        logger.info(f"new_data={new_data}")
 
         df.to_csv(data_results_id_file, encoding='utf-8', index=False)
 

From f6deb02823c71b626875067cfadab3b13a82b721 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 8 Apr 2026 12:22:44 +0200
Subject: [PATCH 17/66] Minor changes and new fairness and explainability
 metrics implemented

---
 nebula/addons/trustworthiness/calculation.py  | 624 +++++++++++++++++-
 .../configs/factsheet_template.json           |  16 +-
 .../configs/factsheet_template_dfl.json       |  16 +-
 .../addons/trustworthiness/dfl_factsheet.py   |  57 +-
 nebula/addons/trustworthiness/factsheet.py    |  56 +-
 .../addons/trustworthiness/trustworthiness.py |   6 +
 6 files changed, 747 insertions(+), 28 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index 96a1b2844..92b36d31d 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -11,11 +11,13 @@
 import numpy as np
 import pandas as pd
 import shap
+import torch
 import torch.nn
 from art.estimators.classification import PyTorchClassifier
 from art.metrics import clever_u, loss_sensitivity, empirical_robustness
 from codecarbon import EmissionsTracker
-from scipy.stats import variation
+from scipy.spatial.distance import jensenshannon
+from scipy.stats import entropy, variation
 from torch import nn, optim
 import torch.nn.functional as F
 import time
@@ -393,6 +395,367 @@ def get_avg_loss_accuracy(scenario_name):
     return avg_loss, avg_accuracy, std_accuracy
 
 
+def get_participant_loss_accuracy(scenario_name, participant_id):
+    """
+    Gets loss and accuracy for a specific participant from CFL aggregated results.
+
+    Args:
+        scenario_name (str): Scenario name.
+        participant_id (int | str): Participant identifier.
+
+    Returns:
+        tuple[float, float]: (loss, accuracy)
+    """
+    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
+    data = read_csv(data_file)
+    row = data[data["id"] == participant_id]
+
+    if row.empty:
+        row = data[data["id"] == int(participant_id)]
+
+    loss = float(row["loss"].iloc[0])
+    accuracy = float(row["accuracy"].iloc[0])
+    return loss, accuracy
+
+
+def _get_model_accuracy(model, dataloader):
+    """
+    Calculates model accuracy over a dataloader.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate.
+        dataloader (DataLoader): Dataloader with (x, y) batches.
+
+    Returns:
+        float: Accuracy in [0, 1].
+    """
+    if not isinstance(model, torch.nn.Module):
+        logger.warning("Model is not a torch.nn.Module")
+        return 0.0
+
+    try:
+        device = next(model.parameters()).device
+    except Exception:
+        device = torch.device("cpu")
+
+    model.eval()
+    correct = 0
+    total = 0
+
+    with torch.no_grad():
+        for x, y in dataloader:
+            x = x.to(device)
+            y = y.to(device)
+
+            out = model(x)
+            logits = out[0] if isinstance(out, (tuple, list)) else out
+            preds = logits.argmax(dim=1)
+
+            correct += (preds == y).sum().item()
+            total += y.size(0)
+
+    return correct / total if total > 0 else 0.0
+
+def _extract_model_logits(model_output):
+    """
+    Normalize the output returned by a model forward pass into a logits tensor.
+
+    Some models may return tuples/lists; for trust metrics we always consume the
+    first element as the classification output.
+    """
+    return model_output[0] if isinstance(model_output, (tuple, list)) else model_output
+
+
+def _prepare_class_targets(y):
+    """
+    Convert different target representations into a flat class-index tensor.
+    """
+    if not torch.is_tensor(y):
+        y = torch.as_tensor(y)
+
+    if y.ndim > 1:
+        if y.size(-1) > 1:
+            y = y.argmax(dim=-1)
+        else:
+            y = y.view(-1)
+
+    return y.long().view(-1)
+
+
+def _logits_to_probabilities(logits):
+    """
+    Convert model outputs into a probability matrix of shape (N, C).
+
+    Supports:
+    - multiclass logits/log-probabilities with shape (N, C)
+    - binary logits with shape (N,) or (N, 1)
+    - already-normalized probability matrices
+    """
+    if not torch.is_tensor(logits):
+        logits = torch.as_tensor(logits)
+
+    if logits.ndim == 0:
+        logits = logits.view(1, 1)
+    elif logits.ndim == 1:
+        logits = logits.view(-1, 1)
+    elif logits.ndim > 2:
+        logits = logits.reshape(logits.shape[0], -1)
+
+    if logits.size(1) == 1:
+        pos_prob = torch.sigmoid(logits[:, 0])
+        probs = torch.stack([1.0 - pos_prob, pos_prob], dim=1)
+    else:
+        row_sums = logits.sum(dim=1)
+        looks_like_probs = (
+            torch.all(logits >= 0)
+            and torch.all(logits <= 1.0 + 1e-6)
+            and torch.allclose(row_sums, torch.ones_like(row_sums), atol=1e-4, rtol=1e-4)
+        )
+        probs = logits if looks_like_probs else torch.softmax(logits, dim=1)
+
+    probs = torch.clamp(probs, min=0.0, max=1.0)
+    probs = probs / probs.sum(dim=1, keepdim=True).clamp_min(1e-12)
+    return probs
+
+
+def _collect_classification_statistics(model, dataloader):
+    """
+    Collect prediction statistics required by calibration and inequality metrics.
+
+    Returns:
+        tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        predicted labels, true labels, prediction confidences, correctness flags,
+        and probability assigned to the true class.
+    """
+    if not isinstance(model, torch.nn.Module):
+        logger.warning("Model is not a torch.nn.Module")
+        empty = np.array([], dtype=float)
+        return empty, empty, empty, empty, empty
+
+    try:
+        device = next(model.parameters()).device
+    except Exception:
+        device = torch.device("cpu")
+
+    preds_all = []
+    targets_all = []
+    confidences_all = []
+    correct_all = []
+    true_probs_all = []
+
+    model.eval()
+    with torch.no_grad():
+        for batch in dataloader:
+            if not isinstance(batch, (tuple, list)) or len(batch) < 2:
+                continue
+
+            x, y = batch[0], batch[1]
+            if not (torch.is_tensor(x) and torch.is_tensor(y)):
+                continue
+
+            x = x.to(device)
+            y = _prepare_class_targets(y).to(device)
+
+            out = model(x)
+            logits = _extract_model_logits(out)
+            probs = _logits_to_probabilities(logits)
+
+            if probs.ndim != 2 or probs.size(0) == 0:
+                continue
+
+            if y.numel() != probs.size(0):
+                n = min(int(y.numel()), int(probs.size(0)))
+                if n == 0:
+                    continue
+                y = y[:n]
+                probs = probs[:n]
+
+            valid_mask = (y >= 0) & (y < probs.size(1))
+            if not torch.any(valid_mask):
+                continue
+
+            y = y[valid_mask]
+            probs = probs[valid_mask]
+
+            conf, preds = probs.max(dim=1)
+            true_probs = probs.gather(1, y.view(-1, 1)).squeeze(1)
+            correct = preds.eq(y).float()
+
+            preds_all.extend(preds.detach().cpu().numpy().tolist())
+            targets_all.extend(y.detach().cpu().numpy().tolist())
+            confidences_all.extend(conf.detach().cpu().numpy().tolist())
+            correct_all.extend(correct.detach().cpu().numpy().tolist())
+            true_probs_all.extend(true_probs.detach().cpu().numpy().tolist())
+
+    return (
+        np.asarray(preds_all, dtype=int),
+        np.asarray(targets_all, dtype=int),
+        np.asarray(confidences_all, dtype=float),
+        np.asarray(correct_all, dtype=float),
+        np.asarray(true_probs_all, dtype=float),
+    )
+
+
+def get_underfitting_score(test_accuracy):
+    """
+    Uses test accuracy as a proxy for underfitting.
+
+    Args:
+        test_accuracy (float): Test accuracy in [0, 1].
+
+    Returns:
+        float: Underfitting proxy value.
+    """
+    try:
+        return float(test_accuracy)
+    except Exception:
+        logger.warning("Could not compute underfitting score")
+        return 0.0
+
+
+def get_overfitting_score(model, train_dataloader, test_accuracy):
+    """
+    Calculates overfitting as the positive train-test accuracy gap.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate on training data.
+        train_dataloader (DataLoader): Training dataloader.
+        test_accuracy (float): Test accuracy in [0, 1].
+
+    Returns:
+        float: Positive train-test accuracy gap.
+    """
+    try:
+        train_accuracy = _get_model_accuracy(model, train_dataloader)
+        return max(0.0, float(train_accuracy) - float(test_accuracy))
+    except Exception as exc:
+        logger.warning("Could not compute overfitting score")
+        logger.warning(exc)
+        return 0.0
+
+
+def get_well_calibration_error(model, test_dataloader, n_bins=10):
+    """
+    Calculates a well-calibration error style metric using prediction confidence.
+
+    For multiclass models, confidence is taken as the max softmax probability and
+    the observed outcome is whether the prediction is correct.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate.
+        test_dataloader (DataLoader): Test dataloader.
+        n_bins (int): Number of quantile bins.
+
+    Returns:
+        float: Calibration error in [0, 1] when computation succeeds.
+    """
+    if not isinstance(model, torch.nn.Module):
+        logger.warning("Model is not a torch.nn.Module")
+        return 0.0
+
+    try:
+        n_bins = max(2, int(n_bins))
+    except Exception:
+        n_bins = 10
+
+    _, _, confidences, correct, _ = _collect_classification_statistics(model, test_dataloader)
+
+    if len(confidences) == 0 or len(correct) == 0:
+        return 0.0
+
+    confidences = np.clip(np.asarray(confidences, dtype=float), 0.0, 1.0)
+    correct = np.clip(np.asarray(correct, dtype=float), 0.0, 1.0)
+
+    bin_edges = np.linspace(0.0, 1.0, n_bins + 1)
+    ece = 0.0
+    total = float(len(confidences))
+
+    for idx in range(n_bins):
+        left = bin_edges[idx]
+        right = bin_edges[idx + 1]
+        if idx == n_bins - 1:
+            mask = (confidences >= left) & (confidences <= right)
+        else:
+            mask = (confidences >= left) & (confidences < right)
+
+        if not np.any(mask):
+            continue
+
+        bin_weight = float(mask.sum()) / total
+        bin_accuracy = float(correct[mask].mean())
+        bin_confidence = float(confidences[mask].mean())
+        ece += bin_weight * abs(bin_accuracy - bin_confidence)
+
+    return float(np.clip(ece, 0.0, 1.0))
+
+
+def get_generalized_entropy_index(model, test_dataloader, alpha=2):
+    """
+    Calculates generalized entropy index from model predictions.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate.
+        test_dataloader (DataLoader): Test dataloader.
+        alpha (float): GEI alpha parameter.
+
+    Returns:
+        float: Generalized entropy index value.
+    """
+    try:
+        _, _, _, _, true_class_probs = _collect_classification_statistics(model, test_dataloader)
+        if len(true_class_probs) == 0:
+            return 0.0
+
+        # Use the probability assigned to the true class as a continuous, positive
+        # benefit. This works consistently for multiclass neural models on both
+        # images and tabular data, and avoids collapsing the metric to a coarse
+        # correct/incorrect indicator.
+        eps = 1e-12
+        b = np.clip(np.asarray(true_class_probs, dtype=float), eps, 1.0)
+        mu = float(np.mean(b))
+        if mu <= 0:
+            return 0.0
+
+        ratio = np.clip(b / mu, eps, None)
+
+        if alpha == 0:
+            val = float(np.mean(-np.log(ratio)))
+        elif alpha == 1:
+            val = float(np.mean(ratio * np.log(ratio)))
+        elif alpha == 2:
+            val = float(np.mean((ratio - 1.0) ** 2) / 2.0)
+        else:
+            val = float(np.mean(ratio**alpha - 1.0) / (alpha * (alpha - 1.0)))
+
+        if math.isnan(val) or math.isinf(val):
+            return 0.0
+        return max(0.0, val)
+    except Exception as exc:
+        logger.warning("Could not compute generalized entropy index")
+        logger.warning(exc)
+        return 0.0
+
+
+def get_theil_index(model, test_dataloader):
+    """
+    Convenience wrapper for generalized entropy index with alpha=1.
+    """
+    return get_generalized_entropy_index(model, test_dataloader, alpha=1)
+
+
+def get_coefficient_of_variation(model, test_dataloader):
+    """
+    Calculates coefficient of variation from GEI(alpha=2).
+    """
+    try:
+        gei = get_generalized_entropy_index(model, test_dataloader, alpha=2)
+        return float(np.sqrt(2 * gei))
+    except Exception as exc:
+        logger.warning("Could not compute coefficient of variation")
+        logger.warning(exc)
+        return 0.0
+
+
 def get_avg_class_imbalance_model_size(scenario_name):
     """
     Calculates the mean class imbalance and model size of the nodes.
@@ -457,31 +820,248 @@ def get_feature_importance_cv(model, test_sample):
     """
 
     try:
-        cv = 0
-        batch_size = 10
-        device = "cpu"
+        vals = np.asarray(_get_feature_importances(model, test_sample), dtype=float).reshape(-1)
+        vals = np.nan_to_num(vals, nan=0.0, posinf=0.0, neginf=0.0)
+        vals = vals[vals > 0]
 
-        if isinstance(model, torch.nn.Module):
-            batched_data, _ = test_sample
+        if len(vals) <= 1:
+            return 0.0
+
+        cv = float(variation(vals))
+        if math.isnan(cv) or math.isinf(cv):
+            return 1.0
+        return max(0.0, cv)
+    except Exception as exc:
+        logger.warning("Could not compute feature importance CV with shap")
+        logger.warning(exc)
+        return 1.0
 
-            n = batch_size
-            m = math.floor(0.8 * n)
 
-            background = batched_data[:m].to(device)
-            test_data = batched_data[m:n].to(device)
+def _get_feature_importances(model, test_sample):
+    """
+    Computes global feature importances from SHAP values.
 
-            e = shap.DeepExplainer(model, background)
-            shap_values = e.shap_values(test_data)
-            if shap_values is not None and len(shap_values) > 0:
-                sums = np.array([shap_values[i].sum() for i in range(len(shap_values))])
-                abs_sums = np.absolute(sums)
-                cv = variation(abs_sums)
-    except Exception as e:
-        logger.warning("Could not compute feature importance CV with shap")
-        cv = 1
-    if math.isnan(cv):
-        cv = 1
-    return cv
+    Args:
+        model (object): The model.
+        test_sample (object): One test sample batch.
+
+    Returns:
+        np.ndarray: Global importances per feature.
+    """
+    if not isinstance(model, torch.nn.Module):
+        logger.warning("Model is not a torch.nn.Module")
+        return np.array([])
+
+    def _prepare_shap_inputs(sample):
+        if not (isinstance(sample, (tuple, list)) and len(sample) >= 1):
+            return None, None, None
+
+        batched_data = sample[0]
+        if not torch.is_tensor(batched_data) or batched_data.ndim == 0 or batched_data.size(0) == 0:
+            return None, None, None
+
+        if not torch.is_floating_point(batched_data):
+            batched_data = batched_data.float()
+
+        batch_size = int(batched_data.size(0))
+        input_shape = tuple(int(dim) for dim in batched_data.shape[1:])
+
+        if batch_size == 1:
+            return batched_data[:1], batched_data[:1], input_shape
+
+        background_size = min(max(8, batch_size // 4), 32, batch_size - 1)
+        explainable = batch_size - background_size
+        explain_size = min(max(4, explainable), 32, explainable)
+
+        background = batched_data[:background_size]
+        test_data = batched_data[background_size:background_size + explain_size]
+
+        if test_data.size(0) == 0:
+            test_data = batched_data[: min(batch_size, 32)]
+
+        return background, test_data, input_shape
+
+    def _compute_shap_values(model_ref, background, test_data):
+        explainer_errors = []
+
+        for explainer_name in ("DeepExplainer", "GradientExplainer"):
+            try:
+                if explainer_name == "DeepExplainer":
+                    explainer = shap.DeepExplainer(model_ref, background)
+                    return explainer.shap_values(test_data, check_additivity=False)
+
+                explainer = shap.GradientExplainer(model_ref, background)
+                return explainer.shap_values(test_data)
+            except Exception as exc:
+                explainer_errors.append(f"{explainer_name}: {exc}")
+
+        raise RuntimeError("; ".join(explainer_errors))
+
+    def _feature_axes_from_shape(arr_shape, input_shape, n_samples):
+        input_shape = tuple(input_shape)
+        input_rank = len(input_shape)
+
+        if input_rank == 0 or len(arr_shape) < input_rank:
+            return None
+
+        if len(arr_shape) >= input_rank + 1 and tuple(arr_shape[1:1 + input_rank]) == input_shape:
+            return tuple(range(1, 1 + input_rank))
+
+        if len(arr_shape) >= input_rank + 2 and arr_shape[1] == n_samples and tuple(arr_shape[2:2 + input_rank]) == input_shape:
+            return tuple(range(2, 2 + input_rank))
+
+        candidates = []
+        for start in range(len(arr_shape) - input_rank + 1):
+            if tuple(arr_shape[start:start + input_rank]) == input_shape:
+                candidates.append(start)
+
+        if not candidates:
+            return None
+
+        # Prefer matches that do not consume the leading sample/output axes.
+        non_leading = [start for start in candidates if start > 0]
+        if non_leading:
+            candidates = non_leading
+
+        if len(arr_shape) > 1 and arr_shape[1] == n_samples:
+            non_output_sample = [start for start in candidates if start > 1]
+            if non_output_sample:
+                candidates = non_output_sample
+
+        start = candidates[0]
+        return tuple(range(start, start + input_rank))
+
+    try:
+        try:
+            device = next(model.parameters()).device
+        except Exception:
+            device = torch.device("cpu")
+
+        background, test_data, input_shape = _prepare_shap_inputs(test_sample)
+        if background is None or test_data is None or input_shape is None:
+            return np.array([])
+
+        background = background.to(device)
+        test_data = test_data.to(device)
+
+        model.eval()
+        shap_values = _compute_shap_values(model, background, test_data)
+
+        if shap_values is None:
+            return np.array([])
+
+        if isinstance(shap_values, (list, tuple)):
+            arrays = [np.asarray(val, dtype=float) for val in shap_values if val is not None]
+            if not arrays:
+                return np.array([])
+            shap_arr = np.stack(arrays, axis=0)
+        else:
+            shap_arr = np.asarray(shap_values, dtype=float)
+
+        if shap_arr.size == 0:
+            return np.array([])
+
+        shap_arr = np.nan_to_num(shap_arr, nan=0.0, posinf=0.0, neginf=0.0)
+        feature_axes = _feature_axes_from_shape(tuple(shap_arr.shape), input_shape, int(test_data.size(0)))
+
+        if feature_axes is None:
+            # Conservative fallback: treat the first axis as samples when possible and
+            # flatten the remaining dimensions into features.
+            if shap_arr.ndim == 1:
+                importances = np.abs(shap_arr)
+            else:
+                aggregate_axes = (0,)
+                importances = np.mean(np.abs(shap_arr), axis=aggregate_axes)
+        else:
+            aggregate_axes = tuple(idx for idx in range(shap_arr.ndim) if idx not in feature_axes)
+            if aggregate_axes:
+                importances = np.mean(np.abs(shap_arr), axis=aggregate_axes)
+            else:
+                importances = np.abs(shap_arr)
+
+        importances = np.asarray(importances, dtype=float).reshape(-1)
+        importances = np.nan_to_num(importances, nan=0.0, posinf=0.0, neginf=0.0)
+        return np.maximum(importances, 0.0)
+    except Exception as exc:
+        logger.warning("Could not compute feature importances with shap")
+        logger.warning(exc)
+        return np.array([])
+
+
+def get_alpha_score(model, test_sample, alpha=0.8):
+    """
+    Computes alpha score from global feature importances.
+    """
+    try:
+        vals = np.asarray(_get_feature_importances(model, test_sample), dtype=float).reshape(-1)
+        vals = np.nan_to_num(vals, nan=0.0, posinf=0.0, neginf=0.0)
+        vals = np.maximum(vals, 0.0)
+        total_features = len(vals)
+        if total_features == 0 or np.sum(vals) <= 1e-12:
+            return 1.0
+
+        try:
+            alpha = float(alpha)
+        except Exception:
+            alpha = 0.8
+        alpha = min(max(alpha, 0.0), 1.0)
+
+        vals_sorted = np.sort(vals)[::-1]
+        cum_sum = np.cumsum(vals_sorted)
+        threshold = float(alpha) * np.sum(vals_sorted)
+        idx = np.searchsorted(cum_sum, threshold)
+        return float(min(total_features, idx + 1) / total_features)
+    except Exception as exc:
+        logger.warning("Could not compute alpha score")
+        logger.warning(exc)
+        return 1.0
+
+
+def _get_spread_base(model, test_sample, divergence=True):
+    vals = _get_feature_importances(model, test_sample)
+    tol = 1e-8
+
+    if len(vals) == 0 or np.sum(vals) < tol:
+        return 0.0 if divergence else 1.0
+    if len(vals) == 1:
+        return 0.0 if divergence else 1.0
+
+    weights = vals / np.sum(vals)
+    equal_weights = np.ones(len(vals)) / len(vals)
+
+    if divergence:
+        metric = jensenshannon(weights, equal_weights, base=2)
+    else:
+        denom = entropy(equal_weights)
+        metric = 0.0 if denom <= tol else entropy(weights) / denom
+
+    if math.isnan(metric) or math.isinf(metric):
+        return 0.0 if divergence else 1.0
+    return float(np.clip(metric, 0.0, 1.0))
+
+
+def get_spread_ratio(model, test_sample):
+    """
+    Computes spread ratio from global feature importances.
+    """
+    try:
+        return _get_spread_base(model, test_sample, divergence=False)
+    except Exception as exc:
+        logger.warning("Could not compute spread ratio")
+        logger.warning(exc)
+        return 1.0
+
+
+def get_spread_divergence(model, test_sample):
+    """
+    Computes spread divergence from global feature importances.
+    """
+    try:
+        return _get_spread_base(model, test_sample, divergence=True)
+    except Exception as exc:
+        logger.warning("Could not compute spread divergence")
+        logger.warning(exc)
+        return 0.0
 
 
 def get_clever_score(model, test_sample, nb_classes, learning_rate):
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template.json b/nebula/addons/trustworthiness/configs/factsheet_template.json
index b2369d7ea..63dbf86a7 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template.json
@@ -27,6 +27,14 @@
 		"learning_rate": "",
 		"local_update_steps": ""
 	},
+	"privacy": {
+		"privacy_risk": ""
+	},
+	"explainability": {
+		"alpha_score": "",
+		"spread_ratio": "",
+		"spread_divergence": ""
+	},
 	"performance": {
 		"test_loss_avg": "",
 		"test_acc_avg": "",
@@ -41,7 +49,13 @@
 	"fairness": {
 		"test_acc_cv": "",
 		"selection_cv": "",
-		"class_imbalance": ""
+		"class_imbalance": "",
+		"underfitting": "",
+		"overfitting": "",
+		"well_calibration_error": "",
+		"generalized_entropy_index": "",
+		"theil_index": "",
+		"coefficient_of_variation": ""
 	},
 	"system": {
 		"avg_time_minutes": "",
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
index e2efbce7d..91d49c194 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
@@ -27,6 +27,14 @@
 		"learning_rate": "",
 		"local_update_steps": ""
 	},
+	"privacy": {
+		"privacy_risk": ""
+	},
+	"explainability": {
+		"alpha_score": "",
+		"spread_ratio": "",
+		"spread_divergence": ""
+	},
 	"performance": {
 		"test_loss": "",
 		"test_acc": "",
@@ -39,7 +47,13 @@
 		"test_attack_success_rate": ""
 	},
 	"fairness": {
-		"class_imbalance": ""
+		"class_imbalance": "",
+		"underfitting": "",
+		"overfitting": "",
+		"well_calibration_error": "",
+		"generalized_entropy_index": "",
+		"theil_index": "",
+		"coefficient_of_variation": ""
 	},
 	"system": {
 		"time_minutes": "",
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 0ffca4c10..2605a3c84 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -18,7 +18,7 @@
 from nebula.core.models.kddcup99.mlp import KDDCUP99ModelMLP
 from nebula.core.models.adultcensus.mlp import AdultCensusModelMLP
 from nebula.core.models.breast_cancer.mlp import BreastCancerModelMLP
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model, get_underfitting_score, get_overfitting_score, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 
 dirname = os.path.dirname(__file__)
@@ -91,12 +91,17 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         factsheet["configuration"]["visualization"] = True
         factsheet["configuration"]["total_round_num"] = n_rounds
 
+        """
         if poisoned_noise_percent != 0:
             factsheet["configuration"]["differential_privacy"] = True
             factsheet["configuration"]["dp_epsilon"] = poisoned_noise_percent
         else:
             factsheet["configuration"]["differential_privacy"] = False
             factsheet["configuration"]["dp_epsilon"] = ""
+        """
+
+        factsheet["configuration"]["differential_privacy"] = False
+        factsheet["configuration"]["dp_epsilon"] = ""
 
         if dataset == "MNIST" and algorithm == "MLP":
             model = MNISTModelMLP()
@@ -124,6 +129,7 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         files_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
 
         train_model_file = os.path.join(files_dir, f"participant_{participant_idx}_final_model.pk")
+        train_dataloader_file = os.path.join(files_dir, f"participant_{participant_idx}_train_loader.pk")
         test_dataloader_file = os.path.join(files_dir, f"participant_{participant_idx}_test_loader.pk")
         emissions_file = os.path.join(files_dir, f"emissions_{participant_idx}.csv")
 
@@ -179,10 +185,57 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
 
         model.load_state_dict(lightning_model.state_dict())
 
+        with open(train_dataloader_file, "rb") as d_file:
+            train_dataloader = pickle.load(d_file)
+
         with open(test_dataloader_file, "rb") as d_file:
             test_dataloader = pickle.load(d_file)
 
         test_sample = next(iter(test_dataloader))
+        factsheet["fairness"]["underfitting"] = get_underfitting_score(
+            factsheet["performance"]["test_acc"]
+        )
+        overfitting_value = get_overfitting_score(
+            model,
+            train_dataloader,
+            factsheet["performance"]["test_acc"],
+        )
+
+        factsheet["fairness"]["overfitting"] = 1/(1 + overfitting_value)
+
+        well_calibration_error_value = get_well_calibration_error(
+            model,
+            test_dataloader,
+        )
+
+        factsheet["fairness"]["well_calibration_error"] = 1/(1 + well_calibration_error_value)
+        generalized_entropy_index_value = get_generalized_entropy_index(
+            model,
+            test_dataloader,
+        )
+        factsheet["fairness"]["generalized_entropy_index"] = 1/(1 + generalized_entropy_index_value)
+        theil_index_value = get_theil_index(
+            model,
+            test_dataloader,
+        )
+        factsheet["fairness"]["theil_index"] = 1/(1 + theil_index_value)
+        coefficient_of_variation_value = get_coefficient_of_variation(
+            model,
+            test_dataloader,
+        )
+        factsheet["fairness"]["coefficient_of_variation"] = 1/(1 + coefficient_of_variation_value)
+        factsheet["explainability"]["alpha_score"] = get_alpha_score(
+            model,
+            test_sample,
+        )
+        factsheet["explainability"]["spread_ratio"] = get_spread_ratio(
+            model,
+            test_sample,
+        )
+        factsheet["explainability"]["spread_divergence"] = get_spread_divergence(
+            model,
+            test_sample,
+        )
 
         lr = factsheet["configuration"]["learning_rate"]
 
@@ -202,7 +255,7 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         factsheet["performance"]["test_confidence_score"] = 1 if value_confidence_score > 1 else value_confidence_score
 
         value_attack_success_rate = attack_success_rate(model, test_sample)
-        factsheet["performance"]["test_attack_success_rate"] = 1 if value_attack_success_rate > 1 else value_attack_success_rate
+        factsheet["performance"]["test_attack_success_rate"] = 1 - value_attack_success_rate
 
         feature_importance = get_feature_importance_cv(model, test_sample)
         factsheet["performance"]["test_feature_importance_cv"] = 1 if feature_importance > 1 else feature_importance
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index fc4b4808e..cb377295d 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -16,7 +16,7 @@
 from nebula.core.models.kddcup99.mlp import KDDCUP99ModelMLP
 from nebula.core.models.adultcensus.mlp import AdultCensusModelMLP
 from nebula.core.models.breast_cancer.mlp import BreastCancerModelMLP
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size, get_underfitting_score, get_overfitting_score, get_participant_loss_accuracy, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
 
@@ -106,12 +106,17 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                     factsheet["configuration"]["visualization"] = True
                     factsheet["configuration"]["total_round_num"] = n_rounds
 
+                    """
                     if poisoned_noise_percent != 0:
                         factsheet["configuration"]["differential_privacy"] = True
                         factsheet["configuration"]["dp_epsilon"] = poisoned_noise_percent
                     else:
                         factsheet["configuration"]["differential_privacy"] = False
                         factsheet["configuration"]["dp_epsilon"] = ""
+                    """
+
+                    factsheet["configuration"]["differential_privacy"] = False
+                    factsheet["configuration"]["dp_epsilon"] = ""
 
                     if dataset == "MNIST" and algorithm == "MLP":
                         model = MNISTModelMLP()
@@ -165,6 +170,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
 
                 files_dir = f"{os.environ.get('NEBULA_LOGS_DIR')}/{scenario_name}/trustworthiness"
 
+                train_dataloader_file = f"{files_dir}/participant_{participant_idx}_train_loader.pk"
                 test_dataloader_file = f"{files_dir}/participant_{participant_idx}_test_loader.pk"
                 final_model_file = f"{files_dir}/participant_{participant_idx}_final_model.pk"
                 emissions_file = os.path.join(files_dir, "emissions.csv")
@@ -186,6 +192,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 factsheet["performance"]["test_acc_avg"] = result_avg_loss_accuracy[1]
                 test_acc_cv = get_cv(std=result_avg_loss_accuracy[2], mean=result_avg_loss_accuracy[1])
                 factsheet["fairness"]["test_acc_cv"] = 1 if test_acc_cv > 1 else test_acc_cv
+                _, participant_test_acc = get_participant_loss_accuracy(scenario_name, participant_idx)
 
                 factsheet["system"]["avg_time_minutes"] = get_elapsed_time(start_time, end_time)
                 factsheet["system"]["avg_model_size"] = avg_model_size
@@ -225,10 +232,55 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
 
                 model.load_state_dict(lightning_model.state_dict())
 
+                with open(train_dataloader_file, "rb") as file:
+                    train_dataloader = pickle.load(file)
+
                 with open(test_dataloader_file, "rb") as file:
                     test_dataloader = pickle.load(file)
 
                 test_sample = next(iter(test_dataloader))
+                factsheet["fairness"]["underfitting"] = get_underfitting_score(
+                    participant_test_acc
+                )
+                overfitting_value = get_overfitting_score(
+                    model,
+                    train_dataloader,
+                    participant_test_acc,
+                )
+                factsheet["fairness"]["overfitting"] = 1/(1 + overfitting_value)
+                well_calibration_error_value = get_well_calibration_error(
+                    model,
+                    test_dataloader,
+                )
+
+                factsheet["fairness"]["well_calibration_error"] = 1/(1 + well_calibration_error_value)
+                generalized_entropy_index_value = get_generalized_entropy_index(
+                    model,
+                    test_dataloader,
+                )
+                factsheet["fairness"]["generalized_entropy_index"] = 1/(1 + generalized_entropy_index_value)
+                theil_index_value = get_theil_index(
+                    model,
+                    test_dataloader,
+                )
+                factsheet["fairness"]["theil_index"] = 1/(1 + theil_index_value)
+                coefficient_of_variation_value = get_coefficient_of_variation(
+                    model,
+                    test_dataloader,
+                )
+                factsheet["fairness"]["coefficient_of_variation"] = 1/(1 + coefficient_of_variation_value)
+                factsheet["explainability"]["alpha_score"] = get_alpha_score(
+                    model,
+                    test_sample,
+                )
+                factsheet["explainability"]["spread_ratio"] = get_spread_ratio(
+                    model,
+                    test_sample,
+                )
+                factsheet["explainability"]["spread_divergence"] = get_spread_divergence(
+                    model,
+                    test_sample,
+                )
 
                 lr = factsheet["configuration"]["learning_rate"]
 
@@ -248,7 +300,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 factsheet["performance"]["test_confidence_score"] = 1 if value_confidence_score > 1 else value_confidence_score
 
                 value_attack_success_rate = attack_success_rate(model, test_sample)
-                factsheet["performance"]["test_attack_success_rate"] = 1 if value_attack_success_rate > 1 else value_attack_success_rate
+                factsheet["performance"]["test_attack_success_rate"] = 1 - value_attack_success_rate
 
                 feature_importance = get_feature_importance_cv(model, test_sample)
                 factsheet["performance"]["test_feature_importance_cv"] = 1 if feature_importance > 1 else feature_importance
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index c7f6f32e4..a85820946 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -836,10 +836,16 @@ async def init(self, experiment_name):
 
     async def _create_pk_files(self, experiment_name):
         # Save data to local files to compute trustworthiness
+        train_loader_filename = f"/nebula/app/logs/{experiment_name}/trustworthiness/participant_{self._idx}_train_loader.pk"
         test_loader_filename = f"/nebula/app/logs/{experiment_name}/trustworthiness/participant_{self._idx}_test_loader.pk"
+        self._engine.trainer.datamodule.setup(stage="fit")
+        train_loader = self._engine.trainer.datamodule.train_dataloader()
         self._engine.trainer.datamodule.setup(stage="test")
         test_loader = self._engine.trainer.datamodule.test_dataloader()[0]
 
+        with open(train_loader_filename, 'wb') as f:
+            pickle.dump(train_loader, f)
+            f.close()
         with open(test_loader_filename, 'wb') as f:
             pickle.dump(test_loader, f)
             f.close()

From 66d39e9f24c12ba10de575523fc53d778488f4b1 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 8 Apr 2026 17:03:32 +0200
Subject: [PATCH 18/66] All models updated, trustworthiness now accepts all
 models and datasets, minor erros fixed, SHAP being fixed

---
 nebula/addons/trustworthiness/calculation.py  | 69 ++++++++++++++-
 .../addons/trustworthiness/dfl_factsheet.py   | 44 ++++++++++
 nebula/addons/trustworthiness/factsheet.py    | 83 ++++++++++++++++++-
 nebula/core/models/cifar10/cnn.py             |  6 ++
 nebula/core/models/cifar10/cnnV2.py           |  6 ++
 nebula/core/models/cifar10/cnnV3.py           |  6 ++
 nebula/core/models/cifar10/fastermobilenet.py |  6 ++
 nebula/core/models/cifar10/resnet.py          |  6 ++
 nebula/core/models/cifar10/simplemobilenet.py |  6 ++
 nebula/core/models/cifar100/cnn.py            |  6 ++
 nebula/core/models/emnist/cnn.py              |  6 ++
 nebula/core/models/emnist/mlp.py              |  6 ++
 nebula/core/models/fashionmnist/cnn.py        |  6 ++
 nebula/core/models/fashionmnist/mlp.py        |  6 ++
 nebula/core/models/mnist/cnn.py               |  6 ++
 15 files changed, 261 insertions(+), 7 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index 92b36d31d..2d978b173 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -3,6 +3,8 @@
 import numbers
 import os.path
 import statistics
+import copy
+import gc
 from datetime import datetime
 from math import e
 from os.path import exists
@@ -852,6 +854,18 @@ def _get_feature_importances(model, test_sample):
         logger.warning("Model is not a torch.nn.Module")
         return np.array([])
 
+    def _clone_model(model_ref, device):
+        try:
+            model_clone = copy.deepcopy(model_ref)
+            model_clone.to(device)
+            model_clone.eval()
+            return model_clone
+        except Exception as exc:
+            logger.warning("Could not clone model for SHAP, using original model")
+            logger.warning(exc)
+            model_ref.eval()
+            return model_ref
+
     def _prepare_shap_inputs(sample):
         if not (isinstance(sample, (tuple, list)) and len(sample) >= 1):
             return None, None, None
@@ -864,7 +878,15 @@ def _prepare_shap_inputs(sample):
             batched_data = batched_data.float()
 
         batch_size = int(batched_data.size(0))
-        input_shape = tuple(int(dim) for dim in batched_data.shape[1:])
+        if batched_data.ndim == 4:
+            # SHAP image explainers operate more naturally on channel-last images.
+            input_shape = (
+                int(batched_data.shape[2]),
+                int(batched_data.shape[3]),
+                int(batched_data.shape[1]),
+            )
+        else:
+            input_shape = tuple(int(dim) for dim in batched_data.shape[1:])
 
         if batch_size == 1:
             return batched_data[:1], batched_data[:1], input_shape
@@ -884,7 +906,41 @@ def _prepare_shap_inputs(sample):
     def _compute_shap_values(model_ref, background, test_data):
         explainer_errors = []
 
+        if test_data.ndim == 4:
+            def predict_fn(images):
+                if isinstance(images, list):
+                    images = np.asarray(images)
+
+                image_tensor = torch.as_tensor(images, dtype=test_data.dtype, device=background.device)
+                if image_tensor.ndim == 3:
+                    image_tensor = image_tensor.unsqueeze(0)
+
+                if image_tensor.ndim != 4:
+                    raise ValueError(f"Expected 4D image batch for SHAP, got shape {tuple(image_tensor.shape)}")
+
+                # SHAP image maskers provide NHWC arrays; convert back to NCHW for the model.
+                image_tensor = image_tensor.permute(0, 3, 1, 2).contiguous()
+
+                with torch.no_grad():
+                    logits = _extract_model_logits(model_ref(image_tensor))
+                    probs = _logits_to_probabilities(logits)
+                return probs.detach().cpu().numpy()
+
+            try:
+                test_images = test_data.detach().cpu().numpy().transpose(0, 2, 3, 1)
+                masker = shap.maskers.Image("blur(8,8)", test_images[0].shape)
+                explainer = shap.Explainer(predict_fn, masker)
+                explanation = explainer(
+                    test_images[: min(int(test_images.shape[0]), 4)],
+                    max_evals=128,
+                    batch_size=8,
+                )
+                return explanation.values
+            except Exception as exc:
+                explainer_errors.append(f"ImageExplainer: {exc}")
+
         for explainer_name in ("DeepExplainer", "GradientExplainer"):
+            explainer = None
             try:
                 if explainer_name == "DeepExplainer":
                     explainer = shap.DeepExplainer(model_ref, background)
@@ -894,6 +950,11 @@ def _compute_shap_values(model_ref, background, test_data):
                 return explainer.shap_values(test_data)
             except Exception as exc:
                 explainer_errors.append(f"{explainer_name}: {exc}")
+            finally:
+                # SHAP explainers may register autograd hooks. If we explain on the
+                # original model, those hooks can leak into later ART metrics.
+                del explainer
+                gc.collect()
 
         raise RuntimeError("; ".join(explainer_errors))
 
@@ -944,8 +1005,10 @@ def _feature_axes_from_shape(arr_shape, input_shape, n_samples):
         background = background.to(device)
         test_data = test_data.to(device)
 
-        model.eval()
-        shap_values = _compute_shap_values(model, background, test_data)
+        shap_model = _clone_model(model, device)
+        shap_values = _compute_shap_values(shap_model, background, test_data)
+        del shap_model
+        gc.collect()
 
         if shap_values is None:
             return np.array([])
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 2605a3c84..4b742a662 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -18,6 +18,17 @@
 from nebula.core.models.kddcup99.mlp import KDDCUP99ModelMLP
 from nebula.core.models.adultcensus.mlp import AdultCensusModelMLP
 from nebula.core.models.breast_cancer.mlp import BreastCancerModelMLP
+from nebula.core.models.fashionmnist.mlp import FashionMNISTModelMLP
+from nebula.core.models.fashionmnist.cnn import FashionMNISTModelCNN
+from nebula.core.models.emnist.mlp import EMNISTModelMLP
+from nebula.core.models.emnist.cnn import EMNISTModelCNN
+from nebula.core.models.cifar10.cnn import CIFAR10ModelCNN
+from nebula.core.models.cifar10.cnnV2 import CIFAR10ModelCNN_V2
+from nebula.core.models.cifar10.cnnV3 import CIFAR10ModelCNN_V3
+from nebula.core.models.cifar10.fastermobilenet import FasterMobileNet
+from nebula.core.models.cifar10.resnet import CIFAR10ModelResNet
+from nebula.core.models.cifar10.simplemobilenet import SimpleMobileNetV1
+from nebula.core.models.cifar100.cnn import CIFAR100ModelCNN
 from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model, get_underfitting_score, get_overfitting_score, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 
@@ -109,6 +120,12 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         elif dataset == "MNIST" and algorithm == "CNN":
             model = MNISTModelCNN()
             num_classes_temp = 10
+        elif dataset == "FashionMNIST" and algorithm == "MLP":
+            model = FashionMNISTModelMLP()
+            num_classes_temp = 10
+        elif dataset == "FashionMNIST" and algorithm == "CNN":
+            model = FashionMNISTModelCNN()
+            num_classes_temp = 10
         elif dataset == "Covtype" and algorithm == "MLP":
             model = CovtypeModelMLP()
             num_classes_temp = 7
@@ -121,6 +138,33 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         elif dataset == "BreastCancer" and algorithm == "MLP":
             model = BreastCancerModelMLP()
             num_classes_temp = 2
+        elif dataset == "EMNIST" and algorithm == "MLP":
+            model = EMNISTModelMLP()
+            num_classes_temp = 47
+        elif dataset == "EMNIST" and algorithm == "CNN":
+            model = EMNISTModelCNN()
+            num_classes_temp = 47
+        elif dataset == "CIFAR10" and algorithm == "ResNet9":
+            model = CIFAR10ModelResNet(classifier="resnet9")
+            num_classes_temp = 10
+        elif dataset == "CIFAR10" and algorithm == "fastermobilenet":
+            model = FasterMobileNet()
+            num_classes_temp = 10
+        elif dataset == "CIFAR10" and algorithm == "simplemobilenet":
+            model = SimpleMobileNetV1()
+            num_classes_temp = 10
+        elif dataset == "CIFAR10" and algorithm == "CNN":
+            model = CIFAR10ModelCNN()
+            num_classes_temp = 10
+        elif dataset == "CIFAR10" and algorithm == "CNNv2":
+            model = CIFAR10ModelCNN_V2()
+            num_classes_temp = 10
+        elif dataset == "CIFAR10" and algorithm == "CNNv3":
+            model = CIFAR10ModelCNN_V3()
+            num_classes_temp = 10
+        elif dataset == "CIFAR100" and algorithm == "CNN":
+            model = CIFAR100ModelCNN()
+            num_classes_temp = 100
 
         factsheet["configuration"]["learning_rate"] = model.get_learning_rate()
         factsheet["configuration"]["trainable_param_num"] = model.count_parameters()
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index cb377295d..4577dc1c4 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -16,6 +16,17 @@
 from nebula.core.models.kddcup99.mlp import KDDCUP99ModelMLP
 from nebula.core.models.adultcensus.mlp import AdultCensusModelMLP
 from nebula.core.models.breast_cancer.mlp import BreastCancerModelMLP
+from nebula.core.models.fashionmnist.mlp import FashionMNISTModelMLP
+from nebula.core.models.fashionmnist.cnn import FashionMNISTModelCNN
+from nebula.core.models.emnist.mlp import EMNISTModelMLP
+from nebula.core.models.emnist.cnn import EMNISTModelCNN
+from nebula.core.models.cifar10.cnn import CIFAR10ModelCNN
+from nebula.core.models.cifar10.cnnV2 import CIFAR10ModelCNN_V2
+from nebula.core.models.cifar10.cnnV3 import CIFAR10ModelCNN_V3
+from nebula.core.models.cifar10.fastermobilenet import FasterMobileNet
+from nebula.core.models.cifar10.resnet import CIFAR10ModelResNet
+from nebula.core.models.cifar10.simplemobilenet import SimpleMobileNetV1
+from nebula.core.models.cifar100.cnn import CIFAR100ModelCNN
 from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size, get_underfitting_score, get_overfitting_score, get_participant_loss_accuracy, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
@@ -124,18 +135,51 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                     elif dataset == "MNIST" and algorithm == "CNN":
                         model = MNISTModelCNN()
                         num_classes_temp = 10
+                    elif dataset == "FashionMNIST" and algorithm == "MLP":
+                        model = FashionMNISTModelMLP()
+                        num_classes_temp = 10
+                    elif dataset == "FashionMNIST" and algorithm == "CNN":
+                        model = FashionMNISTModelCNN()
+                        num_classes_temp = 10
                     elif dataset == "Covtype" and algorithm == "MLP":
                         model = CovtypeModelMLP()
                         num_classes_temp = 7
                     elif dataset == "KDDCUP99" and algorithm == "MLP":
                         model = KDDCUP99ModelMLP()
-                        num_classes_temp = 2
+                        num_classes_temp = 23
                     elif dataset == "AdultCensus" and algorithm == "MLP":
                         model = AdultCensusModelMLP()
                         num_classes_temp = 2
                     elif dataset == "BreastCancer" and algorithm == "MLP":
                         model = BreastCancerModelMLP()
                         num_classes_temp = 2
+                    elif dataset == "EMNIST" and algorithm == "MLP":
+                        model = EMNISTModelMLP()
+                        num_classes_temp = 47
+                    elif dataset == "EMNIST" and algorithm == "CNN":
+                        model = EMNISTModelCNN()
+                        num_classes_temp = 47
+                    elif dataset == "CIFAR10" and algorithm == "ResNet9":
+                        model = CIFAR10ModelResNet(classifier="resnet9")
+                        num_classes_temp = 10
+                    elif dataset == "CIFAR10" and algorithm == "fastermobilenet":
+                        model = FasterMobileNet()
+                        num_classes_temp = 10
+                    elif dataset == "CIFAR10" and algorithm == "simplemobilenet":
+                        model = SimpleMobileNetV1()
+                        num_classes_temp = 10
+                    elif dataset == "CIFAR10" and algorithm == "CNN":
+                        model = CIFAR10ModelCNN()
+                        num_classes_temp = 10
+                    elif dataset == "CIFAR10" and algorithm == "CNNv2":
+                        model = CIFAR10ModelCNN_V2()
+                        num_classes_temp = 10
+                    elif dataset == "CIFAR10" and algorithm == "CNNv3":
+                        model = CIFAR10ModelCNN_V3()
+                        num_classes_temp = 10
+                    elif dataset == "CIFAR100" and algorithm == "CNN":
+                        model = CIFAR100ModelCNN()
+                        num_classes_temp = 100
 
                     factsheet["configuration"]["learning_rate"] = model.get_learning_rate()
                     factsheet["configuration"]["trainable_param_num"] = model.count_parameters()
@@ -217,6 +261,12 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 elif dataset == "MNIST" and model == "CNN":
                     model = MNISTModelCNN()
                     num_classes_temp = 10
+                elif dataset == "FashionMNIST" and model == "MLP":
+                    model = FashionMNISTModelMLP()
+                    num_classes_temp = 10
+                elif dataset == "FashionMNIST" and model == "CNN":
+                    model = FashionMNISTModelCNN()
+                    num_classes_temp = 10
                 elif dataset == "Covtype" and model == "MLP":
                     model = CovtypeModelMLP()
                     num_classes_temp = 7
@@ -229,6 +279,33 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 elif dataset == "BreastCancer" and model == "MLP":
                     model = BreastCancerModelMLP()
                     num_classes_temp = 2
+                elif dataset == "EMNIST" and model == "MLP":
+                    model = EMNISTModelMLP()
+                    num_classes_temp = 47
+                elif dataset == "EMNIST" and model == "CNN":
+                    model = EMNISTModelCNN()
+                    num_classes_temp = 47
+                elif dataset == "CIFAR10" and model == "ResNet9":
+                    model = CIFAR10ModelResNet(classifier="resnet9")
+                    num_classes_temp = 10
+                elif dataset == "CIFAR10" and model == "fastermobilenet":
+                    model = FasterMobileNet()
+                    num_classes_temp = 10
+                elif dataset == "CIFAR10" and model == "simplemobilenet":
+                    model = SimpleMobileNetV1()
+                    num_classes_temp = 10
+                elif dataset == "CIFAR10" and model == "CNN":
+                    model = CIFAR10ModelCNN()
+                    num_classes_temp = 10
+                elif dataset == "CIFAR10" and model == "CNNv2":
+                    model = CIFAR10ModelCNN_V2()
+                    num_classes_temp = 10
+                elif dataset == "CIFAR10" and model == "CNNv3":
+                    model = CIFAR10ModelCNN_V3()
+                    num_classes_temp = 10
+                elif dataset == "CIFAR100" and model == "CNN":
+                    model = CIFAR100ModelCNN()
+                    num_classes_temp = 100
 
                 model.load_state_dict(lightning_model.state_dict())
 
@@ -239,9 +316,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                     test_dataloader = pickle.load(file)
 
                 test_sample = next(iter(test_dataloader))
-                factsheet["fairness"]["underfitting"] = get_underfitting_score(
-                    participant_test_acc
-                )
+                factsheet["fairness"]["underfitting"] = factsheet["performance"]["test_acc_avg"]
                 overfitting_value = get_overfitting_score(
                     model,
                     train_dataloader,
diff --git a/nebula/core/models/cifar10/cnn.py b/nebula/core/models/cifar10/cnn.py
index 473ff3b93..16b9cc70a 100755
--- a/nebula/core/models/cifar10/cnn.py
+++ b/nebula/core/models/cifar10/cnn.py
@@ -45,3 +45,9 @@ def configure_optimizers(self):
         )
         self._optimizer = optimizer
         return optimizer
+
+    def get_learning_rate(self):
+        return self.learning_rate
+
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
diff --git a/nebula/core/models/cifar10/cnnV2.py b/nebula/core/models/cifar10/cnnV2.py
index d10a81996..c8f658c92 100755
--- a/nebula/core/models/cifar10/cnnV2.py
+++ b/nebula/core/models/cifar10/cnnV2.py
@@ -49,3 +49,9 @@ def configure_optimizers(self):
             amsgrad=self.config["amsgrad"],
         )
         return optimizer
+
+    def get_learning_rate(self):
+        return self.learning_rate
+
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
diff --git a/nebula/core/models/cifar10/cnnV3.py b/nebula/core/models/cifar10/cnnV3.py
index 94389385c..47d69a553 100755
--- a/nebula/core/models/cifar10/cnnV3.py
+++ b/nebula/core/models/cifar10/cnnV3.py
@@ -76,3 +76,9 @@ def configure_optimizers(self):
             amsgrad=self.config["amsgrad"],
         )
         return optimizer
+
+    def get_learning_rate(self):
+        return self.learning_rate
+
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
diff --git a/nebula/core/models/cifar10/fastermobilenet.py b/nebula/core/models/cifar10/fastermobilenet.py
index 185587a6c..91f9b89d3 100755
--- a/nebula/core/models/cifar10/fastermobilenet.py
+++ b/nebula/core/models/cifar10/fastermobilenet.py
@@ -65,3 +65,9 @@ def configure_optimizers(self):
             amsgrad=self.config["amsgrad"],
         )
         return optimizer
+
+    def get_learning_rate(self):
+        return self.learning_rate
+
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
diff --git a/nebula/core/models/cifar10/resnet.py b/nebula/core/models/cifar10/resnet.py
index 98ff9cf9f..09af6e467 100755
--- a/nebula/core/models/cifar10/resnet.py
+++ b/nebula/core/models/cifar10/resnet.py
@@ -149,3 +149,9 @@ def configure_optimizers(self):
         else:
             optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay=1e-4)
         return optimizer
+
+    def get_learning_rate(self):
+        return self.learning_rate
+
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
diff --git a/nebula/core/models/cifar10/simplemobilenet.py b/nebula/core/models/cifar10/simplemobilenet.py
index d4643a79e..7a40bc4d7 100755
--- a/nebula/core/models/cifar10/simplemobilenet.py
+++ b/nebula/core/models/cifar10/simplemobilenet.py
@@ -67,3 +67,9 @@ def forward(self, x):
     def configure_optimizers(self):
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         return optimizer
+
+    def get_learning_rate(self):
+        return self.learning_rate
+
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
diff --git a/nebula/core/models/cifar100/cnn.py b/nebula/core/models/cifar100/cnn.py
index fef6a4375..685fc0531 100755
--- a/nebula/core/models/cifar100/cnn.py
+++ b/nebula/core/models/cifar100/cnn.py
@@ -100,3 +100,9 @@ def configure_optimizers(self):
             betas=(self.config["beta1"], self.config["beta2"]),
             amsgrad=self.config["amsgrad"],
         )
+
+    def get_learning_rate(self):
+        return self.learning_rate
+
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
diff --git a/nebula/core/models/emnist/cnn.py b/nebula/core/models/emnist/cnn.py
index ea4277acb..f07e6b61f 100755
--- a/nebula/core/models/emnist/cnn.py
+++ b/nebula/core/models/emnist/cnn.py
@@ -56,3 +56,9 @@ def configure_optimizers(self):
             amsgrad=self.config["amsgrad"],
         )
         return optimizer
+
+    def get_learning_rate(self):
+        return self.learning_rate
+
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
diff --git a/nebula/core/models/emnist/mlp.py b/nebula/core/models/emnist/mlp.py
index b5f93f56a..20e8017d6 100755
--- a/nebula/core/models/emnist/mlp.py
+++ b/nebula/core/models/emnist/mlp.py
@@ -35,6 +35,12 @@ def forward(self, x):
         x = self.l3(x)
         return x
 
+    def get_learning_rate(self):
+        return self.learning_rate
+
     def configure_optimizers(self):
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         return optimizer
+
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
diff --git a/nebula/core/models/fashionmnist/cnn.py b/nebula/core/models/fashionmnist/cnn.py
index 5e1471f93..3c2427bc8 100755
--- a/nebula/core/models/fashionmnist/cnn.py
+++ b/nebula/core/models/fashionmnist/cnn.py
@@ -56,3 +56,9 @@ def configure_optimizers(self):
             amsgrad=self.config["amsgrad"],
         )
         return optimizer
+
+    def get_learning_rate(self):
+        return self.learning_rate
+
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
diff --git a/nebula/core/models/fashionmnist/mlp.py b/nebula/core/models/fashionmnist/mlp.py
index bd4159b03..3ebfa9cfa 100755
--- a/nebula/core/models/fashionmnist/mlp.py
+++ b/nebula/core/models/fashionmnist/mlp.py
@@ -35,6 +35,12 @@ def forward(self, x):
         x = self.l3(x)
         return x
 
+    def get_learning_rate(self):
+        return self.learning_rate
+
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
     def configure_optimizers(self):
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         return optimizer
diff --git a/nebula/core/models/mnist/cnn.py b/nebula/core/models/mnist/cnn.py
index 7cec6b6c3..dd9c4131a 100755
--- a/nebula/core/models/mnist/cnn.py
+++ b/nebula/core/models/mnist/cnn.py
@@ -54,3 +54,9 @@ def configure_optimizers(self):
         )
         self._optimizer = optimizer
         return optimizer
+
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
+    def get_learning_rate(self):
+        return self.learning_rate

From ea49fc7a3e167c83baca25395adef4f8952eff6b Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 10 Apr 2026 12:00:41 +0200
Subject: [PATCH 19/66] Privacy metrics implemented: MIA AUC and Epsilon Star,
 all new metrics added to eval_metrics and trustworthiness calculation, new
 notion added: outcome_fairness, other minor changes

---
 nebula/addons/trustworthiness/calculation.py  | 193 ++++++++++++++----
 .../trustworthiness/configs/eval_metrics.json | 153 +++++++++++++-
 .../configs/eval_metrics_dfl.json             | 149 +++++++++++++-
 .../configs/factsheet_template.json           |   6 +-
 .../configs/factsheet_template_dfl.json       |   6 +-
 .../addons/trustworthiness/dfl_factsheet.py   |  15 +-
 nebula/addons/trustworthiness/factsheet.py    |  14 +-
 .../addons/trustworthiness/trustworthiness.py |   2 +
 nebula/controller/scenarios.py                |   3 +
 .../frontend/static/js/deployment/scenario.js |   3 +
 .../static/js/deployment/trustworthiness.js   |  22 +-
 nebula/frontend/templates/deployment.html     |  22 +-
 12 files changed, 520 insertions(+), 68 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index 2d978b173..48e5f326e 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -20,6 +20,7 @@
 from codecarbon import EmissionsTracker
 from scipy.spatial.distance import jensenshannon
 from scipy.stats import entropy, variation
+from sklearn.metrics import roc_auc_score, roc_curve
 from torch import nn, optim
 import torch.nn.functional as F
 import time
@@ -263,6 +264,155 @@ def get_global_privacy_risk(dp, epsilon, n):
         return 1
 
 
+def _collect_per_sample_losses(model, dataloader, max_samples=5000):
+    """
+    Compute per-sample cross-entropy losses for a dataloader.
+
+    Args:
+        model (torch.nn.Module): The model to evaluate.
+        dataloader: DataLoader providing (samples, labels).
+        max_samples (int): Maximum number of samples to process.
+
+    Returns:
+        np.ndarray: Losses per sample.
+    """
+    if not isinstance(model, torch.nn.Module) or dataloader is None:
+        return np.array([])
+
+    try:
+        device = next(model.parameters()).device
+    except Exception:
+        device = torch.device("cpu")
+
+    criterion = nn.CrossEntropyLoss(reduction="none")
+    losses = []
+    collected = 0
+
+    model.eval()
+    with torch.no_grad():
+        for batch in dataloader:
+            if not isinstance(batch, (tuple, list)) or len(batch) < 2:
+                continue
+
+            samples, labels = batch[0], batch[1]
+            if not torch.is_tensor(samples) or not torch.is_tensor(labels):
+                continue
+
+            remaining = max_samples - collected
+            if remaining <= 0:
+                break
+
+            samples = samples[:remaining].to(device)
+            labels = labels[:remaining]
+
+            if labels.ndim > 1:
+                labels = torch.argmax(labels, dim=1)
+
+            labels = labels.long().to(device)
+
+            outputs = model(samples)
+            logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
+            batch_losses = criterion(logits, labels)
+
+            losses.append(batch_losses.detach().cpu().numpy())
+            collected += int(batch_losses.shape[0])
+
+    if not losses:
+        return np.array([])
+
+    return np.concatenate(losses, axis=0)
+
+
+def get_epsilon_star(model, train_dataloader, test_dataloader, max_samples=5000):
+    """
+    Compute empirical epsilon* from train/test loss distributions.
+
+    This follows the same core structure as privacy_metrics_core.epsilon_star,
+    adapted to PyTorch models and DataLoaders used in Nebula.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate.
+        train_dataloader: Training DataLoader.
+        test_dataloader: Test DataLoader.
+        max_samples (int): Maximum samples to evaluate per split.
+
+    Returns:
+        float: Empirical epsilon* value. Returns 0.0 on failure.
+    """
+    try:
+        loss_train = _collect_per_sample_losses(model, train_dataloader, max_samples=max_samples)
+        loss_test = _collect_per_sample_losses(model, test_dataloader, max_samples=max_samples)
+
+        if loss_train.size == 0 or loss_test.size == 0:
+            return 0.0
+
+        scores = np.concatenate([-loss_train, -loss_test])
+        y_true = np.concatenate([np.ones(len(loss_train)), np.zeros(len(loss_test))])
+
+        fpr, tpr, _ = roc_curve(y_true, scores)
+
+        fpr = np.clip(fpr, 1e-10, 1 - 1e-10)
+        tpr = np.clip(tpr, 1e-10, 1 - 1e-10)
+        fnr = 1 - tpr
+
+        delta = 1.0 / len(loss_train) if len(loss_train) > 0 else 1e-5
+
+        m1 = (1 - delta - fnr) / fpr
+        m2 = (1 - delta - fpr) / fnr
+        m3 = (fnr - delta) / (1 - fpr)
+        m4 = (fpr - delta) / (1 - fnr)
+
+        epsilon_star_val = np.log(
+            np.nanmax(np.maximum.reduce([m1, m2, m3, m4, np.ones_like(m1)]))
+        )
+
+        if np.isnan(epsilon_star_val) or np.isinf(epsilon_star_val):
+            return 0.0
+
+        return float(max(0.0, epsilon_star_val))
+    except Exception as exc:
+        logger.warning("Could not compute epsilon_star")
+        logger.warning(exc)
+        return 0.0
+
+
+def get_mia_auc(model, train_dataloader, test_dataloader, max_samples=5000):
+    """
+    Compute membership inference attack AUC using per-sample loss as the attack score.
+
+    Lower loss suggests a sample is more likely to be a training member, so the
+    attack score is defined as negative loss.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate.
+        train_dataloader: Training DataLoader.
+        test_dataloader: Test DataLoader.
+        max_samples (int): Maximum samples to evaluate per split.
+
+    Returns:
+        float: ROC-AUC of the loss-threshold membership attack. Returns 0.5 on failure.
+    """
+    try:
+        loss_train = _collect_per_sample_losses(model, train_dataloader, max_samples=max_samples)
+        loss_test = _collect_per_sample_losses(model, test_dataloader, max_samples=max_samples)
+
+        if loss_train.size == 0 or loss_test.size == 0:
+            return 0.5
+
+        scores = np.concatenate([-loss_train, -loss_test])
+        y_true = np.concatenate([np.ones(len(loss_train)), np.zeros(len(loss_test))])
+        mia_auc = roc_auc_score(y_true, scores)
+
+        if np.isnan(mia_auc) or np.isinf(mia_auc):
+            return 0.5
+
+        return float(np.clip(mia_auc, 0.0, 1.0))
+    except Exception as exc:
+        logger.warning("Could not compute mia_auc")
+        logger.warning(exc)
+        return 0.5
+
+
 def get_elapsed_time(start_time, end_time):
     """
     Calculates the elapsed time during the execution of the scenario.
@@ -878,15 +1028,7 @@ def _prepare_shap_inputs(sample):
             batched_data = batched_data.float()
 
         batch_size = int(batched_data.size(0))
-        if batched_data.ndim == 4:
-            # SHAP image explainers operate more naturally on channel-last images.
-            input_shape = (
-                int(batched_data.shape[2]),
-                int(batched_data.shape[3]),
-                int(batched_data.shape[1]),
-            )
-        else:
-            input_shape = tuple(int(dim) for dim in batched_data.shape[1:])
+        input_shape = tuple(int(dim) for dim in batched_data.shape[1:])
 
         if batch_size == 1:
             return batched_data[:1], batched_data[:1], input_shape
@@ -906,39 +1048,6 @@ def _prepare_shap_inputs(sample):
     def _compute_shap_values(model_ref, background, test_data):
         explainer_errors = []
 
-        if test_data.ndim == 4:
-            def predict_fn(images):
-                if isinstance(images, list):
-                    images = np.asarray(images)
-
-                image_tensor = torch.as_tensor(images, dtype=test_data.dtype, device=background.device)
-                if image_tensor.ndim == 3:
-                    image_tensor = image_tensor.unsqueeze(0)
-
-                if image_tensor.ndim != 4:
-                    raise ValueError(f"Expected 4D image batch for SHAP, got shape {tuple(image_tensor.shape)}")
-
-                # SHAP image maskers provide NHWC arrays; convert back to NCHW for the model.
-                image_tensor = image_tensor.permute(0, 3, 1, 2).contiguous()
-
-                with torch.no_grad():
-                    logits = _extract_model_logits(model_ref(image_tensor))
-                    probs = _logits_to_probabilities(logits)
-                return probs.detach().cpu().numpy()
-
-            try:
-                test_images = test_data.detach().cpu().numpy().transpose(0, 2, 3, 1)
-                masker = shap.maskers.Image("blur(8,8)", test_images[0].shape)
-                explainer = shap.Explainer(predict_fn, masker)
-                explanation = explainer(
-                    test_images[: min(int(test_images.shape[0]), 4)],
-                    max_evals=128,
-                    batch_size=8,
-                )
-                return explanation.values
-            except Exception as exc:
-                explainer_errors.append(f"ImageExplainer: {exc}")
-
         for explainer_name in ("DeepExplainer", "GradientExplainer"):
             explainer = None
             try:
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics.json b/nebula/addons/trustworthiness/configs/eval_metrics.json
index 642efb262..cbf05879f 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics.json
@@ -42,7 +42,7 @@
             "description": "",
             "weight": 0.1
           },
-          "emprical_robustness": {
+          "empirical_robustness": {
             "inputs": [
               {
                 "source": "factsheet",
@@ -190,14 +190,38 @@
             "type": "true_score",
             "direction": "desc",
             "description": "A worst-case approximation of the maximal risk for distinguishing two clients.",
-            "weight": 1
+            "weight": 0.2
+          },
+          "epsilon_star": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/epsilon_star_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Empirical privacy leakage estimated from the separability of train and test loss distributions.",
+            "weight": 0.4
+          },
+          "mia_auc_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Trust-oriented score derived from the ROC-AUC of a loss-based membership inference attack.",
+            "weight": 0.4
           }
         }
       }
     },
     "fairness": {
       "selection_fairness": {
-        "weight": 0.3333,
+        "weight": 0.25,
         "metrics": {
           "selection_variation": {
             "inputs": [
@@ -215,7 +239,7 @@
         }
       },
       "performance_fairness": {
-        "weight": 0.3333,
+        "weight": 0.25,
         "metrics": {
           "accuracy_variation": {
             "inputs": [
@@ -233,7 +257,7 @@
         }
       },
       "class_distribution": {
-        "weight": 0.3333,
+        "weight": 0.25,
         "metrics": {
           "class_imbalance": {
             "inputs": [
@@ -249,6 +273,83 @@
             "weight": 1
           }
         }
+      },
+      "outcome_fairness": {
+        "weight": 0.25,
+        "metrics": {
+          "underfitting": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/underfitting"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Held-out performance proxy used as an outcome-level fairness signal.",
+            "weight": 0.1667
+          },
+          "overfitting": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/overfitting"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Generalization quality proxy transformed so higher is better.",
+            "weight": 0.1667
+          },
+          "well_calibration_error": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/well_calibration_error"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
+            "weight": 0.1667
+          },
+          "generalized_entropy_index": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/generalized_entropy_index"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Outcome inequality score transformed so higher values indicate better fairness.",
+            "weight": 0.1667
+          },
+          "theil_index": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/theil_index"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
+            "weight": 0.1667
+          },
+          "coefficient_of_variation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/coefficient_of_variation"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Coefficient-of-variation-based outcome fairness score.",
+            "weight": 0.1665
+          }
+        }
       }
     },
     "explainability": {
@@ -311,7 +412,45 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Variation of feature importance scores of all the features.",
-            "weight": 0.5
+            "weight": 0.2
+          },
+          "alpha_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/alpha_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of features needed to explain most of the attribution mass; lower values indicate sparser and more focused explanations.",
+            "weight": 0.2
+          },
+          "spread_ratio": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_ratio"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Normalized entropy of the attribution distribution; lower values indicate explanations concentrated on fewer features.",
+            "weight": 0.2
+          },
+          "spread_divergence": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_divergence"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Jensen-Shannon divergence between the attribution distribution and a uniform distribution; higher values indicate more selective explanations.",
+            "weight": 0.2
           },
           "visualization": {
             "inputs": [
@@ -323,7 +462,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of graphical capabilities to show the explainability.",
-            "weight": 0.5
+            "weight": 0.2
           }
         }
       }
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
index fea2f70d3..6b8a9cf4f 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
@@ -42,7 +42,7 @@
             "description": "",
             "weight": 0.1
           },
-          "emprical_robustness": {
+          "empirical_robustness": {
             "inputs": [
               {
                 "source": "factsheet",
@@ -177,14 +177,38 @@
             "type": "true_score",
             "direction": "desc",
             "description": "A worst-case approximation of the maximal risk for distinguishing two clients.",
-            "weight": 1
+            "weight": 0.2
+          },
+          "epsilon_star": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/epsilon_star_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Empirical privacy leakage estimated from the separability of train and test loss distributions.",
+            "weight": 0.4
+          },
+          "mia_auc_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Trust-oriented score derived from the ROC-AUC of a loss-based membership inference attack.",
+            "weight": 0.4
           }
         }
       }
     },
     "fairness": {
       "class_distribution": {
-        "weight": 1,
+        "weight": 0.5,
         "metrics": {
           "class_imbalance": {
             "inputs": [
@@ -200,6 +224,83 @@
             "weight": 1
           }
         }
+      },
+      "outcome_fairness": {
+        "weight": 0.5,
+        "metrics": {
+          "underfitting": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/underfitting"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Held-out performance proxy used as an outcome-level fairness signal.",
+            "weight": 0.1667
+          },
+          "overfitting": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/overfitting"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Generalization quality proxy transformed so higher is better.",
+            "weight": 0.1667
+          },
+          "well_calibration_error": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/well_calibration_error"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
+            "weight": 0.1667
+          },
+          "generalized_entropy_index": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/generalized_entropy_index"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Outcome inequality score transformed so higher values indicate better fairness.",
+            "weight": 0.1667
+          },
+          "theil_index": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/theil_index"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
+            "weight": 0.1667
+          },
+          "coefficient_of_variation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/coefficient_of_variation"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Coefficient-of-variation-based outcome fairness score.",
+            "weight": 0.1665
+          }
+        }
       }
     },
     "explainability": {
@@ -262,7 +363,45 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Variation of feature importance scores of all the features.",
-            "weight": 0.5
+            "weight": 0.2
+          },
+          "alpha_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/alpha_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of features needed to explain most of the attribution mass; lower values indicate sparser and more focused explanations.",
+            "weight": 0.2
+          },
+          "spread_ratio": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_ratio"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Normalized entropy of the attribution distribution; lower values indicate explanations concentrated on fewer features.",
+            "weight": 0.2
+          },
+          "spread_divergence": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_divergence"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Jensen-Shannon divergence between the attribution distribution and a uniform distribution; higher values indicate more selective explanations.",
+            "weight": 0.2
           },
           "visualization": {
             "inputs": [
@@ -274,7 +413,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of graphical capabilities to show the explainability.",
-            "weight": 0.5
+            "weight": 0.2
           }
         }
       }
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template.json b/nebula/addons/trustworthiness/configs/factsheet_template.json
index 63dbf86a7..7e210646a 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template.json
@@ -28,7 +28,11 @@
 		"local_update_steps": ""
 	},
 	"privacy": {
-		"privacy_risk": ""
+		"privacy_risk": "",
+		"epsilon_star": "",
+		"epsilon_star_score": "",
+		"mia_auc": "",
+		"mia_auc_score": ""
 	},
 	"explainability": {
 		"alpha_score": "",
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
index 91d49c194..4724d14f7 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
@@ -28,7 +28,11 @@
 		"local_update_steps": ""
 	},
 	"privacy": {
-		"privacy_risk": ""
+		"privacy_risk": "",
+		"epsilon_star": "",
+		"epsilon_star_score": "",
+		"mia_auc": "",
+		"mia_auc_score": ""
 	},
 	"explainability": {
 		"alpha_score": "",
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 4b742a662..fbcf98c15 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -29,7 +29,7 @@
 from nebula.core.models.cifar10.resnet import CIFAR10ModelResNet
 from nebula.core.models.cifar10.simplemobilenet import SimpleMobileNetV1
 from nebula.core.models.cifar100.cnn import CIFAR100ModelCNN
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model, get_underfitting_score, get_overfitting_score, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model, get_underfitting_score, get_overfitting_score, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 
 dirname = os.path.dirname(__file__)
@@ -236,6 +236,19 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
             test_dataloader = pickle.load(d_file)
 
         test_sample = next(iter(test_dataloader))
+        factsheet["privacy"]["epsilon_star"] = get_epsilon_star(
+            model,
+            train_dataloader,
+            test_dataloader,
+        )
+        factsheet["privacy"]["epsilon_star_score"] = 1/(1 + factsheet["privacy"]["epsilon_star"])
+        factsheet["privacy"]["mia_auc"] = get_mia_auc(
+            model,
+            train_dataloader,
+            test_dataloader,
+        )
+
+        factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
         factsheet["fairness"]["underfitting"] = get_underfitting_score(
             factsheet["performance"]["test_acc"]
         )
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 4577dc1c4..0149893ac 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -27,7 +27,7 @@
 from nebula.core.models.cifar10.resnet import CIFAR10ModelResNet
 from nebula.core.models.cifar10.simplemobilenet import SimpleMobileNetV1
 from nebula.core.models.cifar100.cnn import CIFAR100ModelCNN
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size, get_underfitting_score, get_overfitting_score, get_participant_loss_accuracy, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size, get_underfitting_score, get_overfitting_score, get_participant_loss_accuracy, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
 
@@ -316,6 +316,18 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                     test_dataloader = pickle.load(file)
 
                 test_sample = next(iter(test_dataloader))
+                factsheet["privacy"]["epsilon_star"] = get_epsilon_star(
+                    model,
+                    train_dataloader,
+                    test_dataloader,
+                )
+                factsheet["privacy"]["epsilon_star_score"] = 1/(1 + factsheet["privacy"]["epsilon_star"])
+                factsheet["privacy"]["mia_auc"] = get_mia_auc(
+                    model,
+                    train_dataloader,
+                    test_dataloader,
+                )
+                factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
                 factsheet["fairness"]["underfitting"] = factsheet["performance"]["test_acc_avg"]
                 overfitting_value = get_overfitting_score(
                     model,
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index a85820946..48fb12bf1 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -286,6 +286,7 @@ def _load_local_trustscores_weights(self, experiment_name: str) -> dict:
                 "indistinguishability": float(data["indistinguishability"]),
                 "fairness": float(data["fairness_pillar"]),
                 "class_distribution": float(data["class_distribution"]),
+                "outcome_fairness": float(data["outcome_fairness"]),
                 "explainability": float(data["explainability_pillar"]),
                 "interpretability": float(data["interpretability"]),
                 "post_hoc_methods": float(data["post_hoc_methods"]),
@@ -985,6 +986,7 @@ async def _generate_factsheet(self, trust_config, experiment_name):
                 "selection_fairness": float(data["selection_fairness"]),
                 "performance_fairness": float(data["performance_fairness"]),
                 "class_distribution": float(data["class_distribution"]),
+                "outcome_fairness": float(data["outcome_fairness"]),
                 "explainability": float(data["explainability_pillar"]),
                 "interpretability": float(data["interpretability"]),
                 "post_hoc_methods": float(data["post_hoc_methods"]),
diff --git a/nebula/controller/scenarios.py b/nebula/controller/scenarios.py
index 1aba47f35..9e0d04dc5 100644
--- a/nebula/controller/scenarios.py
+++ b/nebula/controller/scenarios.py
@@ -91,6 +91,7 @@ def __init__(
         selection_fairness,
         performance_fairness,
         class_distribution,
+        outcome_fairness,
         explainability_pillar,
         interpretability,
         post_hoc_methods,
@@ -215,6 +216,7 @@ def __init__(
         self.selection_fairness = selection_fairness,
         self.performance_fairness = performance_fairness,
         self.class_distribution = class_distribution,
+        self.outcome_fairness = outcome_fairness,
         self.explainability_pillar = explainability_pillar,
         self.interpretability = interpretability,
         self.post_hoc_methods = post_hoc_methods,
@@ -747,6 +749,7 @@ def __init__(self, scenario, user=None):
                     "selection_fairness": self.scenario.selection_fairness,
                     "performance_fairness": self.scenario.performance_fairness,
                     "class_distribution": self.scenario.class_distribution,
+                    "outcome_fairness": self.scenario.outcome_fairness,
                     "explainability_pillar": self.scenario.explainability_pillar,
                     "interpretability": self.scenario.interpretability,
                     "post_hoc_methods": self.scenario.post_hoc_methods,
diff --git a/nebula/frontend/static/js/deployment/scenario.js b/nebula/frontend/static/js/deployment/scenario.js
index 02d1daa3f..5213e39e7 100644
--- a/nebula/frontend/static/js/deployment/scenario.js
+++ b/nebula/frontend/static/js/deployment/scenario.js
@@ -125,6 +125,7 @@ const ScenarioManager = (function () {
                             selection_fairness: "0",
                             performance_fairness: "0",
                             class_distribution: document.getElementById("dfl-fairness-notion-3")?.value || "0",
+                            outcome_fairness: document.getElementById("dfl-fairness-notion-4")?.value || "0",
 
                             explainability_pillar: document.getElementById("dfl-explainability-pillar")?.value || "0",
                             interpretability: document.getElementById("dfl-explainability-notion-1")?.value || "0",
@@ -160,6 +161,7 @@ const ScenarioManager = (function () {
                         selection_fairness: document.getElementById("cfl-fairness-notion-1")?.value || "0",
                         performance_fairness: document.getElementById("cfl-fairness-notion-2")?.value || "0",
                         class_distribution: document.getElementById("cfl-fairness-notion-3")?.value || "0",
+                        outcome_fairness: document.getElementById("cfl-fairness-notion-4")?.value || "0",
 
                         explainability_pillar: document.getElementById("cfl-explainability-pillar")?.value || "0",
                         interpretability: document.getElementById("cfl-explainability-notion-1")?.value || "0",
@@ -191,6 +193,7 @@ const ScenarioManager = (function () {
                     selection_fairness: "0",
                     performance_fairness: "0",
                     class_distribution: "0",
+                    outcome_fairness: "0",
                     explainability_pillar: "0",
                     interpretability: "0",
                     post_hoc_methods: "0",
diff --git a/nebula/frontend/static/js/deployment/trustworthiness.js b/nebula/frontend/static/js/deployment/trustworthiness.js
index 3bdbadac5..1bae3efc4 100644
--- a/nebula/frontend/static/js/deployment/trustworthiness.js
+++ b/nebula/frontend/static/js/deployment/trustworthiness.js
@@ -78,6 +78,7 @@ const TrustworthinessManager = (function() {
             "cfl-fairness-notion-1",
             "cfl-fairness-notion-2",
             "cfl-fairness-notion-3",
+            "cfl-fairness-notion-4",
             "cfl-explainability-notion-1",
             "cfl-explainability-notion-2",
             "cfl-accountability-notion-1",
@@ -105,6 +106,7 @@ const TrustworthinessManager = (function() {
             "dfl-privacy-notion-2",
             "dfl-privacy-notion-3",
             "dfl-fairness-notion-3",
+            "dfl-fairness-notion-4",
             "dfl-explainability-notion-1",
             "dfl-explainability-notion-2",
             "dfl-accountability-notion-1",
@@ -163,6 +165,7 @@ const TrustworthinessManager = (function() {
         const fairnessNotion1 = parseFloat(document.getElementById("cfl-fairness-notion-1").value) || 0;
         const fairnessNotion2 = parseFloat(document.getElementById("cfl-fairness-notion-2").value) || 0;
         const fairnessNotion3 = parseFloat(document.getElementById("cfl-fairness-notion-3").value) || 0;
+        const fairnessNotion4 = parseFloat(document.getElementById("cfl-fairness-notion-4").value) || 0;
 
         const explainabilityNotion1 = parseFloat(document.getElementById("cfl-explainability-notion-1").value) || 0;
         const explainabilityNotion2 = parseFloat(document.getElementById("cfl-explainability-notion-2").value) || 0;
@@ -180,7 +183,7 @@ const TrustworthinessManager = (function() {
 
         const totalRobustnessNotion = robustnessNotion1 + robustnessNotion2 + robustnessNotion3;
         const totalPrivacyNotion = privacyNotion1 + privacyNotion2 + privacyNotion3;
-        const totalFairnessNotion = fairnessNotion1 + fairnessNotion2 + fairnessNotion3;
+        const totalFairnessNotion = fairnessNotion1 + fairnessNotion2 + fairnessNotion3 + fairnessNotion4;
         const totalExplainabilityNotion = explainabilityNotion1 + explainabilityNotion2;
         const totalArchitecturalSoundnessNotion = architecturalSoundnessNotion1 + architecturalSoundnessNotion2;
         const totalSustainabilityNotion = sustainabilityNotion1 + sustainabilityNotion2 + sustainabilityNotion3;
@@ -214,6 +217,7 @@ const TrustworthinessManager = (function() {
         const privacyNotion3 = parseFloat(document.getElementById("dfl-privacy-notion-3").value) || 0;
 
         const fairnessNotion3 = parseFloat(document.getElementById("dfl-fairness-notion-3").value) || 0;
+        const fairnessNotion4 = parseFloat(document.getElementById("dfl-fairness-notion-4").value) || 0;
 
         const explainabilityNotion1 = parseFloat(document.getElementById("dfl-explainability-notion-1").value) || 0;
         const explainabilityNotion2 = parseFloat(document.getElementById("dfl-explainability-notion-2").value) || 0;
@@ -230,7 +234,7 @@ const TrustworthinessManager = (function() {
 
         const totalRobustnessNotion = robustnessNotion1 + robustnessNotion2 + robustnessNotion3;
         const totalPrivacyNotion = privacyNotion1 + privacyNotion2 + privacyNotion3;
-        const totalFairnessNotion = fairnessNotion3;
+        const totalFairnessNotion = fairnessNotion3 + fairnessNotion4;
         const totalExplainabilityNotion = explainabilityNotion1 + explainabilityNotion2;
         const totalArchitecturalSoundnessNotion = architecturalSoundnessNotion1 + architecturalSoundnessNotion2;
         const totalSustainabilityNotion = sustainabilityNotion1 + sustainabilityNotion3;
@@ -279,7 +283,8 @@ const TrustworthinessManager = (function() {
             fairness: [
                 parseFloat(document.getElementById("cfl-fairness-notion-1").value) || 0,
                 parseFloat(document.getElementById("cfl-fairness-notion-2").value) || 0,
-                parseFloat(document.getElementById("cfl-fairness-notion-3").value) || 0
+                parseFloat(document.getElementById("cfl-fairness-notion-3").value) || 0,
+                parseFloat(document.getElementById("cfl-fairness-notion-4").value) || 0
             ],
             explainability: [
                 parseFloat(document.getElementById("cfl-explainability-notion-1").value) || 0,
@@ -325,7 +330,8 @@ const TrustworthinessManager = (function() {
                 parseFloat(document.getElementById("dfl-privacy-notion-3").value) || 0
             ],
             fairness: [
-                parseFloat(document.getElementById("dfl-fairness-notion-3").value) || 0
+                parseFloat(document.getElementById("dfl-fairness-notion-3").value) || 0,
+                parseFloat(document.getElementById("dfl-fairness-notion-4").value) || 0
             ],
             explainability: [
                 parseFloat(document.getElementById("dfl-explainability-notion-1").value) || 0,
@@ -378,10 +384,11 @@ const TrustworthinessManager = (function() {
             document.getElementById("cfl-privacy-notion-2").value = p[1];
             document.getElementById("cfl-privacy-notion-3").value = p[2];
 
-            const f = config.notions.fairness || [0, 0, 0];
+            const f = config.notions.fairness || [0, 0, 0, 0];
             document.getElementById("cfl-fairness-notion-1").value = f[0];
             document.getElementById("cfl-fairness-notion-2").value = f[1];
             document.getElementById("cfl-fairness-notion-3").value = f[2];
+            document.getElementById("cfl-fairness-notion-4").value = f[3];
 
             const e = config.notions.explainability || [0, 0];
             document.getElementById("cfl-explainability-notion-1").value = e[0];
@@ -420,8 +427,9 @@ const TrustworthinessManager = (function() {
             document.getElementById("dfl-privacy-notion-2").value = p[1];
             document.getElementById("dfl-privacy-notion-3").value = p[2];
 
-            const f = config.notions.fairness || [0];
+            const f = config.notions.fairness || [0, 0];
             document.getElementById("dfl-fairness-notion-3").value = f[0];
+            document.getElementById("dfl-fairness-notion-4").value = f[1];
 
             const e = config.notions.explainability || [0, 0];
             document.getElementById("dfl-explainability-notion-1").value = e[0];
@@ -470,6 +478,7 @@ const TrustworthinessManager = (function() {
         document.getElementById("cfl-fairness-notion-1").value = "0";
         document.getElementById("cfl-fairness-notion-2").value = "0";
         document.getElementById("cfl-fairness-notion-3").value = "0";
+        document.getElementById("cfl-fairness-notion-4").value = "0";
 
         document.getElementById("cfl-explainability-notion-1").value = "0";
         document.getElementById("cfl-explainability-notion-2").value = "0";
@@ -500,6 +509,7 @@ const TrustworthinessManager = (function() {
         document.getElementById("dfl-privacy-notion-3").value = "0";
 
         document.getElementById("dfl-fairness-notion-3").value = "0";
+        document.getElementById("dfl-fairness-notion-4").value = "0";
 
         document.getElementById("dfl-explainability-notion-1").value = "0";
         document.getElementById("dfl-explainability-notion-2").value = "0";
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index 83717c656..19c32050b 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -984,21 +984,28 @@ <h5 class="step-title">Fairness pillar</h5>
                                     <div class="notion">
                                         <h5 class="step-title">Selection fairness notion</h5>
                                         <input type="number" class="form-control" id="cfl-fairness-notion-1"
-                                            placeholder="Selection fairness notion" min="20" value="30"
+                                            placeholder="Selection fairness notion" min="20" value="25"
                                             style="display: inline; width: 70%;">
                                         <small class="form-text text-muted">%</small>
                                     </div>
                                     <div class="notion">
                                         <h5 class="step-title">Performance fairness  notion</h5>
                                         <input type="number" class="form-control" id="cfl-fairness-notion-2"
-                                            placeholder="Performance fairness notion" min="20" value="35"
+                                            placeholder="Performance fairness notion" min="20" value="25"
                                             style="display: inline; width: 70%;">
                                         <small class="form-text text-muted">%</small>
                                     </div>
                                     <div class="notion">
                                         <h5 class="step-title">Class distribution notion</h5>
                                         <input type="number" class="form-control" id="cfl-fairness-notion-3"
-                                            placeholder="Class distribution notion" min="20" value="35"
+                                            placeholder="Class distribution notion" min="20" value="25"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Outcome fairness notion</h5>
+                                        <input type="number" class="form-control" id="cfl-fairness-notion-4"
+                                            placeholder="Outcome fairness notion" min="20" value="25"
                                             style="display: inline; width: 70%;">
                                         <small class="form-text text-muted">%</small>
                                     </div>
@@ -1182,7 +1189,14 @@ <h5 class="step-title">Fairness pillar</h5>
                                     <div class="notion">
                                         <h5 class="step-title">Class distribution notion</h5>
                                         <input type="number" class="form-control" id="dfl-fairness-notion-3"
-                                            placeholder="Class distribution notion" min="20" value="100"
+                                            placeholder="Class distribution notion" min="20" value="50"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Outcome fairness notion</h5>
+                                        <input type="number" class="form-control" id="dfl-fairness-notion-4"
+                                            placeholder="Outcome fairness notion" min="20" value="50"
                                             style="display: inline; width: 70%;">
                                         <small class="form-text text-muted">%</small>
                                     </div>

From d2d7022a118a42aa9c5eaa2b986669ad6e7b4b60 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Tue, 14 Apr 2026 10:16:44 +0200
Subject: [PATCH 20/66] Some metrics changed and fixed, and macro F1,
 Reputation Enabled and Average Reputation added, other minor changes

---
 nebula/addons/trustworthiness/calculation.py  | 233 ++++++++++++++----
 .../trustworthiness/configs/eval_metrics.json |  42 +++-
 .../configs/eval_metrics_dfl.json             |  53 +++-
 .../configs/factsheet_template.json           |   5 +-
 .../configs/factsheet_template_dfl.json       |   5 +-
 .../addons/trustworthiness/dfl_factsheet.py   |  30 ++-
 nebula/addons/trustworthiness/factsheet.py    |  30 ++-
 .../addons/trustworthiness/trustworthiness.py |  86 ++++++-
 8 files changed, 390 insertions(+), 94 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index 48e5f326e..80a4f2ce4 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -20,7 +20,7 @@
 from codecarbon import EmissionsTracker
 from scipy.spatial.distance import jensenshannon
 from scipy.stats import entropy, variation
-from sklearn.metrics import roc_auc_score, roc_curve
+from sklearn.metrics import f1_score, roc_auc_score, roc_curve
 from torch import nn, optim
 import torch.nn.functional as F
 import time
@@ -608,6 +608,48 @@ def _get_model_accuracy(model, dataloader):
 
     return correct / total if total > 0 else 0.0
 
+
+def get_macro_f1_score(model, dataloader):
+    """
+    Calculates macro F1 score over a dataloader.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate.
+        dataloader (DataLoader): Dataloader with (x, y) batches.
+
+    Returns:
+        float: Macro F1 score in [0, 1].
+    """
+    if not isinstance(model, torch.nn.Module):
+        logger.warning("Model is not a torch.nn.Module")
+        return 0.0
+
+    try:
+        device = next(model.parameters()).device
+    except Exception:
+        device = torch.device("cpu")
+
+    model.eval()
+    y_true = []
+    y_pred = []
+
+    with torch.no_grad():
+        for x, y in dataloader:
+            x = x.to(device)
+            y = y.to(device)
+
+            out = model(x)
+            logits = out[0] if isinstance(out, (tuple, list)) else out
+            preds = logits.argmax(dim=1)
+
+            y_true.extend(y.detach().cpu().numpy().tolist())
+            y_pred.extend(preds.detach().cpu().numpy().tolist())
+
+    if not y_true:
+        return 0.0
+
+    return float(f1_score(y_true, y_pred, average="macro", zero_division=0))
+
 def _extract_model_logits(model_output):
     """
     Normalize the output returned by a model forward pass into a logits tensor.
@@ -1236,49 +1278,97 @@ def get_spread_divergence(model, test_sample):
         return 0.0
 
 
-def get_clever_score(model, test_sample, nb_classes, learning_rate):
+def get_explainability_metrics_summary(model, test_dataloader, max_batches=4):
     """
-    Calculates the CLEVER score.
+    Computes explainability metrics over multiple test batches and returns
+    their mean values.
 
     Args:
         model (object): The model.
-        test_sample (object): One test sample to calculate the CLEVER score.
-        nb_classes (int): The nb_classes of the model.
-        learning_rate (float): The learning rate of the model.
+        test_dataloader: Test dataloader providing batches.
+        max_batches (int): Maximum number of batches to use.
 
     Returns:
-        float: The CLEVER score.
+        dict: Mean values for feature_importance_cv, alpha_score,
+        spread_ratio and spread_divergence.
     """
+    summary = {
+        "feature_importance_cv": 1.0,
+        "alpha_score": 1.0,
+        "spread_ratio": 1.0,
+        "spread_divergence": 0.0,
+    }
 
+    if test_dataloader is None:
+        return summary
 
-    samples, _ = test_sample
-    input_shape = None
+    try:
+        max_batches = max(1, int(max_batches))
+    except Exception:
+        max_batches = 4
 
-    if torch.is_tensor(samples) and samples.dim() >= 1 and samples.shape[0] != 0:
-        pass
-    else:
-        raise ValueError("`test_sample[0]` must be a non-empty torch.Tensor.")
+    fi_values = []
+    alpha_values = []
+    spread_ratio_values = []
+    spread_divergence_values = []
 
-    if input_shape is None:
-        if samples.dim() >= 2:
-            # (B, ...) -> input_shape = (...)
-            input_shape = tuple(samples.shape[1:])
-        else:
-            # (...) without batch
-            input_shape = tuple(samples.shape)
+    try:
+        for batch_idx, test_sample in enumerate(test_dataloader):
+            if batch_idx >= max_batches:
+                break
 
-    background = samples[-1] if samples.dim() >= 2 else samples
+            fi_values.append(float(get_feature_importance_cv(model, test_sample)))
+            alpha_values.append(float(get_alpha_score(model, test_sample)))
+            spread_ratio_values.append(float(get_spread_ratio(model, test_sample)))
+            spread_divergence_values.append(float(get_spread_divergence(model, test_sample)))
+    except Exception as exc:
+        logger.warning("Could not compute explainability metrics summary")
+        logger.warning(exc)
+
+    if fi_values:
+        summary["feature_importance_cv"] = float(np.mean(fi_values))
+    if alpha_values:
+        summary["alpha_score"] = float(np.mean(alpha_values))
+    if spread_ratio_values:
+        summary["spread_ratio"] = float(np.mean(spread_ratio_values))
+    if spread_divergence_values:
+        summary["spread_divergence"] = float(np.mean(spread_divergence_values))
 
-    x = background.detach().cpu().numpy()
+    return summary
 
-    if tuple(x.shape) == tuple(input_shape):
-        x = x.reshape((1,) + tuple(input_shape))
 
+def get_clever_score(model, test_sample, nb_classes, learning_rate, max_samples=8):
+    """
+    Calculates the CLEVER score as the mean score over multiple samples.
+
+    Args:
+        model (object): The model.
+        test_sample (object): A batch from the test dataloader.
+        nb_classes (int): The nb_classes of the model.
+        learning_rate (float): The learning rate of the model.
+        max_samples (int): Maximum number of samples from the batch to evaluate.
+
+    Returns:
+        float: Mean CLEVER score across the selected samples.
+    """
+    samples, _ = test_sample
+
+    if not (torch.is_tensor(samples) and samples.dim() >= 1 and samples.shape[0] != 0):
+        raise ValueError("`test_sample[0]` must be a non-empty torch.Tensor.")
+
+    input_shape = tuple(samples.shape[1:]) if samples.dim() >= 2 else tuple(samples.shape)
+
+    try:
+        max_samples = max(1, int(max_samples))
+    except Exception:
+        max_samples = 8
+
+    n_samples = min(int(samples.shape[0]), max_samples)
 
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.Adam(model.parameters(), learning_rate)
 
-    # Create the ART classifier
+    # Create the ART classifier once and reuse it for all selected samples.
     classifier = PyTorchClassifier(
         model=model,
         loss=criterion,
@@ -1287,17 +1377,32 @@ def get_clever_score(model, test_sample, nb_classes, learning_rate):
         nb_classes=nb_classes,
     )
 
-    score_untargeted = clever_u(
-        classifier,
-        background.numpy(),
-        10,
-        5,
-        R_L2,
-        norm=2,
-        pool_factor=3,
-        verbose=False,
-    )
-    return score_untargeted
+    clever_scores = []
+    for idx in range(n_samples):
+        background = samples[idx].detach().cpu()
+        sample_np = background.numpy()
+
+        try:
+            score_untargeted = clever_u(
+                classifier,
+                sample_np,
+                10,
+                5,
+                R_L2,
+                norm=2,
+                pool_factor=3,
+                verbose=False,
+            )
+            if score_untargeted is not None and not math.isnan(float(score_untargeted)):
+                clever_scores.append(float(score_untargeted))
+        except Exception as exc:
+            logger.warning("Could not compute CLEVER score for sample index %s", idx)
+            logger.warning(exc)
+
+    if not clever_scores:
+        return 0.0
+
+    return float(np.mean(clever_scores))
 
 
 def stop_emissions_tracking_and_save(
@@ -1390,45 +1495,67 @@ def comm_efficiency(bytes_up: int, bytes_down: int, test_acc_avg: float, eps: fl
 
     return total_bytes / acc
 
-def get_loss_sensitivity_score(model, test_sample, nb_classes, learning_rate):
+def get_loss_sensitivity_score(model, test_sample, nb_classes, learning_rate, max_samples=8):
 
     """
-    Calculates the loss sensitivity score.
+    Calculates the loss sensitivity score as the mean score over multiple samples.
 
     Args:
         model (object): The model.
-        test_sample (object): One test sample to calculate the loss sensitivity score.
+        test_sample (object): A batch from the test dataloader.
         nb_classes (int): The nb_classes of the model.
         learning_rate (float): The learning rate of the model.
+        max_samples (int): Maximum number of samples from the batch to evaluate.
 
     Returns:
-        float: The loss sensitivity score.
+        float: Mean loss sensitivity score across the selected samples.
     """
-
     samples, labels = test_sample
-    sample = samples[-1].unsqueeze(0)
-    label = labels[-1].unsqueeze(0)
 
-    label = F.one_hot(label, num_classes=nb_classes).float()
+    if not (torch.is_tensor(samples) and torch.is_tensor(labels) and samples.shape[0] > 0):
+        raise ValueError("`test_sample` must contain non-empty tensors for samples and labels.")
+
+    try:
+        max_samples = max(1, int(max_samples))
+    except Exception:
+        max_samples = 8
+
+    n_samples = min(int(samples.shape[0]), max_samples)
 
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.Adam(model.parameters(), learning_rate)
 
-    # Create the ART classifier
+    # Create the ART classifier once and reuse it for all selected samples.
     classifier = PyTorchClassifier(
         model=model,
         loss=criterion,
         optimizer=optimizer,
-        input_shape=sample.shape[1:],
+        input_shape=samples.shape[1:],
         nb_classes=nb_classes,
     )
 
-    score = loss_sensitivity(
-        classifier,
-        sample.numpy(),
-        label.numpy(),
-    )
-    return float(score)
+    sensitivity_scores = []
+    for idx in range(n_samples):
+        sample = samples[idx].detach().cpu().unsqueeze(0)
+        label = labels[idx].detach().cpu().unsqueeze(0)
+        label = F.one_hot(label, num_classes=nb_classes).float()
+
+        try:
+            score = loss_sensitivity(
+                classifier,
+                sample.numpy(),
+                label.numpy(),
+            )
+            if score is not None and not math.isnan(float(score)):
+                sensitivity_scores.append(float(score))
+        except Exception as exc:
+            logger.warning("Could not compute loss sensitivity for sample index %s", idx)
+            logger.warning(exc)
+
+    if not sensitivity_scores:
+        return 0.0
+
+    return float(np.mean(sensitivity_scores))
 
 def compute_adversarial_accuracy_art(
     model,
@@ -1495,7 +1622,7 @@ def get_empirical_robustness_score(
     learning_rate,
     attack_name = "fgsm",
     attack_params = None,
-    max_samples = 32,
+    max_samples = 128,
 ):
     """
     Calculates the Empirical Robustness score using Adversarial Robustness Toolbox (ART).
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics.json b/nebula/addons/trustworthiness/configs/eval_metrics.json
index cbf05879f..db70c2000 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics.json
@@ -97,7 +97,19 @@
             "score_function": "get_true_score",
             "type": "true_score",
             "description": "Average test accuracy of the global model on clients test data.",
-            "weight": 0.5
+            "weight": 0.3
+          },
+          "macro_f1": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_macro_f1"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Macro F1 score of the final model on test data.",
+            "weight": 0.3
           },
           "personalization": {
             "inputs": [
@@ -109,7 +121,19 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of personalized FL algorithm.",
-            "weight": 0.5
+            "weight": 0.2
+          },
+          "reputation_enabled": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/reputation_enabled"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of an active reputation-based defense mechanism.",
+            "weight": 0.2
           }
         }
       },
@@ -128,7 +152,19 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the model.",
-            "weight": 1
+            "weight": 0.5
+          },
+          "average_neighbor_reputation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Average reputation score of the neighbors associated with the node or federation.",
+            "weight": 0.5
           }
         }
       }
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
index 6b8a9cf4f..0dd0f650d 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
@@ -86,6 +86,31 @@
       "algorithm_robustness": {
         "weight": 0.4,
         "metrics": {
+          "performance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_acc"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_true_score",
+            "type": "true_score",
+            "description": "Local clean test accuracy of the final model.",
+            "weight": 0.3
+          },
+          "macro_f1": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_macro_f1"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Macro F1 score of the final local model on test data.",
+            "weight": 0.3
+          },
           "personalization": {
             "inputs": [
               {
@@ -96,7 +121,19 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of personalized FL algorithm.",
-            "weight": 1.0
+            "weight": 0.2
+          },
+          "reputation_enabled": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/reputation_enabled"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of an active reputation-based defense mechanism.",
+            "weight": 0.2
           }
         }
       },
@@ -115,7 +152,19 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the model.",
-            "weight": 1
+            "weight": 0.5
+          },
+          "average_neighbor_reputation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Average reputation score of the neighbors associated with the node.",
+            "weight": 0.5
           }
         }
       }
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template.json b/nebula/addons/trustworthiness/configs/factsheet_template.json
index 7e210646a..ee26f5ce3 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template.json
@@ -13,12 +13,14 @@
 		"client_num": "",
 		"sample_client_rate": "",
 		"client_selector": "",
-		"avg_dataset_size": ""
+		"avg_dataset_size": "",
+		"avg_neighbor_reputation": ""
 	},
 	"configuration": {
 		"aggregation_algorithm": "",
 		"training_model": "",
 		"personalization": "",
+		"reputation_enabled": "",
 		"visualization": "",
 		"differential_privacy": "",
 		"dp_epsilon": "",
@@ -42,6 +44,7 @@
 	"performance": {
 		"test_loss_avg": "",
 		"test_acc_avg": "",
+		"test_macro_f1": "",
 		"test_feature_importance_cv": "",
 		"test_clever": "",
 		"test_loss_sensitivity": "",
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
index 4724d14f7..fb4c5d26d 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
@@ -13,12 +13,14 @@
 		"client_num": "",
 		"sample_client_rate": "",
 		"client_selector": "",
-		"local_dataset_size": ""
+		"local_dataset_size": "",
+		"avg_neighbor_reputation": ""
 	},
 	"configuration": {
 		"aggregation_algorithm": "",
 		"training_model": "",
 		"personalization": "",
+		"reputation_enabled": "",
 		"visualization": "",
 		"differential_privacy": "",
 		"dp_epsilon": "",
@@ -42,6 +44,7 @@
 	"performance": {
 		"test_loss": "",
 		"test_acc": "",
+		"test_macro_f1": "",
 		"test_feature_importance_cv": "",
 		"test_clever": "",
 		"test_loss_sensitivity": "",
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index fbcf98c15..5d1a0fc52 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -29,13 +29,13 @@
 from nebula.core.models.cifar10.resnet import CIFAR10ModelResNet
 from nebula.core.models.cifar10.simplemobilenet import SimpleMobileNetV1
 from nebula.core.models.cifar100.cnn import CIFAR100ModelCNN
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model, get_underfitting_score, get_overfitting_score, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model, get_underfitting_score, get_overfitting_score, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 
 dirname = os.path.dirname(__file__)
 logger = logging.getLogger(__name__)
 
-def populate_factsheet(experiment_name, participant_idx, data, start_time, end_time):
+def populate_factsheet(experiment_name, participant_idx, data, start_time, end_time, reputation_summary=None):
     trust_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
     os.makedirs(trust_dir, exist_ok=True)
 
@@ -99,6 +99,7 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         factsheet["configuration"]["aggregation_algorithm"] = data["agg_algorithm"] or ""
         factsheet["configuration"]["training_model"] = data["model"] or ""
         factsheet["configuration"]["personalization"] = False
+        factsheet["configuration"]["reputation_enabled"] = bool(data.get("reputation", {}).get("enabled", False))
         factsheet["configuration"]["visualization"] = True
         factsheet["configuration"]["total_round_num"] = n_rounds
 
@@ -224,6 +225,10 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         factsheet["sustainability"]["emissions_training_local"] = emissions_training_local
         factsheet["sustainability"]["energy_consumed_local"] = energy_consumed_local
         factsheet["participants"]["local_dataset_size"] = sample_size
+        if reputation_summary is not None:
+            factsheet["participants"]["avg_neighbor_reputation"] = reputation_summary.get("avg_neighbor_reputation", "")
+        else:
+            factsheet["participants"]["avg_neighbor_reputation"] = 0
 
         factsheet["sustainability"]["emissions_communication_local"] = (bytes_sent * 2.24e-10 * carbon_intensity_local)+(bytes_recv * 2.24e-10 * carbon_intensity_local)
 
@@ -236,6 +241,8 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
             test_dataloader = pickle.load(d_file)
 
         test_sample = next(iter(test_dataloader))
+        explainability_metrics = get_explainability_metrics_summary(model, test_dataloader)
+        factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_dataloader)
         factsheet["privacy"]["epsilon_star"] = get_epsilon_star(
             model,
             train_dataloader,
@@ -281,18 +288,9 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
             test_dataloader,
         )
         factsheet["fairness"]["coefficient_of_variation"] = 1/(1 + coefficient_of_variation_value)
-        factsheet["explainability"]["alpha_score"] = get_alpha_score(
-            model,
-            test_sample,
-        )
-        factsheet["explainability"]["spread_ratio"] = get_spread_ratio(
-            model,
-            test_sample,
-        )
-        factsheet["explainability"]["spread_divergence"] = get_spread_divergence(
-            model,
-            test_sample,
-        )
+        factsheet["explainability"]["alpha_score"] = explainability_metrics["alpha_score"]
+        factsheet["explainability"]["spread_ratio"] = explainability_metrics["spread_ratio"]
+        factsheet["explainability"]["spread_divergence"] = explainability_metrics["spread_divergence"]
 
         lr = factsheet["configuration"]["learning_rate"]
 
@@ -300,7 +298,7 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         factsheet["performance"]["test_clever"] = 1 if value_clever > 1 else value_clever
 
         value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes_temp, lr)
-        factsheet["performance"]["test_loss_sensitivity"] = 1 if value_loss_sensitivity > 1 else value_loss_sensitivity
+        factsheet["performance"]["test_loss_sensitivity"] = 1 / (1 + value_loss_sensitivity)
 
         value_adv_accuracy = compute_adversarial_accuracy_art(model, test_dataloader, num_classes_temp, lr)
         factsheet["performance"]["test_adv_accuracy"] = 1 if value_adv_accuracy > 1 else value_adv_accuracy
@@ -314,7 +312,7 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         value_attack_success_rate = attack_success_rate(model, test_sample)
         factsheet["performance"]["test_attack_success_rate"] = 1 - value_attack_success_rate
 
-        feature_importance = get_feature_importance_cv(model, test_sample)
+        feature_importance = explainability_metrics["feature_importance_cv"]
         factsheet["performance"]["test_feature_importance_cv"] = 1 if feature_importance > 1 else feature_importance
 
         f.seek(0)
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 0149893ac..f415451b7 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -27,7 +27,7 @@
 from nebula.core.models.cifar10.resnet import CIFAR10ModelResNet
 from nebula.core.models.cifar10.simplemobilenet import SimpleMobileNetV1
 from nebula.core.models.cifar100.cnn import CIFAR100ModelCNN
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size, get_underfitting_score, get_overfitting_score, get_participant_loss_accuracy, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size, get_underfitting_score, get_overfitting_score, get_participant_loss_accuracy, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
 
@@ -114,6 +114,7 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                     factsheet["configuration"]["aggregation_algorithm"] = data["agg_algorithm"] or ""
                     factsheet["configuration"]["training_model"] = data["model"] or ""
                     factsheet["configuration"]["personalization"] = False
+                    factsheet["configuration"]["reputation_enabled"] = bool(data.get("reputation", {}).get("enabled", False))
                     factsheet["configuration"]["visualization"] = True
                     factsheet["configuration"]["total_round_num"] = n_rounds
 
@@ -193,7 +194,7 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                 logging.warning(f"{factsheet_file} is invalid")
                 logging.error(e)
 
-    def populate_factsheet_post_train(self, scenario_name, start_time, end_time, participant_idx):
+    def populate_factsheet_post_train(self, scenario_name, start_time, end_time, participant_idx, reputation_summary=None):
         """
         Populates the factsheet with values after the training.
 
@@ -251,6 +252,10 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
 
                 class_imbalance_score = 1 / (1+avg_class_imbalance)
                 factsheet["fairness"]["class_imbalance"] = 1 if class_imbalance_score > 1 else class_imbalance_score
+                if reputation_summary is not None:
+                    factsheet["participants"]["avg_neighbor_reputation"] = reputation_summary.get("avg_neighbor_reputation", "")
+                else:
+                    factsheet["participants"]["avg_neighbor_reputation"] = 0
 
                 with open(final_model_file, "rb") as file:
                     lightning_model = pickle.load(file)
@@ -316,6 +321,8 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                     test_dataloader = pickle.load(file)
 
                 test_sample = next(iter(test_dataloader))
+                explainability_metrics = get_explainability_metrics_summary(model, test_dataloader)
+                factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_dataloader)
                 factsheet["privacy"]["epsilon_star"] = get_epsilon_star(
                     model,
                     train_dataloader,
@@ -356,18 +363,9 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                     test_dataloader,
                 )
                 factsheet["fairness"]["coefficient_of_variation"] = 1/(1 + coefficient_of_variation_value)
-                factsheet["explainability"]["alpha_score"] = get_alpha_score(
-                    model,
-                    test_sample,
-                )
-                factsheet["explainability"]["spread_ratio"] = get_spread_ratio(
-                    model,
-                    test_sample,
-                )
-                factsheet["explainability"]["spread_divergence"] = get_spread_divergence(
-                    model,
-                    test_sample,
-                )
+                factsheet["explainability"]["alpha_score"] = explainability_metrics["alpha_score"]
+                factsheet["explainability"]["spread_ratio"] = explainability_metrics["spread_ratio"]
+                factsheet["explainability"]["spread_divergence"] = explainability_metrics["spread_divergence"]
 
                 lr = factsheet["configuration"]["learning_rate"]
 
@@ -375,7 +373,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 factsheet["performance"]["test_clever"] = 1 if value_clever > 1 else value_clever
 
                 value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes_temp, lr)
-                factsheet["performance"]["test_loss_sensitivity"] = 1 if value_loss_sensitivity > 1 else value_loss_sensitivity
+                factsheet["performance"]["test_loss_sensitivity"] = 1 / (1 + value_loss_sensitivity)
 
                 value_adv_accuracy = compute_adversarial_accuracy_art(model, test_dataloader, num_classes_temp, lr)
                 factsheet["performance"]["test_adv_accuracy"] = 1 if value_adv_accuracy > 1 else value_adv_accuracy
@@ -389,7 +387,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 value_attack_success_rate = attack_success_rate(model, test_sample)
                 factsheet["performance"]["test_attack_success_rate"] = 1 - value_attack_success_rate
 
-                feature_importance = get_feature_importance_cv(model, test_sample)
+                feature_importance = explainability_metrics["feature_importance_cv"]
                 factsheet["performance"]["test_feature_importance_cv"] = 1 if feature_importance > 1 else feature_importance
 
                 # Set emissions metrics
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 48fb12bf1..6d2c293ef 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -263,7 +263,14 @@ async def _finish_sdfl_trustscores_exchange(self, trust_config, experiment_name)
             self._finalize_sdfl_global_trustscores_aggregation()
 
     def _compute_local_trustscores_report(self, experiment_name, trust_config, weights, federation) -> str:
-        populate_factsheet(experiment_name, self._idx, trust_config, self._start_time, self._end_time)
+        populate_factsheet(
+            experiment_name,
+            self._idx,
+            trust_config,
+            self._start_time,
+            self._end_time,
+            reputation_summary=self._get_reputation_trust_summary(),
+        )
 
         trust_metric_manager = TrustMetricManager(self._start_time, federation, self._idx)
         trust_metric_manager.evaluate_participant(experiment_name, weights, self._idx, use_weights=True)
@@ -317,6 +324,38 @@ def _is_reputation_enabled(self) -> bool:
     def _get_reputation_system(self):
         return getattr(self._engine, "_reputation", None)
 
+    def _get_reputation_trust_summary(self) -> dict:
+        if not self._is_reputation_enabled():
+            return {
+                "reputation_enabled": False,
+                "avg_neighbor_reputation": 0.0,
+            }
+
+        reputation_system = self._get_reputation_system()
+        reputation_values = []
+
+        if reputation_system is not None:
+            for addr, data in reputation_system.reputation.items():
+                if addr == self._engine.addr:
+                    continue
+
+                reputation_value = data.get("reputation")
+                if reputation_value is None:
+                    continue
+
+                reputation_values.append(float(reputation_value))
+
+        if reputation_values:
+            avg_neighbor_reputation = sum(reputation_values) / len(reputation_values)
+        else:
+            reputation_config = self._engine.config.participant.get("defense_args", {}).get("reputation", {})
+            avg_neighbor_reputation = float(reputation_config.get("initial_reputation", 0.0) or 0.0)
+
+        return {
+            "reputation_enabled": True,
+            "avg_neighbor_reputation": avg_neighbor_reputation,
+        }
+
     def _get_trustscores_weight_for_source(self, source: str, node_id: int | str) -> float:
         if not self._is_reputation_enabled():
             return 0.5
@@ -967,7 +1006,13 @@ async def register_trustworthiness_report(self, source, message):
     async def _generate_factsheet(self, trust_config, experiment_name):
         factsheet = Factsheet()
         factsheet.populate_factsheet_pre_train(trust_config, experiment_name)
-        factsheet.populate_factsheet_post_train(experiment_name, self._start_time, self._end_time, self._idx)
+        factsheet.populate_factsheet_post_train(
+            experiment_name,
+            self._start_time,
+            self._end_time,
+            self._idx,
+            reputation_summary=self._get_reputation_trust_summary(),
+        )
 
         data_file_path = os.path.join(os.environ.get('NEBULA_CONFIG_DIR'), experiment_name, "scenario.json")
         with open(data_file_path, 'r') as data_file:
@@ -1005,6 +1050,43 @@ async def _generate_factsheet(self, trust_config, experiment_name):
             trust_metric_manager = TrustMetricManager(self._start_time, federation)
             trust_metric_manager.evaluate(experiment_name, weights, use_weights=True)
 
+    def _is_reputation_enabled(self) -> bool:
+        defense_args = self._engine.config.participant.get("defense_args", {})
+        reputation_config = defense_args.get("reputation", {})
+        return bool(reputation_config.get("enabled", False))
+
+    def _get_reputation_system(self):
+        return getattr(self._engine, "_reputation", None)
+
+    def _get_reputation_trust_summary(self) -> dict:
+        if not self._is_reputation_enabled():
+            return {
+                "reputation_enabled": False,
+                "avg_neighbor_reputation": 0.0,
+            }
+
+        reputation_system = self._get_reputation_system()
+        reputation_values = []
+
+        if reputation_system is not None:
+            for _, data in reputation_system.reputation.items():
+                reputation_value = data.get("reputation")
+                if reputation_value is None:
+                    continue
+
+                reputation_values.append(float(reputation_value))
+
+        if reputation_values:
+            avg_neighbor_reputation = sum(reputation_values) / len(reputation_values)
+        else:
+            reputation_config = self._engine.config.participant.get("defense_args", {}).get("reputation", {})
+            avg_neighbor_reputation = float(reputation_config.get("initial_reputation", 0.0) or 0.0)
+
+        return {
+            "reputation_enabled": True,
+            "avg_neighbor_reputation": avg_neighbor_reputation,
+        }
+
     async def _process_test_metrics_event(self, tme: TestMetricsEvent):
         cur_loss, cur_acc = await tme.get_event_data()
         if cur_loss and cur_acc:

From ee84a74c735f875f95467b5cb94252fcda958481 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 15 Apr 2026 17:49:53 +0200
Subject: [PATCH 21/66] Dropout rate, timeout rate and topology type
 implemented, client selector and selection variation fixed, other minor
 errors fixed

---
 nebula/addons/trustworthiness/calculation.py  |  26 ++++
 .../trustworthiness/configs/eval_metrics.json |  62 ++++++++--
 .../configs/eval_metrics_dfl.json             |  76 ++++++++++--
 .../configs/factsheet_template.json           |   4 +-
 .../configs/factsheet_template_dfl.json       |   5 +-
 .../addons/trustworthiness/dfl_factsheet.py   |  19 ++-
 nebula/addons/trustworthiness/factsheet.py    |  18 ++-
 .../addons/trustworthiness/trustworthiness.py | 112 +++++++++++++++++-
 8 files changed, 298 insertions(+), 24 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index 80a4f2ce4..94aee8afc 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -245,6 +245,32 @@ def get_cv(list=None, std=None, mean=None):
     return 0
 
 
+def get_participation_variation_score(participation_counts):
+    """
+    Convert participation-count dispersion into a trust-oriented score.
+
+    Args:
+        participation_counts (list[float | int]): Number of participations per client.
+
+    Returns:
+        float: Score in [0, 1] where 1 means equal participation.
+    """
+    if not participation_counts:
+        return 1.0
+
+    counts = np.asarray(participation_counts, dtype=float)
+    mean_count = float(np.mean(counts))
+
+    if mean_count <= 0:
+        return 0.0
+
+    cv = get_cv(list=counts)
+    if not np.isfinite(cv):
+        return 0.0
+
+    return float(1 / (1 + cv))
+
+
 def get_global_privacy_risk(dp, epsilon, n):
     """
     Calculates the global privacy risk by epsilon and the number of clients.
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics.json b/nebula/addons/trustworthiness/configs/eval_metrics.json
index db70c2000..e5a4d3eb3 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics.json
@@ -152,7 +152,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the model.",
-            "weight": 0.5
+            "weight": 0.3
           },
           "average_neighbor_reputation": {
             "inputs": [
@@ -164,7 +164,33 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Average reputation score of the neighbors associated with the node or federation.",
-            "weight": 0.5
+            "weight": 0.3
+          },
+          "dropout_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/dropout_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of expected client updates that were not received across rounds.",
+            "weight": 0.2
+          },
+          "timeout_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/timeout_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of aggregation rounds that finished with missing expected client updates.",
+            "weight": 0.2
           }
         }
       }
@@ -571,7 +597,7 @@
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "configuration/optimization_algorithm"
+                "field_path": "configuration/aggregation_algorithm"
               },
               {
                 "source": "factsheet",
@@ -713,10 +739,32 @@
                 "field_path": "participants/client_selector"
               }
             ],
-            "operation": "check_properties",
-            "type": "property_check",
-            "description": "The use of a client selector.",
-            "weight": 1
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Reputation Based": 1.0,
+              "Full Participation": 0.5
+            },
+            "description": "Mapping of client selection strategies to architectural soundness. Reputation-based selection is scored higher than full participation because it introduces an explicit selection mechanism.",
+            "weight": 0.5
+          },
+          "topology_type": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/preprocessing"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Fully": 1.0,
+              "Star": 0.8,
+              "Ring": 0.6,
+              "Random": 0.2
+            },
+            "description": "Mapping of network topology types to architectural soundness, assuming fully connected topologies provide the strongest structural connectivity, followed by star, ring, and random topologies.",
+            "weight": 0.5
           }
         }
       },
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
index 0dd0f650d..8f2618163 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
@@ -152,7 +152,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the model.",
-            "weight": 0.5
+            "weight": 0.3
           },
           "average_neighbor_reputation": {
             "inputs": [
@@ -164,7 +164,33 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Average reputation score of the neighbors associated with the node.",
-            "weight": 0.5
+            "weight": 0.3
+          },
+          "dropout_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/dropout_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of expected neighbor updates that were not received across rounds.",
+            "weight": 0.2
+          },
+          "timeout_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/timeout_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of aggregation rounds that finished with missing expected neighbor updates.",
+            "weight": 0.2
           }
         }
       }
@@ -259,6 +285,18 @@
       "class_distribution": {
         "weight": 0.5,
         "metrics": {
+          "selection_variation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/selection_cv"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Local variation in neighbor participation across rounds, transformed so higher values mean more stable participation.",
+            "weight": 0.5
+          },
           "class_imbalance": {
             "inputs": [
               {
@@ -270,7 +308,7 @@
             "type": "true_score",
             "direction": "desc",
             "description": "Variation of the sample size per class.",
-            "weight": 1
+            "weight": 0.5
           }
         }
       },
@@ -535,7 +573,7 @@
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "configuration/optimization_algorithm"
+                "field_path": "configuration/aggregation_algorithm"
               },
               {
                 "source": "factsheet",
@@ -669,10 +707,32 @@
                 "field_path": "participants/client_selector"
               }
             ],
-            "operation": "check_properties",
-            "type": "property_check",
-            "description": "The use of a client selector.",
-            "weight": 1
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Reputation Based": 1.0,
+              "Full Participation": 0.5
+            },
+            "description": "Mapping of client selection strategies to architectural soundness. Reputation-based selection is scored higher than full participation because it introduces an explicit selection mechanism.",
+            "weight": 0.5
+          },
+          "topology_type": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/preprocessing"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Fully": 1.0,
+              "Star": 0.8,
+              "Ring": 0.6,
+              "Random": 0.2
+            },
+            "description": "Mapping of network topology types to architectural soundness, assuming fully connected topologies provide the strongest structural connectivity, followed by star, ring, and random topologies.",
+            "weight": 0.5
           }
         }
       },
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template.json b/nebula/addons/trustworthiness/configs/factsheet_template.json
index ee26f5ce3..1b4e781b8 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template.json
@@ -70,7 +70,9 @@
 		"total_upload_bytes": "",
 		"total_download_bytes":"",
 		"avg_upload_bytes": "",
-		"avg_download_bytes": ""
+		"avg_download_bytes": "",
+		"dropout_rate": "",
+		"timeout_rate": ""
 	},
 	"sustainability": {
 		"avg_carbon_intensity_server": "",
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
index fb4c5d26d..a37432392 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
@@ -54,6 +54,7 @@
 		"test_attack_success_rate": ""
 	},
 	"fairness": {
+		"selection_cv": "",
 		"class_imbalance": "",
 		"underfitting": "",
 		"overfitting": "",
@@ -66,7 +67,9 @@
 		"time_minutes": "",
 		"model_size": "",
 		"upload_bytes": "",
-		"download_bytes":""
+		"download_bytes":"",
+		"dropout_rate": "",
+		"timeout_rate": ""
 	},
 	"sustainability": {
 		"carbon_intensity_local": "",
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 5d1a0fc52..dac9645de 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -35,7 +35,7 @@
 dirname = os.path.dirname(__file__)
 logger = logging.getLogger(__name__)
 
-def populate_factsheet(experiment_name, participant_idx, data, start_time, end_time, reputation_summary=None):
+def populate_factsheet(experiment_name, participant_idx, data, start_time, end_time, reputation_summary=None, participation_summary=None, reliability_summary=None):
     trust_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
     os.makedirs(trust_dir, exist_ok=True)
 
@@ -93,7 +93,11 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         # Set participants
         factsheet["participants"]["client_num"] = data["n_nodes"] or ""
         factsheet["participants"]["sample_client_rate"] = 1
-        factsheet["participants"]["client_selector"] = ""
+
+        if with_reputation == True:
+            factsheet["participants"]["client_selector"] = "Reputation Based"
+        else:
+            factsheet["participants"]["client_selector"] = "Full Participation"
 
         # Set configuration
         factsheet["configuration"]["aggregation_algorithm"] = data["agg_algorithm"] or ""
@@ -206,6 +210,12 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
 
         factsheet["system"]["upload_bytes"] = int(bytes_sent)
         factsheet["system"]["download_bytes"] = int(bytes_recv)
+        if reliability_summary is not None:
+            factsheet["system"]["dropout_rate"] = reliability_summary.get("dropout_rate", 0.0)
+            factsheet["system"]["timeout_rate"] = reliability_summary.get("timeout_rate", 0.0)
+        else:
+            factsheet["system"]["dropout_rate"] = 0.0
+            factsheet["system"]["timeout_rate"] = 0.0
 
         factsheet["system"]["time_minutes"] = get_elapsed_time(start_time, end_time)
 
@@ -219,6 +229,11 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         else:
             factsheet["fairness"]["class_imbalance"] = factsheet["fairness"].get("class_imbalance", 0.0)
 
+        if participation_summary is not None:
+            factsheet["fairness"]["selection_cv"] = participation_summary.get("selection_cv", 1)
+        else:
+            factsheet["fairness"]["selection_cv"] = 1
+
         carbon_intensity_local, emissions_training_local, energy_consumed_local, sample_size = get_emissions(emissions_file, participant_idx)
 
         factsheet["sustainability"]["carbon_intensity_local"] = carbon_intensity_local
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index f415451b7..a302cf4bb 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -108,7 +108,10 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                     # Set participants
                     factsheet["participants"]["client_num"] = data["n_nodes"] or ""
                     factsheet["participants"]["sample_client_rate"] = 1
-                    factsheet["participants"]["client_selector"] = ""
+                    if with_reputation == True:
+                        factsheet["participants"]["client_selector"] = "Reputation Based"
+                    else:
+                        factsheet["participants"]["client_selector"] = "Full Participation"
 
                     # Set configuration
                     factsheet["configuration"]["aggregation_algorithm"] = data["agg_algorithm"] or ""
@@ -194,7 +197,7 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                 logging.warning(f"{factsheet_file} is invalid")
                 logging.error(e)
 
-    def populate_factsheet_post_train(self, scenario_name, start_time, end_time, participant_idx, reputation_summary=None):
+    def populate_factsheet_post_train(self, scenario_name, start_time, end_time, participant_idx, reputation_summary=None, participation_summary=None, reliability_summary=None):
         """
         Populates the factsheet with values after the training.
 
@@ -247,8 +250,17 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 factsheet["system"]["total_download_bytes"] = result_bytes_sent_recv[1]
                 factsheet["system"]["avg_upload_bytes"] = result_bytes_sent_recv[2]
                 factsheet["system"]["avg_download_bytes"] = result_bytes_sent_recv[3]
+                if reliability_summary is not None:
+                    factsheet["system"]["dropout_rate"] = reliability_summary.get("dropout_rate", 0.0)
+                    factsheet["system"]["timeout_rate"] = reliability_summary.get("timeout_rate", 0.0)
+                else:
+                    factsheet["system"]["dropout_rate"] = 0.0
+                    factsheet["system"]["timeout_rate"] = 0.0
 
-                factsheet["fairness"]["selection_cv"] = 1
+                if participation_summary is not None:
+                    factsheet["fairness"]["selection_cv"] = participation_summary.get("selection_cv", 1)
+                else:
+                    factsheet["fairness"]["selection_cv"] = 1
 
                 class_imbalance_score = 1 / (1+avg_class_imbalance)
                 factsheet["fairness"]["class_imbalance"] = 1 if class_imbalance_score > 1 else class_imbalance_score
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 6d2c293ef..eaa5a1fb3 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -1,14 +1,14 @@
 import logging
 import asyncio
 from nebula.addons.functions import print_msg_box
-from nebula.core.nebulaevents import ExperimentFinishEvent, RoundEndEvent, TestMetricsEvent
+from nebula.core.nebulaevents import AggregationEvent, ExperimentFinishEvent, RoundEndEvent, RoundStartEvent, TestMetricsEvent
 from nebula.core.eventmanager import EventManager
 from nebula.core.noderole import Role, ServerRoleBehavior
 from abc import ABC, abstractmethod
 from nebula.config.config import Config
 from nebula.core.engine import Engine
 import pickle
-from nebula.addons.trustworthiness.calculation import stop_emissions_tracking_and_save, get_bytes_final_model_id, get_class_imbalance_local
+from nebula.addons.trustworthiness.calculation import stop_emissions_tracking_and_save, get_bytes_final_model_id, get_class_imbalance_local, get_participation_variation_score
 from nebula.addons.trustworthiness.utils import save_results_csv, save_trustworthiness_reports_csv, load_emissions_participant, load_data_results_participant, save_results_csv_cfl, save_emissions_csv_cfl, save_class_count_per_participant, get_local_entropy, load_trust_report_json_dumped, create_local_trust_report_copy, accumulate_weighted_trustscores, build_weighted_trustscores_report, save_trust_report_json
 from codecarbon import EmissionsTracker
 from nebula.addons.trustworthiness.per_round_metrics import PerRoundTrustMetrics
@@ -81,11 +81,18 @@ def __init__(self, engine, idx, trust_files_route):
         self._trustscores_template_report = None
         self._trustscores_local_copy_path = None
         self._trustscores_local_report_initialized = False
+        self._round_participation_counts = {}
+        self._dropout_expected_total = 0
+        self._dropout_missing_total = 0
+        self._aggregation_rounds_total = 0
+        self._timed_out_rounds_total = 0
 
     async def init(self, experiment_name):
         self._experiment_name = experiment_name
         self._reset_trustscores_exchange_state()
         self._trustscores_wait_event = asyncio.Event()
+        await EventManager.get_instance().subscribe_node_event(AggregationEvent, self._process_aggregation_event)
+        await EventManager.get_instance().subscribe_node_event(RoundStartEvent, self._process_round_start_event)
         await EventManager.get_instance().subscribe_node_event(RoundEndEvent, self._process_round_end_event)
         await EventManager.get_instance().subscribe_addonevent(TestMetricsEvent, self._process_test_metrics_event)
         await EventManager.get_instance().subscribe_node_event(ExperimentFinishEvent, self._process_experiment_finished_event)
@@ -270,6 +277,8 @@ def _compute_local_trustscores_report(self, experiment_name, trust_config, weigh
             self._start_time,
             self._end_time,
             reputation_summary=self._get_reputation_trust_summary(),
+            participation_summary=self._get_participation_trust_summary(),
+            reliability_summary=self._get_system_reliability_summary(),
         )
 
         trust_metric_manager = TrustMetricManager(self._start_time, federation, self._idx)
@@ -356,6 +365,33 @@ def _get_reputation_trust_summary(self) -> dict:
             "avg_neighbor_reputation": avg_neighbor_reputation,
         }
 
+    def _get_participation_trust_summary(self) -> dict:
+        total_clients = int(self._engine.config.participant["scenario_args"]["n_nodes"]) - 1
+        counts = list(self._round_participation_counts.values())
+
+        if len(counts) < total_clients:
+            counts.extend([0] * (total_clients - len(counts)))
+
+        return {
+            "selection_cv": get_participation_variation_score(counts),
+        }
+
+    def _get_system_reliability_summary(self) -> dict:
+        if self._dropout_expected_total <= 0:
+            dropout_rate = 0.0
+        else:
+            dropout_rate = self._dropout_missing_total / self._dropout_expected_total
+
+        if self._aggregation_rounds_total <= 0:
+            timeout_rate = 0.0
+        else:
+            timeout_rate = self._timed_out_rounds_total / self._aggregation_rounds_total
+
+        return {
+            "dropout_rate": float(dropout_rate),
+            "timeout_rate": float(timeout_rate),
+        }
+
     def _get_trustscores_weight_for_source(self, source: str, node_id: int | str) -> float:
         if not self._is_reputation_enabled():
             return 0.5
@@ -820,6 +856,24 @@ async def _process_round_end_event(self, ree: RoundEndEvent):
         with open(train_model, 'wb') as f:
             pickle.dump(self._engine.trainer.model, f)
 
+    async def _process_round_start_event(self, rse: RoundStartEvent):
+        _, _, expected_nodes = await rse.get_event_data()
+        for node_addr in expected_nodes:
+            self._round_participation_counts[node_addr] = self._round_participation_counts.get(node_addr, 0) + 1
+
+    async def _process_aggregation_event(self, age: AggregationEvent):
+        _, expected_nodes, missing_nodes = await age.get_event_data()
+        self_addr = self._engine.addr
+
+        expected_without_self = {node for node in expected_nodes if node != self_addr}
+        missing_without_self = {node for node in missing_nodes if node != self_addr}
+
+        self._aggregation_rounds_total += 1
+        self._dropout_expected_total += len(expected_without_self)
+        self._dropout_missing_total += len(missing_without_self)
+        if missing_without_self:
+            self._timed_out_rounds_total += 1
+
     async def _process_test_metrics_event(self, tme: TestMetricsEvent):
         cur_loss, cur_acc = await tme.get_event_data()
         if cur_loss and cur_acc:
@@ -857,10 +911,17 @@ def __init__(self, engine: Engine, idx, trust_files_route):
         self._trust_config = None
         self._csv_completed = False
         self._finish_post = False
+        self._round_participation_counts = {}
+        self._dropout_expected_total = 0
+        self._dropout_missing_total = 0
+        self._aggregation_rounds_total = 0
+        self._timed_out_rounds_total = 0
 
     async def init(self, experiment_name):
         self._experiment_name = experiment_name
+        await EventManager.get_instance().subscribe_node_event(AggregationEvent, self._process_aggregation_event)
         await EventManager.get_instance().subscribe_addonevent(TestMetricsEvent, self._process_test_metrics_event)
+        await EventManager.get_instance().subscribe_node_event(RoundStartEvent, self._process_round_start_event)
         await EventManager.get_instance().subscribe_node_event(ExperimentFinishEvent, self._process_experiment_finished_event)
         await self._create_pk_files(experiment_name)
 
@@ -1002,6 +1063,24 @@ async def register_trustworthiness_report(self, source, message):
             self._csv_completed = True
             logging.info(f"[TW SERVER] all reports received, waiting for finish post, csv_completed {self._csv_completed}")
 
+    async def _process_round_start_event(self, rse: RoundStartEvent):
+        _, _, expected_nodes = await rse.get_event_data()
+        for node_addr in expected_nodes:
+            self._round_participation_counts[node_addr] = self._round_participation_counts.get(node_addr, 0) + 1
+
+    async def _process_aggregation_event(self, age: AggregationEvent):
+        _, expected_nodes, missing_nodes = await age.get_event_data()
+        self_addr = self._engine.addr
+
+        expected_without_self = {node for node in expected_nodes if node != self_addr}
+        missing_without_self = {node for node in missing_nodes if node != self_addr}
+
+        self._aggregation_rounds_total += 1
+        self._dropout_expected_total += len(expected_without_self)
+        self._dropout_missing_total += len(missing_without_self)
+        if missing_without_self:
+            self._timed_out_rounds_total += 1
+
 
     async def _generate_factsheet(self, trust_config, experiment_name):
         factsheet = Factsheet()
@@ -1012,6 +1091,8 @@ async def _generate_factsheet(self, trust_config, experiment_name):
             self._end_time,
             self._idx,
             reputation_summary=self._get_reputation_trust_summary(),
+            participation_summary=self._get_participation_trust_summary(),
+            reliability_summary=self._get_system_reliability_summary(),
         )
 
         data_file_path = os.path.join(os.environ.get('NEBULA_CONFIG_DIR'), experiment_name, "scenario.json")
@@ -1087,6 +1168,33 @@ def _get_reputation_trust_summary(self) -> dict:
             "avg_neighbor_reputation": avg_neighbor_reputation,
         }
 
+    def _get_participation_trust_summary(self) -> dict:
+        total_clients = int(self._engine.config.participant["scenario_args"]["n_nodes"]) - 1
+        counts = list(self._round_participation_counts.values())
+
+        if len(counts) < total_clients:
+            counts.extend([0] * (total_clients - len(counts)))
+
+        return {
+            "selection_cv": get_participation_variation_score(counts),
+        }
+
+    def _get_system_reliability_summary(self) -> dict:
+        if self._dropout_expected_total <= 0:
+            dropout_rate = 0.0
+        else:
+            dropout_rate = self._dropout_missing_total / self._dropout_expected_total
+
+        if self._aggregation_rounds_total <= 0:
+            timeout_rate = 0.0
+        else:
+            timeout_rate = self._timed_out_rounds_total / self._aggregation_rounds_total
+
+        return {
+            "dropout_rate": float(dropout_rate),
+            "timeout_rate": float(timeout_rate),
+        }
+
     async def _process_test_metrics_event(self, tme: TestMetricsEvent):
         cur_loss, cur_acc = await tme.get_event_data()
         if cur_loss and cur_acc:

From 700383e476f5fcc2099da51c81933a13c95af55f Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 15 Apr 2026 18:32:27 +0200
Subject: [PATCH 22/66] Accountability updated: Factsheet Completeness

---
 .../trustworthiness/configs/eval_metrics.json | 154 +++++++++++++++++-
 .../configs/eval_metrics_dfl.json             | 134 ++++++++++++++-
 2 files changed, 282 insertions(+), 6 deletions(-)

diff --git a/nebula/addons/trustworthiness/configs/eval_metrics.json b/nebula/addons/trustworthiness/configs/eval_metrics.json
index e5a4d3eb3..49cefc092 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics.json
@@ -566,6 +566,14 @@
               {
                 "source": "factsheet",
                 "field_path": "participants/client_selector"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_dataset_size"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
               }
             ],
             "operation": "check_properties",
@@ -591,7 +599,7 @@
             "operation": "check_properties",
             "type": "property_check",
             "description": "Meta data about the data.",
-            "weight": 0.2
+            "weight": 0.1
           },
           "configuration": {
             "inputs": [
@@ -607,6 +615,14 @@
                 "source": "factsheet",
                 "field_path": "configuration/personalization"
               },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/reputation_enabled"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/visualization"
+              },
               {
                 "source": "factsheet",
                 "field_path": "configuration/differential_privacy"
@@ -635,7 +651,7 @@
             "operation": "check_properties",
             "type": "property_check",
             "description": "FL model configurations.",
-            "weight": 0.2
+            "weight": 0.1
           },
           "performance": {
             "inputs": [
@@ -647,6 +663,10 @@
                 "source": "factsheet",
                 "field_path": "performance/test_acc_avg"
               },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_macro_f1"
+              },
               {
                 "source": "factsheet",
                 "field_path": "performance/test_feature_importance_cv"
@@ -679,7 +699,7 @@
             "operation": "check_properties",
             "type": "property_check",
             "description": "Performance evaluation results.",
-            "weight": 0.2
+            "weight": 0.1
           },
           "fairness": {
             "inputs": [
@@ -694,6 +714,30 @@
               {
                 "source": "factsheet",
                 "field_path": "fairness/class_imbalance"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/underfitting"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/overfitting"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/well_calibration_error"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/generalized_entropy_index"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/theil_index"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/coefficient_of_variation"
               }
             ],
             "operation": "check_properties",
@@ -711,6 +755,14 @@
                 "source": "factsheet",
                 "field_path": "system/avg_model_size"
               },
+              {
+                "source": "factsheet",
+                "field_path": "system/total_upload_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/total_download_bytes"
+              },
               {
                 "source": "factsheet",
                 "field_path": "system/avg_upload_bytes"
@@ -718,6 +770,102 @@
               {
                 "source": "factsheet",
                 "field_path": "system/avg_download_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/dropout_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/timeout_rate"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "privacy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/privacy_risk"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/epsilon_star"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/epsilon_star_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc_score"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "explainability": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/alpha_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_ratio"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_divergence"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "sustainability": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_carbon_intensity_server"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_carbon_intensity_clients"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_power_performance_clients"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_power_performance_server"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_training"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_aggregation"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_communication_uplink"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_communication_downlink"
               }
             ],
             "operation": "check_properties",
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
index 8f2618163..0f06f138e 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
@@ -542,6 +542,14 @@
               {
                 "source": "factsheet",
                 "field_path": "participants/client_selector"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/local_dataset_size"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
               }
             ],
             "operation": "check_properties",
@@ -567,7 +575,7 @@
             "operation": "check_properties",
             "type": "property_check",
             "description": "Meta data about the data.",
-            "weight": 0.2
+            "weight": 0.1
           },
           "configuration": {
             "inputs": [
@@ -583,6 +591,14 @@
                 "source": "factsheet",
                 "field_path": "configuration/personalization"
               },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/reputation_enabled"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/visualization"
+              },
               {
                 "source": "factsheet",
                 "field_path": "configuration/differential_privacy"
@@ -611,7 +627,7 @@
             "operation": "check_properties",
             "type": "property_check",
             "description": "FL model configurations.",
-            "weight": 0.2
+            "weight": 0.1
           },
           "performance": {
             "inputs": [
@@ -623,6 +639,10 @@
                 "source": "factsheet",
                 "field_path": "performance/test_acc"
               },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_macro_f1"
+              },
               {
                 "source": "factsheet",
                 "field_path": "performance/test_feature_importance_cv"
@@ -655,13 +675,41 @@
             "operation": "check_properties",
             "type": "property_check",
             "description": "Performance evaluation results.",
-            "weight": 0.2
+            "weight": 0.1
           },
           "fairness": {
             "inputs": [
               {
                 "source": "factsheet",
                 "field_path": "fairness/class_imbalance"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/selection_cv"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/underfitting"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/overfitting"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/well_calibration_error"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/generalized_entropy_index"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/theil_index"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/coefficient_of_variation"
               }
             ],
             "operation": "check_properties",
@@ -686,6 +734,86 @@
               {
                 "source": "factsheet",
                 "field_path": "system/download_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/dropout_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/timeout_rate"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "privacy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/privacy_risk"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/epsilon_star"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/epsilon_star_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc_score"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "explainability": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/alpha_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_ratio"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_divergence"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "sustainability": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/carbon_intensity_local"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_training_local"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/energy_consumed_local"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_communication_local"
               }
             ],
             "operation": "check_properties",

From 67d45ef74769610dc10e7bbe14c6ccf71b8a83ad Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 17 Apr 2026 10:24:28 +0200
Subject: [PATCH 23/66] Aggregation algorithms updates, SDFL fixed

---
 nebula/core/aggregation/fedavg.py      | 5 +++++
 nebula/core/aggregation/krum.py        | 5 +++++
 nebula/core/aggregation/median.py      | 5 +++++
 nebula/core/aggregation/trimmedmean.py | 5 +++++
 4 files changed, 20 insertions(+)

diff --git a/nebula/core/aggregation/fedavg.py b/nebula/core/aggregation/fedavg.py
index 2ae036a9f..fff29ac00 100755
--- a/nebula/core/aggregation/fedavg.py
+++ b/nebula/core/aggregation/fedavg.py
@@ -1,6 +1,7 @@
 import gc
 
 import torch
+import logging
 
 from nebula.core.aggregation.aggregator import Aggregator
 
@@ -18,6 +19,10 @@ def __init__(self, config=None, **kwargs):
     def run_aggregation(self, models):
         super().run_aggregation(models)
 
+        if not models:
+            logging.warning("FedAvg received an empty update set.")
+            return None
+
         models = list(models.values())
 
         total_samples = float(sum(weight for _, weight in models))
diff --git a/nebula/core/aggregation/krum.py b/nebula/core/aggregation/krum.py
index 902b33fd5..1b6f0b8dd 100755
--- a/nebula/core/aggregation/krum.py
+++ b/nebula/core/aggregation/krum.py
@@ -1,5 +1,6 @@
 import numpy
 import torch
+import logging
 
 from nebula.core.aggregation.aggregator import Aggregator
 
@@ -18,6 +19,10 @@ def __init__(self, config=None, **kwargs):
     def run_aggregation(self, models):
         super().run_aggregation(models)
 
+        if not models:
+            logging.warning("Krum received an empty update set.")
+            return None
+
         models = list(models.values())
 
         accum = {layer: torch.zeros_like(param).float() for layer, param in models[-1][0].items()}
diff --git a/nebula/core/aggregation/median.py b/nebula/core/aggregation/median.py
index a455ff77d..86608da97 100755
--- a/nebula/core/aggregation/median.py
+++ b/nebula/core/aggregation/median.py
@@ -1,5 +1,6 @@
 import numpy as np
 import torch
+import logging
 
 from nebula.core.aggregation.aggregator import Aggregator
 
@@ -40,6 +41,10 @@ def get_median(self, weights):
     def run_aggregation(self, models):
         super().run_aggregation(models)
 
+        if not models:
+            logging.warning("Median received an empty update set.")
+            return None
+
         models = list(models.values())
         models_params = [m for m, _ in models]
 
diff --git a/nebula/core/aggregation/trimmedmean.py b/nebula/core/aggregation/trimmedmean.py
index f9af238db..bee62699f 100755
--- a/nebula/core/aggregation/trimmedmean.py
+++ b/nebula/core/aggregation/trimmedmean.py
@@ -1,5 +1,6 @@
 import numpy as np
 import torch
+import logging
 
 from nebula.core.aggregation.aggregator import Aggregator
 
@@ -44,6 +45,10 @@ def get_trimmedmean(self, weights):
     def run_aggregation(self, models):
         super().run_aggregation(models)
 
+        if not models:
+            logging.warning("TrimmedMean received an empty update set.")
+            return None
+
         models = list(models.values())
         models_params = [m for m, _ in models]
 

From a397a8a671f4eaf9b6bf0c4897afdc5a51c34e4f Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 17 Apr 2026 13:24:34 +0200
Subject: [PATCH 24/66] New notions added: Federation Management and
 Monitoring, frontend updated, new metric: Logs Available

---
 .../trustworthiness/configs/eval_metrics.json | 94 +++++++++----------
 .../configs/eval_metrics_dfl.json             | 66 +++++++++----
 .../configs/factsheet_template.json           |  5 +-
 .../configs/factsheet_template_dfl.json       |  1 +
 .../addons/trustworthiness/dfl_factsheet.py   |  1 +
 nebula/addons/trustworthiness/factsheet.py    |  1 +
 .../addons/trustworthiness/trustworthiness.py |  4 +
 nebula/controller/scenarios.py                |  6 ++
 .../frontend/static/js/deployment/scenario.js | 13 ++-
 .../static/js/deployment/trustworthiness.js   | 45 +++++++--
 nebula/frontend/templates/deployment.html     | 38 +++++++-
 11 files changed, 183 insertions(+), 91 deletions(-)

diff --git a/nebula/addons/trustworthiness/configs/eval_metrics.json b/nebula/addons/trustworthiness/configs/eval_metrics.json
index 49cefc092..080755818 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics.json
@@ -122,18 +122,6 @@
             "type": "true_score",
             "description": "The use of personalized FL algorithm.",
             "weight": 0.2
-          },
-          "reputation_enabled": {
-            "inputs": [
-              {
-                "source": "factsheet",
-                "field_path": "configuration/reputation_enabled"
-              }
-            ],
-            "operation": "get_value",
-            "type": "true_score",
-            "description": "The use of an active reputation-based defense mechanism.",
-            "weight": 0.2
           }
         }
       },
@@ -154,18 +142,6 @@
             "description": "The number of clients in the model.",
             "weight": 0.3
           },
-          "average_neighbor_reputation": {
-            "inputs": [
-              {
-                "source": "factsheet",
-                "field_path": "participants/avg_neighbor_reputation"
-              }
-            ],
-            "operation": "get_value",
-            "type": "true_score",
-            "description": "Average reputation score of the neighbors associated with the node or federation.",
-            "weight": 0.3
-          },
           "dropout_rate": {
             "inputs": [
               {
@@ -531,7 +507,7 @@
     },
     "accountability": {
       "factsheet_completeness": {
-        "weight": 1,
+        "weight": 0.8,
         "metrics": {
           "project_specs": {
             "inputs": [
@@ -570,10 +546,6 @@
               {
                 "source": "factsheet",
                 "field_path": "participants/avg_dataset_size"
-              },
-              {
-                "source": "factsheet",
-                "field_path": "participants/avg_neighbor_reputation"
               }
             ],
             "operation": "check_properties",
@@ -617,11 +589,11 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "configuration/reputation_enabled"
+                "field_path": "configuration/visualization"
               },
               {
                 "source": "factsheet",
-                "field_path": "configuration/visualization"
+                "field_path": "configuration/monitoring"
               },
               {
                 "source": "factsheet",
@@ -874,6 +846,23 @@
             "weight": 0.1
           }
         }
+      },
+      "monitoring": {
+        "weight": 0.2,
+        "metrics": {
+          "logs_available": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/monitoring"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of logs to show all the nodes.",
+            "weight": 1
+          }
+        }
       }
     },
     "architectural_soundness": {
@@ -894,25 +883,7 @@
               "Full Participation": 0.5
             },
             "description": "Mapping of client selection strategies to architectural soundness. Reputation-based selection is scored higher than full participation because it introduces an explicit selection mechanism.",
-            "weight": 0.5
-          },
-          "topology_type": {
-            "inputs": [
-              {
-                "source": "factsheet",
-                "field_path": "data/preprocessing"
-              }
-            ],
-            "operation": "get_value",
-            "type": "score_mapping",
-            "score_map": {
-              "Fully": 1.0,
-              "Star": 0.8,
-              "Ring": 0.6,
-              "Random": 0.2
-            },
-            "description": "Mapping of network topology types to architectural soundness, assuming fully connected topologies provide the strongest structural connectivity, followed by star, ring, and random topologies.",
-            "weight": 0.5
+            "weight": 1
           }
         }
       },
@@ -938,6 +909,29 @@
             "weight": 1
           }
         }
+      },
+      "federation_management": {
+        "weight": 0.5,
+        "metrics": {
+          "topology_type": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/preprocessing"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Fully": 1.0,
+              "Star": 0.8,
+              "Ring": 0.6,
+              "Random": 0.2
+            },
+            "description": "Mapping of network topology types to architectural soundness, assuming fully connected topologies provide the strongest structural connectivity, followed by star, ring, and random topologies.",
+            "weight": 1
+          }
+        }
       }
     },
     "sustainability": {
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
index 0f06f138e..141dda891 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
@@ -507,7 +507,7 @@
     },
     "accountability": {
       "factsheet_completeness": {
-        "weight": 1,
+        "weight": 0.8,
         "metrics": {
           "project_specs": {
             "inputs": [
@@ -599,6 +599,10 @@
                 "source": "factsheet",
                 "field_path": "configuration/visualization"
               },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/monitoring"
+              },
               {
                 "source": "factsheet",
                 "field_path": "configuration/differential_privacy"
@@ -822,6 +826,23 @@
             "weight": 0.1
           }
         }
+      },
+      "monitoring": {
+        "weight": 0.2,
+        "metrics": {
+          "logs_available": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/monitoring"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of logs to show all the nodes.",
+            "weight": 1
+          }
+        }
       }
     },
     "architectural_soundness": {
@@ -842,25 +863,7 @@
               "Full Participation": 0.5
             },
             "description": "Mapping of client selection strategies to architectural soundness. Reputation-based selection is scored higher than full participation because it introduces an explicit selection mechanism.",
-            "weight": 0.5
-          },
-          "topology_type": {
-            "inputs": [
-              {
-                "source": "factsheet",
-                "field_path": "data/preprocessing"
-              }
-            ],
-            "operation": "get_value",
-            "type": "score_mapping",
-            "score_map": {
-              "Fully": 1.0,
-              "Star": 0.8,
-              "Ring": 0.6,
-              "Random": 0.2
-            },
-            "description": "Mapping of network topology types to architectural soundness, assuming fully connected topologies provide the strongest structural connectivity, followed by star, ring, and random topologies.",
-            "weight": 0.5
+            "weight": 1
           }
         }
       },
@@ -886,6 +889,29 @@
             "weight": 1
           }
         }
+      },
+      "federation_management": {
+        "weight": 0.5,
+        "metrics": {
+          "topology_type": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/preprocessing"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Fully": 1.0,
+              "Star": 0.8,
+              "Ring": 0.6,
+              "Random": 0.2
+            },
+            "description": "Mapping of network topology types to architectural soundness, assuming fully connected topologies provide the strongest structural connectivity, followed by star, ring, and random topologies.",
+            "weight": 1
+          }
+        }
       }
     },
     "sustainability": {
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template.json b/nebula/addons/trustworthiness/configs/factsheet_template.json
index 1b4e781b8..6948e4982 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template.json
@@ -13,15 +13,14 @@
 		"client_num": "",
 		"sample_client_rate": "",
 		"client_selector": "",
-		"avg_dataset_size": "",
-		"avg_neighbor_reputation": ""
+		"avg_dataset_size": ""
 	},
 	"configuration": {
 		"aggregation_algorithm": "",
 		"training_model": "",
 		"personalization": "",
-		"reputation_enabled": "",
 		"visualization": "",
+		"monitoring": "",
 		"differential_privacy": "",
 		"dp_epsilon": "",
 		"trainable_param_num": "",
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
index a37432392..d2a4decd6 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
@@ -22,6 +22,7 @@
 		"personalization": "",
 		"reputation_enabled": "",
 		"visualization": "",
+		"monitoring": "",
 		"differential_privacy": "",
 		"dp_epsilon": "",
 		"trainable_param_num": "",
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index dac9645de..4499dcba0 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -105,6 +105,7 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         factsheet["configuration"]["personalization"] = False
         factsheet["configuration"]["reputation_enabled"] = bool(data.get("reputation", {}).get("enabled", False))
         factsheet["configuration"]["visualization"] = True
+        factsheet["configuration"]["monitoring"] = True
         factsheet["configuration"]["total_round_num"] = n_rounds
 
         """
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index a302cf4bb..4acda271c 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -119,6 +119,7 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                     factsheet["configuration"]["personalization"] = False
                     factsheet["configuration"]["reputation_enabled"] = bool(data.get("reputation", {}).get("enabled", False))
                     factsheet["configuration"]["visualization"] = True
+                    factsheet["configuration"]["monitoring"] = True
                     factsheet["configuration"]["total_round_num"] = n_rounds
 
                     """
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index eaa5a1fb3..025cb4358 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -308,9 +308,11 @@ def _load_local_trustscores_weights(self, experiment_name: str) -> dict:
                 "post_hoc_methods": float(data["post_hoc_methods"]),
                 "accountability": float(data["accountability_pillar"]),
                 "factsheet_completeness": float(data["factsheet_completeness"]),
+                "monitoring":  float(data["monitoring"]),
                 "architectural_soundness": float(data["architectural_soundness_pillar"]),
                 "client_management": float(data["client_management"]),
                 "optimization": float(data["optimization"]),
+                "federation_management": float(data["federation_management"]),
                 "sustainability": float(data["sustainability_pillar"]),
                 "energy_source": float(data["energy_source"]),
                 "federation_complexity": float(data["federation_complexity"]),
@@ -1118,9 +1120,11 @@ async def _generate_factsheet(self, trust_config, experiment_name):
                 "post_hoc_methods": float(data["post_hoc_methods"]),
                 "accountability": float(data["accountability_pillar"]),
                 "factsheet_completeness":  float(data["factsheet_completeness"]),
+                "monitoring":  float(data["monitoring"]),
                 "architectural_soundness": float(data["architectural_soundness_pillar"]),
                 "client_management": float(data["client_management"]),
                 "optimization": float(data["optimization"]),
+                "federation_management": float(data["federation_management"]),
                 "sustainability": float(data["sustainability_pillar"]),
                 "energy_source": float(data["energy_source"]),
                 "hardware_efficiency": float(data["hardware_efficiency"]),
diff --git a/nebula/controller/scenarios.py b/nebula/controller/scenarios.py
index 9e0d04dc5..831d65b16 100644
--- a/nebula/controller/scenarios.py
+++ b/nebula/controller/scenarios.py
@@ -97,9 +97,11 @@ def __init__(
         post_hoc_methods,
         accountability_pillar,
         factsheet_completeness,
+        monitoring,
         architectural_soundness_pillar,
         client_management,
         optimization,
+        federation_management,
         sustainability_pillar,
         energy_source,
         hardware_efficiency,
@@ -222,9 +224,11 @@ def __init__(
         self.post_hoc_methods = post_hoc_methods,
         self.accountability_pillar = accountability_pillar,
         self.factsheet_completeness = factsheet_completeness,
+        self.monitoring = monitoring,
         self.architectural_soundness_pillar = architectural_soundness_pillar,
         self.client_management = client_management,
         self.optimization = optimization,
+        self.federation_management = federation_management,
         self.sustainability_pillar = sustainability_pillar,
         self.energy_source = energy_source,
         self.hardware_efficiency = hardware_efficiency,
@@ -755,9 +759,11 @@ def __init__(self, scenario, user=None):
                     "post_hoc_methods": self.scenario.post_hoc_methods,
                     "accountability_pillar": self.scenario.accountability_pillar,
                     "factsheet_completeness": self.scenario.factsheet_completeness,
+                    "monitoring": self.scenario.monitoring,
                     "architectural_soundness_pillar": self.scenario.architectural_soundness_pillar,
                     "client_management": self.scenario.client_management,
                     "optimization": self.scenario.optimization,
+                    "federation_management": self.scenario.federation_management,
                     "sustainability_pillar": self.scenario.sustainability_pillar,
                     "energy_source": self.scenario.energy_source,
                     "hardware_efficiency": self.scenario.hardware_efficiency,
diff --git a/nebula/frontend/static/js/deployment/scenario.js b/nebula/frontend/static/js/deployment/scenario.js
index 5213e39e7..4f92d8e06 100644
--- a/nebula/frontend/static/js/deployment/scenario.js
+++ b/nebula/frontend/static/js/deployment/scenario.js
@@ -132,11 +132,13 @@ const ScenarioManager = (function () {
                             post_hoc_methods: document.getElementById("dfl-explainability-notion-2")?.value || "0",
 
                             accountability_pillar: document.getElementById("dfl-accountability-pillar")?.value || "0",
-                            factsheet_completeness: document.getElementById("dfl-accountability-notion-1")?.value || "100",
+                            factsheet_completeness: document.getElementById("dfl-accountability-notion-1")?.value || "0",
+                            monitoring: document.getElementById("dfl-accountability-notion-2")?.value || "0",
 
                             architectural_soundness_pillar: document.getElementById("dfl-architectural-soundness-pillar")?.value || "0",
                             client_management: document.getElementById("dfl-architectural-soundness-notion-1")?.value || "0",
                             optimization: document.getElementById("dfl-architectural-soundness-notion-2")?.value || "0",
+                            federation_management: document.getElementById("dfl-architectural-soundness-notion-3")?.value || "0",
 
                             sustainability_pillar: document.getElementById("dfl-sustainability-pillar")?.value || "0",
                             energy_source: document.getElementById("dfl-sustainability-notion-1")?.value || "0",
@@ -168,11 +170,14 @@ const ScenarioManager = (function () {
                         post_hoc_methods: document.getElementById("cfl-explainability-notion-2")?.value || "0",
 
                         accountability_pillar: document.getElementById("cfl-accountability-pillar")?.value || "0",
-                        factsheet_completeness: document.getElementById("cfl-accountability-notion-1")?.value || "100",
+                        factsheet_completeness: document.getElementById("cfl-accountability-notion-1")?.value || "0",
+                        monitoring: document.getElementById("cfl-accountability-notion-2")?.value || "0",
+
 
                         architectural_soundness_pillar: document.getElementById("cfl-architectural-soundness-pillar")?.value || "0",
                         client_management: document.getElementById("cfl-architectural-soundness-notion-1")?.value || "0",
                         optimization: document.getElementById("cfl-architectural-soundness-notion-2")?.value || "0",
+                        federation_management: document.getElementById("cfl-architectural-soundness-notion-3")?.value || "0",
 
                         sustainability_pillar: document.getElementById("cfl-sustainability-pillar")?.value || "0",
                         energy_source: document.getElementById("cfl-sustainability-notion-1")?.value || "0",
@@ -198,10 +203,12 @@ const ScenarioManager = (function () {
                     interpretability: "0",
                     post_hoc_methods: "0",
                     accountability_pillar: "0",
-                    factsheet_completeness: "100",
+                    factsheet_completeness: "0",
+                    monitoring: "0",
                     architectural_soundness_pillar: "0",
                     client_management: "0",
                     optimization: "0",
+                    federation_management: "0",
                     sustainability_pillar: "0",
                     energy_source: "0",
                     hardware_efficiency: "0",
diff --git a/nebula/frontend/static/js/deployment/trustworthiness.js b/nebula/frontend/static/js/deployment/trustworthiness.js
index 1bae3efc4..25fe3a20f 100644
--- a/nebula/frontend/static/js/deployment/trustworthiness.js
+++ b/nebula/frontend/static/js/deployment/trustworthiness.js
@@ -82,8 +82,10 @@ const TrustworthinessManager = (function() {
             "cfl-explainability-notion-1",
             "cfl-explainability-notion-2",
             "cfl-accountability-notion-1",
+            "cfl-accountability-notion-2",
             "cfl-architectural-soundness-notion-1",
             "cfl-architectural-soundness-notion-2",
+            "cfl-architectural-soundness-notion-3",
             "cfl-sustainability-notion-1",
             "cfl-sustainability-notion-2",
             "cfl-sustainability-notion-3"
@@ -110,8 +112,10 @@ const TrustworthinessManager = (function() {
             "dfl-explainability-notion-1",
             "dfl-explainability-notion-2",
             "dfl-accountability-notion-1",
+            "dfl-accountability-notion-2",
             "dfl-architectural-soundness-notion-1",
             "dfl-architectural-soundness-notion-2",
+            "dfl-architectural-soundness-notion-3",
             "dfl-sustainability-notion-1",
             "dfl-sustainability-notion-3"
         ];
@@ -172,11 +176,15 @@ const TrustworthinessManager = (function() {
 
         const architecturalSoundnessNotion1 = parseFloat(document.getElementById("cfl-architectural-soundness-notion-1").value) || 0;
         const architecturalSoundnessNotion2 = parseFloat(document.getElementById("cfl-architectural-soundness-notion-2").value) || 0;
+        const architecturalSoundnessNotion3 = parseFloat(document.getElementById("cfl-architectural-soundness-notion-3").value) || 0;
 
         const sustainabilityNotion1 = parseFloat(document.getElementById("cfl-sustainability-notion-1").value) || 0;
         const sustainabilityNotion2 = parseFloat(document.getElementById("cfl-sustainability-notion-2").value) || 0;
         const sustainabilityNotion3 = parseFloat(document.getElementById("cfl-sustainability-notion-3").value) || 0;
 
+        const accountabilityNotion1 = parseFloat(document.getElementById("cfl-accountability-notion-1").value) || 0;
+        const accountabilityNotion2 = parseFloat(document.getElementById("cfl-accountability-notion-2").value) || 0;
+
         const totalPillar =
             robustnessPercent + privacyPercent + fairnessPercent + explainabilityPercent +
             accountabilityPercent + architecturalSoundnessPercent + sustainabilityPercent;
@@ -185,8 +193,10 @@ const TrustworthinessManager = (function() {
         const totalPrivacyNotion = privacyNotion1 + privacyNotion2 + privacyNotion3;
         const totalFairnessNotion = fairnessNotion1 + fairnessNotion2 + fairnessNotion3 + fairnessNotion4;
         const totalExplainabilityNotion = explainabilityNotion1 + explainabilityNotion2;
-        const totalArchitecturalSoundnessNotion = architecturalSoundnessNotion1 + architecturalSoundnessNotion2;
+        const totalArchitecturalSoundnessNotion = architecturalSoundnessNotion1 + architecturalSoundnessNotion2 + architecturalSoundnessNotion3;
         const totalSustainabilityNotion = sustainabilityNotion1 + sustainabilityNotion2 + sustainabilityNotion3;
+        const totalAccountabilityNotion = accountabilityNotion1 + accountabilityNotion2;
+
 
         return (
             getWeightValidationMessage("Pillars", totalPillar) ||
@@ -195,7 +205,8 @@ const TrustworthinessManager = (function() {
             getWeightValidationMessage("Fairness notions", totalFairnessNotion) ||
             getWeightValidationMessage("Explainability notions", totalExplainabilityNotion) ||
             getWeightValidationMessage("Architectural soundness notions", totalArchitecturalSoundnessNotion) ||
-            getWeightValidationMessage("Sustainability notions", totalSustainabilityNotion)
+            getWeightValidationMessage("Sustainability notions", totalSustainabilityNotion) ||
+            getWeightValidationMessage("Accountability notions", totalAccountabilityNotion)
         );
     }
 
@@ -224,10 +235,14 @@ const TrustworthinessManager = (function() {
 
         const architecturalSoundnessNotion1 = parseFloat(document.getElementById("dfl-architectural-soundness-notion-1").value) || 0;
         const architecturalSoundnessNotion2 = parseFloat(document.getElementById("dfl-architectural-soundness-notion-2").value) || 0;
+        const architecturalSoundnessNotion3 = parseFloat(document.getElementById("dfl-architectural-soundness-notion-3").value) || 0;
 
         const sustainabilityNotion1 = parseFloat(document.getElementById("dfl-sustainability-notion-1").value) || 0;
         const sustainabilityNotion3 = parseFloat(document.getElementById("dfl-sustainability-notion-3").value) || 0;
 
+        const accountabilityNotion1 = parseFloat(document.getElementById("cfl-accountability-notion-1").value) || 0;
+        const accountabilityNotion2 = parseFloat(document.getElementById("cfl-accountability-notion-2").value) || 0;
+
         const totalPillar =
             robustnessPercent + privacyPercent + fairnessPercent + explainabilityPercent +
             accountabilityPercent + architecturalSoundnessPercent + sustainabilityPercent;
@@ -236,8 +251,9 @@ const TrustworthinessManager = (function() {
         const totalPrivacyNotion = privacyNotion1 + privacyNotion2 + privacyNotion3;
         const totalFairnessNotion = fairnessNotion3 + fairnessNotion4;
         const totalExplainabilityNotion = explainabilityNotion1 + explainabilityNotion2;
-        const totalArchitecturalSoundnessNotion = architecturalSoundnessNotion1 + architecturalSoundnessNotion2;
+        const totalArchitecturalSoundnessNotion = architecturalSoundnessNotion1 + architecturalSoundnessNotion2 + architecturalSoundnessNotion3;
         const totalSustainabilityNotion = sustainabilityNotion1 + sustainabilityNotion3;
+        const totalAccountabilityNotion = accountabilityNotion1 + accountabilityNotion2;
 
         return (
             getWeightValidationMessage("Pillars", totalPillar) ||
@@ -246,7 +262,8 @@ const TrustworthinessManager = (function() {
             getWeightValidationMessage("Fairness notions", totalFairnessNotion) ||
             getWeightValidationMessage("Explainability notions", totalExplainabilityNotion) ||
             getWeightValidationMessage("Architectural soundness notions", totalArchitecturalSoundnessNotion) ||
-            getWeightValidationMessage("Sustainability notions", totalSustainabilityNotion)
+            getWeightValidationMessage("Sustainability notions", totalSustainabilityNotion) ||
+            getWeightValidationMessage("Accountability notions", totalAccountabilityNotion)
         );
     }
 
@@ -291,11 +308,13 @@ const TrustworthinessManager = (function() {
                 parseFloat(document.getElementById("cfl-explainability-notion-2").value) || 0
             ],
             accountability: [
-                parseFloat(document.getElementById("cfl-accountability-notion-1")?.value) || 100
+                parseFloat(document.getElementById("cfl-accountability-notion-1")?.value) || 0,
+                parseFloat(document.getElementById("cfl-accountability-notion-2")?.value) || 0
             ],
             architecturalSoundness: [
                 parseFloat(document.getElementById("cfl-architectural-soundness-notion-1").value) || 0,
-                parseFloat(document.getElementById("cfl-architectural-soundness-notion-2").value) || 0
+                parseFloat(document.getElementById("cfl-architectural-soundness-notion-2").value) || 0,
+                parseFloat(document.getElementById("cfl-architectural-soundness-notion-3").value) || 0
             ],
             sustainability: [
                 parseFloat(document.getElementById("cfl-sustainability-notion-1").value) || 0,
@@ -338,11 +357,13 @@ const TrustworthinessManager = (function() {
                 parseFloat(document.getElementById("dfl-explainability-notion-2").value) || 0
             ],
             accountability: [
-                parseFloat(document.getElementById("dfl-accountability-notion-1")?.value) || 100
+                parseFloat(document.getElementById("dfl-accountability-notion-1")?.value) || 0,
+                parseFloat(document.getElementById("dfl-accountability-notion-2")?.value) || 0
             ],
             architecturalSoundness: [
                 parseFloat(document.getElementById("dfl-architectural-soundness-notion-1").value) || 0,
-                parseFloat(document.getElementById("dfl-architectural-soundness-notion-2").value) || 0
+                parseFloat(document.getElementById("dfl-architectural-soundness-notion-2").value) || 0,
+                parseFloat(document.getElementById("dfl-architectural-soundness-notion-3").value) || 0
             ],
             sustainability: [
                 parseFloat(document.getElementById("dfl-sustainability-notion-1").value) || 0,
@@ -394,9 +415,10 @@ const TrustworthinessManager = (function() {
             document.getElementById("cfl-explainability-notion-1").value = e[0];
             document.getElementById("cfl-explainability-notion-2").value = e[1];
 
-            const a = config.notions.architecturalSoundness || [0, 0];
+            const a = config.notions.architecturalSoundness || [0, 0, 0];
             document.getElementById("cfl-architectural-soundness-notion-1").value = a[0];
             document.getElementById("cfl-architectural-soundness-notion-2").value = a[1];
+            document.getElementById("cfl-architectural-soundness-notion-3").value = a[2];
 
             const s = config.notions.sustainability || [0, 0, 0];
             document.getElementById("cfl-sustainability-notion-1").value = s[0];
@@ -435,9 +457,10 @@ const TrustworthinessManager = (function() {
             document.getElementById("dfl-explainability-notion-1").value = e[0];
             document.getElementById("dfl-explainability-notion-2").value = e[1];
 
-            const a = config.notions.architecturalSoundness || [0, 0];
+            const a = config.notions.architecturalSoundness || [0, 0, 0];
             document.getElementById("dfl-architectural-soundness-notion-1").value = a[0];
             document.getElementById("dfl-architectural-soundness-notion-2").value = a[1];
+            document.getElementById("dfl-architectural-soundness-notion-3").value = a[2];
 
             const s = config.notions.sustainability || [0, 0];
             document.getElementById("dfl-sustainability-notion-1").value = s[0];
@@ -485,6 +508,7 @@ const TrustworthinessManager = (function() {
 
         document.getElementById("cfl-architectural-soundness-notion-1").value = "0";
         document.getElementById("cfl-architectural-soundness-notion-2").value = "0";
+        document.getElementById("cfl-architectural-soundness-notion-3").value = "0";
 
         document.getElementById("cfl-sustainability-notion-1").value = "0";
         document.getElementById("cfl-sustainability-notion-2").value = "0";
@@ -516,6 +540,7 @@ const TrustworthinessManager = (function() {
 
         document.getElementById("dfl-architectural-soundness-notion-1").value = "0";
         document.getElementById("dfl-architectural-soundness-notion-2").value = "0";
+        document.getElementById("dfl-architectural-soundness-notion-3").value = "0";
 
         document.getElementById("dfl-sustainability-notion-1").value = "0";
         document.getElementById("dfl-sustainability-notion-3").value = "0";
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index 19c32050b..c28e3dc81 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -1048,7 +1048,14 @@ <h5 class="step-title">Accountability pillar</h5>
                                     <div class="notion">
                                         <h5 class="step-title">Factsheet completeness notion</h5>
                                         <input type="number" class="form-control" id="cfl-accountability-notion-1"
-                                            placeholder="Factsheet completeness notion" min="100" value="100"
+                                            placeholder="Factsheet completeness notion" min="20" value="80"
+                                            style="display: inline; width: 70%;" disabled="true">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Monitoring notion</h5>
+                                        <input type="number" class="form-control" id="cfl-accountability-notion-2"
+                                            placeholder="Monitoring notion" min="20" value="20"
                                             style="display: inline; width: 70%;" disabled="true">
                                         <small class="form-text text-muted">%</small>
                                     </div>
@@ -1066,7 +1073,7 @@ <h5 class="step-title">Architectural soundness pillar</h5>
                                     <div class="notion">
                                         <h5 class="step-title">Client management notion</h5>
                                         <input type="number" class="form-control" id="cfl-architectural-soundness-notion-1"
-                                            placeholder="Client management notion" min="20" value="50"
+                                            placeholder="Client management notion" min="20" value="25"
                                             style="display: inline; width: 70%;">
                                         <small class="form-text text-muted">%</small>
                                     </div>
@@ -1077,6 +1084,13 @@ <h5 class="step-title">Optimization notion</h5>
                                             style="display: inline; width: 70%;">
                                         <small class="form-text text-muted">%</small>
                                     </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Federation management notion</h5>
+                                        <input type="number" class="form-control" id="cfl-architectural-soundness-notion-3"
+                                            placeholder="Federation management notion" min="20" value="25"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
                                 </div>
                             </div>
                             <div class="trust-options">
@@ -1239,8 +1253,15 @@ <h5 class="step-title">Accountability pillar</h5>
                                     <div class="notion">
                                         <h5 class="step-title">Factsheet completeness notion</h5>
                                         <input type="number" class="form-control" id="dfl-accountability-notion-1"
-                                            placeholder="Factsheet completeness notion" min="100" value="100"
-                                            style="display: inline; width: 70%;" disabled="true">
+                                            placeholder="Factsheet completeness notion" min="20" value="80"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Monitoring notion</h5>
+                                        <input type="number" class="form-control" id="dfl-accountability-notion-2"
+                                            placeholder="Monitoring notion" min="20" value="20"
+                                            style="display: inline; width: 70%;">
                                         <small class="form-text text-muted">%</small>
                                     </div>
                                 </div>
@@ -1257,7 +1278,7 @@ <h5 class="step-title">Architectural soundness pillar</h5>
                                     <div class="notion">
                                         <h5 class="step-title">Client management notion</h5>
                                         <input type="number" class="form-control" id="dfl-architectural-soundness-notion-1"
-                                            placeholder="Client management notion" min="20" value="50"
+                                            placeholder="Client management notion" min="20" value="25"
                                             style="display: inline; width: 70%;">
                                         <small class="form-text text-muted">%</small>
                                     </div>
@@ -1268,6 +1289,13 @@ <h5 class="step-title">Optimization notion</h5>
                                             style="display: inline; width: 70%;">
                                         <small class="form-text text-muted">%</small>
                                     </div>
+                                    <div class="notion">
+                                        <h5 class="step-title">Federation management notion</h5>
+                                        <input type="number" class="form-control" id="dfl-architectural-soundness-notion-3"
+                                            placeholder="Federation management notion" min="20" value="25"
+                                            style="display: inline; width: 70%;">
+                                        <small class="form-text text-muted">%</small>
+                                    </div>
                                 </div>
                             </div>
                             <div class="trust-options">

From c0731744a28d920793f0d4ad2f4a64c3673c6685 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Mon, 20 Apr 2026 10:29:43 +0200
Subject: [PATCH 25/66] Notions: Dynamic weights fixed

---
 nebula/addons/trustworthiness/metric.py |  4 ++--
 nebula/addons/trustworthiness/pillar.py | 16 ++++++++++++++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/nebula/addons/trustworthiness/metric.py b/nebula/addons/trustworthiness/metric.py
index 8083dd6f0..acfe4486b 100755
--- a/nebula/addons/trustworthiness/metric.py
+++ b/nebula/addons/trustworthiness/metric.py
@@ -61,7 +61,7 @@ def evaluate(self, experiment_name, weights, use_weights=False):
             final_score = 0
             result_print = []
             for key, value in metrics:
-                pillar = TrustPillar(key, value, input_docs, use_weights)
+                pillar = TrustPillar(key, value, input_docs, use_weights, user_weights=weights)
                 score, result = pillar.evaluate()
                 weight = weights.get(key) / 100
                 final_score += weight * score
@@ -111,7 +111,7 @@ def evaluate_participant(self, experiment_name, weights, participant_id, use_wei
             final_score = 0
             result_print = []
             for key, value in metrics:
-                pillar = TrustPillar(key, value, input_docs, use_weights)
+                pillar = TrustPillar(key, value, input_docs, use_weights, user_weights=weights)
                 score, result = pillar.evaluate()
                 weight = weights.get(key) / 100
                 final_score += weight * score
diff --git a/nebula/addons/trustworthiness/pillar.py b/nebula/addons/trustworthiness/pillar.py
index 1a780cc5b..a57ec1abb 100755
--- a/nebula/addons/trustworthiness/pillar.py
+++ b/nebula/addons/trustworthiness/pillar.py
@@ -18,12 +18,13 @@ class TrustPillar:
 
     """
 
-    def __init__(self, name, metrics, input_docs, use_weights=False):
+    def __init__(self, name, metrics, input_docs, use_weights=False, user_weights=None):
         self.name = name
         self.input_docs = input_docs
         self.metrics = metrics
         self.result = []
         self.use_weights = use_weights
+        self.user_weights = user_weights or {}
 
     def evaluate(self):
         """
@@ -35,11 +36,22 @@ def evaluate(self):
         score = 0
         avg_weight = 1 / len(self.metrics)
         for key, value in self.metrics.items():
-            weight = value.get("weight", avg_weight) if self.use_weights else avg_weight
+            weight = self._get_notion_weight(key, value, avg_weight) if self.use_weights else avg_weight
             score += weight * self.get_notion_score(key, value.get("metrics"))
         score = round(score, 2)
         return score, {self.name: {"score": score, "notions": self.result}}
 
+    def _get_notion_weight(self, notion_name, notion_config, avg_weight):
+        """
+        Resolve the weight for a notion.
+
+        Scenario-defined notion weights are stored as percentages in scenario.json.
+        When present, they must override the defaults from the metrics config.
+        """
+        if notion_name in self.user_weights:
+            return float(self.user_weights[notion_name]) / 100
+        return notion_config.get("weight", avg_weight)
+
     def get_notion_score(self, name, metrics):
         """
         Evaluate the trust score for the notion.

From cb2dc949f5f7c9dfe256dba894a92d8f51931926 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Mon, 20 Apr 2026 15:51:15 +0200
Subject: [PATCH 26/66] Underffiting now uses validation, frontend fixed

---
 nebula/addons/trustworthiness/calculation.py  | 55 +++++++++++++------
 .../addons/trustworthiness/dfl_factsheet.py   |  6 +-
 nebula/addons/trustworthiness/factsheet.py    |  5 +-
 nebula/addons/trustworthiness/metric.py       |  2 +-
 .../addons/trustworthiness/trustworthiness.py | 45 ++++++++++++---
 nebula/addons/trustworthiness/utils.py        | 18 +++---
 nebula/core/engine.py                         |  1 +
 nebula/core/models/nebulamodel.py             | 11 +++-
 nebula/core/nebulaevents.py                   | 36 ++++++++----
 nebula/core/network/messages.py               |  3 +-
 nebula/core/pb/nebula.proto                   |  1 +
 nebula/core/pb/nebula_pb2.py                  | 12 ++--
 nebula/core/training/lightning.py             | 19 ++++++-
 nebula/frontend/templates/deployment.html     |  4 +-
 14 files changed, 156 insertions(+), 62 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index 94aee8afc..006da5d2b 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -572,6 +572,24 @@ def get_avg_loss_accuracy(scenario_name):
 
     return avg_loss, avg_accuracy, std_accuracy
 
+def get_underfitting_score(scenario_name, id):
+    """
+    Calculates the mean val accuracy of the nodes.
+    """
+    total_val_accuracy = 0
+
+    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
+
+    data = read_csv(data_file)
+
+    number_files = len(data)
+
+    total_val_accuracy = data["val_accuracy"].sum()
+
+    avg_val_accuracy = total_val_accuracy/ (number_files-1)
+
+    return avg_val_accuracy
+
 
 def get_participant_loss_accuracy(scenario_name, participant_id):
     """
@@ -816,22 +834,6 @@ def _collect_classification_statistics(model, dataloader):
     )
 
 
-def get_underfitting_score(test_accuracy):
-    """
-    Uses test accuracy as a proxy for underfitting.
-
-    Args:
-        test_accuracy (float): Test accuracy in [0, 1].
-
-    Returns:
-        float: Underfitting proxy value.
-    """
-    try:
-        return float(test_accuracy)
-    except Exception:
-        logger.warning("Could not compute underfitting score")
-        return 0.0
-
 
 def get_overfitting_score(model, train_dataloader, test_accuracy):
     """
@@ -853,6 +855,27 @@ def get_overfitting_score(model, train_dataloader, test_accuracy):
         logger.warning(exc)
         return 0.0
 
+def get_underfitting_score_local(scenario_name, id):
+    """
+    Gets the local validation accuracy for a specific DFL/SDFL participant.
+
+    Args:
+        scenario_name (str): Scenario name.
+        participant_id (int | str): Participant identifier.
+
+    Returns:
+        float: Validation accuracy.
+    """
+    data_file = os.path.join(
+        os.environ.get('NEBULA_LOGS_DIR'),
+        scenario_name,
+        "trustworthiness",
+        f"data_results_{id}.csv",
+    )
+
+    data = read_csv(data_file)
+    return float(data["val_accuracy"].iloc[0])
+
 
 def get_well_calibration_error(model, test_dataloader, n_bins=10):
     """
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 4499dcba0..6b12a41e0 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -29,7 +29,7 @@
 from nebula.core.models.cifar10.resnet import CIFAR10ModelResNet
 from nebula.core.models.cifar10.simplemobilenet import SimpleMobileNetV1
 from nebula.core.models.cifar100.cnn import CIFAR100ModelCNN
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model, get_underfitting_score, get_overfitting_score, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model, get_underfitting_score, get_overfitting_score, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score, get_underfitting_score_local
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 
 dirname = os.path.dirname(__file__)
@@ -272,9 +272,7 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         )
 
         factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
-        factsheet["fairness"]["underfitting"] = get_underfitting_score(
-            factsheet["performance"]["test_acc"]
-        )
+        factsheet["fairness"]["underfitting"] = get_underfitting_score_local(experiment_name, participant_idx)
         overfitting_value = get_overfitting_score(
             model,
             train_dataloader,
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 4acda271c..49ae3578f 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -348,7 +348,10 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                     test_dataloader,
                 )
                 factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
-                factsheet["fairness"]["underfitting"] = factsheet["performance"]["test_acc_avg"]
+
+                underfitting_score = get_underfitting_score(scenario_name, participant_idx)
+
+                factsheet["fairness"]["underfitting"] = underfitting_score
                 overfitting_value = get_overfitting_score(
                     model,
                     train_dataloader,
diff --git a/nebula/addons/trustworthiness/metric.py b/nebula/addons/trustworthiness/metric.py
index acfe4486b..7c92ec552 100755
--- a/nebula/addons/trustworthiness/metric.py
+++ b/nebula/addons/trustworthiness/metric.py
@@ -18,7 +18,7 @@ class TrustMetricManager:
 
     def __init__(self, scenario_start_time, federation, participant=None):
         if federation == "DFL":
-            self.factsheet_file_nm = f"factsheet_participant_{participant}.json" # IDEA: Pasarle desde trustworthiness.py el id del participante, ponerlo a None para CFL
+            self.factsheet_file_nm = f"factsheet_participant_{participant}.json"
             self.eval_metrics_file_nm = "eval_metrics_dfl.json"
             self.nebula_trust_results_nm = f"nebula_trust_results_{participant}.json"
             self.scenario_start_time = scenario_start_time
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 025cb4358..e56ac9bfb 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -1,7 +1,7 @@
 import logging
 import asyncio
 from nebula.addons.functions import print_msg_box
-from nebula.core.nebulaevents import AggregationEvent, ExperimentFinishEvent, RoundEndEvent, RoundStartEvent, TestMetricsEvent
+from nebula.core.nebulaevents import AggregationEvent, ExperimentFinishEvent, RoundEndEvent, RoundStartEvent, TestMetricsEvent, ValidationMetricsEvent
 from nebula.core.eventmanager import EventManager
 from nebula.core.noderole import Role, ServerRoleBehavior
 from abc import ABC, abstractmethod
@@ -68,6 +68,8 @@ def __init__(self, engine, idx, trust_files_route):
         self._sample_size = None
         self._current_loss = None
         self._current_accuracy = None
+        self._current_val_loss = None
+        self._current_val_accuracy = None
         self._experiment_name = ""
         self._per_round = None
         self._start_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
@@ -95,6 +97,7 @@ async def init(self, experiment_name):
         await EventManager.get_instance().subscribe_node_event(RoundStartEvent, self._process_round_start_event)
         await EventManager.get_instance().subscribe_node_event(RoundEndEvent, self._process_round_end_event)
         await EventManager.get_instance().subscribe_addonevent(TestMetricsEvent, self._process_test_metrics_event)
+        await EventManager.get_instance().subscribe_addonevent(ValidationMetricsEvent, self._process_validation_metrics_event)
         await EventManager.get_instance().subscribe_node_event(ExperimentFinishEvent, self._process_experiment_finished_event)
         await self._create_pk_files(experiment_name)
 
@@ -134,6 +137,10 @@ def get_sample_size(self):
     def get_metrics(self):
         return (self._current_loss, self._current_accuracy)
 
+    def get_validation_metrics(self):
+        logging.info("VALIDATION ACCURACY=%s", self._current_val_accuracy)
+        return (self._current_val_loss, self._current_val_accuracy)
+
     async def finish_experiment_role_pre_actions(self):
         with open(self._train_loader_file, 'rb') as file:
             train_loader = pickle.load(file)
@@ -154,7 +161,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
             #logging.info("connections=%s", list(cm.connections.keys()))
             #logging.info("server in connections? %s", server_addr in cm.connections)
 
-            bytes_sent, bytes_recv, accuracy, loss = load_data_results_participant(experiment_name, self._idx)
+            bytes_sent, bytes_recv, accuracy, loss, val_accuracy = load_data_results_participant(experiment_name, self._idx)
 
             role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size = load_emissions_participant(experiment_name, self._idx)
 
@@ -185,12 +192,13 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 class_imbalance=class_imbalance,
                 model_size=model_size,
                 local_entropy=local_entropy,
+                val_accuracy=val_accuracy,
             )
 
             logging.info(
                 "[TW SEND] dest=%s node_id=%s bytes_sent=%s bytes_recv=%s "
                 "accuracy=%s loss=%s energy_grid=%s emissions=%s workload=%s"
-                "cpu_model=%s gpu_model=%s cpu_used=%s gpu_used=%s energy_consumed=%s sample_size=%s class_imbalance=%s model_size=%s local_entropy=%s",
+                "cpu_model=%s gpu_model=%s cpu_used=%s gpu_used=%s energy_consumed=%s sample_size=%s class_imbalance=%s model_size=%s local_entropy=%s val_accuracy=%s",
                 server_addr,
                 str(self._idx),
                 bytes_sent,
@@ -210,6 +218,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 class_imbalance,
                 model_size,
                 local_entropy,
+                val_accuracy
             )
 
             await cm.send_message(
@@ -884,6 +893,11 @@ async def _process_test_metrics_event(self, tme: TestMetricsEvent):
         if self._per_round is not None:
             await self._per_round.on_test_metrics(self._engine, float(cur_loss), float(cur_acc))
 
+    async def _process_validation_metrics_event(self, vme: ValidationMetricsEvent):
+        cur_loss, cur_acc = await vme.get_event_data()
+        if cur_loss is not None and cur_acc is not None:
+            self._current_val_loss, self._current_val_accuracy = cur_loss, cur_acc
+
     async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):
         model_file = f"/nebula/app/logs/{self._experiment_name}/trustworthiness/participant_{self._engine.idx}_final_model.pk"
 
@@ -900,6 +914,8 @@ def __init__(self, engine: Engine, idx, trust_files_route):
         self._sample_size = 0
         self._current_loss = None
         self._current_accuracy = None
+        self._current_val_loss = None
+        self._current_val_accuracy = None
         server_start_time: ServerRoleBehavior = engine.rb
         self._start_time = server_start_time._start_time
         self._engine: Engine = engine
@@ -923,6 +939,7 @@ async def init(self, experiment_name):
         self._experiment_name = experiment_name
         await EventManager.get_instance().subscribe_node_event(AggregationEvent, self._process_aggregation_event)
         await EventManager.get_instance().subscribe_addonevent(TestMetricsEvent, self._process_test_metrics_event)
+        await EventManager.get_instance().subscribe_addonevent(ValidationMetricsEvent, self._process_validation_metrics_event)
         await EventManager.get_instance().subscribe_node_event(RoundStartEvent, self._process_round_start_event)
         await EventManager.get_instance().subscribe_node_event(ExperimentFinishEvent, self._process_experiment_finished_event)
         await self._create_pk_files(experiment_name)
@@ -963,6 +980,9 @@ def get_sample_size(self):
     def get_metrics(self):
         return (self._current_loss, self._current_accuracy)
 
+    def get_validation_metrics(self):
+        return (self._current_val_loss, self._current_val_accuracy)
+
     async def finish_experiment_role_pre_actions(self):
         pass
 
@@ -973,7 +993,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
         if self._csv_completed == True:
             logging.info("[TW SERVER] finish_experiment_role_post_actions called, trustworthiness reports OK, starting generate_factsheet")
-            bytes_sent, bytes_recv, accuracy, loss = load_data_results_participant(
+            bytes_sent, bytes_recv, accuracy, loss, val_accuracy= load_data_results_participant(
                 self._experiment_name,
                 self._idx,
             )
@@ -994,7 +1014,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
 
-            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy)
+            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy, val_accuracy)
             save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
             await self._generate_factsheet(trust_config, experiment_name)
         else:
@@ -1003,7 +1023,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
             await asyncio.sleep(60)
             if self._trustworthiness_reports != None and self._csv_completed == False:
                 save_trustworthiness_reports_csv(self._trustworthiness_reports, self._experiment_name)
-            bytes_sent, bytes_recv, accuracy, loss = load_data_results_participant(
+            bytes_sent, bytes_recv, accuracy, loss, val_accuracy = load_data_results_participant(
                 self._experiment_name,
                 self._idx,
             )
@@ -1024,7 +1044,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
 
-            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy)
+            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy, val_accuracy)
             save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
             await self._generate_factsheet(trust_config, experiment_name)
         #await self._generate_factsheet(trust_config, experiment_name)
@@ -1050,6 +1070,7 @@ async def register_trustworthiness_report(self, source, message):
             "class_imbalance": message.class_imbalance,
             "model_size": message.model_size,
             "local_entropy": message.local_entropy,
+            "val_accuracy": message.val_accuracy,
         }
 
         logging.info(
@@ -1207,6 +1228,11 @@ async def _process_test_metrics_event(self, tme: TestMetricsEvent):
         if self._per_round is not None:
             await self._per_round.on_test_metrics(self._engine, float(cur_loss), float(cur_acc))
 
+    async def _process_validation_metrics_event(self, vme: ValidationMetricsEvent):
+        cur_loss, cur_acc = await vme.get_event_data()
+        if cur_loss is not None and cur_acc is not None:
+            self._current_val_loss, self._current_val_accuracy = cur_loss, cur_acc
+
     async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):
         model_file = f"/nebula/app/logs/{self._experiment_name}/trustworthiness/participant_{self._engine.idx}_final_model.pk"
 
@@ -1266,6 +1292,9 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         await self.tw.finish_experiment_role_pre_actions()
 
         last_loss, last_accuracy = self.tw.get_metrics()
+        _, last_val_accuracy = self.tw.get_validation_metrics()
+        if last_val_accuracy is None:
+            last_val_accuracy = 0.0
 
         # Get sent/received bytes from the reporter
         bytes_sent = self._engine.reporter.acc_bytes_sent
@@ -1276,7 +1305,7 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         sample_size = self.tw.get_sample_size()
 
         # Final operations
-        save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_accuracy, last_loss)
+        save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_accuracy, last_loss, last_val_accuracy)
         stop_emissions_tracking_and_save(self._tracker, self._trust_dir_files, f'emissions_{self._idx}.csv', self._role.value, workload, sample_size, self._idx)
         await self.tw.finish_experiment_role_post_actions(self._trust_config, self._experiment_name)
 
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
index 8aeb68562..fb4274d4e 100755
--- a/nebula/addons/trustworthiness/utils.py
+++ b/nebula/addons/trustworthiness/utils.py
@@ -306,8 +306,9 @@ def load_data_results_participant(experiment_name: str, participant_id: int | st
     bytes_recv = int(float(row["bytes_recv"]))
     accuracy = float(row["accuracy"])
     loss = float(row["loss"])
+    val_accuracy = float(row["val_accuracy"])
 
-    return bytes_sent, bytes_recv, accuracy, loss
+    return bytes_sent, bytes_recv, accuracy, loss, val_accuracy
 
 
 def load_emissions_participant(experiment_name: str, participant_id: int | str):
@@ -354,7 +355,7 @@ def save_trustworthiness_reports_csv(
     with open(data_results_path, "w", newline="") as csv_file:
         writer = csv.DictWriter(
             csv_file,
-            fieldnames=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "class_imbalance", "model_size", "local_entropy"],
+            fieldnames=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "class_imbalance", "model_size", "local_entropy", "val_accuracy"],
         )
         writer.writeheader()
 
@@ -368,6 +369,7 @@ def save_trustworthiness_reports_csv(
                 "class_imbalance": report["class_imbalance"],
                 "model_size": report["model_size"],
                 "local_entropy": report["local_entropy"],
+                "val_accuracy": report["val_accuracy"],
             })
 
     with open(emissions_path, "w", newline="") as csv_file:
@@ -398,7 +400,7 @@ def save_trustworthiness_reports_csv(
         emissions_path,
     )
 
-def save_results_csv_cfl(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float, class_imbalance: float, model_size: int, local_entropy: float):
+def save_results_csv_cfl(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float, class_imbalance: float, model_size: int, local_entropy: float, val_accuracy: float):
     try:
         data_results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
     except:
@@ -407,13 +409,13 @@ def save_results_csv_cfl(scenario_name: str, id: int, bytes_sent: int, bytes_rec
     if exists(data_results_file):
         df = pd.read_csv(data_results_file)
     else:
-        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "class_imbalance", "model_size", "local_entropy"])
+        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "class_imbalance", "model_size", "local_entropy", "val_accuracy"])
 
     try:
         # Add new entry to DataFrame
         new_data = pd.DataFrame({'id': [id], 'bytes_sent': [bytes_sent],
                                     'bytes_recv': [bytes_recv], 'accuracy': [accuracy],
-                                    'loss': [loss], 'class_imbalance': [class_imbalance], 'model_size': [model_size], 'local_entropy': [local_entropy]})
+                                    'loss': [loss], 'class_imbalance': [class_imbalance], 'model_size': [model_size], 'local_entropy': [local_entropy], 'val_accuracy': [val_accuracy]})
         df = pd.concat([df, new_data], ignore_index=True)
 
         df.to_csv(data_results_file, encoding='utf-8', index=False)
@@ -445,7 +447,7 @@ def save_emissions_csv_cfl(scenario_name: str, id: int, role: str, energy_grid:
         logger.warning(e)
 
 
-def save_results_csv(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float):
+def save_results_csv(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float, val_accuracy: float):
 
     try:
         data_results_id_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", f"data_results_{id}.csv")
@@ -455,13 +457,13 @@ def save_results_csv(scenario_name: str, id: int, bytes_sent: int, bytes_recv: i
     if exists(data_results_id_file):
         df = pd.read_csv(data_results_id_file)
     else:
-        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss"])
+        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "val_accuracy"])
 
     try:
         # Add new entry to DataFrame
         new_data = pd.DataFrame({'id': [id], 'bytes_sent': [bytes_sent],
                                     'bytes_recv': [bytes_recv], 'accuracy': [accuracy],
-                                    'loss': [loss]})
+                                    'loss': [loss], 'val_accuracy': [val_accuracy]})
         df = pd.concat([df, new_data], ignore_index=True)
 
         df.to_csv(data_results_id_file, encoding='utf-8', index=False)
diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index 85e767b61..85fbe5026 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -437,6 +437,7 @@ async def _trustworthiness_report_callback(self, source, message):
                 "class_imbalance": message.class_imbalance,
                 "model_size": message.model_size,
                 "local_entropy": message.local_entropy,
+                "val_accuracy": message.val_accuracy,
             }
 
             logging.info(f"handle_trustworthiness_message | Trigger | {report}")
diff --git a/nebula/core/models/nebulamodel.py b/nebula/core/models/nebulamodel.py
index 66aac2db5..6c5e672d2 100755
--- a/nebula/core/models/nebulamodel.py
+++ b/nebula/core/models/nebulamodel.py
@@ -83,6 +83,11 @@ def log_metrics_end(self, phase):
             f"{phase}/{key.replace('Multiclass', '').split('/')[-1]}": value.detach() for key, value in output.items()
         }
 
+        if phase == "Validation":
+            self._latest_validation_metrics = {
+                key: float(value.detach().cpu().item()) for key, value in output.items()
+            }
+
         self.logger.log_data(output, step=self.global_number[phase])
 
         metrics_str = ""
@@ -199,6 +204,7 @@ def __init__(
 
         self._current_loss = -1
         self._optimizer = None
+        self._latest_validation_metrics = {}
 
     def set_communication_manager(self, communication_manager):
         self.communication_manager = communication_manager
@@ -231,6 +237,9 @@ def step(self, batch, batch_idx, phase):
     def get_loss(self):
         return self._current_loss
 
+    def get_latest_validation_metrics(self):
+        return self._latest_validation_metrics
+
     def modify_learning_rate(self, new_lr):
         logging.info(f"Modifiying | learning rate, new value: {new_lr}")
         self.learning_rate = new_lr
@@ -306,7 +315,7 @@ def test_step(self, batch, batch_idx, dataloader_idx=None):
         loss = self.criterion(y_pred, y)
         y_pred_classes = torch.argmax(y_pred, dim=1)
         accuracy = torch.mean((y_pred_classes == y).float())
-        
+
         if dataloader_idx == 0:
             self.log(f"val_loss", loss, on_epoch=True, prog_bar=False)
             self.log(f"val_accuracy", accuracy, on_epoch=True, prog_bar=False)
diff --git a/nebula/core/nebulaevents.py b/nebula/core/nebulaevents.py
index f2ec08835..0e3e77977 100644
--- a/nebula/core/nebulaevents.py
+++ b/nebula/core/nebulaevents.py
@@ -5,7 +5,7 @@ class AddonEvent(ABC):
     """
     Abstract base class for all addon-related events in the system.
     """
-    
+
     @abstractmethod
     async def get_event_data(self):
         """
@@ -21,7 +21,7 @@ class NodeEvent(ABC):
     """
     Abstract base class for all node-related events in the system.
     """
-    
+
     @abstractmethod
     async def get_event_data(self):
         """
@@ -52,7 +52,7 @@ class MessageEvent:
         source (str): Address or identifier of the message sender.
         message (Any): The actual message payload.
     """
-    
+
     def __init__(self, message_type, source, message):
         """
         Initializes a MessageEvent instance.
@@ -264,7 +264,7 @@ async def get_event_data(self) -> tuple[str, bool]:
 
     async def is_concurrent(self) -> bool:
         return True
-    
+
 class ModelPropagationEvent(NodeEvent):
     def __init__(self, eligible_neighbors, strategy):
         """Event triggered when model propagation is ready.
@@ -275,7 +275,7 @@ def __init__(self, eligible_neighbors, strategy):
         """
         self.eligible_neighbors = eligible_neighbors
         self._strategy = strategy
-        
+
     def __str__(self):
         return f"Model propagation event, strategy: {self._strategy}"
 
@@ -291,8 +291,8 @@ async def get_event_data(self) -> tuple[set, str]:
         return (self.eligible_neighbors, self._strategy)
 
     async def is_concurrent(self) -> bool:
-        return False    
-            
+        return False
+
 
 
 class UpdateReceivedEvent(NodeEvent):
@@ -362,7 +362,7 @@ async def get_event_data(self) -> tuple[str, tuple[float, float]]:
 
     async def is_concurrent(self) -> bool:
         return True
-    
+
 class DuplicatedMessageEvent(NodeEvent):
     """
     Event triggered when a message is received that has already been processed.
@@ -370,7 +370,7 @@ class DuplicatedMessageEvent(NodeEvent):
     Attributes:
         source (str): The address of the node that sent the duplicated message.
     """
-    
+
     def __init__(self, source: str, message_type: str):
         self.source = source
 
@@ -396,7 +396,7 @@ class GPSEvent(AddonEvent):
     Attributes:
         distances (dict): A dictionary mapping node addresses to their respective distances.
     """
-    
+
     def __init__(self, distances: dict):
         """
         Initializes a GPSEvent.
@@ -427,7 +427,7 @@ class ChangeLocationEvent(AddonEvent):
         latitude (float): New latitude of the node.
         longitude (float): New longitude of the node.
     """
-    
+
     def __init__(self, latitude, longitude):
         """
         Initializes a ChangeLocationEvent.
@@ -450,7 +450,7 @@ async def get_event_data(self):
             tuple: A tuple containing latitude and longitude.
         """
         return (self.latitude, self.longitude)
-    
+
 class TestMetricsEvent(AddonEvent):
     def __init__(self, loss, accuracy):
         self._loss = loss
@@ -461,3 +461,15 @@ def __str__(self):
 
     async def get_event_data(self):
         return (self._loss, self._accuracy)
+
+
+class ValidationMetricsEvent(AddonEvent):
+    def __init__(self, loss, accuracy):
+        self._loss = loss
+        self._accuracy = accuracy
+
+    def __str__(self):
+        return "ValidationMetricsEvent"
+
+    async def get_event_data(self):
+        return (self._loss, self._accuracy)
diff --git a/nebula/core/network/messages.py b/nebula/core/network/messages.py
index 1b687d7bb..3f237fbc2 100644
--- a/nebula/core/network/messages.py
+++ b/nebula/core/network/messages.py
@@ -114,7 +114,8 @@ def _define_message_templates(self):
                     "sample_size",
                     "class_imbalance",
                     "model_size",
-                    "local_entropy"
+                    "local_entropy",
+                    "val_accuracy"
                 ],
                 "defaults": {},
             },
diff --git a/nebula/core/pb/nebula.proto b/nebula/core/pb/nebula.proto
index df6d55c6b..8bb82c4cd 100755
--- a/nebula/core/pb/nebula.proto
+++ b/nebula/core/pb/nebula.proto
@@ -157,6 +157,7 @@ message TrustworthinessMessage {
   float class_imbalance = 17;
   int64 model_size = 18;
   float local_entropy = 19;
+  float val_accuracy = 20;
 }
 
 message TrustscoresMessage {
diff --git a/nebula/core/pb/nebula_pb2.py b/nebula/core/pb/nebula_pb2.py
index e1227477e..1c6ac41f3 100644
--- a/nebula/core/pb/nebula_pb2.py
+++ b/nebula/core/pb/nebula_pb2.py
@@ -13,7 +13,7 @@
 
 
 
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xac\x05\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x12\x39\n\x13trustscores_message\x18\r \x01(\x0b\x32\x1a.nebula.TrustscoresMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\xc2\x03\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\x12\x17\n\x0f\x63lass_imbalance\x18\x11 \x01(\x02\x12\x12\n\nmodel_size\x18\x12 \x01(\x03\x12\x15\n\rlocal_entropy\x18\x13 \x01(\x02\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\"\x88\x01\n\x12TrustscoresMessage\x12\x31\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32!.nebula.TrustscoresMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x19\n\x11trust_report_json\x18\x03 \x01(\t\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xac\x05\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x12\x39\n\x13trustscores_message\x18\r \x01(\x0b\x32\x1a.nebula.TrustscoresMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\xd8\x03\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\x12\x17\n\x0f\x63lass_imbalance\x18\x11 \x01(\x02\x12\x12\n\nmodel_size\x18\x12 \x01(\x03\x12\x15\n\rlocal_entropy\x18\x13 \x01(\x02\x12\x14\n\x0cval_accuracy\x18\x14 \x01(\x02\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\"\x88\x01\n\x12TrustscoresMessage\x12\x31\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32!.nebula.TrustscoresMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x19\n\x11trust_report_json\x18\x03 \x01(\t\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\x62\x06proto3')
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
 _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'nebula_pb2', globals())
@@ -59,11 +59,11 @@
   _RESPONSEMESSAGE._serialized_start=2127
   _RESPONSEMESSAGE._serialized_end=2162
   _TRUSTWORTHINESSMESSAGE._serialized_start=2165
-  _TRUSTWORTHINESSMESSAGE._serialized_end=2615
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2595
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2615
-  _TRUSTSCORESMESSAGE._serialized_start=2618
-  _TRUSTSCORESMESSAGE._serialized_end=2754
+  _TRUSTWORTHINESSMESSAGE._serialized_end=2637
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2617
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2637
+  _TRUSTSCORESMESSAGE._serialized_start=2640
+  _TRUSTSCORESMESSAGE._serialized_end=2776
   _TRUSTSCORESMESSAGE_ACTION._serialized_start=2106
   _TRUSTSCORESMESSAGE_ACTION._serialized_end=2125
 # @@protoc_insertion_point(module_scope)
diff --git a/nebula/core/training/lightning.py b/nebula/core/training/lightning.py
index d83975147..5b9d96e50 100755
--- a/nebula/core/training/lightning.py
+++ b/nebula/core/training/lightning.py
@@ -19,7 +19,7 @@
 from nebula.config.config import TRAINING_LOGGER
 from nebula.core.utils.deterministic import enable_deterministic
 from nebula.core.utils.nebulalogger_tensorboard import NebulaTensorBoardLogger
-from nebula.core.nebulaevents import TestMetricsEvent
+from nebula.core.nebulaevents import TestMetricsEvent, ValidationMetricsEvent
 from nebula.core.eventmanager import EventManager
 
 logging_training = logging.getLogger(TRAINING_LOGGER)
@@ -295,8 +295,10 @@ async def train(self):
         try:
             self.create_trainer()
             logging.info(f"{'=' * 10} [Training] Started (check training logs for progress) {'=' * 10}")
-            await asyncio.to_thread(self._train_sync)
+            val_loss, val_accuracy = await asyncio.to_thread(self._train_sync)
             logging.info(f"{'=' * 10} [Training] Finished (check training logs for progress) {'=' * 10}")
+            vme = ValidationMetricsEvent(val_loss, val_accuracy)
+            await EventManager.get_instance().publish_addonevent(vme)
         except Exception as e:
             logging_training.error(f"Error training model: {e}")
             logging_training.error(traceback.format_exc())
@@ -304,11 +306,24 @@ async def train(self):
     def _train_sync(self):
         try:
             self._trainer.fit(self.model, self.datamodule)
+            validation_metrics = {}
+            if hasattr(self.model, "get_latest_validation_metrics"):
+                validation_metrics = self.model.get_latest_validation_metrics() or {}
+
+            loss = None
+            model_loss = getattr(self.model, "get_loss", None)
+            if callable(model_loss):
+                raw_loss = model_loss()
+                loss = raw_loss.item() if hasattr(raw_loss, "item") else raw_loss
+
+            accuracy = validation_metrics.get("Validation/Accuracy")
+            return loss, accuracy
         except Exception as e:
             logging_training.error(f"Error in _train_sync: {e}")
             tb = traceback.format_exc()
             logging_training.error(f"Traceback: {tb}")
             # If "raise", the exception will be managed by the main thread
+            return None, None
 
     async def test(self):
         try:
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index c28e3dc81..955b36760 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -1049,14 +1049,14 @@ <h5 class="step-title">Accountability pillar</h5>
                                         <h5 class="step-title">Factsheet completeness notion</h5>
                                         <input type="number" class="form-control" id="cfl-accountability-notion-1"
                                             placeholder="Factsheet completeness notion" min="20" value="80"
-                                            style="display: inline; width: 70%;" disabled="true">
+                                            style="display: inline; width: 70%;">
                                         <small class="form-text text-muted">%</small>
                                     </div>
                                     <div class="notion">
                                         <h5 class="step-title">Monitoring notion</h5>
                                         <input type="number" class="form-control" id="cfl-accountability-notion-2"
                                             placeholder="Monitoring notion" min="20" value="20"
-                                            style="display: inline; width: 70%;" disabled="true">
+                                            style="display: inline; width: 70%;">
                                         <small class="form-text text-muted">%</small>
                                     </div>
                                 </div>

From b5da1f5666a2b4678c2acf6ae18a4d50f9217bc0 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 24 Apr 2026 10:41:00 +0200
Subject: [PATCH 27/66] Differential Privacy V1 implemented

---
 .../addons/trustworthiness/trustworthiness.py |  21 ++--
 nebula/core/training/dp.py                    |  69 +++++++++++
 nebula/core/training/lightning.py             | 112 +++++++++++++++++-
 .../frontend/config/participant.json.example  |  12 +-
 pyproject.toml                                |   1 +
 5 files changed, 204 insertions(+), 11 deletions(-)
 create mode 100644 nebula/core/training/dp.py

diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index e56ac9bfb..fe2dbb97c 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -138,9 +138,18 @@ def get_metrics(self):
         return (self._current_loss, self._current_accuracy)
 
     def get_validation_metrics(self):
-        logging.info("VALIDATION ACCURACY=%s", self._current_val_accuracy)
         return (self._current_val_loss, self._current_val_accuracy)
 
+    def _dump_model_for_trust(self, path):
+        model = self._engine.trainer.model
+        optimizer = model._optimizer
+        model._optimizer = None
+        try:
+            with open(path, 'wb') as f:
+                pickle.dump(model, f)
+        finally:
+            model._optimizer = optimizer
+
     async def finish_experiment_role_pre_actions(self):
         with open(self._train_loader_file, 'rb') as file:
             train_loader = pickle.load(file)
@@ -863,9 +872,7 @@ async def _register_sdfl_trustscores_report(self, source, message):
     async def _process_round_end_event(self, ree: RoundEndEvent):
         scenario_name = self._engine.config.participant["scenario_args"]["name"]
         train_model = f"/nebula/app/logs/{scenario_name}/trustworthiness/participant_{self._idx}_train_model.pk"
-        # Save the training model in the trustworthiness directory
-        with open(train_model, 'wb') as f:
-            pickle.dump(self._engine.trainer.model, f)
+        self._dump_model_for_trust(train_model)
 
     async def _process_round_start_event(self, rse: RoundStartEvent):
         _, _, expected_nodes = await rse.get_event_data()
@@ -900,11 +907,7 @@ async def _process_validation_metrics_event(self, vme: ValidationMetricsEvent):
 
     async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):
         model_file = f"/nebula/app/logs/{self._experiment_name}/trustworthiness/participant_{self._engine.idx}_final_model.pk"
-
-
-        # Save model in trustworthy dir
-        with open(model_file, 'wb') as f:
-            pickle.dump(self._engine.trainer.model, f)
+        self._dump_model_for_trust(model_file)
 
 
 class TrustWorkloadServer(TrustWorkload):
diff --git a/nebula/core/training/dp.py b/nebula/core/training/dp.py
new file mode 100644
index 000000000..570fc25d6
--- /dev/null
+++ b/nebula/core/training/dp.py
@@ -0,0 +1,69 @@
+class SimpleDPState:
+    def __init__(self):
+        self.extras = {}
+
+
+class DifferentialPrivacyPlugin:
+    name = "differential_privacy"
+
+    def __init__(
+        self,
+        *,
+        noise_multiplier=1.0,
+        max_grad_norm=1.0,
+        target_delta=1e-5,
+        accountant="prv",
+        secure_mode=False,
+        poisson_sampling=True,
+        clipping="flat",
+    ):
+        self.noise_multiplier = float(noise_multiplier)
+        self.max_grad_norm = float(max_grad_norm)
+        self.target_delta = target_delta
+        self.accountant = accountant
+        self.secure_mode = bool(secure_mode)
+        self.poisson_sampling = bool(poisson_sampling)
+        self.clipping = clipping
+
+    def on_train_start(self, model, optimizer, state):
+        from opacus import PrivacyEngine
+
+        privacy_engine = PrivacyEngine(accountant=self.accountant, secure_mode=self.secure_mode)
+        dataloader = state.extras["dataloader"]
+        model.train()
+        private_model, optimizer, private_dataloader = privacy_engine.make_private(
+            module=model,
+            optimizer=optimizer,
+            data_loader=dataloader,
+            noise_multiplier=self.noise_multiplier,
+            max_grad_norm=self.max_grad_norm,
+            poisson_sampling=self.poisson_sampling,
+            clipping=self.clipping,
+            wrap_model=False,
+        )
+
+        state.extras["privacy_engine"] = privacy_engine
+        state.extras["model"] = private_model
+        state.extras["optimizer"] = optimizer
+        state.extras["dataloader"] = private_dataloader
+
+    def on_train_end(self, state):
+        privacy_engine = state.extras.get("privacy_engine")
+        private_model = state.extras.get("model")
+
+        if privacy_engine is not None and self.target_delta is not None:
+            try:
+                epsilon = privacy_engine.get_epsilon(delta=self.target_delta)
+                state.extras["dp_epsilon"] = float(epsilon)
+                state.extras["dp_delta"] = float(self.target_delta)
+            except Exception:
+                pass
+
+        if private_model is not None:
+            private_model.zero_grad(set_to_none=True)
+            try:
+                private_model.forbid_grad_accumulation()
+                private_model.disable_hooks()
+                private_model.remove_hooks()
+            except Exception:
+                pass
diff --git a/nebula/core/training/lightning.py b/nebula/core/training/lightning.py
index 5b9d96e50..6a70ccd14 100755
--- a/nebula/core/training/lightning.py
+++ b/nebula/core/training/lightning.py
@@ -17,6 +17,7 @@
 from torch.nn import functional as F
 
 from nebula.config.config import TRAINING_LOGGER
+from nebula.core.training.dp import DifferentialPrivacyPlugin, SimpleDPState
 from nebula.core.utils.deterministic import enable_deterministic
 from nebula.core.utils.nebulalogger_tensorboard import NebulaTensorBoardLogger
 from nebula.core.nebulaevents import TestMetricsEvent, ValidationMetricsEvent
@@ -135,6 +136,7 @@ def __init__(self, model, datamodule, config=None):
         self.idx = self.config.participant["device_args"]["idx"]
         self.log_dir = os.path.join(self.config.participant["tracking_args"]["log_dir"], self.experiment_name)
         self._logger = None
+        self._dp_plugin = self.create_dp_plugin()
         self.create_logger()
         enable_deterministic(seed=self.config.participant["scenario_args"]["random_seed"])
 
@@ -172,6 +174,21 @@ def create_logger(self):
 
         self._logger = nebulalogger
 
+    def create_dp_plugin(self):
+        dp_config = self.config.participant["training_args"].get("dp")
+        if dp_config is None or not dp_config["enabled"]:
+            return None
+
+        return DifferentialPrivacyPlugin(
+            noise_multiplier=dp_config["noise_multiplier"],
+            max_grad_norm=dp_config["max_grad_norm"],
+            target_delta=dp_config["target_delta"],
+            accountant=dp_config["accountant"],
+            secure_mode=dp_config["secure_mode"],
+            poisson_sampling=dp_config["poisson_sampling"],
+            clipping=dp_config["clipping"],
+        )
+
     def create_trainer(self):
         # Create a new trainer and logger for each round
         self.create_logger()
@@ -305,7 +322,11 @@ async def train(self):
 
     def _train_sync(self):
         try:
-            self._trainer.fit(self.model, self.datamodule)
+            if self._dp_plugin is None:
+                self._trainer.fit(self.model, self.datamodule)
+            else:
+                self._fit_with_dp()
+
             validation_metrics = {}
             if hasattr(self.model, "get_latest_validation_metrics"):
                 validation_metrics = self.model.get_latest_validation_metrics() or {}
@@ -325,6 +346,95 @@ def _train_sync(self):
             # If "raise", the exception will be managed by the main thread
             return None, None
 
+    def _get_training_device(self):
+        if (
+            self.config.participant["device_args"]["accelerator"] == "gpu"
+            and torch.cuda.is_available()
+            and self.config.participant["device_args"]["gpu_id"]
+        ):
+            return torch.device(f"cuda:{self.config.participant['device_args']['gpu_id'][0]}")
+        return torch.device("cpu")
+
+    def _log_manual_metrics(self, phase, metrics):
+        output = metrics.compute()
+        output = {
+            f"{phase}/{key.replace('Multiclass', '').split('/')[-1]}": value.detach() for key, value in output.items()
+        }
+        if phase == "Validation":
+            self.model._latest_validation_metrics = {
+                key: float(value.detach().cpu().item()) for key, value in output.items()
+            }
+        self._logger.log_data(output, step=self.model.global_number[phase])
+
+    def _fit_with_dp(self):
+        train_dataloader = self.datamodule.train_dataloader()
+        val_dataloader = self.datamodule.val_dataloader()
+        state = SimpleDPState()
+        state.extras["dataloader"] = train_dataloader
+        self.model.train()
+        optimizer = self.model.configure_optimizers()
+        device = self._get_training_device()
+
+        try:
+            self._dp_plugin.on_train_start(self.model, optimizer, state)
+            private_model = state.extras["model"]
+            private_optimizer = state.extras["optimizer"]
+            private_dataloader = state.extras["dataloader"]
+            self.model._optimizer = private_optimizer
+            private_model.to(device)
+
+            for epoch in range(self.epochs):
+                logging_training.info(f"Starting Epoch {epoch} DP")
+                private_model.train()
+                for batch_idx, batch in enumerate(private_dataloader):
+                    inputs, labels = batch
+                    inputs = inputs.to(device)
+                    labels = labels.to(device)
+
+                    private_optimizer.zero_grad()
+                    outputs = private_model(inputs)
+                    loss = self.model.criterion(outputs, labels)
+                    self.model._current_loss = loss.detach()
+                    self._logger.log_data({"Train/Loss": loss.detach()})
+                    self.model.train_metrics.update(torch.argmax(outputs.detach(), dim=1), labels.detach())
+                    loss.backward()
+                    private_optimizer.step()
+
+                self._log_manual_metrics("Train", self.model.train_metrics)
+                self.model.train_metrics.reset()
+                self.model.global_number["Train"] += 1
+                logging_training.info(f"Epoch {epoch} finished DP")
+
+                logging_training.info(f"Starting validation for Epoch {epoch} DP")
+                private_model.eval()
+                with torch.no_grad():
+                    for batch_idx, batch in enumerate(val_dataloader):
+                        inputs, labels = batch
+                        inputs = inputs.to(device)
+                        labels = labels.to(device)
+
+                        outputs = private_model(inputs)
+                        loss = self.model.criterion(outputs, labels)
+                        self.model._current_loss = loss.detach()
+                        self.model.val_metrics.update(torch.argmax(outputs.detach(), dim=1), labels.detach())
+
+                self._log_manual_metrics("Validation", self.model.val_metrics)
+                self.model.val_metrics.reset()
+                self.model.global_number["Validation"] += 1
+                logging_training.info(f"Validation for Epoch {epoch} finished DP")
+
+            if hasattr(private_model, "_module"):
+                self.model.load_state_dict(private_model._module.state_dict())
+            self.model.train()
+        finally:
+            self._dp_plugin.on_train_end(state)
+
+        dp_epsilon = state.extras.get("dp_epsilon")
+        if dp_epsilon is not None:
+            dp_delta = state.extras["dp_delta"]
+            self._logger.log_data({"DP/Epsilon": dp_epsilon, "DP/Delta": dp_delta})
+            logging_training.info(f"DP privacy budget | epsilon={dp_epsilon:.4f} | delta={dp_delta}")
+
     async def test(self):
         try:
             self.create_trainer()
diff --git a/nebula/frontend/config/participant.json.example b/nebula/frontend/config/participant.json.example
index ef2976063..0f01250fd 100755
--- a/nebula/frontend/config/participant.json.example
+++ b/nebula/frontend/config/participant.json.example
@@ -84,7 +84,17 @@
   },
   "training_args": {
     "trainer": "lightning",
-    "epochs": 3
+    "epochs": 3,
+    "dp": {
+      "enabled": true,
+      "noise_multiplier": 0.4,
+      "max_grad_norm": 1.0,
+      "target_delta": 1e-5,
+      "accountant": "prv",
+      "secure_mode": false,
+      "poisson_sampling": true,
+      "clipping": "flat"
+    }
   },
   "aggregator_args": {
     "algorithm": "FedAvg",
diff --git a/pyproject.toml b/pyproject.toml
index d54ff3dcd..9644e41a5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -92,6 +92,7 @@ core = [
     "matplotlib==3.9.2",
     "networkx==3.3",
     "numpy==2.1.1",
+    "opacus==1.5.4",
     "plotly==5.24.0",
     "protobuf==4.25.3",
     "psutil==6.0.0",

From e813458acaf52b3de7c35e4123ded6e185a35f95 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 24 Apr 2026 12:57:47 +0200
Subject: [PATCH 28/66] Differential Privacy V2, LightningDP implemented,
 frontend implemented

---
 nebula/controller/scenarios.py                |   6 +
 nebula/core/node.py                           |   7 +-
 nebula/core/training/lightning.py             | 112 +---------
 nebula/core/training/lightning_dp.py          | 196 ++++++++++++++++++
 .../frontend/config/participant.json.example  |   4 +-
 nebula/frontend/static/js/deployment/dp.js    |  59 ++++++
 nebula/frontend/static/js/deployment/main.js  |   3 +
 .../frontend/static/js/deployment/scenario.js |   7 +
 nebula/frontend/templates/deployment.html     |  16 ++
 9 files changed, 296 insertions(+), 114 deletions(-)
 create mode 100644 nebula/core/training/lightning_dp.py
 create mode 100644 nebula/frontend/static/js/deployment/dp.js

diff --git a/nebula/controller/scenarios.py b/nebula/controller/scenarios.py
index 831d65b16..e44b9ed2c 100644
--- a/nebula/controller/scenarios.py
+++ b/nebula/controller/scenarios.py
@@ -115,6 +115,7 @@ def __init__(
         sar_neighbor_policy,
         sar_training,
         sar_training_policy,
+        dp=None,
         physical_ips=None,
     ):
         """
@@ -192,6 +193,7 @@ def __init__(
         self.network_subnet = network_subnet
         self.network_gateway = network_gateway
         self.epochs = epochs
+        self.dp = dp
         self.attack_params = attack_params
         self.reputation = reputation
         self.random_geo = random_geo
@@ -700,6 +702,10 @@ def __init__(self, scenario, user=None):
             participant_config["data_args"]["partition_parameter"] = self.scenario.partition_parameter
             participant_config["model_args"]["model"] = self.scenario.model
             participant_config["training_args"]["epochs"] = int(self.scenario.epochs)
+            if isinstance(self.scenario.dp, dict) and "enabled" in self.scenario.dp:
+                participant_config.setdefault("training_args", {})
+                participant_config["training_args"].setdefault("dp", {})
+                participant_config["training_args"]["dp"]["enabled"] = bool(self.scenario.dp["enabled"])
             participant_config["device_args"]["accelerator"] = self.scenario.accelerator
             participant_config["device_args"]["gpu_id"] = self.scenario.gpu_id
             participant_config["device_args"]["logging"] = self.scenario.logginglevel
diff --git a/nebula/core/node.py b/nebula/core/node.py
index c9541098c..a78fa78b2 100755
--- a/nebula/core/node.py
+++ b/nebula/core/node.py
@@ -50,6 +50,7 @@
 from nebula.core.models.mnist.mlp import MNISTModelMLP
 from nebula.core.engine import Engine
 from nebula.core.training.lightning import Lightning
+from nebula.core.training.lightning_dp import LightningDP
 from nebula.core.training.siamese import Siamese
 
 # os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
@@ -203,8 +204,12 @@ async def main(config: Config):
 
     trainer = None
     trainer_str = config.participant["training_args"]["trainer"]
+    dp_enabled = config.participant["training_args"]["dp"]["enabled"]
     if trainer_str == "lightning":
-        trainer = Lightning
+        if dp_enabled:
+            trainer = LightningDP
+        else:
+            trainer = Lightning
     elif trainer_str == "scikit":
         raise NotImplementedError
     elif trainer_str == "siamese":
diff --git a/nebula/core/training/lightning.py b/nebula/core/training/lightning.py
index 6a70ccd14..5b9d96e50 100755
--- a/nebula/core/training/lightning.py
+++ b/nebula/core/training/lightning.py
@@ -17,7 +17,6 @@
 from torch.nn import functional as F
 
 from nebula.config.config import TRAINING_LOGGER
-from nebula.core.training.dp import DifferentialPrivacyPlugin, SimpleDPState
 from nebula.core.utils.deterministic import enable_deterministic
 from nebula.core.utils.nebulalogger_tensorboard import NebulaTensorBoardLogger
 from nebula.core.nebulaevents import TestMetricsEvent, ValidationMetricsEvent
@@ -136,7 +135,6 @@ def __init__(self, model, datamodule, config=None):
         self.idx = self.config.participant["device_args"]["idx"]
         self.log_dir = os.path.join(self.config.participant["tracking_args"]["log_dir"], self.experiment_name)
         self._logger = None
-        self._dp_plugin = self.create_dp_plugin()
         self.create_logger()
         enable_deterministic(seed=self.config.participant["scenario_args"]["random_seed"])
 
@@ -174,21 +172,6 @@ def create_logger(self):
 
         self._logger = nebulalogger
 
-    def create_dp_plugin(self):
-        dp_config = self.config.participant["training_args"].get("dp")
-        if dp_config is None or not dp_config["enabled"]:
-            return None
-
-        return DifferentialPrivacyPlugin(
-            noise_multiplier=dp_config["noise_multiplier"],
-            max_grad_norm=dp_config["max_grad_norm"],
-            target_delta=dp_config["target_delta"],
-            accountant=dp_config["accountant"],
-            secure_mode=dp_config["secure_mode"],
-            poisson_sampling=dp_config["poisson_sampling"],
-            clipping=dp_config["clipping"],
-        )
-
     def create_trainer(self):
         # Create a new trainer and logger for each round
         self.create_logger()
@@ -322,11 +305,7 @@ async def train(self):
 
     def _train_sync(self):
         try:
-            if self._dp_plugin is None:
-                self._trainer.fit(self.model, self.datamodule)
-            else:
-                self._fit_with_dp()
-
+            self._trainer.fit(self.model, self.datamodule)
             validation_metrics = {}
             if hasattr(self.model, "get_latest_validation_metrics"):
                 validation_metrics = self.model.get_latest_validation_metrics() or {}
@@ -346,95 +325,6 @@ def _train_sync(self):
             # If "raise", the exception will be managed by the main thread
             return None, None
 
-    def _get_training_device(self):
-        if (
-            self.config.participant["device_args"]["accelerator"] == "gpu"
-            and torch.cuda.is_available()
-            and self.config.participant["device_args"]["gpu_id"]
-        ):
-            return torch.device(f"cuda:{self.config.participant['device_args']['gpu_id'][0]}")
-        return torch.device("cpu")
-
-    def _log_manual_metrics(self, phase, metrics):
-        output = metrics.compute()
-        output = {
-            f"{phase}/{key.replace('Multiclass', '').split('/')[-1]}": value.detach() for key, value in output.items()
-        }
-        if phase == "Validation":
-            self.model._latest_validation_metrics = {
-                key: float(value.detach().cpu().item()) for key, value in output.items()
-            }
-        self._logger.log_data(output, step=self.model.global_number[phase])
-
-    def _fit_with_dp(self):
-        train_dataloader = self.datamodule.train_dataloader()
-        val_dataloader = self.datamodule.val_dataloader()
-        state = SimpleDPState()
-        state.extras["dataloader"] = train_dataloader
-        self.model.train()
-        optimizer = self.model.configure_optimizers()
-        device = self._get_training_device()
-
-        try:
-            self._dp_plugin.on_train_start(self.model, optimizer, state)
-            private_model = state.extras["model"]
-            private_optimizer = state.extras["optimizer"]
-            private_dataloader = state.extras["dataloader"]
-            self.model._optimizer = private_optimizer
-            private_model.to(device)
-
-            for epoch in range(self.epochs):
-                logging_training.info(f"Starting Epoch {epoch} DP")
-                private_model.train()
-                for batch_idx, batch in enumerate(private_dataloader):
-                    inputs, labels = batch
-                    inputs = inputs.to(device)
-                    labels = labels.to(device)
-
-                    private_optimizer.zero_grad()
-                    outputs = private_model(inputs)
-                    loss = self.model.criterion(outputs, labels)
-                    self.model._current_loss = loss.detach()
-                    self._logger.log_data({"Train/Loss": loss.detach()})
-                    self.model.train_metrics.update(torch.argmax(outputs.detach(), dim=1), labels.detach())
-                    loss.backward()
-                    private_optimizer.step()
-
-                self._log_manual_metrics("Train", self.model.train_metrics)
-                self.model.train_metrics.reset()
-                self.model.global_number["Train"] += 1
-                logging_training.info(f"Epoch {epoch} finished DP")
-
-                logging_training.info(f"Starting validation for Epoch {epoch} DP")
-                private_model.eval()
-                with torch.no_grad():
-                    for batch_idx, batch in enumerate(val_dataloader):
-                        inputs, labels = batch
-                        inputs = inputs.to(device)
-                        labels = labels.to(device)
-
-                        outputs = private_model(inputs)
-                        loss = self.model.criterion(outputs, labels)
-                        self.model._current_loss = loss.detach()
-                        self.model.val_metrics.update(torch.argmax(outputs.detach(), dim=1), labels.detach())
-
-                self._log_manual_metrics("Validation", self.model.val_metrics)
-                self.model.val_metrics.reset()
-                self.model.global_number["Validation"] += 1
-                logging_training.info(f"Validation for Epoch {epoch} finished DP")
-
-            if hasattr(private_model, "_module"):
-                self.model.load_state_dict(private_model._module.state_dict())
-            self.model.train()
-        finally:
-            self._dp_plugin.on_train_end(state)
-
-        dp_epsilon = state.extras.get("dp_epsilon")
-        if dp_epsilon is not None:
-            dp_delta = state.extras["dp_delta"]
-            self._logger.log_data({"DP/Epsilon": dp_epsilon, "DP/Delta": dp_delta})
-            logging_training.info(f"DP privacy budget | epsilon={dp_epsilon:.4f} | delta={dp_delta}")
-
     async def test(self):
         try:
             self.create_trainer()
diff --git a/nebula/core/training/lightning_dp.py b/nebula/core/training/lightning_dp.py
new file mode 100644
index 000000000..d185d96c4
--- /dev/null
+++ b/nebula/core/training/lightning_dp.py
@@ -0,0 +1,196 @@
+import logging
+import traceback
+
+import torch
+
+from nebula.config.config import TRAINING_LOGGER
+from nebula.core.training.lightning import Lightning
+from nebula.core.training.dp import DifferentialPrivacyPlugin, SimpleDPState
+
+logging_training = logging.getLogger(TRAINING_LOGGER)
+
+
+class LightningDP(Lightning):
+    """
+    Lightning-based trainer with Differential Privacy support.
+
+    This class inherits the standard Nebula Lightning trainer but overrides
+    the synchronous training logic because Opacus needs to privatize the
+    model, optimizer and dataloader before the training loop starts.
+    """
+
+    def __init__(self, model, datamodule, config=None):
+        super().__init__(model, datamodule, config)
+        self._dp_plugin = self.create_dp_plugin()
+
+    def create_dp_plugin(self):
+        dp_config = self.config.participant["training_args"].get("dp")
+
+        if dp_config is None or not dp_config.get("enabled", False):
+            raise ValueError("LightningDP was selected, but Differential Privacy is not enabled in the configuration.")
+
+        return DifferentialPrivacyPlugin(
+            noise_multiplier=dp_config["noise_multiplier"],
+            max_grad_norm=dp_config["max_grad_norm"],
+            target_delta=dp_config["target_delta"],
+            accountant=dp_config["accountant"],
+            secure_mode=dp_config["secure_mode"],
+            poisson_sampling=dp_config["poisson_sampling"],
+            clipping=dp_config["clipping"],
+        )
+
+    def _train_sync(self):
+        try:
+            self._fit_with_dp()
+
+            validation_metrics = {}
+            if hasattr(self.model, "get_latest_validation_metrics"):
+                validation_metrics = self.model.get_latest_validation_metrics() or {}
+
+            loss = None
+            model_loss = getattr(self.model, "get_loss", None)
+            if callable(model_loss):
+                raw_loss = model_loss()
+                loss = raw_loss.item() if hasattr(raw_loss, "item") else raw_loss
+
+            accuracy = validation_metrics.get("Validation/Accuracy")
+            return loss, accuracy
+
+        except Exception as e:
+            logging_training.error(f"Error in _train_sync with Differential Privacy: {e}")
+            tb = traceback.format_exc()
+            logging_training.error(f"Traceback: {tb}")
+            return None, None
+
+    def _get_training_device(self):
+        if (
+            self.config.participant["device_args"]["accelerator"] == "gpu"
+            and torch.cuda.is_available()
+            and self.config.participant["device_args"]["gpu_id"]
+        ):
+            return torch.device(f"cuda:{self.config.participant['device_args']['gpu_id'][0]}")
+
+        return torch.device("cpu")
+
+    def _log_manual_metrics(self, phase, metrics):
+        output = metrics.compute()
+        output = {
+            f"{phase}/{key.replace('Multiclass', '').split('/')[-1]}": value.detach()
+            for key, value in output.items()
+        }
+
+        if phase == "Validation":
+            self.model._latest_validation_metrics = {
+                key: float(value.detach().cpu().item())
+                for key, value in output.items()
+            }
+
+        self._logger.log_data(output, step=self.model.global_number[phase])
+
+    def _fit_with_dp(self):
+        train_dataloader = self.datamodule.train_dataloader()
+        val_dataloader = self.datamodule.val_dataloader()
+
+        state = SimpleDPState()
+        state.extras["dataloader"] = train_dataloader
+
+        self.model.train()
+        optimizer = self.model.configure_optimizers()
+        device = self._get_training_device()
+
+        try:
+            self._dp_plugin.on_train_start(self.model, optimizer, state)
+
+            private_model = state.extras["model"]
+            private_optimizer = state.extras["optimizer"]
+            private_dataloader = state.extras["dataloader"]
+
+            self.model._optimizer = private_optimizer
+            private_model.to(device)
+
+            for epoch in range(self.epochs):
+                logging_training.info(f"Starting Epoch {epoch} DP")
+
+                private_model.train()
+
+                for batch_idx, batch in enumerate(private_dataloader):
+                    inputs, labels = batch
+                    inputs = inputs.to(device)
+                    labels = labels.to(device)
+
+                    private_optimizer.zero_grad()
+
+                    outputs = private_model(inputs)
+                    loss = self.model.criterion(outputs, labels)
+
+                    self.model._current_loss = loss.detach()
+
+                    if self._logger is not None:
+                        self._logger.log_data({"Train/Loss": loss.detach()})
+
+                    self.model.train_metrics.update(
+                        torch.argmax(outputs.detach(), dim=1),
+                        labels.detach(),
+                    )
+
+                    loss.backward()
+                    private_optimizer.step()
+
+                self._log_manual_metrics("Train", self.model.train_metrics)
+                self.model.train_metrics.reset()
+                self.model.global_number["Train"] += 1
+
+                logging_training.info(f"Epoch {epoch} finished DP")
+
+                logging_training.info(f"Starting validation for Epoch {epoch} DP")
+
+                private_model.eval()
+
+                with torch.no_grad():
+                    for batch_idx, batch in enumerate(val_dataloader):
+                        inputs, labels = batch
+                        inputs = inputs.to(device)
+                        labels = labels.to(device)
+
+                        outputs = private_model(inputs)
+                        loss = self.model.criterion(outputs, labels)
+
+                        self.model._current_loss = loss.detach()
+
+                        self.model.val_metrics.update(
+                            torch.argmax(outputs.detach(), dim=1),
+                            labels.detach(),
+                        )
+
+                self._log_manual_metrics("Validation", self.model.val_metrics)
+                self.model.val_metrics.reset()
+                self.model.global_number["Validation"] += 1
+
+                logging_training.info(f"Validation for Epoch {epoch} finished DP")
+
+            if hasattr(private_model, "_module"):
+                self.model.load_state_dict(private_model._module.state_dict())
+            else:
+                self.model.load_state_dict(private_model.state_dict())
+
+            self.model.train()
+
+        finally:
+            self._dp_plugin.on_train_end(state)
+
+        dp_epsilon = state.extras.get("dp_epsilon")
+
+        if dp_epsilon is not None:
+            dp_delta = state.extras["dp_delta"]
+
+            if self._logger is not None:
+                self._logger.log_data(
+                    {
+                        "DP/Epsilon": dp_epsilon,
+                        "DP/Delta": dp_delta,
+                    }
+                )
+
+            logging_training.info(
+                f"DP privacy budget | epsilon={dp_epsilon:.4f} | delta={dp_delta}"
+            )
diff --git a/nebula/frontend/config/participant.json.example b/nebula/frontend/config/participant.json.example
index 0f01250fd..725251cf5 100755
--- a/nebula/frontend/config/participant.json.example
+++ b/nebula/frontend/config/participant.json.example
@@ -86,8 +86,8 @@
     "trainer": "lightning",
     "epochs": 3,
     "dp": {
-      "enabled": true,
-      "noise_multiplier": 0.4,
+      "enabled": false,
+      "noise_multiplier": 1.0,
       "max_grad_norm": 1.0,
       "target_delta": 1e-5,
       "accountant": "prv",
diff --git a/nebula/frontend/static/js/deployment/dp.js b/nebula/frontend/static/js/deployment/dp.js
new file mode 100644
index 000000000..52884c2f9
--- /dev/null
+++ b/nebula/frontend/static/js/deployment/dp.js
@@ -0,0 +1,59 @@
+// Differential Privacy Module
+const DpManager = (function() {
+    const DEFAULT_DP_CONFIG = {
+        enabled: false
+    };
+
+    function initializeDifferentialPrivacy() {
+        setupDpSwitch();
+        setDpConfig(DEFAULT_DP_CONFIG);
+    }
+
+    function setupDpSwitch() {
+        const dpSwitch = document.getElementById("dpSwitch");
+        if (!dpSwitch) return;
+
+        dpSwitch.addEventListener("change", function() {
+            toggleDpSettings(this.checked);
+        });
+    }
+
+    function toggleDpSettings(enabled) {
+        const dpSettings = document.getElementById("dp-settings");
+        if (!dpSettings) return;
+
+        dpSettings.style.display = enabled ? "block" : "none";
+    }
+
+    function getDpConfig() {
+        return {
+            enabled: Boolean(document.getElementById("dpSwitch")?.checked)
+        };
+    }
+
+    function setDpConfig(config = DEFAULT_DP_CONFIG) {
+        const dpConfig = {
+            ...DEFAULT_DP_CONFIG,
+            ...(config || {})
+        };
+
+        const dpSwitch = document.getElementById("dpSwitch");
+        if (!dpSwitch) return;
+
+        dpSwitch.checked = Boolean(dpConfig.enabled);
+        toggleDpSettings(dpSwitch.checked);
+    }
+
+    function resetDpConfig() {
+        setDpConfig(DEFAULT_DP_CONFIG);
+    }
+
+    return {
+        initializeDifferentialPrivacy,
+        getDpConfig,
+        setDpConfig,
+        resetDpConfig
+    };
+})();
+
+export default DpManager;
diff --git a/nebula/frontend/static/js/deployment/main.js b/nebula/frontend/static/js/deployment/main.js
index 42fb11922..44dd652f8 100644
--- a/nebula/frontend/static/js/deployment/main.js
+++ b/nebula/frontend/static/js/deployment/main.js
@@ -8,6 +8,7 @@ import SaManager from './situational-awareness.js';
 import GraphSettings from './graph-settings.js';
 import Utils from './utils.js';
 import TrustworthinessManager from './trustworthiness.js';
+import DpManager from './dp.js';
 
 const DeploymentManager = (function() {
     function initialize() {
@@ -31,6 +32,7 @@ const DeploymentManager = (function() {
         ReputationManager.initializeReputationSystem();
         SaManager.initializeSa();
         TrustworthinessManager.initializeTrustworthinessSystem();
+        DpManager.initializeDifferentialPrivacy();
         GraphSettings.initializeDistanceControls();
 
         // Make modules globally available
@@ -41,6 +43,7 @@ const DeploymentManager = (function() {
         window.ReputationManager = ReputationManager;
         window.SaManager = SaManager;
         window.TrustworthinessManager = TrustworthinessManager;
+        window.DpManager = DpManager;
         window.GraphSettings = GraphSettings;
         window.DeploymentManager = DeploymentManager;
         window.Utils = Utils;
diff --git a/nebula/frontend/static/js/deployment/scenario.js b/nebula/frontend/static/js/deployment/scenario.js
index 4f92d8e06..6af1e7d79 100644
--- a/nebula/frontend/static/js/deployment/scenario.js
+++ b/nebula/frontend/static/js/deployment/scenario.js
@@ -74,6 +74,7 @@ const ScenarioManager = (function () {
             logginglevel: document.getElementById("loggingLevel").value === "true",
             report_status_data_queue: document.getElementById("reportingSwitch").checked,
             epochs: parseInt(document.getElementById("epochs").value),
+            dp: window.DpManager.getDpConfig(),
             attack_params: attackConfig,
             reputation: {
                 enabled: window.ReputationManager.getReputationConfig().enabled || false,
@@ -263,6 +264,9 @@ const ScenarioManager = (function () {
         document.getElementById("loggingLevel").value = scenario.logginglevel ? "true" : "false";
         document.getElementById("reportingSwitch").checked = scenario.report_status_data_queue;
         document.getElementById("epochs").value = scenario.epochs;
+        if (window.DpManager) {
+            window.DpManager.setDpConfig(scenario.dp);
+        }
 
         // Load module configurations
         if (scenario.attacks && scenario.attacks.length > 0) {
@@ -436,6 +440,9 @@ const ScenarioManager = (function () {
         if (window.SaManager) {
             window.SaManager.resetSaConfig();
         }
+        if (window.DpManager) {
+            window.DpManager.resetDpConfig();
+        }
 
         // Trigger necessary events
         document.getElementById("federationArchitecture").dispatchEvent(new Event('change'));
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index 955b36760..e0a4d80a7 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -519,6 +519,22 @@ <h5 class="step-title">Number of Epochs</h5>
                             style="display: inline; width: 80%">
                     </div>
                 </div>
+                <!-- Differential Privacy -->
+                <div class="form-group row container-shadow tiny grey">
+                    <h5 class="step-number">Differential Privacy <i class="fa fa-user-secret"></i>
+                    </h5>
+                    <h5 class="step-title">Enable/Disable DP</h5>
+                    <div class="form-check form-switch" style="margin-left: 23px;">
+                        <input class="form-check-input" type="checkbox" id="dpSwitch"
+                            style="display: inline; width: 80px; height: 30px;">
+                    </div>
+                    <div id="dp-settings" style="margin-top: 10px; display: none;">
+                        <small class="form-text text-muted">
+                            Additional DP parameters are still taken from
+                            <code>nebula/frontend/config/participant.json.example</code>.
+                        </small>
+                    </div>
+                </div>
                 <!-- Advanced Robustness -->
                 <div class="form-group row container-shadow tiny grey">
                     <h5 class="step-number">Robustness <i class="fa fa-shield"></i>

From 8ac17cd5fe449964694289e16996f0aac165d242 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 24 Apr 2026 16:57:35 +0200
Subject: [PATCH 29/66] DP and epsilon added to trustworthiness

---
 nebula/addons/trustworthiness/calculation.py  | 21 +++++++++++++
 .../addons/trustworthiness/dfl_factsheet.py   | 12 +++++---
 .../addons/trustworthiness/trustworthiness.py | 30 +++++++++++++------
 nebula/addons/trustworthiness/utils.py        | 20 ++++++++-----
 nebula/core/engine.py                         |  2 ++
 nebula/core/network/messages.py               |  4 ++-
 nebula/core/pb/nebula.proto                   |  2 ++
 nebula/core/pb/nebula_pb2.py                  | 12 ++++----
 nebula/core/training/lightning.py             |  7 +++++
 nebula/core/training/lightning_dp.py          | 16 ++++++++--
 10 files changed, 95 insertions(+), 31 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index 006da5d2b..7c1235b1c 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -876,6 +876,27 @@ def get_underfitting_score_local(scenario_name, id):
     data = read_csv(data_file)
     return float(data["val_accuracy"].iloc[0])
 
+def get_dp_local(scenario_name, id):
+    """
+    Gets the dp metrics for a specific DFL/SDFL participant.
+
+    Args:
+        scenario_name (str): Scenario name.
+        participant_id (int | str): Participant identifier.
+
+    Returns:
+        float: DP Enabled, Epsilon.
+    """
+    data_file = os.path.join(
+        os.environ.get('NEBULA_LOGS_DIR'),
+        scenario_name,
+        "trustworthiness",
+        f"data_results_{id}.csv",
+    )
+
+    data = read_csv(data_file)
+    return data["dp_enabled"].iloc[0], float(data["dp_epsilon"].iloc[0])
+
 
 def get_well_calibration_error(model, test_dataloader, n_bins=10):
     """
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 6b12a41e0..b4d3e8408 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -29,7 +29,7 @@
 from nebula.core.models.cifar10.resnet import CIFAR10ModelResNet
 from nebula.core.models.cifar10.simplemobilenet import SimpleMobileNetV1
 from nebula.core.models.cifar100.cnn import CIFAR100ModelCNN
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model, get_underfitting_score, get_overfitting_score, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score, get_underfitting_score_local
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model, get_underfitting_score, get_overfitting_score, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score, get_underfitting_score_local, get_dp_local
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 
 dirname = os.path.dirname(__file__)
@@ -116,9 +116,13 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
             factsheet["configuration"]["differential_privacy"] = False
             factsheet["configuration"]["dp_epsilon"] = ""
         """
-
-        factsheet["configuration"]["differential_privacy"] = False
-        factsheet["configuration"]["dp_epsilon"] = ""
+        dp_enabled, dp_epsilon = get_dp_local(experiment_name, participant_idx)
+        if dp_enabled:
+            factsheet["configuration"]["differential_privacy"] = True
+            factsheet["configuration"]["dp_epsilon"] = dp_epsilon
+        else:
+            factsheet["configuration"]["differential_privacy"] = False
+            factsheet["configuration"]["dp_epsilon"] = ""
 
         if dataset == "MNIST" and algorithm == "MLP":
             model = MNISTModelMLP()
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index fe2dbb97c..76933327c 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -42,7 +42,7 @@ def get_workload(self) -> str:
     def get_sample_size(self) -> float:
         raise NotImplementedError
 
-    abstractmethod
+    @abstractmethod
     def get_metrics(self) -> tuple[float, float]:
         raise NotImplementedError
 
@@ -170,7 +170,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
             #logging.info("connections=%s", list(cm.connections.keys()))
             #logging.info("server in connections? %s", server_addr in cm.connections)
 
-            bytes_sent, bytes_recv, accuracy, loss, val_accuracy = load_data_results_participant(experiment_name, self._idx)
+            bytes_sent, bytes_recv, accuracy, loss, val_accuracy, dp_enabled, dp_epsilon = load_data_results_participant(experiment_name, self._idx)
 
             role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size = load_emissions_participant(experiment_name, self._idx)
 
@@ -202,12 +202,14 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 model_size=model_size,
                 local_entropy=local_entropy,
                 val_accuracy=val_accuracy,
+                dp_enabled=dp_enabled,
+                dp_epsilon=dp_epsilon
             )
 
             logging.info(
                 "[TW SEND] dest=%s node_id=%s bytes_sent=%s bytes_recv=%s "
                 "accuracy=%s loss=%s energy_grid=%s emissions=%s workload=%s"
-                "cpu_model=%s gpu_model=%s cpu_used=%s gpu_used=%s energy_consumed=%s sample_size=%s class_imbalance=%s model_size=%s local_entropy=%s val_accuracy=%s",
+                "cpu_model=%s gpu_model=%s cpu_used=%s gpu_used=%s energy_consumed=%s sample_size=%s class_imbalance=%s model_size=%s local_entropy=%s val_accuracy=%s dp_enabled=%s dp_epsilon=%s",
                 server_addr,
                 str(self._idx),
                 bytes_sent,
@@ -227,7 +229,9 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 class_imbalance,
                 model_size,
                 local_entropy,
-                val_accuracy
+                val_accuracy,
+                dp_enabled,
+                dp_epsilon
             )
 
             await cm.send_message(
@@ -996,7 +1000,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
         if self._csv_completed == True:
             logging.info("[TW SERVER] finish_experiment_role_post_actions called, trustworthiness reports OK, starting generate_factsheet")
-            bytes_sent, bytes_recv, accuracy, loss, val_accuracy= load_data_results_participant(
+            bytes_sent, bytes_recv, accuracy, loss, val_accuracy, dp_enabled, dp_epsilon= load_data_results_participant(
                 self._experiment_name,
                 self._idx,
             )
@@ -1017,7 +1021,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
 
-            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy, val_accuracy)
+            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy, val_accuracy, dp_enabled, dp_epsilon)
             save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
             await self._generate_factsheet(trust_config, experiment_name)
         else:
@@ -1026,7 +1030,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
             await asyncio.sleep(60)
             if self._trustworthiness_reports != None and self._csv_completed == False:
                 save_trustworthiness_reports_csv(self._trustworthiness_reports, self._experiment_name)
-            bytes_sent, bytes_recv, accuracy, loss, val_accuracy = load_data_results_participant(
+            bytes_sent, bytes_recv, accuracy, loss, val_accuracy, dp_enabled, dp_epsilon = load_data_results_participant(
                 self._experiment_name,
                 self._idx,
             )
@@ -1047,7 +1051,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
 
-            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy, val_accuracy)
+            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy, val_accuracy, dp_enabled, dp_epsilon)
             save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
             await self._generate_factsheet(trust_config, experiment_name)
         #await self._generate_factsheet(trust_config, experiment_name)
@@ -1074,6 +1078,8 @@ async def register_trustworthiness_report(self, source, message):
             "model_size": message.model_size,
             "local_entropy": message.local_entropy,
             "val_accuracy": message.val_accuracy,
+            "dp_enabled": message.dp_enabled,
+            "dp_epsilon": message.dp_epsilon
         }
 
         logging.info(
@@ -1303,12 +1309,18 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         bytes_sent = self._engine.reporter.acc_bytes_sent
         bytes_recv = self._engine.reporter.acc_bytes_recv
 
+        privacy_metrics = self._engine.trainer.get_privacy_metrics()
+        dp_enabled=bool(privacy_metrics.get("dp_enabled", False))
+        dp_epsilon=privacy_metrics.get("dp_epsilon")
+        if dp_epsilon == None:
+            dp_epsilon=0
+
         # Get TrustWorkload information
         workload = self.tw.get_workload()
         sample_size = self.tw.get_sample_size()
 
         # Final operations
-        save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_accuracy, last_loss, last_val_accuracy)
+        save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_accuracy, last_loss, last_val_accuracy, dp_enabled, dp_epsilon)
         stop_emissions_tracking_and_save(self._tracker, self._trust_dir_files, f'emissions_{self._idx}.csv', self._role.value, workload, sample_size, self._idx)
         await self.tw.finish_experiment_role_post_actions(self._trust_config, self._experiment_name)
 
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
index fb4274d4e..0acdbe8b1 100755
--- a/nebula/addons/trustworthiness/utils.py
+++ b/nebula/addons/trustworthiness/utils.py
@@ -307,8 +307,10 @@ def load_data_results_participant(experiment_name: str, participant_id: int | st
     accuracy = float(row["accuracy"])
     loss = float(row["loss"])
     val_accuracy = float(row["val_accuracy"])
+    dp_enabled = bool(row["dp_enabled"])
+    dp_epsilon = float(row["dp_epsilon"])
 
-    return bytes_sent, bytes_recv, accuracy, loss, val_accuracy
+    return bytes_sent, bytes_recv, accuracy, loss, val_accuracy, dp_enabled, dp_epsilon
 
 
 def load_emissions_participant(experiment_name: str, participant_id: int | str):
@@ -355,7 +357,7 @@ def save_trustworthiness_reports_csv(
     with open(data_results_path, "w", newline="") as csv_file:
         writer = csv.DictWriter(
             csv_file,
-            fieldnames=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "class_imbalance", "model_size", "local_entropy", "val_accuracy"],
+            fieldnames=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "class_imbalance", "model_size", "local_entropy", "val_accuracy", "dp_enabled", "dp_epsilon"],
         )
         writer.writeheader()
 
@@ -370,6 +372,8 @@ def save_trustworthiness_reports_csv(
                 "model_size": report["model_size"],
                 "local_entropy": report["local_entropy"],
                 "val_accuracy": report["val_accuracy"],
+                "dp_enabled": report["dp_enabled"],
+                "dp_epsilon": report["dp_epsilon"],
             })
 
     with open(emissions_path, "w", newline="") as csv_file:
@@ -400,7 +404,7 @@ def save_trustworthiness_reports_csv(
         emissions_path,
     )
 
-def save_results_csv_cfl(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float, class_imbalance: float, model_size: int, local_entropy: float, val_accuracy: float):
+def save_results_csv_cfl(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float, class_imbalance: float, model_size: int, local_entropy: float, val_accuracy: float, dp_enabled: bool, dp_epsilon: float):
     try:
         data_results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
     except:
@@ -409,13 +413,13 @@ def save_results_csv_cfl(scenario_name: str, id: int, bytes_sent: int, bytes_rec
     if exists(data_results_file):
         df = pd.read_csv(data_results_file)
     else:
-        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "class_imbalance", "model_size", "local_entropy", "val_accuracy"])
+        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "class_imbalance", "model_size", "local_entropy", "val_accuracy", "dp_enabled", "dp_epsilon"])
 
     try:
         # Add new entry to DataFrame
         new_data = pd.DataFrame({'id': [id], 'bytes_sent': [bytes_sent],
                                     'bytes_recv': [bytes_recv], 'accuracy': [accuracy],
-                                    'loss': [loss], 'class_imbalance': [class_imbalance], 'model_size': [model_size], 'local_entropy': [local_entropy], 'val_accuracy': [val_accuracy]})
+                                    'loss': [loss], 'class_imbalance': [class_imbalance], 'model_size': [model_size], 'local_entropy': [local_entropy], 'val_accuracy': [val_accuracy], 'dp_enabled': [dp_enabled], 'dp_epsilon': [dp_epsilon]})
         df = pd.concat([df, new_data], ignore_index=True)
 
         df.to_csv(data_results_file, encoding='utf-8', index=False)
@@ -447,7 +451,7 @@ def save_emissions_csv_cfl(scenario_name: str, id: int, role: str, energy_grid:
         logger.warning(e)
 
 
-def save_results_csv(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float, val_accuracy: float):
+def save_results_csv(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float, val_accuracy: float, dp_enabled: bool, dp_epsilon: float):
 
     try:
         data_results_id_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", f"data_results_{id}.csv")
@@ -457,13 +461,13 @@ def save_results_csv(scenario_name: str, id: int, bytes_sent: int, bytes_recv: i
     if exists(data_results_id_file):
         df = pd.read_csv(data_results_id_file)
     else:
-        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "val_accuracy"])
+        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "val_accuracy", "dp_enabled", "dp_epsilon"])
 
     try:
         # Add new entry to DataFrame
         new_data = pd.DataFrame({'id': [id], 'bytes_sent': [bytes_sent],
                                     'bytes_recv': [bytes_recv], 'accuracy': [accuracy],
-                                    'loss': [loss], 'val_accuracy': [val_accuracy]})
+                                    'loss': [loss], 'val_accuracy': [val_accuracy], 'dp_enabled': [dp_enabled], 'dp_epsilon': [dp_epsilon]})
         df = pd.concat([df, new_data], ignore_index=True)
 
         df.to_csv(data_results_id_file, encoding='utf-8', index=False)
diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index 85fbe5026..527763448 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -438,6 +438,8 @@ async def _trustworthiness_report_callback(self, source, message):
                 "model_size": message.model_size,
                 "local_entropy": message.local_entropy,
                 "val_accuracy": message.val_accuracy,
+                "dp_enabled": message.dp_enabled,
+                "dp_epsilon": message.dp_epsilon
             }
 
             logging.info(f"handle_trustworthiness_message | Trigger | {report}")
diff --git a/nebula/core/network/messages.py b/nebula/core/network/messages.py
index 3f237fbc2..e66edad4f 100644
--- a/nebula/core/network/messages.py
+++ b/nebula/core/network/messages.py
@@ -115,7 +115,9 @@ def _define_message_templates(self):
                     "class_imbalance",
                     "model_size",
                     "local_entropy",
-                    "val_accuracy"
+                    "val_accuracy",
+                    "dp_enabled",
+                    "dp_epsilon"
                 ],
                 "defaults": {},
             },
diff --git a/nebula/core/pb/nebula.proto b/nebula/core/pb/nebula.proto
index 8bb82c4cd..46c127c05 100755
--- a/nebula/core/pb/nebula.proto
+++ b/nebula/core/pb/nebula.proto
@@ -158,6 +158,8 @@ message TrustworthinessMessage {
   int64 model_size = 18;
   float local_entropy = 19;
   float val_accuracy = 20;
+  bool dp_enabled = 21;
+  float dp_epsilon = 22;
 }
 
 message TrustscoresMessage {
diff --git a/nebula/core/pb/nebula_pb2.py b/nebula/core/pb/nebula_pb2.py
index 1c6ac41f3..0cdc85585 100644
--- a/nebula/core/pb/nebula_pb2.py
+++ b/nebula/core/pb/nebula_pb2.py
@@ -13,7 +13,7 @@
 
 
 
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xac\x05\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x12\x39\n\x13trustscores_message\x18\r \x01(\x0b\x32\x1a.nebula.TrustscoresMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\xd8\x03\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\x12\x17\n\x0f\x63lass_imbalance\x18\x11 \x01(\x02\x12\x12\n\nmodel_size\x18\x12 \x01(\x03\x12\x15\n\rlocal_entropy\x18\x13 \x01(\x02\x12\x14\n\x0cval_accuracy\x18\x14 \x01(\x02\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\"\x88\x01\n\x12TrustscoresMessage\x12\x31\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32!.nebula.TrustscoresMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x19\n\x11trust_report_json\x18\x03 \x01(\t\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xac\x05\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x12\x39\n\x13trustscores_message\x18\r \x01(\x0b\x32\x1a.nebula.TrustscoresMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\x80\x04\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\x12\x17\n\x0f\x63lass_imbalance\x18\x11 \x01(\x02\x12\x12\n\nmodel_size\x18\x12 \x01(\x03\x12\x15\n\rlocal_entropy\x18\x13 \x01(\x02\x12\x14\n\x0cval_accuracy\x18\x14 \x01(\x02\x12\x12\n\ndp_enabled\x18\x15 \x01(\x08\x12\x12\n\ndp_epsilon\x18\x16 \x01(\x02\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\"\x88\x01\n\x12TrustscoresMessage\x12\x31\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32!.nebula.TrustscoresMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x19\n\x11trust_report_json\x18\x03 \x01(\t\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\x62\x06proto3')
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
 _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'nebula_pb2', globals())
@@ -59,11 +59,11 @@
   _RESPONSEMESSAGE._serialized_start=2127
   _RESPONSEMESSAGE._serialized_end=2162
   _TRUSTWORTHINESSMESSAGE._serialized_start=2165
-  _TRUSTWORTHINESSMESSAGE._serialized_end=2637
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2617
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2637
-  _TRUSTSCORESMESSAGE._serialized_start=2640
-  _TRUSTSCORESMESSAGE._serialized_end=2776
+  _TRUSTWORTHINESSMESSAGE._serialized_end=2677
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2657
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2677
+  _TRUSTSCORESMESSAGE._serialized_start=2680
+  _TRUSTSCORESMESSAGE._serialized_end=2816
   _TRUSTSCORESMESSAGE_ACTION._serialized_start=2106
   _TRUSTSCORESMESSAGE_ACTION._serialized_end=2125
 # @@protoc_insertion_point(module_scope)
diff --git a/nebula/core/training/lightning.py b/nebula/core/training/lightning.py
index 5b9d96e50..0c869d1de 100755
--- a/nebula/core/training/lightning.py
+++ b/nebula/core/training/lightning.py
@@ -388,3 +388,10 @@ def update_model_learning_rate(self, new_lr):
 
     def show_current_learning_rate(self):
         self.model.show_current_learning_rate()
+
+    def get_privacy_metrics(self):
+        return {
+            "dp_enabled": False,
+            "dp_epsilon": 0,
+            "dp_delta": 0,
+        }
diff --git a/nebula/core/training/lightning_dp.py b/nebula/core/training/lightning_dp.py
index d185d96c4..09e18b42b 100644
--- a/nebula/core/training/lightning_dp.py
+++ b/nebula/core/training/lightning_dp.py
@@ -14,14 +14,14 @@ class LightningDP(Lightning):
     """
     Lightning-based trainer with Differential Privacy support.
 
-    This class inherits the standard Nebula Lightning trainer but overrides
-    the synchronous training logic because Opacus needs to privatize the
-    model, optimizer and dataloader before the training loop starts.
+    This class inherits the standard Nebula Lightning trainer.
     """
 
     def __init__(self, model, datamodule, config=None):
         super().__init__(model, datamodule, config)
         self._dp_plugin = self.create_dp_plugin()
+        self.dp_epsilon = None
+        self.dp_delta = None
 
     def create_dp_plugin(self):
         dp_config = self.config.participant["training_args"].get("dp")
@@ -183,6 +183,9 @@ def _fit_with_dp(self):
         if dp_epsilon is not None:
             dp_delta = state.extras["dp_delta"]
 
+            self.dp_epsilon = float(dp_epsilon)
+            self.dp_delta = float(dp_delta)
+
             if self._logger is not None:
                 self._logger.log_data(
                     {
@@ -194,3 +197,10 @@ def _fit_with_dp(self):
             logging_training.info(
                 f"DP privacy budget | epsilon={dp_epsilon:.4f} | delta={dp_delta}"
             )
+
+    def get_privacy_metrics(self):
+        return {
+            "dp_enabled": True,
+            "dp_epsilon": self.dp_epsilon,
+            "dp_delta": self.dp_delta,
+        }

From b7b23c28f621aa8ea2d1827179f9bd327d2763f4 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Mon, 27 Apr 2026 11:43:01 +0200
Subject: [PATCH 30/66] DP Trustworthiness CFL finished, global privacy risk
 fixed, frontend updated: Noise Multiplier

---
 nebula/addons/trustworthiness/calculation.py  | 47 +++++++++++++++++++
 .../configs/eval_metrics_dfl.json             |  4 +-
 .../configs/factsheet_template_dfl.json       |  1 +
 .../addons/trustworthiness/dfl_factsheet.py   |  3 ++
 nebula/addons/trustworthiness/factsheet.py    | 10 +++-
 .../addons/trustworthiness/trustworthiness.py |  2 +
 nebula/controller/scenarios.py                |  9 +++-
 nebula/frontend/static/js/deployment/dp.js    | 15 +++++-
 nebula/frontend/templates/deployment.html     |  8 +++-
 9 files changed, 91 insertions(+), 8 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index 7c1235b1c..e05c1cf90 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -289,6 +289,24 @@ def get_global_privacy_risk(dp, epsilon, n):
     else:
         return 1
 
+def get_global_privacy_risk_dfl(dp, epsilon, n):
+    """
+    Calculates the global privacy risk by epsilon and the number of clients.
+
+    Args:
+        dp (bool): Indicates if differential privacy is used or not.
+        epsilon (int): The epsilon value.
+        n (int): The number of neighbours.
+
+    Returns:
+        float: The global privacy risk.
+    """
+
+    if dp is True and isinstance(epsilon, numbers.Number):
+        return 1 / (1 + (n + 1) * math.pow(e, -epsilon))
+    else:
+        return 1
+
 
 def _collect_per_sample_losses(model, dataloader, max_samples=5000):
     """
@@ -898,6 +916,35 @@ def get_dp_local(scenario_name, id):
     return data["dp_enabled"].iloc[0], float(data["dp_epsilon"].iloc[0])
 
 
+def get_dp_global(scenario_name):
+    """
+    Gets the aggregated DP metrics for a CFL scenario, excluding the server node.
+
+    Args:
+        scenario_name (str): Scenario name.
+
+    Returns:
+        tuple[bool, float | str]: Whether DP is enabled, and the
+        average epsilon across client nodes.
+    """
+    total_epsilon = 0
+
+    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
+
+    data = read_csv(data_file)
+
+    if data["dp_enabled"].iloc[0] == False:
+        return False, 0.0
+
+    number_files = len(data)
+
+    total_epsilon = data["dp_epsilon"].sum()
+
+    avg_epsilon = total_epsilon / (number_files-1)
+
+    return True, avg_epsilon
+
+
 def get_well_calibration_error(model, test_dataloader, n_bins=10):
     """
     Calculates a well-calibration error style metric using prediction confidence.
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
index 141dda891..c396abddc 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
@@ -245,10 +245,10 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "participants/client_num"
+                "field_path": "participants/neighbor_num"
               }
             ],
-            "operation": "get_global_privacy_risk",
+            "operation": "get_global_privacy_risk_dfl",
             "type": "true_score",
             "direction": "desc",
             "description": "A worst-case approximation of the maximal risk for distinguishing two clients.",
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
index d2a4decd6..5c19255d0 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
@@ -14,6 +14,7 @@
 		"sample_client_rate": "",
 		"client_selector": "",
 		"local_dataset_size": "",
+		"neighbor_num": "",
 		"avg_neighbor_reputation": ""
 	},
 	"configuration": {
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index b4d3e8408..844d8a8b5 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -245,10 +245,13 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         factsheet["sustainability"]["emissions_training_local"] = emissions_training_local
         factsheet["sustainability"]["energy_consumed_local"] = energy_consumed_local
         factsheet["participants"]["local_dataset_size"] = sample_size
+
         if reputation_summary is not None:
             factsheet["participants"]["avg_neighbor_reputation"] = reputation_summary.get("avg_neighbor_reputation", "")
+            factsheet["participants"]["neighbor_num"] = reputation_summary.get("neighbor_num", 0)
         else:
             factsheet["participants"]["avg_neighbor_reputation"] = 0
+            factsheet["participants"]["neighbor_num"] = 0
 
         factsheet["sustainability"]["emissions_communication_local"] = (bytes_sent * 2.24e-10 * carbon_intensity_local)+(bytes_recv * 2.24e-10 * carbon_intensity_local)
 
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 49ae3578f..5c7f73c47 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -27,7 +27,7 @@
 from nebula.core.models.cifar10.resnet import CIFAR10ModelResNet
 from nebula.core.models.cifar10.simplemobilenet import SimpleMobileNetV1
 from nebula.core.models.cifar100.cnn import CIFAR100ModelCNN
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size, get_underfitting_score, get_overfitting_score, get_participant_loss_accuracy, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size, get_underfitting_score, get_overfitting_score, get_participant_loss_accuracy, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score, get_dp_global
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
 
@@ -243,6 +243,14 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 factsheet["fairness"]["test_acc_cv"] = 1 if test_acc_cv > 1 else test_acc_cv
                 _, participant_test_acc = get_participant_loss_accuracy(scenario_name, participant_idx)
 
+                dp_enabled, dp_epsilon = get_dp_global(scenario_name)
+                if dp_enabled:
+                    factsheet["configuration"]["differential_privacy"] = True
+                    factsheet["configuration"]["dp_epsilon"] = dp_epsilon
+                else:
+                    factsheet["configuration"]["differential_privacy"] = False
+                    factsheet["configuration"]["dp_epsilon"] = ""
+
                 factsheet["system"]["avg_time_minutes"] = get_elapsed_time(start_time, end_time)
                 factsheet["system"]["avg_model_size"] = avg_model_size
 
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 76933327c..e2897596c 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -361,6 +361,7 @@ def _get_reputation_trust_summary(self) -> dict:
         if not self._is_reputation_enabled():
             return {
                 "reputation_enabled": False,
+                "neighbor_num": len(self._expected_trustscores_sources),
                 "avg_neighbor_reputation": 0.0,
             }
 
@@ -386,6 +387,7 @@ def _get_reputation_trust_summary(self) -> dict:
 
         return {
             "reputation_enabled": True,
+            "neighbor_num": len(self._expected_trustscores_sources),
             "avg_neighbor_reputation": avg_neighbor_reputation,
         }
 
diff --git a/nebula/controller/scenarios.py b/nebula/controller/scenarios.py
index e44b9ed2c..262f5b102 100644
--- a/nebula/controller/scenarios.py
+++ b/nebula/controller/scenarios.py
@@ -702,10 +702,15 @@ def __init__(self, scenario, user=None):
             participant_config["data_args"]["partition_parameter"] = self.scenario.partition_parameter
             participant_config["model_args"]["model"] = self.scenario.model
             participant_config["training_args"]["epochs"] = int(self.scenario.epochs)
-            if isinstance(self.scenario.dp, dict) and "enabled" in self.scenario.dp:
+            if isinstance(self.scenario.dp, dict):
                 participant_config.setdefault("training_args", {})
                 participant_config["training_args"].setdefault("dp", {})
-                participant_config["training_args"]["dp"]["enabled"] = bool(self.scenario.dp["enabled"])
+                if "enabled" in self.scenario.dp:
+                    participant_config["training_args"]["dp"]["enabled"] = bool(self.scenario.dp["enabled"])
+                if "noise_multiplier" in self.scenario.dp:
+                    participant_config["training_args"]["dp"]["noise_multiplier"] = float(
+                        self.scenario.dp["noise_multiplier"]
+                    )
             participant_config["device_args"]["accelerator"] = self.scenario.accelerator
             participant_config["device_args"]["gpu_id"] = self.scenario.gpu_id
             participant_config["device_args"]["logging"] = self.scenario.logginglevel
diff --git a/nebula/frontend/static/js/deployment/dp.js b/nebula/frontend/static/js/deployment/dp.js
index 52884c2f9..7512dfaa3 100644
--- a/nebula/frontend/static/js/deployment/dp.js
+++ b/nebula/frontend/static/js/deployment/dp.js
@@ -1,7 +1,8 @@
 // Differential Privacy Module
 const DpManager = (function() {
     const DEFAULT_DP_CONFIG = {
-        enabled: false
+        enabled: false,
+        noise_multiplier: 1.0
     };
 
     function initializeDifferentialPrivacy() {
@@ -26,8 +27,14 @@ const DpManager = (function() {
     }
 
     function getDpConfig() {
+        const noiseMultiplierInput = document.getElementById("dpNoiseMultiplier");
+        const noiseMultiplier = parseFloat(noiseMultiplierInput?.value);
+
         return {
-            enabled: Boolean(document.getElementById("dpSwitch")?.checked)
+            enabled: Boolean(document.getElementById("dpSwitch")?.checked),
+            noise_multiplier: Number.isFinite(noiseMultiplier)
+                ? noiseMultiplier
+                : DEFAULT_DP_CONFIG.noise_multiplier
         };
     }
 
@@ -41,6 +48,10 @@ const DpManager = (function() {
         if (!dpSwitch) return;
 
         dpSwitch.checked = Boolean(dpConfig.enabled);
+        const noiseMultiplierInput = document.getElementById("dpNoiseMultiplier");
+        if (noiseMultiplierInput) {
+            noiseMultiplierInput.value = dpConfig.noise_multiplier;
+        }
         toggleDpSettings(dpSwitch.checked);
     }
 
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index e0a4d80a7..4ad79c579 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -529,8 +529,14 @@ <h5 class="step-title">Enable/Disable DP</h5>
                             style="display: inline; width: 80px; height: 30px;">
                     </div>
                     <div id="dp-settings" style="margin-top: 10px; display: none;">
+                        <h5 class="step-title">Noise multiplier</h5>
+                        <div class="form-check form-check-inline">
+                            <input type="number" class="form-control" id="dpNoiseMultiplier"
+                                placeholder="Noise multiplier" min="0" step="0.01" value="1.0"
+                                style="display: inline; width: 80%">
+                        </div>
                         <small class="form-text text-muted">
-                            Additional DP parameters are still taken from
+                            Other DP parameters are still taken from
                             <code>nebula/frontend/config/participant.json.example</code>.
                         </small>
                     </div>

From 134e6e22a1b56510e06d142081c958ef637062b8 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Mon, 27 Apr 2026 16:52:04 +0200
Subject: [PATCH 31/66] CFL acc and loss fixed, DP changed

---
 nebula/addons/trustworthiness/calculation.py  |  4 +-
 .../addons/trustworthiness/trustworthiness.py |  4 +-
 nebula/core/training/dp.py                    |  1 -
 nebula/core/training/lightning_dp.py          | 81 ++++---------------
 4 files changed, 21 insertions(+), 69 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index e05c1cf90..fd49c97d3 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -584,8 +584,8 @@ def get_avg_loss_accuracy(scenario_name):
     total_loss = data["loss"].sum()
     total_accuracy = data["accuracy"].sum()
 
-    avg_loss = total_loss / number_files
-    avg_accuracy = total_accuracy / number_files
+    avg_loss = total_loss / (number_files-1)
+    avg_accuracy = total_accuracy / (number_files-1)
     std_accuracy = statistics.stdev(data["accuracy"])
 
     return avg_loss, avg_accuracy, std_accuracy
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index e2897596c..d5b390c3f 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -1023,7 +1023,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
 
-            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy, val_accuracy, dp_enabled, dp_epsilon)
+            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, 0, 0, class_imbalance, model_size, local_entropy, val_accuracy, dp_enabled, dp_epsilon)
             save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
             await self._generate_factsheet(trust_config, experiment_name)
         else:
@@ -1053,7 +1053,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
 
-            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, accuracy, loss, class_imbalance, model_size, local_entropy, val_accuracy, dp_enabled, dp_epsilon)
+            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, 0, 0, class_imbalance, model_size, local_entropy, val_accuracy, dp_enabled, dp_epsilon)
             save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
             await self._generate_factsheet(trust_config, experiment_name)
         #await self._generate_factsheet(trust_config, experiment_name)
diff --git a/nebula/core/training/dp.py b/nebula/core/training/dp.py
index 570fc25d6..0554f5268 100644
--- a/nebula/core/training/dp.py
+++ b/nebula/core/training/dp.py
@@ -39,7 +39,6 @@ def on_train_start(self, model, optimizer, state):
             max_grad_norm=self.max_grad_norm,
             poisson_sampling=self.poisson_sampling,
             clipping=self.clipping,
-            wrap_model=False,
         )
 
         state.extras["privacy_engine"] = privacy_engine
diff --git a/nebula/core/training/lightning_dp.py b/nebula/core/training/lightning_dp.py
index 09e18b42b..ea2fd0337 100644
--- a/nebula/core/training/lightning_dp.py
+++ b/nebula/core/training/lightning_dp.py
@@ -60,7 +60,7 @@ def _train_sync(self):
             logging_training.error(f"Error in _train_sync with Differential Privacy: {e}")
             tb = traceback.format_exc()
             logging_training.error(f"Traceback: {tb}")
-            return None, None
+            raise
 
     def _get_training_device(self):
         if (
@@ -88,15 +88,16 @@ def _log_manual_metrics(self, phase, metrics):
         self._logger.log_data(output, step=self.model.global_number[phase])
 
     def _fit_with_dp(self):
-        train_dataloader = self.datamodule.train_dataloader()
-        val_dataloader = self.datamodule.val_dataloader()
-
         state = SimpleDPState()
-        state.extras["dataloader"] = train_dataloader
+        original_configure_optimizers = self.model.configure_optimizers
 
         self.model.train()
+        self.datamodule.setup("fit")
+        train_dataloader = self.datamodule.train_dataloader()
+        val_dataloader = self.datamodule.val_dataloader()
+
         optimizer = self.model.configure_optimizers()
-        device = self._get_training_device()
+        state.extras["dataloader"] = train_dataloader
 
         try:
             self._dp_plugin.on_train_start(self.model, optimizer, state)
@@ -106,67 +107,17 @@ def _fit_with_dp(self):
             private_dataloader = state.extras["dataloader"]
 
             self.model._optimizer = private_optimizer
-            private_model.to(device)
-
-            for epoch in range(self.epochs):
-                logging_training.info(f"Starting Epoch {epoch} DP")
-
-                private_model.train()
-
-                for batch_idx, batch in enumerate(private_dataloader):
-                    inputs, labels = batch
-                    inputs = inputs.to(device)
-                    labels = labels.to(device)
-
-                    private_optimizer.zero_grad()
-
-                    outputs = private_model(inputs)
-                    loss = self.model.criterion(outputs, labels)
 
-                    self.model._current_loss = loss.detach()
+            def configure_private_optimizers():
+                return private_optimizer
 
-                    if self._logger is not None:
-                        self._logger.log_data({"Train/Loss": loss.detach()})
+            self.model.configure_optimizers = configure_private_optimizers
 
-                    self.model.train_metrics.update(
-                        torch.argmax(outputs.detach(), dim=1),
-                        labels.detach(),
-                    )
-
-                    loss.backward()
-                    private_optimizer.step()
-
-                self._log_manual_metrics("Train", self.model.train_metrics)
-                self.model.train_metrics.reset()
-                self.model.global_number["Train"] += 1
-
-                logging_training.info(f"Epoch {epoch} finished DP")
-
-                logging_training.info(f"Starting validation for Epoch {epoch} DP")
-
-                private_model.eval()
-
-                with torch.no_grad():
-                    for batch_idx, batch in enumerate(val_dataloader):
-                        inputs, labels = batch
-                        inputs = inputs.to(device)
-                        labels = labels.to(device)
-
-                        outputs = private_model(inputs)
-                        loss = self.model.criterion(outputs, labels)
-
-                        self.model._current_loss = loss.detach()
-
-                        self.model.val_metrics.update(
-                            torch.argmax(outputs.detach(), dim=1),
-                            labels.detach(),
-                        )
-
-                self._log_manual_metrics("Validation", self.model.val_metrics)
-                self.model.val_metrics.reset()
-                self.model.global_number["Validation"] += 1
-
-                logging_training.info(f"Validation for Epoch {epoch} finished DP")
+            self._trainer.fit(
+                self.model,
+                train_dataloaders=private_dataloader,
+                val_dataloaders=val_dataloader,
+            )
 
             if hasattr(private_model, "_module"):
                 self.model.load_state_dict(private_model._module.state_dict())
@@ -176,7 +127,9 @@ def _fit_with_dp(self):
             self.model.train()
 
         finally:
+            self.model.configure_optimizers = original_configure_optimizers
             self._dp_plugin.on_train_end(state)
+            self.datamodule.teardown("fit")
 
         dp_epsilon = state.extras.get("dp_epsilon")
 

From 13189aee624d934047aaac7044a1f49cdeb58c34 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Tue, 28 Apr 2026 11:05:00 +0200
Subject: [PATCH 32/66] DP finished, Lightning fit

---
 nebula/core/models/adultcensus/mlp.py         |  4 ++
 nebula/core/models/breast_cancer/mlp.py       |  4 ++
 nebula/core/models/cifar10/cnn.py             |  4 ++
 nebula/core/models/cifar10/cnnV2.py           |  4 ++
 nebula/core/models/cifar10/cnnV3.py           |  4 ++
 nebula/core/models/cifar10/fastermobilenet.py |  4 ++
 nebula/core/models/cifar10/resnet.py          |  4 ++
 nebula/core/models/cifar10/simplemobilenet.py |  4 ++
 nebula/core/models/cifar100/cnn.py            |  4 ++
 nebula/core/models/covtype/mlp.py             |  4 ++
 nebula/core/models/emnist/cnn.py              |  4 ++
 nebula/core/models/emnist/mlp.py              |  4 ++
 nebula/core/models/fashionmnist/cnn.py        |  4 ++
 nebula/core/models/fashionmnist/mlp.py        |  4 ++
 nebula/core/models/kddcup99/mlp.py            |  4 ++
 nebula/core/models/mnist/cnn.py               |  4 ++
 nebula/core/models/mnist/mlp.py               |  8 +++-
 nebula/core/models/nebulamodel.py             | 15 +++++++
 nebula/core/models/sentiment140/cnn.py        |  4 ++
 nebula/core/models/sentiment140/rnn.py        |  4 ++
 nebula/core/training/dp.py                    | 28 ++++++++++--
 nebula/core/training/lightning_dp.py          | 43 ++++++++++---------
 22 files changed, 139 insertions(+), 27 deletions(-)

diff --git a/nebula/core/models/adultcensus/mlp.py b/nebula/core/models/adultcensus/mlp.py
index b2f33eacb..c1106b647 100644
--- a/nebula/core/models/adultcensus/mlp.py
+++ b/nebula/core/models/adultcensus/mlp.py
@@ -57,6 +57,10 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return x
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         return optimizer
 
diff --git a/nebula/core/models/breast_cancer/mlp.py b/nebula/core/models/breast_cancer/mlp.py
index e84d099f2..8b5bd0ed8 100644
--- a/nebula/core/models/breast_cancer/mlp.py
+++ b/nebula/core/models/breast_cancer/mlp.py
@@ -45,6 +45,10 @@ def forward(self, x):
         return x
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         return optimizer
 
diff --git a/nebula/core/models/cifar10/cnn.py b/nebula/core/models/cifar10/cnn.py
index 16b9cc70a..e1d9eb238 100755
--- a/nebula/core/models/cifar10/cnn.py
+++ b/nebula/core/models/cifar10/cnn.py
@@ -37,6 +37,10 @@ def forward(self, x):
         return x
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(
             self.parameters(),
             lr=self.learning_rate,
diff --git a/nebula/core/models/cifar10/cnnV2.py b/nebula/core/models/cifar10/cnnV2.py
index c8f658c92..a22ae51ec 100755
--- a/nebula/core/models/cifar10/cnnV2.py
+++ b/nebula/core/models/cifar10/cnnV2.py
@@ -42,6 +42,10 @@ def forward(self, x):
         return x
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(
             self.parameters(),
             lr=self.learning_rate,
diff --git a/nebula/core/models/cifar10/cnnV3.py b/nebula/core/models/cifar10/cnnV3.py
index 47d69a553..d5dd5da33 100755
--- a/nebula/core/models/cifar10/cnnV3.py
+++ b/nebula/core/models/cifar10/cnnV3.py
@@ -69,6 +69,10 @@ def forward(self, x):
         return x
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(
             self.parameters(),
             lr=self.learning_rate,
diff --git a/nebula/core/models/cifar10/fastermobilenet.py b/nebula/core/models/cifar10/fastermobilenet.py
index 91f9b89d3..374988b18 100755
--- a/nebula/core/models/cifar10/fastermobilenet.py
+++ b/nebula/core/models/cifar10/fastermobilenet.py
@@ -58,6 +58,10 @@ def forward(self, x):
         return x
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(
             self.parameters(),
             lr=self.learning_rate,
diff --git a/nebula/core/models/cifar10/resnet.py b/nebula/core/models/cifar10/resnet.py
index 09af6e467..0aaf40a32 100755
--- a/nebula/core/models/cifar10/resnet.py
+++ b/nebula/core/models/cifar10/resnet.py
@@ -141,6 +141,10 @@ def forward(self, x):
         raise NotImplementedError()
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         if self.implementation == "scratch" and self.classifier == "resnet9":
             params = []
             for key, module in self.model.items():
diff --git a/nebula/core/models/cifar10/simplemobilenet.py b/nebula/core/models/cifar10/simplemobilenet.py
index 7a40bc4d7..17a6f2551 100755
--- a/nebula/core/models/cifar10/simplemobilenet.py
+++ b/nebula/core/models/cifar10/simplemobilenet.py
@@ -65,6 +65,10 @@ def forward(self, x):
         return x
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         return optimizer
 
diff --git a/nebula/core/models/cifar100/cnn.py b/nebula/core/models/cifar100/cnn.py
index 685fc0531..377ce58c6 100755
--- a/nebula/core/models/cifar100/cnn.py
+++ b/nebula/core/models/cifar100/cnn.py
@@ -94,6 +94,10 @@ def forward(self, x):
         return x
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         return torch.optim.Adam(
             self.parameters(),
             lr=self.config["lr"],
diff --git a/nebula/core/models/covtype/mlp.py b/nebula/core/models/covtype/mlp.py
index 0399caa43..8d15f5d47 100644
--- a/nebula/core/models/covtype/mlp.py
+++ b/nebula/core/models/covtype/mlp.py
@@ -45,6 +45,10 @@ def forward(self, x):
         return x
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         return optimizer
 
diff --git a/nebula/core/models/emnist/cnn.py b/nebula/core/models/emnist/cnn.py
index f07e6b61f..e286df5c2 100755
--- a/nebula/core/models/emnist/cnn.py
+++ b/nebula/core/models/emnist/cnn.py
@@ -49,6 +49,10 @@ def forward(self, x):
         return logits
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(
             self.parameters(),
             lr=self.learning_rate,
diff --git a/nebula/core/models/emnist/mlp.py b/nebula/core/models/emnist/mlp.py
index 20e8017d6..8308d19c5 100755
--- a/nebula/core/models/emnist/mlp.py
+++ b/nebula/core/models/emnist/mlp.py
@@ -39,6 +39,10 @@ def get_learning_rate(self):
         return self.learning_rate
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         return optimizer
 
diff --git a/nebula/core/models/fashionmnist/cnn.py b/nebula/core/models/fashionmnist/cnn.py
index 3c2427bc8..58ec13d42 100755
--- a/nebula/core/models/fashionmnist/cnn.py
+++ b/nebula/core/models/fashionmnist/cnn.py
@@ -49,6 +49,10 @@ def forward(self, x):
         return logits
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(
             self.parameters(),
             lr=self.learning_rate,
diff --git a/nebula/core/models/fashionmnist/mlp.py b/nebula/core/models/fashionmnist/mlp.py
index 3ebfa9cfa..81d0e95da 100755
--- a/nebula/core/models/fashionmnist/mlp.py
+++ b/nebula/core/models/fashionmnist/mlp.py
@@ -42,5 +42,9 @@ def count_parameters(self):
         return sum(p.numel() for p in self.parameters() if p.requires_grad)
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         return optimizer
diff --git a/nebula/core/models/kddcup99/mlp.py b/nebula/core/models/kddcup99/mlp.py
index 4bb59fec9..eb3c79e69 100644
--- a/nebula/core/models/kddcup99/mlp.py
+++ b/nebula/core/models/kddcup99/mlp.py
@@ -38,6 +38,10 @@ def forward(self, x):
         return x
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         self._optimizer = optimizer
         return optimizer
diff --git a/nebula/core/models/mnist/cnn.py b/nebula/core/models/mnist/cnn.py
index dd9c4131a..abbc33113 100755
--- a/nebula/core/models/mnist/cnn.py
+++ b/nebula/core/models/mnist/cnn.py
@@ -46,6 +46,10 @@ def forward(self, x):
         return logits
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(
             self.parameters(),
             lr=self.learning_rate,
diff --git a/nebula/core/models/mnist/mlp.py b/nebula/core/models/mnist/mlp.py
index 64a0b1da9..426075273 100755
--- a/nebula/core/models/mnist/mlp.py
+++ b/nebula/core/models/mnist/mlp.py
@@ -33,12 +33,16 @@ def forward(self, x):
         return x
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         self._optimizer = optimizer
         return optimizer
-    
+
     def get_learning_rate(self):
         return self.learning_rate
 
     def count_parameters(self):
-        return sum(p.numel() for p in self.parameters() if p.requires_grad)
\ No newline at end of file
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
diff --git a/nebula/core/models/nebulamodel.py b/nebula/core/models/nebulamodel.py
index 6c5e672d2..b8ae90160 100755
--- a/nebula/core/models/nebulamodel.py
+++ b/nebula/core/models/nebulamodel.py
@@ -204,8 +204,23 @@ def __init__(
 
         self._current_loss = -1
         self._optimizer = None
+        self._optimizer_override = None
         self._latest_validation_metrics = {}
 
+        self.dp_enabled = False
+        self.dp_epsilon = None
+        self.dp_delta = None
+
+    def set_optimizer_override(self, optimizer):
+        self._optimizer_override = optimizer
+        self._optimizer = optimizer
+
+    def clear_optimizer_override(self):
+        self._optimizer_override = None
+
+    def get_optimizer_override(self):
+        return self._optimizer_override
+
     def set_communication_manager(self, communication_manager):
         self.communication_manager = communication_manager
 
diff --git a/nebula/core/models/sentiment140/cnn.py b/nebula/core/models/sentiment140/cnn.py
index 87541aa05..bf5cdb0c6 100755
--- a/nebula/core/models/sentiment140/cnn.py
+++ b/nebula/core/models/sentiment140/cnn.py
@@ -47,6 +47,10 @@ def forward(self, x):
         return out
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(
             self.parameters(),
             lr=self.learning_rate,
diff --git a/nebula/core/models/sentiment140/rnn.py b/nebula/core/models/sentiment140/rnn.py
index cfbea66cf..763848724 100755
--- a/nebula/core/models/sentiment140/rnn.py
+++ b/nebula/core/models/sentiment140/rnn.py
@@ -53,5 +53,9 @@ def forward(self, x):
         return out
 
     def configure_optimizers(self):
+        optimizer_override = self.get_optimizer_override()
+        if optimizer_override is not None:
+            return optimizer_override
+
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         return optimizer
diff --git a/nebula/core/training/dp.py b/nebula/core/training/dp.py
index 0554f5268..56a2508f8 100644
--- a/nebula/core/training/dp.py
+++ b/nebula/core/training/dp.py
@@ -24,14 +24,22 @@ def __init__(
         self.secure_mode = bool(secure_mode)
         self.poisson_sampling = bool(poisson_sampling)
         self.clipping = clipping
+        self._privacy_engine = None
 
     def on_train_start(self, model, optimizer, state):
         from opacus import PrivacyEngine
 
-        privacy_engine = PrivacyEngine(accountant=self.accountant, secure_mode=self.secure_mode)
         dataloader = state.extras["dataloader"]
         model.train()
-        private_model, optimizer, private_dataloader = privacy_engine.make_private(
+
+        if self._privacy_engine is None:
+            self._privacy_engine = PrivacyEngine(
+                accountant=self.accountant,
+                secure_mode=self.secure_mode,
+            )
+        privacy_engine = self._privacy_engine
+
+        private_model, private_optimizer, private_dataloader = privacy_engine.make_private(
             module=model,
             optimizer=optimizer,
             data_loader=dataloader,
@@ -43,7 +51,7 @@ def on_train_start(self, model, optimizer, state):
 
         state.extras["privacy_engine"] = privacy_engine
         state.extras["model"] = private_model
-        state.extras["optimizer"] = optimizer
+        state.extras["optimizer"] = private_optimizer
         state.extras["dataloader"] = private_dataloader
 
     def on_train_end(self, state):
@@ -59,10 +67,22 @@ def on_train_end(self, state):
                 pass
 
         if private_model is not None:
-            private_model.zero_grad(set_to_none=True)
+            try:
+                private_model.zero_grad(set_to_none=True)
+            except Exception:
+                pass
+
             try:
                 private_model.forbid_grad_accumulation()
+            except Exception:
+                pass
+
+            try:
                 private_model.disable_hooks()
+            except Exception:
+                pass
+
+            try:
                 private_model.remove_hooks()
             except Exception:
                 pass
diff --git a/nebula/core/training/lightning_dp.py b/nebula/core/training/lightning_dp.py
index ea2fd0337..c15a164b9 100644
--- a/nebula/core/training/lightning_dp.py
+++ b/nebula/core/training/lightning_dp.py
@@ -89,45 +89,43 @@ def _log_manual_metrics(self, phase, metrics):
 
     def _fit_with_dp(self):
         state = SimpleDPState()
-        original_configure_optimizers = self.model.configure_optimizers
 
-        self.model.train()
-        self.datamodule.setup("fit")
-        train_dataloader = self.datamodule.train_dataloader()
-        val_dataloader = self.datamodule.val_dataloader()
-
-        optimizer = self.model.configure_optimizers()
-        state.extras["dataloader"] = train_dataloader
+        if hasattr(self.model, "clear_optimizer_override"):
+            self.model.clear_optimizer_override()
 
         try:
+            self.model.train()
+            self.datamodule.setup("fit")
+            train_dataloader = self.datamodule.train_dataloader()
+            val_dataloader = self.datamodule.val_dataloader()
+
+            optimizer = self.model.configure_optimizers()
+            state.extras["dataloader"] = train_dataloader
+
             self._dp_plugin.on_train_start(self.model, optimizer, state)
 
-            private_model = state.extras["model"]
             private_optimizer = state.extras["optimizer"]
             private_dataloader = state.extras["dataloader"]
 
-            self.model._optimizer = private_optimizer
-
-            def configure_private_optimizers():
-                return private_optimizer
-
-            self.model.configure_optimizers = configure_private_optimizers
+            if not hasattr(self.model, "set_optimizer_override"):
+                raise ValueError("DP training requires the model to support optimizer overrides.")
 
+            # Opacus keeps the grad-sample hooks on self.model, while Lightning gets
+            # the original LightningModule and a DPOptimizer through configure_optimizers.
+            self.model.dp_enabled = True
+            self.model.set_optimizer_override(private_optimizer)
             self._trainer.fit(
                 self.model,
                 train_dataloaders=private_dataloader,
                 val_dataloaders=val_dataloader,
             )
 
-            if hasattr(private_model, "_module"):
-                self.model.load_state_dict(private_model._module.state_dict())
-            else:
-                self.model.load_state_dict(private_model.state_dict())
-
             self.model.train()
 
         finally:
-            self.model.configure_optimizers = original_configure_optimizers
+            self.model.dp_enabled = False
+            if hasattr(self.model, "clear_optimizer_override"):
+                self.model.clear_optimizer_override()
             self._dp_plugin.on_train_end(state)
             self.datamodule.teardown("fit")
 
@@ -139,6 +137,9 @@ def configure_private_optimizers():
             self.dp_epsilon = float(dp_epsilon)
             self.dp_delta = float(dp_delta)
 
+            self.model.dp_epsilon = self.dp_epsilon
+            self.model.dp_delta = self.dp_delta
+
             if self._logger is not None:
                 self._logger.log_data(
                     {

From 62f5fda62725cfabc8929f4b3914b58ae6b614ed Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Tue, 28 Apr 2026 13:31:27 +0200
Subject: [PATCH 33/66] Aggregation error fixed, trustworthiness
 refactorization (factsheet/CFL), trustworthiness agnostic to model and
 dataset

---
 nebula/addons/trustworthiness/calculation.py  |  18 +-
 .../addons/trustworthiness/dfl_factsheet.py   |   2 +-
 nebula/addons/trustworthiness/factsheet.py    | 177 ++----------------
 .../addons/trustworthiness/trustworthiness.py |  46 +++--
 nebula/core/aggregation/fedavg.py             |   3 +-
 nebula/core/models/adultcensus/mlp.py         |   4 +
 nebula/core/models/breast_cancer/mlp.py       |   4 +
 nebula/core/models/cifar10/cnn.py             |   4 +
 nebula/core/models/cifar10/cnnV2.py           |   4 +
 nebula/core/models/cifar10/cnnV3.py           |   4 +
 nebula/core/models/cifar10/fastermobilenet.py |   4 +
 nebula/core/models/cifar10/resnet.py          |   4 +
 nebula/core/models/cifar10/simplemobilenet.py |   4 +
 nebula/core/models/cifar100/cnn.py            |   4 +
 nebula/core/models/covtype/mlp.py             |   4 +
 nebula/core/models/emnist/cnn.py              |   4 +
 nebula/core/models/emnist/mlp.py              |   4 +
 nebula/core/models/fashionmnist/cnn.py        |   4 +
 nebula/core/models/fashionmnist/mlp.py        |   4 +
 nebula/core/models/kddcup99/mlp.py            |   4 +
 nebula/core/models/mnist/cnn.py               |   4 +
 nebula/core/models/mnist/mlp.py               |   4 +
 nebula/core/models/sentiment140/cnn.py        |   4 +
 nebula/core/models/sentiment140/rnn.py        |   4 +
 24 files changed, 128 insertions(+), 194 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index fd49c97d3..b0fcbbd97 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -24,6 +24,8 @@
 from torch import nn, optim
 import torch.nn.functional as F
 import time
+import io
+
 
 from nebula.addons.trustworthiness.utils import read_csv
 
@@ -513,21 +515,19 @@ def get_bytes_model(model_file):
 
     return model_size
 
-def get_bytes_final_model_id(id, scenario_name):
+def get_bytes_final_model_id(model):
     """
-    Calculates the bytes of the final model of a node by id.
+    Calculates the serialized size in bytes of a PyTorch model state_dict.
 
     Args:
-        id: Participant ID.
+        model (nn.Module): PyTorch model.
 
     Returns:
-        float: The bytes of the model.
+        int: Model size in bytes.
     """
-
-
-    model_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", f"participant_{id}_final_model.pk")
-
-    model_size = os.path.getsize(model_file)
+    buffer: io.BytesIO = io.BytesIO()
+    torch.save(model.state_dict(), buffer)
+    model_size: int = buffer.tell()
 
     return model_size
 
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 844d8a8b5..9e57437f3 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -178,7 +178,7 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
 
         factsheet["configuration"]["learning_rate"] = model.get_learning_rate()
         factsheet["configuration"]["trainable_param_num"] = model.count_parameters()
-        factsheet["configuration"]["local_update_steps"] = 1
+        factsheet["configuration"]["local_update_steps"] = data["epochs"]
 
         files_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
 
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 5c7f73c47..bd166a542 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -9,24 +9,6 @@
 import pandas as pd
 import time
 
-# from nebula.core.models.cifar10.cnn import CIFAR10ModelCNN
-from nebula.core.models.mnist.mlp import MNISTModelMLP
-from nebula.core.models.mnist.cnn import MNISTModelCNN
-from nebula.core.models.covtype.mlp import CovtypeModelMLP
-from nebula.core.models.kddcup99.mlp import KDDCUP99ModelMLP
-from nebula.core.models.adultcensus.mlp import AdultCensusModelMLP
-from nebula.core.models.breast_cancer.mlp import BreastCancerModelMLP
-from nebula.core.models.fashionmnist.mlp import FashionMNISTModelMLP
-from nebula.core.models.fashionmnist.cnn import FashionMNISTModelCNN
-from nebula.core.models.emnist.mlp import EMNISTModelMLP
-from nebula.core.models.emnist.cnn import EMNISTModelCNN
-from nebula.core.models.cifar10.cnn import CIFAR10ModelCNN
-from nebula.core.models.cifar10.cnnV2 import CIFAR10ModelCNN_V2
-from nebula.core.models.cifar10.cnnV3 import CIFAR10ModelCNN_V3
-from nebula.core.models.cifar10.fastermobilenet import FasterMobileNet
-from nebula.core.models.cifar10.resnet import CIFAR10ModelResNet
-from nebula.core.models.cifar10.simplemobilenet import SimpleMobileNetV1
-from nebula.core.models.cifar100.cnn import CIFAR100ModelCNN
 from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size, get_underfitting_score, get_overfitting_score, get_participant_loss_accuracy, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score, get_dp_global
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
@@ -42,7 +24,7 @@ def __init__(self):
         self.factsheet_file_nm = "factsheet.json"
         self.factsheet_template_file_nm = "factsheet_template.json"
 
-    def populate_factsheet_pre_train(self, data, scenario_name):
+    def populate_factsheet_pre_train(self, data, scenario_name, model):
         """
         Populates the factsheet with values before the training.
 
@@ -134,61 +116,9 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                     factsheet["configuration"]["differential_privacy"] = False
                     factsheet["configuration"]["dp_epsilon"] = ""
 
-                    if dataset == "MNIST" and algorithm == "MLP":
-                        model = MNISTModelMLP()
-                        num_classes_temp = 10
-                    elif dataset == "MNIST" and algorithm == "CNN":
-                        model = MNISTModelCNN()
-                        num_classes_temp = 10
-                    elif dataset == "FashionMNIST" and algorithm == "MLP":
-                        model = FashionMNISTModelMLP()
-                        num_classes_temp = 10
-                    elif dataset == "FashionMNIST" and algorithm == "CNN":
-                        model = FashionMNISTModelCNN()
-                        num_classes_temp = 10
-                    elif dataset == "Covtype" and algorithm == "MLP":
-                        model = CovtypeModelMLP()
-                        num_classes_temp = 7
-                    elif dataset == "KDDCUP99" and algorithm == "MLP":
-                        model = KDDCUP99ModelMLP()
-                        num_classes_temp = 23
-                    elif dataset == "AdultCensus" and algorithm == "MLP":
-                        model = AdultCensusModelMLP()
-                        num_classes_temp = 2
-                    elif dataset == "BreastCancer" and algorithm == "MLP":
-                        model = BreastCancerModelMLP()
-                        num_classes_temp = 2
-                    elif dataset == "EMNIST" and algorithm == "MLP":
-                        model = EMNISTModelMLP()
-                        num_classes_temp = 47
-                    elif dataset == "EMNIST" and algorithm == "CNN":
-                        model = EMNISTModelCNN()
-                        num_classes_temp = 47
-                    elif dataset == "CIFAR10" and algorithm == "ResNet9":
-                        model = CIFAR10ModelResNet(classifier="resnet9")
-                        num_classes_temp = 10
-                    elif dataset == "CIFAR10" and algorithm == "fastermobilenet":
-                        model = FasterMobileNet()
-                        num_classes_temp = 10
-                    elif dataset == "CIFAR10" and algorithm == "simplemobilenet":
-                        model = SimpleMobileNetV1()
-                        num_classes_temp = 10
-                    elif dataset == "CIFAR10" and algorithm == "CNN":
-                        model = CIFAR10ModelCNN()
-                        num_classes_temp = 10
-                    elif dataset == "CIFAR10" and algorithm == "CNNv2":
-                        model = CIFAR10ModelCNN_V2()
-                        num_classes_temp = 10
-                    elif dataset == "CIFAR10" and algorithm == "CNNv3":
-                        model = CIFAR10ModelCNN_V3()
-                        num_classes_temp = 10
-                    elif dataset == "CIFAR100" and algorithm == "CNN":
-                        model = CIFAR100ModelCNN()
-                        num_classes_temp = 100
-
                     factsheet["configuration"]["learning_rate"] = model.get_learning_rate()
                     factsheet["configuration"]["trainable_param_num"] = model.count_parameters()
-                    factsheet["configuration"]["local_update_steps"] = 1
+                    factsheet["configuration"]["local_update_steps"] = data["epochs"]
 
                     f.seek(0)
                     f.truncate()
@@ -198,7 +128,7 @@ def populate_factsheet_pre_train(self, data, scenario_name):
                 logging.warning(f"{factsheet_file} is invalid")
                 logging.error(e)
 
-    def populate_factsheet_post_train(self, scenario_name, start_time, end_time, participant_idx, reputation_summary=None, participation_summary=None, reliability_summary=None):
+    def populate_factsheet_post_train(self, scenario_name, start_time, end_time, participant_idx, model, train_loader, test_loader, reputation_summary=None, participation_summary=None, reliability_summary=None):
         """
         Populates the factsheet with values after the training.
 
@@ -214,14 +144,8 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
             try:
                 factsheet = json.load(f)
 
-                dataset = factsheet["data"]["provenance"]
-                model = factsheet["configuration"]["training_model"]
-
                 files_dir = f"{os.environ.get('NEBULA_LOGS_DIR')}/{scenario_name}/trustworthiness"
 
-                train_dataloader_file = f"{files_dir}/participant_{participant_idx}_train_loader.pk"
-                test_dataloader_file = f"{files_dir}/participant_{participant_idx}_test_loader.pk"
-                final_model_file = f"{files_dir}/participant_{participant_idx}_final_model.pk"
                 emissions_file = os.path.join(files_dir, "emissions.csv")
 
                 avg_class_imbalance, avg_model_size = get_avg_class_imbalance_model_size(scenario_name)
@@ -278,82 +202,19 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 else:
                     factsheet["participants"]["avg_neighbor_reputation"] = 0
 
-                with open(final_model_file, "rb") as file:
-                    lightning_model = pickle.load(file)
-
-                if dataset == "MNIST" and model == "MLP":
-                    model = MNISTModelMLP()
-                    num_classes_temp = 10
-                elif dataset == "MNIST" and model == "CNN":
-                    model = MNISTModelCNN()
-                    num_classes_temp = 10
-                elif dataset == "FashionMNIST" and model == "MLP":
-                    model = FashionMNISTModelMLP()
-                    num_classes_temp = 10
-                elif dataset == "FashionMNIST" and model == "CNN":
-                    model = FashionMNISTModelCNN()
-                    num_classes_temp = 10
-                elif dataset == "Covtype" and model == "MLP":
-                    model = CovtypeModelMLP()
-                    num_classes_temp = 7
-                elif dataset == "KDDCUP99" and model == "MLP":
-                    model = KDDCUP99ModelMLP()
-                    num_classes_temp = 23
-                elif dataset == "AdultCensus" and model == "MLP":
-                    model = AdultCensusModelMLP()
-                    num_classes_temp = 2
-                elif dataset == "BreastCancer" and model == "MLP":
-                    model = BreastCancerModelMLP()
-                    num_classes_temp = 2
-                elif dataset == "EMNIST" and model == "MLP":
-                    model = EMNISTModelMLP()
-                    num_classes_temp = 47
-                elif dataset == "EMNIST" and model == "CNN":
-                    model = EMNISTModelCNN()
-                    num_classes_temp = 47
-                elif dataset == "CIFAR10" and model == "ResNet9":
-                    model = CIFAR10ModelResNet(classifier="resnet9")
-                    num_classes_temp = 10
-                elif dataset == "CIFAR10" and model == "fastermobilenet":
-                    model = FasterMobileNet()
-                    num_classes_temp = 10
-                elif dataset == "CIFAR10" and model == "simplemobilenet":
-                    model = SimpleMobileNetV1()
-                    num_classes_temp = 10
-                elif dataset == "CIFAR10" and model == "CNN":
-                    model = CIFAR10ModelCNN()
-                    num_classes_temp = 10
-                elif dataset == "CIFAR10" and model == "CNNv2":
-                    model = CIFAR10ModelCNN_V2()
-                    num_classes_temp = 10
-                elif dataset == "CIFAR10" and model == "CNNv3":
-                    model = CIFAR10ModelCNN_V3()
-                    num_classes_temp = 10
-                elif dataset == "CIFAR100" and model == "CNN":
-                    model = CIFAR100ModelCNN()
-                    num_classes_temp = 100
-
-                model.load_state_dict(lightning_model.state_dict())
-
-                with open(train_dataloader_file, "rb") as file:
-                    train_dataloader = pickle.load(file)
-
-                with open(test_dataloader_file, "rb") as file:
-                    test_dataloader = pickle.load(file)
-
-                test_sample = next(iter(test_dataloader))
-                explainability_metrics = get_explainability_metrics_summary(model, test_dataloader)
-                factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_dataloader)
+                test_sample = next(iter(test_loader))
+                explainability_metrics = get_explainability_metrics_summary(model, test_loader)
+                factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_loader)
                 factsheet["privacy"]["epsilon_star"] = get_epsilon_star(
                     model,
-                    train_dataloader,
-                    test_dataloader,
+                    train_loader,
+                    test_loader,
                 )
                 factsheet["privacy"]["epsilon_star_score"] = 1/(1 + factsheet["privacy"]["epsilon_star"])
                 factsheet["privacy"]["mia_auc"] = get_mia_auc(
                     model,
-                    train_dataloader,
-                    test_dataloader,
+                    train_loader,
+                    test_loader,
                 )
                 factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
 
@@ -362,29 +223,29 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 factsheet["fairness"]["underfitting"] = underfitting_score
                 overfitting_value = get_overfitting_score(
                     model,
-                    train_dataloader,
+                    train_loader,
                     participant_test_acc,
                 )
                 factsheet["fairness"]["overfitting"] = 1/(1 + overfitting_value)
                 well_calibration_error_value = get_well_calibration_error(
                     model,
-                    test_dataloader,
+                    test_loader,
                 )
 
                 factsheet["fairness"]["well_calibration_error"] = 1/(1 + well_calibration_error_value)
                 generalized_entropy_index_value = get_generalized_entropy_index(
                     model,
-                    test_dataloader,
+                    test_loader,
                 )
                 factsheet["fairness"]["generalized_entropy_index"] = 1/(1 + generalized_entropy_index_value)
                 theil_index_value = get_theil_index(
                     model,
-                    test_dataloader,
+                    test_loader,
                 )
                 factsheet["fairness"]["theil_index"] = 1/(1 + theil_index_value)
                 coefficient_of_variation_value = get_coefficient_of_variation(
                     model,
-                    test_dataloader,
+                    test_loader,
                 )
                 factsheet["fairness"]["coefficient_of_variation"] = 1/(1 + coefficient_of_variation_value)
                 factsheet["explainability"]["alpha_score"] = explainability_metrics["alpha_score"]
@@ -393,16 +254,16 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
 
                 lr = factsheet["configuration"]["learning_rate"]
 
-                value_clever = get_clever_score(model, test_sample, num_classes_temp, lr)
+                value_clever = get_clever_score(model, test_sample, model.get_num_classes(), lr)
                 factsheet["performance"]["test_clever"] = 1 if value_clever > 1 else value_clever
 
-                value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes_temp, lr)
+                value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, model.get_num_classes(), lr)
                 factsheet["performance"]["test_loss_sensitivity"] = 1 / (1 + value_loss_sensitivity)
 
-                value_adv_accuracy = compute_adversarial_accuracy_art(model, test_dataloader, num_classes_temp, lr)
+                value_adv_accuracy = compute_adversarial_accuracy_art(model, test_loader, model.get_num_classes(), lr)
                 factsheet["performance"]["test_adv_accuracy"] = 1 if value_adv_accuracy > 1 else value_adv_accuracy
 
-                value_empirical_robustness = get_empirical_robustness_score(model, test_sample, num_classes_temp, lr)
+                value_empirical_robustness = get_empirical_robustness_score(model, test_sample, model.get_num_classes(), lr)
                 factsheet["performance"]["test_empirical_robustness"] = 1 if value_empirical_robustness > 1 else value_empirical_robustness
 
                 value_confidence_score = get_confidence_score(model, test_sample)
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index d5b390c3f..3d9b3c0b5 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -140,6 +140,7 @@ def get_metrics(self):
     def get_validation_metrics(self):
         return (self._current_val_loss, self._current_val_accuracy)
 
+    """
     def _dump_model_for_trust(self, path):
         model = self._engine.trainer.model
         optimizer = model._optimizer
@@ -149,6 +150,7 @@ def _dump_model_for_trust(self, path):
                 pickle.dump(model, f)
         finally:
             model._optimizer = optimizer
+    """
 
     async def finish_experiment_role_pre_actions(self):
         with open(self._train_loader_file, 'rb') as file:
@@ -176,7 +178,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
 
-            model_size = get_bytes_final_model_id(self._idx, experiment_name)
+            model_size = get_bytes_final_model_id(self._engine.trainer.model)
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
 
@@ -878,7 +880,10 @@ async def _register_sdfl_trustscores_report(self, source, message):
     async def _process_round_end_event(self, ree: RoundEndEvent):
         scenario_name = self._engine.config.participant["scenario_args"]["name"]
         train_model = f"/nebula/app/logs/{scenario_name}/trustworthiness/participant_{self._idx}_train_model.pk"
-        self._dump_model_for_trust(train_model)
+        #self._dump_model_for_trust(train_model)
+        # Save the model in the trustworthiness directory
+        #with open(train_model, 'wb') as f:
+        #    pickle.dump(self._engine.trainer.model, f)
 
     async def _process_round_start_event(self, rse: RoundStartEvent):
         _, _, expected_nodes = await rse.get_event_data()
@@ -913,7 +918,10 @@ async def _process_validation_metrics_event(self, vme: ValidationMetricsEvent):
 
     async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):
         model_file = f"/nebula/app/logs/{self._experiment_name}/trustworthiness/participant_{self._engine.idx}_final_model.pk"
-        self._dump_model_for_trust(model_file)
+        #self._dump_model_for_trust(model_file)
+        # Save the model in the trustworthiness directory
+        #with open(model_file, 'wb') as f:
+        #    pickle.dump(self._engine.trainer.model, f)
 
 
 class TrustWorkloadServer(TrustWorkload):
@@ -951,7 +959,6 @@ async def init(self, experiment_name):
         await EventManager.get_instance().subscribe_addonevent(ValidationMetricsEvent, self._process_validation_metrics_event)
         await EventManager.get_instance().subscribe_node_event(RoundStartEvent, self._process_round_start_event)
         await EventManager.get_instance().subscribe_node_event(ExperimentFinishEvent, self._process_experiment_finished_event)
-        await self._create_pk_files(experiment_name)
 
         self._per_round = PerRoundTrustMetrics(
             experiment_name=experiment_name,
@@ -963,23 +970,6 @@ async def init(self, experiment_name):
         )
         await self._per_round.setup(self._engine)
 
-    async def _create_pk_files(self, experiment_name):
-        # Save data to local files to compute trustworthiness
-        train_loader_filename = f"/nebula/app/logs/{experiment_name}/trustworthiness/participant_{self._idx}_train_loader.pk"
-        test_loader_filename = f"/nebula/app/logs/{experiment_name}/trustworthiness/participant_{self._idx}_test_loader.pk"
-        self._engine.trainer.datamodule.setup(stage="fit")
-        train_loader = self._engine.trainer.datamodule.train_dataloader()
-        self._engine.trainer.datamodule.setup(stage="test")
-        test_loader = self._engine.trainer.datamodule.test_dataloader()[0]
-
-        with open(train_loader_filename, 'wb') as f:
-            pickle.dump(train_loader, f)
-            f.close()
-        with open(test_loader_filename, 'wb') as f:
-            pickle.dump(test_loader, f)
-            f.close()
-
-
     def get_workload(self):
         return self._workload
 
@@ -1019,7 +1009,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
 
-            model_size = get_bytes_final_model_id(self._idx, experiment_name)
+            model_size = get_bytes_final_model_id(self._engine.trainer.model)
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
 
@@ -1049,14 +1039,13 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
 
-            model_size = get_bytes_final_model_id(self._idx, experiment_name)
+            model_size = get_bytes_final_model_id(self._engine.trainer.model)
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
 
             save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, 0, 0, class_imbalance, model_size, local_entropy, val_accuracy, dp_enabled, dp_epsilon)
             save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
             await self._generate_factsheet(trust_config, experiment_name)
-        #await self._generate_factsheet(trust_config, experiment_name)
 
     async def register_trustworthiness_report(self, source, message):
         self._trustworthiness_reports[message.node_id] = {
@@ -1118,12 +1107,19 @@ async def _process_aggregation_event(self, age: AggregationEvent):
 
     async def _generate_factsheet(self, trust_config, experiment_name):
         factsheet = Factsheet()
-        factsheet.populate_factsheet_pre_train(trust_config, experiment_name)
+        self._engine.trainer.datamodule.setup(stage="fit")
+        train_loader = self._engine.trainer.datamodule.train_dataloader()
+        self._engine.trainer.datamodule.setup(stage="test")
+        test_loader = self._engine.trainer.datamodule.test_dataloader()[0]
+        factsheet.populate_factsheet_pre_train(trust_config, experiment_name, self._engine.trainer.model)
         factsheet.populate_factsheet_post_train(
             experiment_name,
             self._start_time,
             self._end_time,
             self._idx,
+            self._engine.trainer.model,
+            train_loader,
+            test_loader,
             reputation_summary=self._get_reputation_trust_summary(),
             participation_summary=self._get_participation_trust_summary(),
             reliability_summary=self._get_system_reliability_summary(),
diff --git a/nebula/core/aggregation/fedavg.py b/nebula/core/aggregation/fedavg.py
index fff29ac00..42e82f14a 100755
--- a/nebula/core/aggregation/fedavg.py
+++ b/nebula/core/aggregation/fedavg.py
@@ -28,7 +28,8 @@ def run_aggregation(self, models):
         total_samples = float(sum(weight for _, weight in models))
 
         if total_samples == 0:
-            raise ValueError("Total number of samples must be greater than zero.")
+            logging.warning("Total number of samples must be greater than zero.")
+            return None
 
         last_model_params = models[-1][0]
         accum = {layer: torch.zeros_like(param, dtype=torch.float32) for layer, param in last_model_params.items()}
diff --git a/nebula/core/models/adultcensus/mlp.py b/nebula/core/models/adultcensus/mlp.py
index c1106b647..2e8ce5107 100644
--- a/nebula/core/models/adultcensus/mlp.py
+++ b/nebula/core/models/adultcensus/mlp.py
@@ -22,6 +22,7 @@ def __init__(
         hidden1: int = 256,
         hidden2: int = 128,
         dropout: float = 0.0,
+        data_type="Tabular",
     ):
         # NebulaModel expects something like input_channels first; for tabular we pass input_dim there.
         super().__init__(input_dim, num_classes, learning_rate, metrics, confusion_matrix, seed)
@@ -69,3 +70,6 @@ def get_learning_rate(self) -> float:
 
     def count_parameters(self) -> int:
         return int(sum(p.numel() for p in self.parameters() if p.requires_grad))
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/breast_cancer/mlp.py b/nebula/core/models/breast_cancer/mlp.py
index 8b5bd0ed8..2b70e0713 100644
--- a/nebula/core/models/breast_cancer/mlp.py
+++ b/nebula/core/models/breast_cancer/mlp.py
@@ -14,6 +14,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Tabular",
     ):
         # OJO: NebulaModel está pensado para imágenes (input_channels),
         # pero en la práctica se usa ese primer argumento como "input shape info".
@@ -57,3 +58,6 @@ def get_learning_rate(self) -> float:
 
     def count_parameters(self) -> int:
         return int(sum(p.numel() for p in self.parameters() if p.requires_grad))
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/cifar10/cnn.py b/nebula/core/models/cifar10/cnn.py
index e1d9eb238..1486f0aa6 100755
--- a/nebula/core/models/cifar10/cnn.py
+++ b/nebula/core/models/cifar10/cnn.py
@@ -12,6 +12,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -55,3 +56,6 @@ def get_learning_rate(self):
 
     def count_parameters(self):
         return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/cifar10/cnnV2.py b/nebula/core/models/cifar10/cnnV2.py
index a22ae51ec..ca5ef81b0 100755
--- a/nebula/core/models/cifar10/cnnV2.py
+++ b/nebula/core/models/cifar10/cnnV2.py
@@ -12,6 +12,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -59,3 +60,6 @@ def get_learning_rate(self):
 
     def count_parameters(self):
         return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/cifar10/cnnV3.py b/nebula/core/models/cifar10/cnnV3.py
index d5dd5da33..b1783f76a 100755
--- a/nebula/core/models/cifar10/cnnV3.py
+++ b/nebula/core/models/cifar10/cnnV3.py
@@ -12,6 +12,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -86,3 +87,6 @@ def get_learning_rate(self):
 
     def count_parameters(self):
         return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/cifar10/fastermobilenet.py b/nebula/core/models/cifar10/fastermobilenet.py
index 374988b18..4856b22dd 100755
--- a/nebula/core/models/cifar10/fastermobilenet.py
+++ b/nebula/core/models/cifar10/fastermobilenet.py
@@ -13,6 +13,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -75,3 +76,6 @@ def get_learning_rate(self):
 
     def count_parameters(self):
         return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/cifar10/resnet.py b/nebula/core/models/cifar10/resnet.py
index 0aaf40a32..ce5d5ec7e 100755
--- a/nebula/core/models/cifar10/resnet.py
+++ b/nebula/core/models/cifar10/resnet.py
@@ -39,6 +39,7 @@ def __init__(
         seed=None,
         implementation="scratch",
         classifier="resnet9",
+        data_type="Images",
     ):
         super().__init__()
         if metrics is None:
@@ -159,3 +160,6 @@ def get_learning_rate(self):
 
     def count_parameters(self):
         return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/cifar10/simplemobilenet.py b/nebula/core/models/cifar10/simplemobilenet.py
index 17a6f2551..478398439 100755
--- a/nebula/core/models/cifar10/simplemobilenet.py
+++ b/nebula/core/models/cifar10/simplemobilenet.py
@@ -18,6 +18,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -77,3 +78,6 @@ def get_learning_rate(self):
 
     def count_parameters(self):
         return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/cifar100/cnn.py b/nebula/core/models/cifar100/cnn.py
index 377ce58c6..8ec7118c0 100755
--- a/nebula/core/models/cifar100/cnn.py
+++ b/nebula/core/models/cifar100/cnn.py
@@ -12,6 +12,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -110,3 +111,6 @@ def get_learning_rate(self):
 
     def count_parameters(self):
         return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/covtype/mlp.py b/nebula/core/models/covtype/mlp.py
index 8d15f5d47..eace7b899 100644
--- a/nebula/core/models/covtype/mlp.py
+++ b/nebula/core/models/covtype/mlp.py
@@ -14,6 +14,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Tabular",
     ):
         # OJO: NebulaModel está pensado para imágenes (input_channels),
         # pero en la práctica se usa ese primer argumento como "input shape info".
@@ -57,3 +58,6 @@ def get_learning_rate(self) -> float:
 
     def count_parameters(self) -> int:
         return int(sum(p.numel() for p in self.parameters() if p.requires_grad))
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/emnist/cnn.py b/nebula/core/models/emnist/cnn.py
index e286df5c2..79ba0a944 100755
--- a/nebula/core/models/emnist/cnn.py
+++ b/nebula/core/models/emnist/cnn.py
@@ -12,6 +12,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -66,3 +67,6 @@ def get_learning_rate(self):
 
     def count_parameters(self):
         return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/emnist/mlp.py b/nebula/core/models/emnist/mlp.py
index 8308d19c5..c3af2d01f 100755
--- a/nebula/core/models/emnist/mlp.py
+++ b/nebula/core/models/emnist/mlp.py
@@ -12,6 +12,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -48,3 +49,6 @@ def configure_optimizers(self):
 
     def count_parameters(self):
         return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/fashionmnist/cnn.py b/nebula/core/models/fashionmnist/cnn.py
index 58ec13d42..a62efea31 100755
--- a/nebula/core/models/fashionmnist/cnn.py
+++ b/nebula/core/models/fashionmnist/cnn.py
@@ -12,6 +12,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -66,3 +67,6 @@ def get_learning_rate(self):
 
     def count_parameters(self):
         return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/fashionmnist/mlp.py b/nebula/core/models/fashionmnist/mlp.py
index 81d0e95da..4009cd06c 100755
--- a/nebula/core/models/fashionmnist/mlp.py
+++ b/nebula/core/models/fashionmnist/mlp.py
@@ -12,6 +12,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -48,3 +49,6 @@ def configure_optimizers(self):
 
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         return optimizer
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/kddcup99/mlp.py b/nebula/core/models/kddcup99/mlp.py
index eb3c79e69..60306beae 100644
--- a/nebula/core/models/kddcup99/mlp.py
+++ b/nebula/core/models/kddcup99/mlp.py
@@ -13,6 +13,7 @@ def __init__(
         confusion_matrix=None,
         seed=None,
         input_size=118,
+        data_type="Tabular",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -51,3 +52,6 @@ def get_learning_rate(self):
 
     def count_parameters(self):
         return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/mnist/cnn.py b/nebula/core/models/mnist/cnn.py
index abbc33113..19f59f4f1 100755
--- a/nebula/core/models/mnist/cnn.py
+++ b/nebula/core/models/mnist/cnn.py
@@ -12,6 +12,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -64,3 +65,6 @@ def count_parameters(self):
 
     def get_learning_rate(self):
         return self.learning_rate
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/mnist/mlp.py b/nebula/core/models/mnist/mlp.py
index 426075273..bbc46a308 100755
--- a/nebula/core/models/mnist/mlp.py
+++ b/nebula/core/models/mnist/mlp.py
@@ -12,6 +12,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -46,3 +47,6 @@ def get_learning_rate(self):
 
     def count_parameters(self):
         return sum(p.numel() for p in self.parameters() if p.requires_grad)
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/sentiment140/cnn.py b/nebula/core/models/sentiment140/cnn.py
index bf5cdb0c6..dec754f3b 100755
--- a/nebula/core/models/sentiment140/cnn.py
+++ b/nebula/core/models/sentiment140/cnn.py
@@ -14,6 +14,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Tabular",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -58,3 +59,6 @@ def configure_optimizers(self):
             amsgrad=self.config["amsgrad"],
         )
         return optimizer
+
+    def get_num_classes(self):
+        return self.num_classes
diff --git a/nebula/core/models/sentiment140/rnn.py b/nebula/core/models/sentiment140/rnn.py
index 763848724..e15cec49a 100755
--- a/nebula/core/models/sentiment140/rnn.py
+++ b/nebula/core/models/sentiment140/rnn.py
@@ -12,6 +12,7 @@ def __init__(
         metrics=None,
         confusion_matrix=None,
         seed=None,
+        data_type="Tabular",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
 
@@ -59,3 +60,6 @@ def configure_optimizers(self):
 
         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
         return optimizer
+
+    def get_num_classes(self):
+        return self.num_classes

From 28ccb01a3cb76e4b6ca2e2b206703d7e8b5b2690 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 29 Apr 2026 09:32:09 +0200
Subject: [PATCH 34/66] Trustworthiness refactorization (factsheet/DFL),
 agnostic to model and dataset

---
 nebula/addons/trustworthiness/calculation.py  |  40 +-----
 .../addons/trustworthiness/dfl_factsheet.py   | 132 +++---------------
 nebula/addons/trustworthiness/factsheet.py    |   2 +-
 .../addons/trustworthiness/trustworthiness.py |  33 +++--
 4 files changed, 46 insertions(+), 161 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index b0fcbbd97..c0a6468eb 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -477,45 +477,7 @@ def get_elapsed_time(start_time, end_time):
 
     return elapsed_time
 
-
-def get_bytes_models(models_files):
-    """
-    Calculates the mean bytes of the final models of the nodes.
-
-    Args:
-        models_files (list): List of final models.
-
-    Returns:
-        float: The mean bytes of the models.
-    """
-
-    total_models_size = 0
-    number_models = len(models_files)
-
-    for file in models_files:
-        model_size = os.path.getsize(file)
-        total_models_size += model_size
-
-    avg_model_size = total_models_size / number_models
-
-    return avg_model_size
-
-def get_bytes_model(model_file):
-    """
-    Calculates the bytes of the final model of a node.
-
-    Args:
-        model_file: Final model.
-
-    Returns:
-        float: The bytes of the model.
-    """
-
-    model_size = os.path.getsize(model_file)
-
-    return model_size
-
-def get_bytes_final_model_id(model):
+def get_bytes_model(model):
     """
     Calculates the serialized size in bytes of a PyTorch model state_dict.
 
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 9e57437f3..9cf1f0ce1 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -11,31 +11,13 @@
 import pandas as pd
 import time
 
-# from nebula.core.models.cifar10.cnn import CIFAR10ModelCNN
-from nebula.core.models.mnist.mlp import MNISTModelMLP
-from nebula.core.models.mnist.cnn import MNISTModelCNN
-from nebula.core.models.covtype.mlp import CovtypeModelMLP
-from nebula.core.models.kddcup99.mlp import KDDCUP99ModelMLP
-from nebula.core.models.adultcensus.mlp import AdultCensusModelMLP
-from nebula.core.models.breast_cancer.mlp import BreastCancerModelMLP
-from nebula.core.models.fashionmnist.mlp import FashionMNISTModelMLP
-from nebula.core.models.fashionmnist.cnn import FashionMNISTModelCNN
-from nebula.core.models.emnist.mlp import EMNISTModelMLP
-from nebula.core.models.emnist.cnn import EMNISTModelCNN
-from nebula.core.models.cifar10.cnn import CIFAR10ModelCNN
-from nebula.core.models.cifar10.cnnV2 import CIFAR10ModelCNN_V2
-from nebula.core.models.cifar10.cnnV3 import CIFAR10ModelCNN_V3
-from nebula.core.models.cifar10.fastermobilenet import FasterMobileNet
-from nebula.core.models.cifar10.resnet import CIFAR10ModelResNet
-from nebula.core.models.cifar10.simplemobilenet import SimpleMobileNetV1
-from nebula.core.models.cifar100.cnn import CIFAR100ModelCNN
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model, get_underfitting_score, get_overfitting_score, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score, get_underfitting_score_local, get_dp_local
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model, get_underfitting_score, get_overfitting_score, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score, get_underfitting_score_local, get_dp_local
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 
 dirname = os.path.dirname(__file__)
 logger = logging.getLogger(__name__)
 
-def populate_factsheet(experiment_name, participant_idx, data, start_time, end_time, reputation_summary=None, participation_summary=None, reliability_summary=None):
+def populate_factsheet(experiment_name, participant_idx, data, start_time, end_time, model, train_loader, test_loader, reputation_summary=None, participation_summary=None, reliability_summary=None):
     trust_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
     os.makedirs(trust_dir, exist_ok=True)
 
@@ -108,14 +90,6 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         factsheet["configuration"]["monitoring"] = True
         factsheet["configuration"]["total_round_num"] = n_rounds
 
-        """
-        if poisoned_noise_percent != 0:
-            factsheet["configuration"]["differential_privacy"] = True
-            factsheet["configuration"]["dp_epsilon"] = poisoned_noise_percent
-        else:
-            factsheet["configuration"]["differential_privacy"] = False
-            factsheet["configuration"]["dp_epsilon"] = ""
-        """
         dp_enabled, dp_epsilon = get_dp_local(experiment_name, participant_idx)
         if dp_enabled:
             factsheet["configuration"]["differential_privacy"] = True
@@ -124,72 +98,14 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
             factsheet["configuration"]["differential_privacy"] = False
             factsheet["configuration"]["dp_epsilon"] = ""
 
-        if dataset == "MNIST" and algorithm == "MLP":
-            model = MNISTModelMLP()
-            num_classes_temp = 10
-        elif dataset == "MNIST" and algorithm == "CNN":
-            model = MNISTModelCNN()
-            num_classes_temp = 10
-        elif dataset == "FashionMNIST" and algorithm == "MLP":
-            model = FashionMNISTModelMLP()
-            num_classes_temp = 10
-        elif dataset == "FashionMNIST" and algorithm == "CNN":
-            model = FashionMNISTModelCNN()
-            num_classes_temp = 10
-        elif dataset == "Covtype" and algorithm == "MLP":
-            model = CovtypeModelMLP()
-            num_classes_temp = 7
-        elif dataset == "KDDCUP99" and algorithm == "MLP":
-            model = KDDCUP99ModelMLP()
-            num_classes_temp = 23
-        elif dataset == "AdultCensus" and algorithm == "MLP":
-            model = AdultCensusModelMLP()
-            num_classes_temp = 2
-        elif dataset == "BreastCancer" and algorithm == "MLP":
-            model = BreastCancerModelMLP()
-            num_classes_temp = 2
-        elif dataset == "EMNIST" and algorithm == "MLP":
-            model = EMNISTModelMLP()
-            num_classes_temp = 47
-        elif dataset == "EMNIST" and algorithm == "CNN":
-            model = EMNISTModelCNN()
-            num_classes_temp = 47
-        elif dataset == "CIFAR10" and algorithm == "ResNet9":
-            model = CIFAR10ModelResNet(classifier="resnet9")
-            num_classes_temp = 10
-        elif dataset == "CIFAR10" and algorithm == "fastermobilenet":
-            model = FasterMobileNet()
-            num_classes_temp = 10
-        elif dataset == "CIFAR10" and algorithm == "simplemobilenet":
-            model = SimpleMobileNetV1()
-            num_classes_temp = 10
-        elif dataset == "CIFAR10" and algorithm == "CNN":
-            model = CIFAR10ModelCNN()
-            num_classes_temp = 10
-        elif dataset == "CIFAR10" and algorithm == "CNNv2":
-            model = CIFAR10ModelCNN_V2()
-            num_classes_temp = 10
-        elif dataset == "CIFAR10" and algorithm == "CNNv3":
-            model = CIFAR10ModelCNN_V3()
-            num_classes_temp = 10
-        elif dataset == "CIFAR100" and algorithm == "CNN":
-            model = CIFAR100ModelCNN()
-            num_classes_temp = 100
-
         factsheet["configuration"]["learning_rate"] = model.get_learning_rate()
         factsheet["configuration"]["trainable_param_num"] = model.count_parameters()
         factsheet["configuration"]["local_update_steps"] = data["epochs"]
 
         files_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
 
-        train_model_file = os.path.join(files_dir, f"participant_{participant_idx}_final_model.pk")
-        train_dataloader_file = os.path.join(files_dir, f"participant_{participant_idx}_train_loader.pk")
-        test_dataloader_file = os.path.join(files_dir, f"participant_{participant_idx}_test_loader.pk")
         emissions_file = os.path.join(files_dir, f"emissions_{participant_idx}.csv")
 
-        with open(train_model_file, "rb") as t_file:
-            lightning_model = pickle.load(t_file)
-
         get_all_data_entropy(experiment_name)
 
         data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_idx)}_class_count.json")
@@ -210,11 +126,11 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
 
         bytes_sent, bytes_recv = get_bytes(experiment_name, participant_idx)
 
-        model_file = os.path.join(files_dir, f"participant_{participant_idx}_final_model.pk")
-        factsheet["system"]["model_size"] = get_bytes_model(model_file)
+        factsheet["system"]["model_size"] = get_bytes_model(model)
 
         factsheet["system"]["upload_bytes"] = int(bytes_sent)
         factsheet["system"]["download_bytes"] = int(bytes_recv)
+
         if reliability_summary is not None:
             factsheet["system"]["dropout_rate"] = reliability_summary.get("dropout_rate", 0.0)
             factsheet["system"]["timeout_rate"] = reliability_summary.get("timeout_rate", 0.0)
@@ -255,34 +171,26 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
 
         factsheet["sustainability"]["emissions_communication_local"] = (bytes_sent * 2.24e-10 * carbon_intensity_local)+(bytes_recv * 2.24e-10 * carbon_intensity_local)
 
-        model.load_state_dict(lightning_model.state_dict())
-
-        with open(train_dataloader_file, "rb") as d_file:
-            train_dataloader = pickle.load(d_file)
-
-        with open(test_dataloader_file, "rb") as d_file:
-            test_dataloader = pickle.load(d_file)
-
-        test_sample = next(iter(test_dataloader))
-        explainability_metrics = get_explainability_metrics_summary(model, test_dataloader)
-        factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_dataloader)
+        test_sample = next(iter(test_loader))
+        explainability_metrics = get_explainability_metrics_summary(model, test_loader)
+        factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_loader)
         factsheet["privacy"]["epsilon_star"] = get_epsilon_star(
             model,
-            train_dataloader,
-            test_dataloader,
+            train_loader,
+            test_loader,
         )
         factsheet["privacy"]["epsilon_star_score"] = 1/(1 + factsheet["privacy"]["epsilon_star"])
         factsheet["privacy"]["mia_auc"] = get_mia_auc(
             model,
-            train_dataloader,
-            test_dataloader,
+            train_loader,
+            test_loader,
         )
 
         factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
         factsheet["fairness"]["underfitting"] = get_underfitting_score_local(experiment_name, participant_idx)
         overfitting_value = get_overfitting_score(
             model,
-            train_dataloader,
+            train_loader,
             factsheet["performance"]["test_acc"],
         )
 
@@ -290,23 +198,23 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
 
         well_calibration_error_value = get_well_calibration_error(
             model,
-            test_dataloader,
+            test_loader,
         )
 
         factsheet["fairness"]["well_calibration_error"] = 1/(1 + well_calibration_error_value)
         generalized_entropy_index_value = get_generalized_entropy_index(
             model,
-            test_dataloader,
+            test_loader,
         )
         factsheet["fairness"]["generalized_entropy_index"] = 1/(1 + generalized_entropy_index_value)
         theil_index_value = get_theil_index(
             model,
-            test_dataloader,
+            test_loader,
         )
         factsheet["fairness"]["theil_index"] = 1/(1 + theil_index_value)
         coefficient_of_variation_value = get_coefficient_of_variation(
             model,
-            test_dataloader,
+            test_loader,
         )
         factsheet["fairness"]["coefficient_of_variation"] = 1/(1 + coefficient_of_variation_value)
         factsheet["explainability"]["alpha_score"] = explainability_metrics["alpha_score"]
@@ -315,16 +223,16 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
 
         lr = factsheet["configuration"]["learning_rate"]
 
-        value_clever = get_clever_score(model, test_sample, num_classes_temp, lr)
+        value_clever = get_clever_score(model, test_sample, model.get_num_classes(), lr)
         factsheet["performance"]["test_clever"] = 1 if value_clever > 1 else value_clever
 
-        value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes_temp, lr)
+        value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, model.get_num_classes(), lr)
         factsheet["performance"]["test_loss_sensitivity"] = 1 / (1 + value_loss_sensitivity)
 
-        value_adv_accuracy = compute_adversarial_accuracy_art(model, test_dataloader, num_classes_temp, lr)
+        value_adv_accuracy = compute_adversarial_accuracy_art(model, test_loader, model.get_num_classes(), lr)
         factsheet["performance"]["test_adv_accuracy"] = 1 if value_adv_accuracy > 1 else value_adv_accuracy
 
-        value_empirical_robustness = get_empirical_robustness_score(model, test_sample, num_classes_temp, lr)
+        value_empirical_robustness = get_empirical_robustness_score(model, test_sample, model.get_num_classes(), lr)
         factsheet["performance"]["test_empirical_robustness"] = 1 if value_empirical_robustness > 1 else value_empirical_robustness
 
         value_confidence_score = get_confidence_score(model, test_sample)
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index bd166a542..b5372a7f3 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -9,7 +9,7 @@
 import pandas as pd
 import time
 
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_models, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size, get_underfitting_score, get_overfitting_score, get_participant_loss_accuracy, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score, get_dp_global
+from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size, get_underfitting_score, get_overfitting_score, get_participant_loss_accuracy, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score, get_dp_global
 from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
 
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 3d9b3c0b5..381482773 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -8,7 +8,7 @@
 from nebula.config.config import Config
 from nebula.core.engine import Engine
 import pickle
-from nebula.addons.trustworthiness.calculation import stop_emissions_tracking_and_save, get_bytes_final_model_id, get_class_imbalance_local, get_participation_variation_score
+from nebula.addons.trustworthiness.calculation import stop_emissions_tracking_and_save, get_bytes_model, get_class_imbalance_local, get_participation_variation_score
 from nebula.addons.trustworthiness.utils import save_results_csv, save_trustworthiness_reports_csv, load_emissions_participant, load_data_results_participant, save_results_csv_cfl, save_emissions_csv_cfl, save_class_count_per_participant, get_local_entropy, load_trust_report_json_dumped, create_local_trust_report_copy, accumulate_weighted_trustscores, build_weighted_trustscores_report, save_trust_report_json
 from codecarbon import EmissionsTracker
 from nebula.addons.trustworthiness.per_round_metrics import PerRoundTrustMetrics
@@ -113,6 +113,7 @@ async def init(self, experiment_name):
 
 
     async def _create_pk_files(self, experiment_name):
+        """
         # Save data to local files to compute trustworthiness
         train_loader_filename = f"/nebula/app/logs/{experiment_name}/trustworthiness/participant_{self._idx}_train_loader.pk"
         test_loader_filename = f"/nebula/app/logs/{experiment_name}/trustworthiness/participant_{self._idx}_test_loader.pk"
@@ -127,6 +128,8 @@ async def _create_pk_files(self, experiment_name):
         with open(test_loader_filename, 'wb') as f:
             pickle.dump(test_loader, f)
             f.close()
+        """
+        pass
 
     def get_workload(self):
         return self._workload
@@ -153,8 +156,8 @@ def _dump_model_for_trust(self, path):
     """
 
     async def finish_experiment_role_pre_actions(self):
-        with open(self._train_loader_file, 'rb') as file:
-            train_loader = pickle.load(file)
+        self._engine.trainer.datamodule.setup(stage="fit")
+        train_loader = self._engine.trainer.datamodule.train_dataloader()
         self._sample_size = len(train_loader)
 
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
@@ -178,7 +181,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
 
-            model_size = get_bytes_final_model_id(self._engine.trainer.model)
+            model_size = get_bytes_model(self._engine.trainer.model)
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
 
@@ -294,12 +297,19 @@ async def _finish_sdfl_trustscores_exchange(self, trust_config, experiment_name)
             self._finalize_sdfl_global_trustscores_aggregation()
 
     def _compute_local_trustscores_report(self, experiment_name, trust_config, weights, federation) -> str:
+        self._engine.trainer.datamodule.setup(stage="fit")
+        train_loader = self._engine.trainer.datamodule.train_dataloader()
+        self._engine.trainer.datamodule.setup(stage="test")
+        test_loader = self._engine.trainer.datamodule.test_dataloader()[0]
         populate_factsheet(
             experiment_name,
             self._idx,
             trust_config,
             self._start_time,
             self._end_time,
+            self._engine.trainer.model,
+            train_loader,
+            test_loader,
             reputation_summary=self._get_reputation_trust_summary(),
             participation_summary=self._get_participation_trust_summary(),
             reliability_summary=self._get_system_reliability_summary(),
@@ -878,12 +888,13 @@ async def _register_sdfl_trustscores_report(self, source, message):
             self._trustscores_wait_event.set()
 
     async def _process_round_end_event(self, ree: RoundEndEvent):
-        scenario_name = self._engine.config.participant["scenario_args"]["name"]
-        train_model = f"/nebula/app/logs/{scenario_name}/trustworthiness/participant_{self._idx}_train_model.pk"
+        #scenario_name = self._engine.config.participant["scenario_args"]["name"]
+        #train_model = f"/nebula/app/logs/{scenario_name}/trustworthiness/participant_{self._idx}_train_model.pk"
         #self._dump_model_for_trust(train_model)
         # Save the model in the trustworthiness directory
         #with open(train_model, 'wb') as f:
         #    pickle.dump(self._engine.trainer.model, f)
+        pass
 
     async def _process_round_start_event(self, rse: RoundStartEvent):
         _, _, expected_nodes = await rse.get_event_data()
@@ -917,11 +928,12 @@ async def _process_validation_metrics_event(self, vme: ValidationMetricsEvent):
             self._current_val_loss, self._current_val_accuracy = cur_loss, cur_acc
 
     async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):
-        model_file = f"/nebula/app/logs/{self._experiment_name}/trustworthiness/participant_{self._engine.idx}_final_model.pk"
+        #model_file = f"/nebula/app/logs/{self._experiment_name}/trustworthiness/participant_{self._engine.idx}_final_model.pk"
         #self._dump_model_for_trust(model_file)
         # Save the model in the trustworthiness directory
         #with open(model_file, 'wb') as f:
         #    pickle.dump(self._engine.trainer.model, f)
+        pass
 
 
 class TrustWorkloadServer(TrustWorkload):
@@ -1009,7 +1021,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
 
-            model_size = get_bytes_final_model_id(self._engine.trainer.model)
+            model_size = get_bytes_model(self._engine.trainer.model)
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
 
@@ -1039,7 +1051,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
 
-            model_size = get_bytes_final_model_id(self._engine.trainer.model)
+            model_size = get_bytes_model(self._engine.trainer.model)
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
 
@@ -1241,11 +1253,14 @@ async def _process_validation_metrics_event(self, vme: ValidationMetricsEvent):
             self._current_val_loss, self._current_val_accuracy = cur_loss, cur_acc
 
     async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):
+        """
         model_file = f"/nebula/app/logs/{self._experiment_name}/trustworthiness/participant_{self._engine.idx}_final_model.pk"
 
         # Save the model in the trustworthiness directory
         with open(model_file, 'wb') as f:
             pickle.dump(self._engine.trainer.model, f)
+        """
+        pass
 
 """                                                     ##############################
                                                         #       TRUSTWORTHINESS      #

From e204eaa2afd653abd6b975d1fda34088286b4ee5 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 29 Apr 2026 12:01:19 +0200
Subject: [PATCH 35/66] Trustworthiness refactor

---
 .../addons/trustworthiness/dfl_factsheet.py   |   2 +-
 nebula/addons/trustworthiness/metric.py       |   2 +-
 .../addons/trustworthiness/trustworthiness.py | 934 +++++-------------
 nebula/addons/trustworthiness/weights.py      |  75 ++
 4 files changed, 330 insertions(+), 683 deletions(-)
 create mode 100644 nebula/addons/trustworthiness/weights.py

diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 9cf1f0ce1..026c64d87 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -32,7 +32,7 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         factsheet = {}
         factsheet = json.load(f)
 
-        logging.info("DFL FactSheet: Populating factsheet with pre training metrics")
+        logging.info("DFL FactSheet: Populating factsheet")
 
         federation = data["federation"]
         n_nodes = int(data["n_nodes"])
diff --git a/nebula/addons/trustworthiness/metric.py b/nebula/addons/trustworthiness/metric.py
index 7c92ec552..b5548c1c5 100755
--- a/nebula/addons/trustworthiness/metric.py
+++ b/nebula/addons/trustworthiness/metric.py
@@ -17,7 +17,7 @@ class TrustMetricManager:
     """
 
     def __init__(self, scenario_start_time, federation, participant=None):
-        if federation == "DFL":
+        if federation == "DFL" or federation == "SDFL":
             self.factsheet_file_nm = f"factsheet_participant_{participant}.json"
             self.eval_metrics_file_nm = "eval_metrics_dfl.json"
             self.nebula_trust_results_nm = f"nebula_trust_results_{participant}.json"
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 381482773..7977ff929 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -1,13 +1,12 @@
 import logging
 import asyncio
 from nebula.addons.functions import print_msg_box
-from nebula.core.nebulaevents import AggregationEvent, ExperimentFinishEvent, RoundEndEvent, RoundStartEvent, TestMetricsEvent, ValidationMetricsEvent
+from nebula.core.nebulaevents import AggregationEvent, ExperimentFinishEvent, RoundStartEvent, TestMetricsEvent, ValidationMetricsEvent
 from nebula.core.eventmanager import EventManager
 from nebula.core.noderole import Role, ServerRoleBehavior
 from abc import ABC, abstractmethod
 from nebula.config.config import Config
 from nebula.core.engine import Engine
-import pickle
 from nebula.addons.trustworthiness.calculation import stop_emissions_tracking_and_save, get_bytes_model, get_class_imbalance_local, get_participation_variation_score
 from nebula.addons.trustworthiness.utils import save_results_csv, save_trustworthiness_reports_csv, load_emissions_participant, load_data_results_participant, save_results_csv_cfl, save_emissions_csv_cfl, save_class_count_per_participant, get_local_entropy, load_trust_report_json_dumped, create_local_trust_report_copy, accumulate_weighted_trustscores, build_weighted_trustscores_report, save_trust_report_json
 from codecarbon import EmissionsTracker
@@ -17,6 +16,7 @@
 from nebula.addons.trustworthiness.metric import TrustMetricManager
 from nebula.addons.trustworthiness.dfl_factsheet import populate_factsheet
 from nebula.addons.trustworthiness.graphics import Graphics
+from nebula.addons.trustworthiness.weights import load_trust_weights
 import json
 import os
 from nebula.core.network.communications import CommunicationsManager
@@ -54,35 +54,22 @@ async def finish_experiment_role_pre_actions(self):
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
         raise NotImplementedError
 
-class TrustWorkloadTrainer(TrustWorkload):
-    TRUSTSCORES_WAIT_TIMEOUT_SECONDS = 20
-    TRUSTSCORES_FORWARDING_GRACE_SECONDS = 1.0
-    TRUSTSCORES_FORWARDING_GRACE_MARGIN_SECONDS = 1.0
-
-    def __init__(self, engine, idx, trust_files_route):
+class BaseTrustWorkload(TrustWorkload):
+    def __init__(self, engine: Engine, idx, trust_files_route, workload: str, role_label: str, sample_size=None, start_time=None):
         self._engine: Engine = engine
-        self._workload = 'training'
+        self._workload = workload
         self._idx = idx
         self._trust_files_route = trust_files_route
-        self._train_loader_file = f'{self._trust_files_route}/participant_{self._idx}_train_loader.pk'
-        self._sample_size = None
+        self._sample_size = sample_size
         self._current_loss = None
         self._current_accuracy = None
         self._current_val_loss = None
         self._current_val_accuracy = None
         self._experiment_name = ""
         self._per_round = None
-        self._start_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
+        self._role_label = role_label
+        self._start_time = start_time or datetime.now().strftime("%d/%m/%Y %H:%M:%S")
         self._end_time = None
-        self._expected_trustscores_sources = set()
-        self._expected_trustscores_reports = int(self._engine.config.participant["scenario_args"]["n_nodes"]) - 1
-        self._received_trustscores_node_ids = set()
-        self._trustscores_wait_event = None
-        self._trustscores_score_accumulator = {}
-        self._trustscores_weight_accumulator = {}
-        self._trustscores_template_report = None
-        self._trustscores_local_copy_path = None
-        self._trustscores_local_report_initialized = False
         self._round_participation_counts = {}
         self._dropout_expected_total = 0
         self._dropout_missing_total = 0
@@ -91,46 +78,21 @@ def __init__(self, engine, idx, trust_files_route):
 
     async def init(self, experiment_name):
         self._experiment_name = experiment_name
-        self._reset_trustscores_exchange_state()
-        self._trustscores_wait_event = asyncio.Event()
         await EventManager.get_instance().subscribe_node_event(AggregationEvent, self._process_aggregation_event)
         await EventManager.get_instance().subscribe_node_event(RoundStartEvent, self._process_round_start_event)
-        await EventManager.get_instance().subscribe_node_event(RoundEndEvent, self._process_round_end_event)
         await EventManager.get_instance().subscribe_addonevent(TestMetricsEvent, self._process_test_metrics_event)
         await EventManager.get_instance().subscribe_addonevent(ValidationMetricsEvent, self._process_validation_metrics_event)
-        await EventManager.get_instance().subscribe_node_event(ExperimentFinishEvent, self._process_experiment_finished_event)
-        await self._create_pk_files(experiment_name)
 
         self._per_round = PerRoundTrustMetrics(
             experiment_name=experiment_name,
             participant_idx=self._idx,
             trust_dir=self._trust_files_route,
-            role_label="TRAINER",
+            role_label=self._role_label,
             enable_print=True,
             enable_csv=True,
         )
         await self._per_round.setup(self._engine)
 
-
-    async def _create_pk_files(self, experiment_name):
-        """
-        # Save data to local files to compute trustworthiness
-        train_loader_filename = f"/nebula/app/logs/{experiment_name}/trustworthiness/participant_{self._idx}_train_loader.pk"
-        test_loader_filename = f"/nebula/app/logs/{experiment_name}/trustworthiness/participant_{self._idx}_test_loader.pk"
-        self._engine.trainer.datamodule.setup(stage="fit")
-        train_loader = self._engine.trainer.datamodule.train_dataloader()
-        self._engine.trainer.datamodule.setup(stage="test")
-        test_loader = self._engine.trainer.datamodule.test_dataloader()[0]
-
-        with open(train_loader_filename, 'wb') as f:
-            pickle.dump(train_loader, f)
-            f.close()
-        with open(test_loader_filename, 'wb') as f:
-            pickle.dump(test_loader, f)
-            f.close()
-        """
-        pass
-
     def get_workload(self):
         return self._workload
 
@@ -143,17 +105,122 @@ def get_metrics(self):
     def get_validation_metrics(self):
         return (self._current_val_loss, self._current_val_accuracy)
 
-    """
-    def _dump_model_for_trust(self, path):
-        model = self._engine.trainer.model
-        optimizer = model._optimizer
-        model._optimizer = None
-        try:
-            with open(path, 'wb') as f:
-                pickle.dump(model, f)
-        finally:
-            model._optimizer = optimizer
-    """
+    def _is_reputation_enabled(self) -> bool:
+        defense_args = self._engine.config.participant.get("defense_args", {})
+        reputation_config = defense_args.get("reputation", {})
+        return bool(reputation_config.get("enabled", False))
+
+    def _get_reputation_system(self):
+        return getattr(self._engine, "_reputation", None)
+
+    def _get_reputation_trust_summary(self) -> dict:
+        summary = {
+            "reputation_enabled": self._is_reputation_enabled(),
+            "avg_neighbor_reputation": 0.0,
+        }
+        if hasattr(self, "_expected_trustscores_sources"):
+            summary["neighbor_num"] = len(self._expected_trustscores_sources)
+
+        if not summary["reputation_enabled"]:
+            return summary
+
+        reputation_system = self._get_reputation_system()
+        reputation_values = []
+        if reputation_system is not None:
+            for addr, data in reputation_system.reputation.items():
+                if addr == self._engine.addr:
+                    continue
+
+                reputation_value = data.get("reputation")
+                if reputation_value is not None:
+                    reputation_values.append(float(reputation_value))
+
+        if reputation_values:
+            summary["avg_neighbor_reputation"] = sum(reputation_values) / len(reputation_values)
+        else:
+            reputation_config = self._engine.config.participant.get("defense_args", {}).get("reputation", {})
+            summary["avg_neighbor_reputation"] = float(reputation_config.get("initial_reputation", 0.0) or 0.0)
+
+        return summary
+
+    def _get_participation_trust_summary(self) -> dict:
+        total_clients = int(self._engine.config.participant["scenario_args"]["n_nodes"]) - 1
+        counts = list(self._round_participation_counts.values())
+
+        if len(counts) < total_clients:
+            counts.extend([0] * (total_clients - len(counts)))
+
+        return {
+            "selection_cv": get_participation_variation_score(counts),
+        }
+
+    def _get_system_reliability_summary(self) -> dict:
+        dropout_rate = 0.0
+        if self._dropout_expected_total > 0:
+            dropout_rate = self._dropout_missing_total / self._dropout_expected_total
+
+        timeout_rate = 0.0
+        if self._aggregation_rounds_total > 0:
+            timeout_rate = self._timed_out_rounds_total / self._aggregation_rounds_total
+
+        return {
+            "dropout_rate": float(dropout_rate),
+            "timeout_rate": float(timeout_rate),
+        }
+
+    async def _process_round_start_event(self, rse: RoundStartEvent):
+        _, _, expected_nodes = await rse.get_event_data()
+        for node_addr in expected_nodes:
+            self._round_participation_counts[node_addr] = self._round_participation_counts.get(node_addr, 0) + 1
+
+    async def _process_aggregation_event(self, age: AggregationEvent):
+        _, expected_nodes, missing_nodes = await age.get_event_data()
+        self_addr = self._engine.addr
+
+        expected_without_self = {node for node in expected_nodes if node != self_addr}
+        missing_without_self = {node for node in missing_nodes if node != self_addr}
+
+        self._aggregation_rounds_total += 1
+        self._dropout_expected_total += len(expected_without_self)
+        self._dropout_missing_total += len(missing_without_self)
+        if missing_without_self:
+            self._timed_out_rounds_total += 1
+
+    async def _process_test_metrics_event(self, tme: TestMetricsEvent):
+        cur_loss, cur_acc = await tme.get_event_data()
+        if cur_loss is not None and cur_acc is not None:
+            self._current_loss, self._current_accuracy = cur_loss, cur_acc
+
+            if self._per_round is not None:
+                await self._per_round.on_test_metrics(self._engine, float(cur_loss), float(cur_acc))
+
+    async def _process_validation_metrics_event(self, vme: ValidationMetricsEvent):
+        cur_loss, cur_acc = await vme.get_event_data()
+        if cur_loss is not None and cur_acc is not None:
+            self._current_val_loss, self._current_val_accuracy = cur_loss, cur_acc
+
+
+class TrustWorkloadTrainer(BaseTrustWorkload):
+    TRUSTSCORES_WAIT_TIMEOUT_SECONDS = 20
+    TRUSTSCORES_FORWARDING_GRACE_SECONDS = 1.0
+    TRUSTSCORES_FORWARDING_GRACE_MARGIN_SECONDS = 1.0
+
+    def __init__(self, engine, idx, trust_files_route):
+        super().__init__(engine, idx, trust_files_route, workload="training", role_label="TRAINER")
+        self._expected_trustscores_sources = set()
+        self._expected_trustscores_reports = int(self._engine.config.participant["scenario_args"]["n_nodes"]) - 1
+        self._received_trustscores_node_ids = set()
+        self._trustscores_wait_event = None
+        self._trustscores_score_accumulator = {}
+        self._trustscores_weight_accumulator = {}
+        self._trustscores_template_report = None
+        self._trustscores_local_copy_path = None
+        self._trustscores_local_report_initialized = False
+
+    async def init(self, experiment_name):
+        self._reset_trustscores_exchange_state()
+        self._trustscores_wait_event = asyncio.Event()
+        await super().init(experiment_name)
 
     async def finish_experiment_role_pre_actions(self):
         self._engine.trainer.datamodule.setup(stage="fit")
@@ -163,18 +230,13 @@ async def finish_experiment_role_pre_actions(self):
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
         federation = trust_config.get("federation")  # "CFL" or "DFL"
 
-        if federation == "DFL":
-            await self._finish_dfl_trustscores_exchange(trust_config, experiment_name)
-        elif federation == "SDFL":
-            await self._finish_sdfl_trustscores_exchange(trust_config, experiment_name)
+        if federation == "DFL" or federation == "SDFL":
+            await self._finish_trustscores_exchange(federation, trust_config, experiment_name)
         else:
             cm = CommunicationsManager.get_instance()
 
             server_addr = str(self._engine.config.participant["network_args"]["neighbors"]).strip()
 
-            #logging.info("connections=%s", list(cm.connections.keys()))
-            #logging.info("server in connections? %s", server_addr in cm.connections)
-
             bytes_sent, bytes_recv, accuracy, loss, val_accuracy, dp_enabled, dp_epsilon = load_data_results_participant(experiment_name, self._idx)
 
             role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size = load_emissions_participant(experiment_name, self._idx)
@@ -185,7 +247,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             local_entropy = get_local_entropy(self._idx, experiment_name)
 
-            message = cm.mm.create_message(
+            message = cm.create_message(
                 "trustworthiness",
                 action="report",
                 node_id=str(self._idx),
@@ -196,11 +258,11 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 role=role,
                 energy_grid=energy_grid,
                 emissions=emissions,
-                workload = workload,
-                cpu_model = cpu_model,
-                gpu_model = gpu_model,
-                cpu_used = cpu_used,
-                gpu_used = gpu_used,
+                workload=workload,
+                cpu_model=cpu_model,
+                gpu_model=gpu_model,
+                cpu_used=cpu_used,
+                gpu_used=gpu_used,
                 energy_consumed=energy_consumed,
                 sample_size=sample_size,
                 class_imbalance=class_imbalance,
@@ -213,7 +275,7 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
 
             logging.info(
                 "[TW SEND] dest=%s node_id=%s bytes_sent=%s bytes_recv=%s "
-                "accuracy=%s loss=%s energy_grid=%s emissions=%s workload=%s"
+                "accuracy=%s loss=%s role=%s energy_grid=%s emissions=%s workload=%s "
                 "cpu_model=%s gpu_model=%s cpu_used=%s gpu_used=%s energy_consumed=%s sample_size=%s class_imbalance=%s model_size=%s local_entropy=%s val_accuracy=%s dp_enabled=%s dp_epsilon=%s",
                 server_addr,
                 str(self._idx),
@@ -246,36 +308,9 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
                 allow_after_learning_finished=True,
             )
 
-    # -------------------------------------------------------------------------
-    # DFL trustscores flow
-    # -------------------------------------------------------------------------
-
-    async def _finish_dfl_trustscores_exchange(self, trust_config, experiment_name):
-        self._end_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
-        await self._prepare_trustscores_exchange()
-
-        weights = self._load_local_trustscores_weights(experiment_name)
-        json_dumped = await asyncio.to_thread(
-            self._compute_local_trustscores_report,
-            experiment_name,
-            trust_config,
-            weights,
-            "DFL",
-        )
-        logging.info("JSON_dumped=%s", json_dumped)
-        self._initialize_local_trustscores_aggregation(experiment_name)
-        await self._share_trustscores_report(json_dumped)
-        await self._wait_for_trustscores_reports()
-        await self._wait_for_trustscores_forwarding_drain()
-        self._finalize_trustscores_aggregation()
-
-    # -------------------------------------------------------------------------
-    # SDFL trustscores flow
-    # -------------------------------------------------------------------------
-
-    async def _finish_sdfl_trustscores_exchange(self, trust_config, experiment_name):
+    async def _finish_trustscores_exchange(self, federation, trust_config, experiment_name):
         self._end_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
-        await self._prepare_sdfl_trustscores_exchange()
+        await self._prepare_trustscores_exchange(federation)
 
         weights = self._load_local_trustscores_weights(experiment_name)
         local_trust_report_json = await asyncio.to_thread(
@@ -283,17 +318,22 @@ async def _finish_sdfl_trustscores_exchange(self, trust_config, experiment_name)
             experiment_name,
             trust_config,
             weights,
-            "DFL",
+            federation,
         )
+        logging.info("[TW %s] local trustscores report computed", federation)
 
-        if self._is_sdfl_aggregator_node():
+        if federation == "DFL":
+            self._initialize_local_trustscores_aggregation(experiment_name)
+        elif self._is_sdfl_aggregator_node():
             self._initialize_sdfl_global_trustscores_aggregation(experiment_name)
 
-        await self._share_sdfl_trustscores_report(local_trust_report_json)
-        await self._wait_for_sdfl_trustscores_reports()
-        await self._wait_for_sdfl_trustscores_forwarding_drain()
+        await self._share_trustscores_report(local_trust_report_json, federation)
+        await self._wait_for_trustscores_reports(federation)
+        await self._wait_for_trustscores_forwarding_drain(federation)
 
-        if self._is_sdfl_aggregator_node():
+        if federation == "DFL":
+            self._finalize_trustscores_aggregation()
+        elif self._is_sdfl_aggregator_node():
             self._finalize_sdfl_global_trustscores_aggregation()
 
     def _compute_local_trustscores_report(self, experiment_name, trust_config, weights, federation) -> str:
@@ -321,36 +361,8 @@ def _compute_local_trustscores_report(self, experiment_name, trust_config, weigh
         return load_trust_report_json_dumped(experiment_name, self._idx)
 
     def _load_local_trustscores_weights(self, experiment_name: str) -> dict:
-        data_file_path = os.path.join(os.environ.get('NEBULA_CONFIG_DIR'), experiment_name, "scenario.json")
-        with open(data_file_path, 'r') as data_file:
-            data = json.load(data_file)
-
-            return {
-                "robustness": float(data["robustness_pillar"]),
-                "resilience_to_attacks": float(data["resilience_to_attacks"]),
-                "algorithm_robustness": float(data["algorithm_robustness"]),
-                "client_reliability": float(data["client_reliability"]),
-                "privacy": float(data["privacy_pillar"]),
-                "technique": float(data["technique"]),
-                "uncertainty": float(data["uncertainty"]),
-                "indistinguishability": float(data["indistinguishability"]),
-                "fairness": float(data["fairness_pillar"]),
-                "class_distribution": float(data["class_distribution"]),
-                "outcome_fairness": float(data["outcome_fairness"]),
-                "explainability": float(data["explainability_pillar"]),
-                "interpretability": float(data["interpretability"]),
-                "post_hoc_methods": float(data["post_hoc_methods"]),
-                "accountability": float(data["accountability_pillar"]),
-                "factsheet_completeness": float(data["factsheet_completeness"]),
-                "monitoring":  float(data["monitoring"]),
-                "architectural_soundness": float(data["architectural_soundness_pillar"]),
-                "client_management": float(data["client_management"]),
-                "optimization": float(data["optimization"]),
-                "federation_management": float(data["federation_management"]),
-                "sustainability": float(data["sustainability_pillar"]),
-                "energy_source": float(data["energy_source"]),
-                "federation_complexity": float(data["federation_complexity"]),
-            }
+        federation = self._engine.config.participant["trust_args"]["scenario"].get("federation")
+        return load_trust_weights(experiment_name, federation)
 
     def _reset_trustscores_exchange_state(self):
         self._expected_trustscores_sources = set()
@@ -361,75 +373,6 @@ def _reset_trustscores_exchange_state(self):
         self._trustscores_local_copy_path = None
         self._trustscores_local_report_initialized = False
 
-    def _is_reputation_enabled(self) -> bool:
-        defense_args = self._engine.config.participant.get("defense_args", {})
-        reputation_config = defense_args.get("reputation", {})
-        return bool(reputation_config.get("enabled", False))
-
-    def _get_reputation_system(self):
-        return getattr(self._engine, "_reputation", None)
-
-    def _get_reputation_trust_summary(self) -> dict:
-        if not self._is_reputation_enabled():
-            return {
-                "reputation_enabled": False,
-                "neighbor_num": len(self._expected_trustscores_sources),
-                "avg_neighbor_reputation": 0.0,
-            }
-
-        reputation_system = self._get_reputation_system()
-        reputation_values = []
-
-        if reputation_system is not None:
-            for addr, data in reputation_system.reputation.items():
-                if addr == self._engine.addr:
-                    continue
-
-                reputation_value = data.get("reputation")
-                if reputation_value is None:
-                    continue
-
-                reputation_values.append(float(reputation_value))
-
-        if reputation_values:
-            avg_neighbor_reputation = sum(reputation_values) / len(reputation_values)
-        else:
-            reputation_config = self._engine.config.participant.get("defense_args", {}).get("reputation", {})
-            avg_neighbor_reputation = float(reputation_config.get("initial_reputation", 0.0) or 0.0)
-
-        return {
-            "reputation_enabled": True,
-            "neighbor_num": len(self._expected_trustscores_sources),
-            "avg_neighbor_reputation": avg_neighbor_reputation,
-        }
-
-    def _get_participation_trust_summary(self) -> dict:
-        total_clients = int(self._engine.config.participant["scenario_args"]["n_nodes"]) - 1
-        counts = list(self._round_participation_counts.values())
-
-        if len(counts) < total_clients:
-            counts.extend([0] * (total_clients - len(counts)))
-
-        return {
-            "selection_cv": get_participation_variation_score(counts),
-        }
-
-    def _get_system_reliability_summary(self) -> dict:
-        if self._dropout_expected_total <= 0:
-            dropout_rate = 0.0
-        else:
-            dropout_rate = self._dropout_missing_total / self._dropout_expected_total
-
-        if self._aggregation_rounds_total <= 0:
-            timeout_rate = 0.0
-        else:
-            timeout_rate = self._timed_out_rounds_total / self._aggregation_rounds_total
-
-        return {
-            "dropout_rate": float(dropout_rate),
-            "timeout_rate": float(timeout_rate),
-        }
-
     def _get_trustscores_weight_for_source(self, source: str, node_id: int | str) -> float:
         if not self._is_reputation_enabled():
             return 0.5
@@ -473,22 +416,25 @@ def _get_trustscores_peer_weights_from_reputation(self) -> dict:
     def _get_trustscores_self_weight(self) -> float:
         return 1.0
 
-    def _log_trustscores_node_weights(self):
+    def _log_trustscores_node_weights(self, federation: str):
         if not self._is_reputation_enabled():
             logging.info(
-                "[TW DFL] Reputation system disabled. trustscores weights fallback to 0.5 for all nodes"
+                "[TW %s] Reputation system disabled. trustscores weights fallback to 0.5 for all nodes",
+                federation,
             )
             return
 
         peer_weight_map = self._get_trustscores_peer_weights_from_reputation()
         if not peer_weight_map:
             logging.info(
-                "[TW DFL] Reputation system enabled, but no peer reputation weights are available yet. Falling back to 0.5 when needed"
+                "[TW %s] Reputation system enabled, but no peer reputation weights are available yet. Falling back to 0.5 when needed",
+                federation,
             )
             return
 
         logging.info(
-            "[TW DFL] Local trustscores weights from reputation | self_node_id=%s self_weight=%s peer_weights_by_addr=%s",
+            "[TW %s] Trustscores weights from reputation | self_node_id=%s self_weight=%s peer_weights_by_addr=%s",
+            federation,
             self._idx,
             self._get_trustscores_self_weight(),
             peer_weight_map,
@@ -496,7 +442,8 @@ def _log_trustscores_node_weights(self):
 
         for addr, weight in sorted(peer_weight_map.items()):
             logging.info(
-                "[TW DFL] Local trustscores weight from reputation | self_node_id=%s target_addr=%s weight=%s",
+                "[TW %s] Trustscores weight from reputation | self_node_id=%s target_addr=%s weight=%s",
+                federation,
                 self._idx,
                 addr,
                 weight,
@@ -522,7 +469,7 @@ def _initialize_local_trustscores_aggregation(self, experiment_name: str):
             self._get_trustscores_self_weight(),
         )
 
-    async def _prepare_trustscores_exchange(self):
+    async def _prepare_trustscores_exchange(self, federation: str):
         cm = CommunicationsManager.get_instance()
         self._expected_trustscores_sources = await cm.get_all_addrs_current_connections(only_direct=True)
 
@@ -535,22 +482,25 @@ async def _prepare_trustscores_exchange(self):
 
         if self._expected_trustscores_reports <= 0:
             self._trustscores_wait_event.set()
-            logging.info("[TW DFL] No remote trustscores reports expected")
+            logging.info("[TW %s] No remote trustscores reports expected", federation)
             return
 
         logging.info(
-            "[TW DFL] Expecting %s trustscores reports. Initial neighbors=%s",
+            "[TW %s] Expecting %s trustscores reports. Initial neighbors=%s aggregator_mode=%s",
+            federation,
             self._expected_trustscores_reports,
             sorted(self._expected_trustscores_sources),
+            self._is_sdfl_aggregator_node() if federation == "SDFL" else False,
         )
-        self._log_trustscores_node_weights()
+        if federation == "DFL" or self._is_sdfl_aggregator_node():
+            self._log_trustscores_node_weights(federation)
 
-    async def _share_trustscores_report(self, trust_report_json: str):
+    async def _share_trustscores_report(self, trust_report_json: str, federation: str):
         cm = CommunicationsManager.get_instance()
         neighbors = self._expected_trustscores_sources.copy()
 
         if not neighbors:
-            logging.info("[TW DFL] No direct neighbors available to share trustscores")
+            logging.info("[TW %s] No direct neighbors available to share trustscores", federation)
             return
 
         message = cm.create_message(
@@ -560,7 +510,7 @@ async def _share_trustscores_report(self, trust_report_json: str):
             trust_report_json=trust_report_json,
         )
 
-        logging.info("[TW DFL] Sharing trustscores report with neighbors=%s", sorted(neighbors))
+        logging.info("[TW %s] Sharing trustscores report with neighbors=%s", federation, sorted(neighbors))
         for neighbor in neighbors:
             await cm.send_message(
                 neighbor,
@@ -569,7 +519,7 @@ async def _share_trustscores_report(self, trust_report_json: str):
                 allow_after_learning_finished=True,
             )
 
-    async def _wait_for_trustscores_reports(self):
+    async def _wait_for_trustscores_reports(self, federation: str):
         if self._trustscores_wait_event is None:
             return
 
@@ -579,19 +529,21 @@ async def _wait_for_trustscores_reports(self):
                 timeout=self.TRUSTSCORES_WAIT_TIMEOUT_SECONDS,
             )
             logging.info(
-                "[TW DFL] Trustscores exchange complete (%s/%s)",
+                "[TW %s] Trustscores exchange complete (%s/%s)",
+                federation,
                 len(self._received_trustscores_node_ids),
                 self._expected_trustscores_reports,
             )
         except asyncio.TimeoutError:
             logging.warning(
-                "[TW DFL] Timeout waiting trustscores reports. Received=%s/%s missing=%s",
+                "[TW %s] Timeout waiting trustscores reports. Received=%s/%s missing=%s",
+                federation,
                 len(self._received_trustscores_node_ids),
                 self._expected_trustscores_reports,
                 self._expected_trustscores_reports - len(self._received_trustscores_node_ids),
             )
 
-    async def _wait_for_trustscores_forwarding_drain(self):
+    async def _wait_for_trustscores_forwarding_drain(self, federation: str):
         if not self._expected_trustscores_sources:
             return
 
@@ -605,7 +557,8 @@ async def _wait_for_trustscores_forwarding_drain(self):
         )
 
         logging.info(
-            "[TW DFL] Waiting %.2fs to drain forwarded trustscores messages before shutdown",
+            "[TW %s] Waiting %.2fs to drain forwarded trustscores messages before shutdown",
+            federation,
             forwarding_grace,
         )
         await asyncio.sleep(forwarding_grace)
@@ -638,8 +591,9 @@ def _initialize_sdfl_global_trustscores_aggregation(self, experiment_name: str):
             return
 
         trust_report_template = json.loads(load_trust_report_json_dumped(experiment_name, self._idx))
+        logs_dir = os.environ.get("NEBULA_LOGS_DIR", os.path.join("nebula", "app", "logs"))
         output_path = os.path.join(
-            os.environ.get("NEBULA_LOGS_DIR"),
+            logs_dir,
             experiment_name,
             "trustworthiness",
             "nebula_trust_results.json",
@@ -660,125 +614,6 @@ def _initialize_sdfl_global_trustscores_aggregation(self, experiment_name: str):
             output_path,
         )
 
-    async def _prepare_sdfl_trustscores_exchange(self):
-        cm = CommunicationsManager.get_instance()
-        self._expected_trustscores_sources = await cm.get_all_addrs_current_connections(only_direct=True)
-
-        if self._trustscores_wait_event is None:
-            self._trustscores_wait_event = asyncio.Event()
-        self._trustscores_wait_event.clear()
-
-        if len(self._received_trustscores_node_ids) >= self._expected_trustscores_reports:
-            self._trustscores_wait_event.set()
-
-        if self._expected_trustscores_reports <= 0:
-            self._trustscores_wait_event.set()
-            logging.info("[TW SDFL] No remote trustscores reports expected")
-            return
-
-        logging.info(
-            "[TW SDFL] Expecting %s trustscores reports. Initial neighbors=%s aggregator_mode=%s",
-            self._expected_trustscores_reports,
-            sorted(self._expected_trustscores_sources),
-            self._is_sdfl_aggregator_node(),
-        )
-        if self._is_sdfl_aggregator_node():
-            self._log_sdfl_trustscores_node_weights()
-
-    def _log_sdfl_trustscores_node_weights(self):
-        if not self._is_reputation_enabled():
-            logging.info(
-                "[TW SDFL] Reputation system disabled. trustscores weights fallback to 0.5 for all nodes"
-            )
-            return
-
-        peer_weight_map = self._get_trustscores_peer_weights_from_reputation()
-        if not peer_weight_map:
-            logging.info(
-                "[TW SDFL] Reputation system enabled, but no peer reputation weights are available yet. Falling back to 0.5 when needed"
-            )
-            return
-
-        logging.info(
-            "[TW SDFL] Global trustscores weights from reputation | self_node_id=%s self_weight=%s peer_weights_by_addr=%s",
-            self._idx,
-            self._get_trustscores_self_weight(),
-            peer_weight_map,
-        )
-
-        for addr, weight in sorted(peer_weight_map.items()):
-            logging.info(
-                "[TW SDFL] Global trustscores weight from reputation | self_node_id=%s target_addr=%s weight=%s",
-                self._idx,
-                addr,
-                weight,
-            )
-
-    async def _share_sdfl_trustscores_report(self, trust_report_json: str):
-        cm = CommunicationsManager.get_instance()
-        neighbors = self._expected_trustscores_sources.copy()
-
-        if not neighbors:
-            logging.info("[TW SDFL] No direct neighbors available to share trustscores")
-            return
-
-        message = cm.create_message(
-            "trustscores",
-            action="share",
-            node_id=str(self._idx),
-            trust_report_json=trust_report_json,
-        )
-
-        logging.info("[TW SDFL] Sharing local trustscores report with neighbors=%s", sorted(neighbors))
-        for neighbor in neighbors:
-            await cm.send_message(
-                neighbor,
-                message,
-                message_type="trustscores",
-                allow_after_learning_finished=True,
-            )
-
-    async def _wait_for_sdfl_trustscores_reports(self):
-        if self._trustscores_wait_event is None:
-            return
-
-        try:
-            await asyncio.wait_for(
-                self._trustscores_wait_event.wait(),
-                timeout=self.TRUSTSCORES_WAIT_TIMEOUT_SECONDS,
-            )
-            logging.info(
-                "[TW SDFL] Trustscores exchange complete (%s/%s)",
-                len(self._received_trustscores_node_ids),
-                self._expected_trustscores_reports,
-            )
-        except asyncio.TimeoutError:
-            logging.warning(
-                "[TW SDFL] Timeout waiting trustscores reports. Received=%s/%s missing=%s",
-                len(self._received_trustscores_node_ids),
-                self._expected_trustscores_reports,
-                self._expected_trustscores_reports - len(self._received_trustscores_node_ids),
-            )
-
-    async def _wait_for_sdfl_trustscores_forwarding_drain(self):
-        if not self._expected_trustscores_sources:
-            return
-
-        cm = CommunicationsManager.get_instance()
-        forwarder = getattr(cm, "forwarder", None)
-        forwarder_interval = getattr(forwarder, "interval", 0)
-        messages_interval = getattr(forwarder, "messages_interval", 0)
-        forwarding_grace = max(
-            self.TRUSTSCORES_FORWARDING_GRACE_SECONDS,
-            float(forwarder_interval) + float(messages_interval) + self.TRUSTSCORES_FORWARDING_GRACE_MARGIN_SECONDS,
-        )
-
-        logging.info(
-            "[TW SDFL] Waiting %.2fs to drain forwarded trustscores messages before shutdown",
-            forwarding_grace,
-        )
-        await asyncio.sleep(forwarding_grace)
-
     def _finalize_sdfl_global_trustscores_aggregation(self):
         if self._trustscores_template_report is None or self._trustscores_local_copy_path is None:
             logging.warning("[TW SDFL] Skipping global trustscores write because the template/output is not available")
@@ -800,63 +635,24 @@ def _finalize_sdfl_global_trustscores_aggregation(self):
 
     async def register_trustscores_report(self, source, message):
         federation = self._engine.config.participant["trust_args"]["scenario"].get("federation")
-        if federation == "SDFL":
-            await self._register_sdfl_trustscores_report(source, message)
-            return
-
-        await self._register_dfl_trustscores_report(source, message)
-
-    async def _register_dfl_trustscores_report(self, source, message):
-        if str(message.node_id) == str(self._idx):
-            logging.info("[TW DFL] Ignoring own trustscores report from %s", source)
-            return
-
-        if str(message.node_id) in self._received_trustscores_node_ids:
-            logging.info(
-                "[TW DFL] Ignoring duplicated trustscores report from node_id=%s source=%s",
-                message.node_id,
-                source,
-            )
-            return
-
-        trust_report = json.loads(message.trust_report_json)
-        remote_weight = self._get_trustscores_weight_for_source(source, message.node_id)
-        accumulate_weighted_trustscores(
-            report=trust_report,
-            weight=remote_weight,
-            score_accumulator=self._trustscores_score_accumulator,
-            weight_accumulator=self._trustscores_weight_accumulator,
-        )
-        logging.info(
-            "[TW DFL] Trustscores report received from node_id=%s source=%s accumulated_with_weight=%s",
-            message.node_id,
-            source,
-            remote_weight,
-        )
-
-        self._received_trustscores_node_ids.add(str(message.node_id))
-        logging.info(
-            "[TW DFL] Trustscores progress %s/%s",
-            len(self._received_trustscores_node_ids),
-            self._expected_trustscores_reports,
-        )
-        if len(self._received_trustscores_node_ids) >= self._expected_trustscores_reports:
-            self._trustscores_wait_event.set()
+        await self._register_trustscores_report(source, message, federation)
 
-    async def _register_sdfl_trustscores_report(self, source, message):
+    async def _register_trustscores_report(self, source, message, federation: str):
         if str(message.node_id) == str(self._idx):
-            logging.info("[TW SDFL] Ignoring own trustscores report from %s", source)
+            logging.info("[TW %s] Ignoring own trustscores report from %s", federation, source)
             return
 
         if str(message.node_id) in self._received_trustscores_node_ids:
             logging.info(
-                "[TW SDFL] Ignoring duplicated trustscores report from node_id=%s source=%s",
+                "[TW %s] Ignoring duplicated trustscores report from node_id=%s source=%s",
+                federation,
                 message.node_id,
                 source,
             )
             return
 
-        if self._is_sdfl_aggregator_node():
+        should_accumulate = federation == "DFL" or self._is_sdfl_aggregator_node()
+        if should_accumulate:
             trust_report = json.loads(message.trust_report_json)
             remote_weight = self._get_trustscores_weight_for_source(source, message.node_id)
             accumulate_weighted_trustscores(
@@ -866,133 +662,54 @@ async def _register_sdfl_trustscores_report(self, source, message):
                 weight_accumulator=self._trustscores_weight_accumulator,
             )
             logging.info(
-                "[TW SDFL] Trustscores report received from node_id=%s source=%s accumulated_with_weight=%s",
+                "[TW %s] Trustscores report received from node_id=%s source=%s accumulated_with_weight=%s",
+                federation,
                 message.node_id,
                 source,
                 remote_weight,
             )
         else:
             logging.info(
-                "[TW SDFL] Trustscores report received from node_id=%s source=%s forwarding_only=True",
+                "[TW %s] Trustscores report received from node_id=%s source=%s forwarding_only=True",
+                federation,
                 message.node_id,
                 source,
             )
 
         self._received_trustscores_node_ids.add(str(message.node_id))
         logging.info(
-            "[TW SDFL] Trustscores progress %s/%s",
+            "[TW %s] Trustscores progress %s/%s",
+            federation,
             len(self._received_trustscores_node_ids),
             self._expected_trustscores_reports,
         )
         if len(self._received_trustscores_node_ids) >= self._expected_trustscores_reports:
             self._trustscores_wait_event.set()
 
-    async def _process_round_end_event(self, ree: RoundEndEvent):
-        #scenario_name = self._engine.config.participant["scenario_args"]["name"]
-        #train_model = f"/nebula/app/logs/{scenario_name}/trustworthiness/participant_{self._idx}_train_model.pk"
-        #self._dump_model_for_trust(train_model)
-        # Save the model in the trustworthiness directory
-        #with open(train_model, 'wb') as f:
-        #    pickle.dump(self._engine.trainer.model, f)
-        pass
-
-    async def _process_round_start_event(self, rse: RoundStartEvent):
-        _, _, expected_nodes = await rse.get_event_data()
-        for node_addr in expected_nodes:
-            self._round_participation_counts[node_addr] = self._round_participation_counts.get(node_addr, 0) + 1
-
-    async def _process_aggregation_event(self, age: AggregationEvent):
-        _, expected_nodes, missing_nodes = await age.get_event_data()
-        self_addr = self._engine.addr
-
-        expected_without_self = {node for node in expected_nodes if node != self_addr}
-        missing_without_self = {node for node in missing_nodes if node != self_addr}
-
-        self._aggregation_rounds_total += 1
-        self._dropout_expected_total += len(expected_without_self)
-        self._dropout_missing_total += len(missing_without_self)
-        if missing_without_self:
-            self._timed_out_rounds_total += 1
-
-    async def _process_test_metrics_event(self, tme: TestMetricsEvent):
-        cur_loss, cur_acc = await tme.get_event_data()
-        if cur_loss and cur_acc:
-            self._current_loss, self._current_accuracy = cur_loss, cur_acc
-
-        if self._per_round is not None:
-            await self._per_round.on_test_metrics(self._engine, float(cur_loss), float(cur_acc))
-
-    async def _process_validation_metrics_event(self, vme: ValidationMetricsEvent):
-        cur_loss, cur_acc = await vme.get_event_data()
-        if cur_loss is not None and cur_acc is not None:
-            self._current_val_loss, self._current_val_accuracy = cur_loss, cur_acc
-
-    async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):
-        #model_file = f"/nebula/app/logs/{self._experiment_name}/trustworthiness/participant_{self._engine.idx}_final_model.pk"
-        #self._dump_model_for_trust(model_file)
-        # Save the model in the trustworthiness directory
-        #with open(model_file, 'wb') as f:
-        #    pickle.dump(self._engine.trainer.model, f)
-        pass
-
-
-class TrustWorkloadServer(TrustWorkload):
+class TrustWorkloadServer(BaseTrustWorkload):
+    REPORTS_WAIT_TIMEOUT_SECONDS = 60
 
     def __init__(self, engine: Engine, idx, trust_files_route):
-        self._workload = 'aggregation'
-        self._sample_size = 0
-        self._current_loss = None
-        self._current_accuracy = None
-        self._current_val_loss = None
-        self._current_val_accuracy = None
         server_start_time: ServerRoleBehavior = engine.rb
-        self._start_time = server_start_time._start_time
-        self._engine: Engine = engine
-        self._end_time = None
-        self._experiment_name = ""
-        self._idx = idx
-        self._trust_files_route = trust_files_route
-        self._per_round = None
+        super().__init__(
+            engine,
+            idx,
+            trust_files_route,
+            workload="aggregation",
+            role_label="SERVER",
+            sample_size=0,
+            start_time=server_start_time._start_time,
+        )
         self._trustworthiness_reports = {}
         self._expected_reports = int(self._engine.config.participant["scenario_args"]["n_nodes"])-1
         self._trust_config = None
         self._csv_completed = False
-        self._finish_post = False
-        self._round_participation_counts = {}
-        self._dropout_expected_total = 0
-        self._dropout_missing_total = 0
-        self._aggregation_rounds_total = 0
-        self._timed_out_rounds_total = 0
+        self._reports_wait_event = asyncio.Event()
+        if self._expected_reports <= 0:
+            self._reports_wait_event.set()
 
     async def init(self, experiment_name):
-        self._experiment_name = experiment_name
-        await EventManager.get_instance().subscribe_node_event(AggregationEvent, self._process_aggregation_event)
-        await EventManager.get_instance().subscribe_addonevent(TestMetricsEvent, self._process_test_metrics_event)
-        await EventManager.get_instance().subscribe_addonevent(ValidationMetricsEvent, self._process_validation_metrics_event)
-        await EventManager.get_instance().subscribe_node_event(RoundStartEvent, self._process_round_start_event)
-        await EventManager.get_instance().subscribe_node_event(ExperimentFinishEvent, self._process_experiment_finished_event)
-
-        self._per_round = PerRoundTrustMetrics(
-            experiment_name=experiment_name,
-            participant_idx=self._idx,
-            trust_dir=self._trust_files_route,
-            role_label="SERVER",
-            enable_print=True,
-            enable_csv=True,
-        )
-        await self._per_round.setup(self._engine)
-
-    def get_workload(self):
-        return self._workload
-
-    def get_sample_size(self):
-        return self._sample_size
-
-    def get_metrics(self):
-        return (self._current_loss, self._current_accuracy)
-
-    def get_validation_metrics(self):
-        return (self._current_val_loss, self._current_val_accuracy)
+        await super().init(experiment_name)
 
     async def finish_experiment_role_pre_actions(self):
         pass
@@ -1002,62 +719,52 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
         self._trust_config = trust_config
         self._experiment_name = experiment_name
 
-        if self._csv_completed == True:
+        if self._csv_completed:
             logging.info("[TW SERVER] finish_experiment_role_post_actions called, trustworthiness reports OK, starting generate_factsheet")
-            bytes_sent, bytes_recv, accuracy, loss, val_accuracy, dp_enabled, dp_epsilon= load_data_results_participant(
-                self._experiment_name,
-                self._idx,
-            )
-
-            role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size = load_emissions_participant(
-                self._experiment_name,
-                self._idx,
-            )
-
-            logging.info(
-                "[TW SERVER] local server report added for node_id=%s",
-                str(self._idx),
-            )
-
-            class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
-
-            model_size = get_bytes_model(self._engine.trainer.model)
-
-            local_entropy = get_local_entropy(self._idx, experiment_name)
-
-            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, 0, 0, class_imbalance, model_size, local_entropy, val_accuracy, dp_enabled, dp_epsilon)
-            save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
-            await self._generate_factsheet(trust_config, experiment_name)
+            await self._save_local_server_report_and_generate_factsheet(trust_config, experiment_name)
         else:
-            self._finish_post = True
             logging.info("[TW SERVER] finish_experiment_role_post_actions called, waiting for trustworthiness reports")
-            await asyncio.sleep(60)
-            if self._trustworthiness_reports != None and self._csv_completed == False:
+            try:
+                await asyncio.wait_for(
+                    self._reports_wait_event.wait(),
+                    timeout=self.REPORTS_WAIT_TIMEOUT_SECONDS,
+                )
+            except asyncio.TimeoutError:
+                logging.warning(
+                    "[TW SERVER] Timeout waiting trustworthiness reports. Received=%s/%s",
+                    len(self._trustworthiness_reports),
+                    self._expected_reports,
+                )
+
+            if self._trustworthiness_reports is not None and not self._csv_completed:
                 save_trustworthiness_reports_csv(self._trustworthiness_reports, self._experiment_name)
-            bytes_sent, bytes_recv, accuracy, loss, val_accuracy, dp_enabled, dp_epsilon = load_data_results_participant(
-                self._experiment_name,
-                self._idx,
-            )
+                self._csv_completed = True
 
-            role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size = load_emissions_participant(
-                self._experiment_name,
-                self._idx,
-            )
+            await self._save_local_server_report_and_generate_factsheet(trust_config, experiment_name)
 
-            logging.info(
-                "[TW SERVER] local server report added for node_id=%s",
-                str(self._idx),
-            )
+    async def _save_local_server_report_and_generate_factsheet(self, trust_config, experiment_name):
+        bytes_sent, bytes_recv, _, _, val_accuracy, dp_enabled, dp_epsilon = load_data_results_participant(
+            self._experiment_name,
+            self._idx,
+        )
 
-            class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
+        role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size = load_emissions_participant(
+            self._experiment_name,
+            self._idx,
+        )
 
-            model_size = get_bytes_model(self._engine.trainer.model)
+        logging.info(
+            "[TW SERVER] local server report added for node_id=%s",
+            str(self._idx),
+        )
 
-            local_entropy = get_local_entropy(self._idx, experiment_name)
+        class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
+        model_size = get_bytes_model(self._engine.trainer.model)
+        local_entropy = get_local_entropy(self._idx, experiment_name)
 
-            save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, 0, 0, class_imbalance, model_size, local_entropy, val_accuracy, dp_enabled, dp_epsilon)
-            save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
-            await self._generate_factsheet(trust_config, experiment_name)
+        save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, 0, 0, class_imbalance, model_size, local_entropy, val_accuracy, dp_enabled, dp_epsilon)
+        save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
+        await self._generate_factsheet(trust_config, experiment_name)
 
     async def register_trustworthiness_report(self, source, message):
         self._trustworthiness_reports[message.node_id] = {
@@ -1096,27 +803,9 @@ async def register_trustworthiness_report(self, source, message):
             # Generate CSV files
             save_trustworthiness_reports_csv(self._trustworthiness_reports, self._experiment_name)
             self._csv_completed = True
+            self._reports_wait_event.set()
             logging.info(f"[TW SERVER] all reports received, waiting for finish post, csv_completed {self._csv_completed}")
 
-    async def _process_round_start_event(self, rse: RoundStartEvent):
-        _, _, expected_nodes = await rse.get_event_data()
-        for node_addr in expected_nodes:
-            self._round_participation_counts[node_addr] = self._round_participation_counts.get(node_addr, 0) + 1
-
-    async def _process_aggregation_event(self, age: AggregationEvent):
-        _, expected_nodes, missing_nodes = await age.get_event_data()
-        self_addr = self._engine.addr
-
-        expected_without_self = {node for node in expected_nodes if node != self_addr}
-        missing_without_self = {node for node in missing_nodes if node != self_addr}
-
-        self._aggregation_rounds_total += 1
-        self._dropout_expected_total += len(expected_without_self)
-        self._dropout_missing_total += len(missing_without_self)
-        if missing_without_self:
-            self._timed_out_rounds_total += 1
-
-
     async def _generate_factsheet(self, trust_config, experiment_name):
         factsheet = Factsheet()
         self._engine.trainer.datamodule.setup(stage="fit")
@@ -1137,130 +826,11 @@ async def _generate_factsheet(self, trust_config, experiment_name):
             reliability_summary=self._get_system_reliability_summary(),
         )
 
-        data_file_path = os.path.join(os.environ.get('NEBULA_CONFIG_DIR'), experiment_name, "scenario.json")
-        with open(data_file_path, 'r') as data_file:
-            data = json.load(data_file)
-
-            weights = {
-                "robustness": float(data["robustness_pillar"]),
-                "resilience_to_attacks": float(data["resilience_to_attacks"]),
-                "algorithm_robustness": float(data["algorithm_robustness"]),
-                "client_reliability": float(data["client_reliability"]),
-                "privacy": float(data["privacy_pillar"]),
-                "technique": float(data["technique"]),
-                "uncertainty": float(data["uncertainty"]),
-                "indistinguishability": float(data["indistinguishability"]),
-                "fairness": float(data["fairness_pillar"]),
-                "selection_fairness": float(data["selection_fairness"]),
-                "performance_fairness": float(data["performance_fairness"]),
-                "class_distribution": float(data["class_distribution"]),
-                "outcome_fairness": float(data["outcome_fairness"]),
-                "explainability": float(data["explainability_pillar"]),
-                "interpretability": float(data["interpretability"]),
-                "post_hoc_methods": float(data["post_hoc_methods"]),
-                "accountability": float(data["accountability_pillar"]),
-                "factsheet_completeness":  float(data["factsheet_completeness"]),
-                "monitoring":  float(data["monitoring"]),
-                "architectural_soundness": float(data["architectural_soundness_pillar"]),
-                "client_management": float(data["client_management"]),
-                "optimization": float(data["optimization"]),
-                "federation_management": float(data["federation_management"]),
-                "sustainability": float(data["sustainability_pillar"]),
-                "energy_source": float(data["energy_source"]),
-                "hardware_efficiency": float(data["hardware_efficiency"]),
-                "federation_complexity": float(data["federation_complexity"])
-            }
-            federation = trust_config.get("federation")
-
-            trust_metric_manager = TrustMetricManager(self._start_time, federation)
-            trust_metric_manager.evaluate(experiment_name, weights, use_weights=True)
-
-    def _is_reputation_enabled(self) -> bool:
-        defense_args = self._engine.config.participant.get("defense_args", {})
-        reputation_config = defense_args.get("reputation", {})
-        return bool(reputation_config.get("enabled", False))
-
-    def _get_reputation_system(self):
-        return getattr(self._engine, "_reputation", None)
-
-    def _get_reputation_trust_summary(self) -> dict:
-        if not self._is_reputation_enabled():
-            return {
-                "reputation_enabled": False,
-                "avg_neighbor_reputation": 0.0,
-            }
-
-        reputation_system = self._get_reputation_system()
-        reputation_values = []
-
-        if reputation_system is not None:
-            for _, data in reputation_system.reputation.items():
-                reputation_value = data.get("reputation")
-                if reputation_value is None:
-                    continue
-
-                reputation_values.append(float(reputation_value))
-
-        if reputation_values:
-            avg_neighbor_reputation = sum(reputation_values) / len(reputation_values)
-        else:
-            reputation_config = self._engine.config.participant.get("defense_args", {}).get("reputation", {})
-            avg_neighbor_reputation = float(reputation_config.get("initial_reputation", 0.0) or 0.0)
-
-        return {
-            "reputation_enabled": True,
-            "avg_neighbor_reputation": avg_neighbor_reputation,
-        }
-
-    def _get_participation_trust_summary(self) -> dict:
-        total_clients = int(self._engine.config.participant["scenario_args"]["n_nodes"]) - 1
-        counts = list(self._round_participation_counts.values())
+        federation = trust_config.get("federation")
+        weights = load_trust_weights(experiment_name, federation)
 
-        if len(counts) < total_clients:
-            counts.extend([0] * (total_clients - len(counts)))
-
-        return {
-            "selection_cv": get_participation_variation_score(counts),
-        }
-
-    def _get_system_reliability_summary(self) -> dict:
-        if self._dropout_expected_total <= 0:
-            dropout_rate = 0.0
-        else:
-            dropout_rate = self._dropout_missing_total / self._dropout_expected_total
-
-        if self._aggregation_rounds_total <= 0:
-            timeout_rate = 0.0
-        else:
-            timeout_rate = self._timed_out_rounds_total / self._aggregation_rounds_total
-
-        return {
-            "dropout_rate": float(dropout_rate),
-            "timeout_rate": float(timeout_rate),
-        }
-
-    async def _process_test_metrics_event(self, tme: TestMetricsEvent):
-        cur_loss, cur_acc = await tme.get_event_data()
-        if cur_loss and cur_acc:
-            self._current_loss, self._current_accuracy = cur_loss, cur_acc
-
-        if self._per_round is not None:
-            await self._per_round.on_test_metrics(self._engine, float(cur_loss), float(cur_acc))
-
-    async def _process_validation_metrics_event(self, vme: ValidationMetricsEvent):
-        cur_loss, cur_acc = await vme.get_event_data()
-        if cur_loss is not None and cur_acc is not None:
-            self._current_val_loss, self._current_val_accuracy = cur_loss, cur_acc
-
-    async def _process_experiment_finished_event(self, efe:ExperimentFinishEvent):
-        """
-        model_file = f"/nebula/app/logs/{self._experiment_name}/trustworthiness/participant_{self._engine.idx}_final_model.pk"
-
-        # Save the model in the trustworthiness directory
-        with open(model_file, 'wb') as f:
-            pickle.dump(self._engine.trainer.model, f)
-        """
-        pass
+        trust_metric_manager = TrustMetricManager(self._start_time, federation)
+        trust_metric_manager.evaluate(experiment_name, weights, use_weights=True)
 
 """                                                     ##############################
                                                         #       TRUSTWORTHINESS      #
@@ -1278,7 +848,8 @@ def __init__(self, engine: Engine, config: Config):
         self._config = config
         self._trust_config = self._config.participant["trust_args"]["scenario"]
         self._experiment_name = self._config.participant["scenario_args"]["name"]
-        self._trust_dir_files = f"/nebula/app/logs/{self._experiment_name}/trustworthiness"
+        logs_dir = os.environ.get("NEBULA_LOGS_DIR", os.path.join("nebula", "app", "logs"))
+        self._trust_dir_files = os.path.join(logs_dir, self._experiment_name, "trustworthiness")
         self._emissions_file = 'emissions.csv'
         self._role: Role = engine.rb.get_role()
         self._idx = self._config.participant["device_args"]["idx"]
@@ -1301,10 +872,11 @@ async def start(self):
         self._tracker.start()
 
     async def _create_trustworthiness_directory(self):
-        trust_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), self._experiment_name, "trustworthiness")
+        logs_dir = os.environ.get("NEBULA_LOGS_DIR", os.path.join("nebula", "app", "logs"))
+        trust_dir = os.path.join(logs_dir, self._experiment_name, "trustworthiness")
         # Create a directory to store files used to compute trust
         os.makedirs(trust_dir, exist_ok=True)
-        os.chmod(trust_dir, 0o777)
+        os.chmod(trust_dir, 0o755)
 
     async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         class_counter = self._engine.trainer.datamodule.get_samples_per_label()
@@ -1325,7 +897,7 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         privacy_metrics = self._engine.trainer.get_privacy_metrics()
         dp_enabled=bool(privacy_metrics.get("dp_enabled", False))
         dp_epsilon=privacy_metrics.get("dp_epsilon")
-        if dp_epsilon == None:
+        if dp_epsilon is None:
             dp_epsilon=0
 
         # Get TrustWorkload information
diff --git a/nebula/addons/trustworthiness/weights.py b/nebula/addons/trustworthiness/weights.py
new file mode 100644
index 000000000..8df8bbc44
--- /dev/null
+++ b/nebula/addons/trustworthiness/weights.py
@@ -0,0 +1,75 @@
+import json
+import os
+
+
+COMMON_TRUST_WEIGHT_FIELDS = {
+    "robustness": "robustness_pillar",
+    "resilience_to_attacks": "resilience_to_attacks",
+    "algorithm_robustness": "algorithm_robustness",
+    "client_reliability": "client_reliability",
+    "privacy": "privacy_pillar",
+    "technique": "technique",
+    "uncertainty": "uncertainty",
+    "indistinguishability": "indistinguishability",
+    "fairness": "fairness_pillar",
+    "class_distribution": "class_distribution",
+    "outcome_fairness": "outcome_fairness",
+    "explainability": "explainability_pillar",
+    "interpretability": "interpretability",
+    "post_hoc_methods": "post_hoc_methods",
+    "accountability": "accountability_pillar",
+    "factsheet_completeness": "factsheet_completeness",
+    "monitoring": "monitoring",
+    "architectural_soundness": "architectural_soundness_pillar",
+    "client_management": "client_management",
+    "optimization": "optimization",
+    "federation_management": "federation_management",
+    "sustainability": "sustainability_pillar",
+    "energy_source": "energy_source",
+    "federation_complexity": "federation_complexity",
+}
+
+CFL_TRUST_WEIGHT_FIELDS = {
+    **COMMON_TRUST_WEIGHT_FIELDS,
+    "selection_fairness": "selection_fairness",
+    "performance_fairness": "performance_fairness",
+    "hardware_efficiency": "hardware_efficiency",
+}
+
+DFL_TRUST_WEIGHT_FIELDS = COMMON_TRUST_WEIGHT_FIELDS
+
+TRUST_WEIGHT_FIELDS_BY_FEDERATION = {
+    "CFL": CFL_TRUST_WEIGHT_FIELDS,
+    "DFL": DFL_TRUST_WEIGHT_FIELDS,
+    "SDFL": DFL_TRUST_WEIGHT_FIELDS,
+}
+
+
+def load_trust_weights(experiment_name: str, federation: str) -> dict[str, float]:
+    config_dir = os.environ.get("NEBULA_CONFIG_DIR")
+    if not config_dir:
+        raise RuntimeError("NEBULA_CONFIG_DIR is not configured")
+
+    federation_key = (federation or "CFL").upper()
+    weight_fields = TRUST_WEIGHT_FIELDS_BY_FEDERATION.get(federation_key)
+    if weight_fields is None:
+        raise ValueError(f"Unsupported trustworthiness federation: {federation}")
+
+    scenario_path = os.path.join(config_dir, experiment_name, "scenario.json")
+    with open(scenario_path, "r") as data_file:
+        data = json.load(data_file)
+
+    weights = {}
+    missing_fields = []
+    for weight_name, scenario_field in weight_fields.items():
+        if scenario_field not in data:
+            missing_fields.append(scenario_field)
+            continue
+        weights[weight_name] = float(data[scenario_field])
+
+    if missing_fields:
+        raise KeyError(
+            f"Missing {federation_key} trustworthiness weight fields in {scenario_path}: {', '.join(sorted(missing_fields))}"
+        )
+
+    return weights

From cc30ffbb8c118de10212c9affb6dfe36cfd9b846 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Thu, 30 Apr 2026 15:30:41 +0200
Subject: [PATCH 36/66] DP error fixed DFL, calculation refactoring,
 maxgradnorm added to frontend, DP error fixed CFL (always True)

---
 nebula/addons/trustworthiness/calculation.py  | 329 ++++++++++--------
 nebula/addons/trustworthiness/factsheet.py    |  12 -
 .../addons/trustworthiness/trustworthiness.py |   2 +-
 nebula/addons/trustworthiness/utils.py        |   2 +-
 nebula/controller/scenarios.py                |  18 +-
 nebula/frontend/static/js/deployment/dp.js    |  14 +-
 nebula/frontend/templates/deployment.html     |   6 +
 7 files changed, 210 insertions(+), 173 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index c0a6468eb..e0bd71959 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -23,7 +23,6 @@
 from sklearn.metrics import f1_score, roc_auc_score, roc_curve
 from torch import nn, optim
 import torch.nn.functional as F
-import time
 import io
 
 
@@ -37,6 +36,10 @@
 R_LI = 0.1
 
 
+# ---------------------------------------------------------------------------
+# Generic score mapping helpers used by eval_metrics*.json
+# ---------------------------------------------------------------------------
+
 def get_mapped_score(score_key, score_map):
     """
     Finds the score by the score_key in the score_map.
@@ -71,7 +74,15 @@ def get_normalized_scores(scores):
     Returns:
         list: The normalized list.
     """
-    normalized = [(x - np.min(scores)) / (np.max(scores) - np.min(scores)) for x in scores]
+    if scores is None or len(scores) == 0:
+        return []
+
+    min_score = np.min(scores)
+    max_score = np.max(scores)
+    if max_score == min_score:
+        return [1.0 for _ in scores]
+
+    normalized = [(x - min_score) / (max_score - min_score) for x in scores]
     return normalized
 
 
@@ -168,7 +179,7 @@ def get_scaled_score(value, scale: list, direction: str):
     except Exception:
         logger.warning("Score minimum or score maximum is missing. The minimum has been set to 0 and the maximum to 1")
         value_min, value_max = 0, 1
-    if not value:
+    if value is None or value == "":
         logger.warning("Score value is missing. Set value to zero")
     else:
         low, high = 0, 1
@@ -214,6 +225,11 @@ def check_properties(*args):
     result = map(lambda x: x is not None and x != "", args)
     return np.mean(list(result))
 
+
+# ---------------------------------------------------------------------------
+# Local/global data distribution and participation metrics
+# ---------------------------------------------------------------------------
+
 def get_class_imbalance_local(participant_id, experiment_name):
     data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_id)}_class_count.json")
 
@@ -239,10 +255,15 @@ def get_cv(list=None, std=None, mean=None):
         float: The coefficient of variation calculated.
     """
     if std is not None and mean is not None:
+        if mean == 0:
+            return 0
         return std / mean
 
     if list is not None:
-        return np.std(list) / np.mean(list)
+        mean_value = np.mean(list)
+        if mean_value == 0:
+            return 0
+        return np.std(list) / mean_value
 
     return 0
 
@@ -273,6 +294,11 @@ def get_participation_variation_score(participation_counts):
     return float(1 / (1 + cv))
 
 
+# ---------------------------------------------------------------------------
+# Privacy metrics
+# ---------------------------------------------------------------------------
+
+
 def get_global_privacy_risk(dp, epsilon, n):
     """
     Calculates the global privacy risk by epsilon and the number of clients.
@@ -459,6 +485,10 @@ def get_mia_auc(model, train_dataloader, test_dataloader, max_samples=5000):
         return 0.5
 
 
+# ---------------------------------------------------------------------------
+# Scenario report readers and aggregate system metrics
+# ---------------------------------------------------------------------------
+
 def get_elapsed_time(start_time, end_time):
     """
     Calculates the elapsed time during the execution of the scenario.
@@ -477,6 +507,42 @@ def get_elapsed_time(start_time, end_time):
 
     return elapsed_time
 
+
+def _trustworthiness_dir(scenario_name):
+    return os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness")
+
+
+def _global_data_results_path(scenario_name):
+    return os.path.join(_trustworthiness_dir(scenario_name), "data_results.csv")
+
+
+def _participant_data_results_path(scenario_name, participant_id):
+    return os.path.join(_trustworthiness_dir(scenario_name), f"data_results_{participant_id}.csv")
+
+
+def _read_global_results(scenario_name):
+    return read_csv(_global_data_results_path(scenario_name))
+
+
+def _read_participant_results(scenario_name, participant_id):
+    return read_csv(_participant_data_results_path(scenario_name, participant_id))
+
+
+def _find_participant_row(data, participant_id, source_name):
+    row = data[data["id"] == participant_id]
+
+    if row.empty:
+        try:
+            row = data[data["id"] == int(participant_id)]
+        except (TypeError, ValueError):
+            row = data.iloc[0:0]
+
+    if row.empty:
+        raise ValueError(f"Participant {participant_id} not found in {source_name}")
+
+    return row.iloc[0]
+
+
 def get_bytes_model(model):
     """
     Calculates the serialized size in bytes of a PyTorch model state_dict.
@@ -505,12 +571,7 @@ def get_bytes_sent_recv(scenario_name):
     Returns:
         4-tupla: The total bytes sent, the total bytes received, the mean bytes sent and the mean bytes received of the nodes.
     """
-    total_upload_bytes = 0
-    total_download_bytes = 0
-
-    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
-
-    data = read_csv(data_file)
+    data = _read_global_results(scenario_name)
 
     number_files = len(data)
 
@@ -534,21 +595,17 @@ def get_avg_loss_accuracy(scenario_name):
     Returns:
         3-tupla: The mean loss of the models, the mean accuracies of the models, the standard deviation of the accuracies of the models.
     """
-    total_accuracy = 0
-    total_loss = 0
-
-    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
-
-    data = read_csv(data_file)
+    data = _read_global_results(scenario_name)
 
     number_files = len(data)
 
     total_loss = data["loss"].sum()
     total_accuracy = data["accuracy"].sum()
 
-    avg_loss = total_loss / (number_files-1)
-    avg_accuracy = total_accuracy / (number_files-1)
-    std_accuracy = statistics.stdev(data["accuracy"])
+    denominator = max(1, number_files - 1)
+    avg_loss = total_loss / denominator
+    avg_accuracy = total_accuracy / denominator
+    std_accuracy = statistics.stdev(data["accuracy"]) if number_files > 1 else 0.0
 
     return avg_loss, avg_accuracy, std_accuracy
 
@@ -556,17 +613,13 @@ def get_underfitting_score(scenario_name, id):
     """
     Calculates the mean val accuracy of the nodes.
     """
-    total_val_accuracy = 0
-
-    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
-
-    data = read_csv(data_file)
+    data = _read_global_results(scenario_name)
 
     number_files = len(data)
 
     total_val_accuracy = data["val_accuracy"].sum()
 
-    avg_val_accuracy = total_val_accuracy/ (number_files-1)
+    avg_val_accuracy = total_val_accuracy / max(1, number_files - 1)
 
     return avg_val_accuracy
 
@@ -582,18 +635,19 @@ def get_participant_loss_accuracy(scenario_name, participant_id):
     Returns:
         tuple[float, float]: (loss, accuracy)
     """
-    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
-    data = read_csv(data_file)
-    row = data[data["id"] == participant_id]
-
-    if row.empty:
-        row = data[data["id"] == int(participant_id)]
+    data_file = _global_data_results_path(scenario_name)
+    row = _find_participant_row(read_csv(data_file), participant_id, data_file)
 
-    loss = float(row["loss"].iloc[0])
-    accuracy = float(row["accuracy"].iloc[0])
+    loss = float(row["loss"])
+    accuracy = float(row["accuracy"])
     return loss, accuracy
 
 
+# ---------------------------------------------------------------------------
+# Model performance metrics
+# ---------------------------------------------------------------------------
+
+
 def _get_model_accuracy(model, dataloader):
     """
     Calculates model accuracy over a dataloader.
@@ -846,14 +900,7 @@ def get_underfitting_score_local(scenario_name, id):
     Returns:
         float: Validation accuracy.
     """
-    data_file = os.path.join(
-        os.environ.get('NEBULA_LOGS_DIR'),
-        scenario_name,
-        "trustworthiness",
-        f"data_results_{id}.csv",
-    )
-
-    data = read_csv(data_file)
+    data = _read_participant_results(scenario_name, id)
     return float(data["val_accuracy"].iloc[0])
 
 def get_dp_local(scenario_name, id):
@@ -867,14 +914,7 @@ def get_dp_local(scenario_name, id):
     Returns:
         float: DP Enabled, Epsilon.
     """
-    data_file = os.path.join(
-        os.environ.get('NEBULA_LOGS_DIR'),
-        scenario_name,
-        "trustworthiness",
-        f"data_results_{id}.csv",
-    )
-
-    data = read_csv(data_file)
+    data = _read_participant_results(scenario_name, id)
     return data["dp_enabled"].iloc[0], float(data["dp_epsilon"].iloc[0])
 
 
@@ -889,24 +929,22 @@ def get_dp_global(scenario_name):
         tuple[bool, float | str]: Whether DP is enabled, and the
         average epsilon across client nodes.
     """
-    total_epsilon = 0
-
-    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
-
-    data = read_csv(data_file)
+    data = _read_global_results(scenario_name)
 
     if data["dp_enabled"].iloc[0] == False:
         return False, 0.0
 
     number_files = len(data)
 
-    total_epsilon = data["dp_epsilon"].sum()
-
-    avg_epsilon = total_epsilon / (number_files-1)
+    avg_epsilon = data["dp_epsilon"].sum() / max(1, number_files - 1)
 
     return True, avg_epsilon
 
 
+# ---------------------------------------------------------------------------
+# Fairness and calibration metrics
+# ---------------------------------------------------------------------------
+
 def get_well_calibration_error(model, test_dataloader, n_bins=10):
     """
     Calculates a well-calibration error style metric using prediction confidence.
@@ -1039,12 +1077,7 @@ def get_avg_class_imbalance_model_size(scenario_name):
     Returns:
         2-tupla: The mean class imbalance mean and model size mean of the nodes.
     """
-    total_class_imbalance = 0
-    total_model_size = 0
-
-    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
-
-    data = read_csv(data_file)
+    data = _read_global_results(scenario_name)
 
     number_files = len(data)
 
@@ -1067,19 +1100,17 @@ def get_entropy_list(scenario_name):
     Returns:
         list: Lista con los valores de entropy
     """
-    data_file = os.path.join(
-        os.environ.get('NEBULA_LOGS_DIR'),
-        scenario_name,
-        "trustworthiness",
-        "data_results.csv"
-    )
-
-    data = read_csv(data_file)
+    data = _read_global_results(scenario_name)
 
     entropy_list = data["local_entropy"].tolist()
 
     return entropy_list
 
+
+# ---------------------------------------------------------------------------
+# Explainability metrics
+# ---------------------------------------------------------------------------
+
 def get_feature_importance_cv(model, test_sample):
     """
     Calculates the coefficient of variation of the feature importance.
@@ -1126,8 +1157,19 @@ def _get_feature_importances(model, test_sample):
         return np.array([])
 
     def _clone_model(model_ref, device):
+        optimizer_attrs = ("_optimizer", "_optimizer_override")
+        optimizer_state = {}
         try:
+            for attr in optimizer_attrs:
+                if hasattr(model_ref, attr):
+                    optimizer_state[attr] = getattr(model_ref, attr)
+                    setattr(model_ref, attr, None)
+
             model_clone = copy.deepcopy(model_ref)
+            for attr in optimizer_attrs:
+                if hasattr(model_clone, attr):
+                    setattr(model_clone, attr, None)
+
             model_clone.to(device)
             model_clone.eval()
             return model_clone
@@ -1136,6 +1178,9 @@ def _clone_model(model_ref, device):
             logger.warning(exc)
             model_ref.eval()
             return model_ref
+        finally:
+            for attr, value in optimizer_state.items():
+                setattr(model_ref, attr, value)
 
     def _prepare_shap_inputs(sample):
         if not (isinstance(sample, (tuple, list)) and len(sample) >= 1):
@@ -1416,6 +1461,41 @@ def get_explainability_metrics_summary(model, test_dataloader, max_batches=4):
     return summary
 
 
+# ---------------------------------------------------------------------------
+# Robustness metrics based on ART estimators
+# ---------------------------------------------------------------------------
+
+def _build_art_classifier(model, input_shape, nb_classes, learning_rate):
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), learning_rate)
+
+    return PyTorchClassifier(
+        model=model,
+        loss=criterion,
+        optimizer=optimizer,
+        input_shape=tuple(input_shape),
+        nb_classes=nb_classes,
+    )
+
+
+def _validate_test_sample_tensors(test_sample):
+    if not (isinstance(test_sample, (tuple, list)) and len(test_sample) >= 2):
+        raise ValueError("`test_sample` must contain samples and labels.")
+
+    samples, labels = test_sample[0], test_sample[1]
+    if not (torch.is_tensor(samples) and torch.is_tensor(labels) and samples.shape[0] > 0):
+        raise ValueError("`test_sample` must contain non-empty tensors for samples and labels.")
+
+    return samples, labels
+
+
+def _coerce_max_samples(max_samples, default=8):
+    try:
+        return max(1, int(max_samples))
+    except Exception:
+        return default
+
+
 def get_clever_score(model, test_sample, nb_classes, learning_rate, max_samples=8):
     """
     Calculates the CLEVER score as the mean score over multiple samples.
@@ -1430,31 +1510,15 @@ def get_clever_score(model, test_sample, nb_classes, learning_rate, max_samples=
     Returns:
         float: Mean CLEVER score across the selected samples.
     """
-    samples, _ = test_sample
-
-    if not (torch.is_tensor(samples) and samples.dim() >= 1 and samples.shape[0] != 0):
-        raise ValueError("`test_sample[0]` must be a non-empty torch.Tensor.")
+    samples, _ = _validate_test_sample_tensors(test_sample)
 
     input_shape = tuple(samples.shape[1:]) if samples.dim() >= 2 else tuple(samples.shape)
 
-    try:
-        max_samples = max(1, int(max_samples))
-    except Exception:
-        max_samples = 8
-
+    max_samples = _coerce_max_samples(max_samples)
     n_samples = min(int(samples.shape[0]), max_samples)
 
-    criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters(), learning_rate)
-
     # Create the ART classifier once and reuse it for all selected samples.
-    classifier = PyTorchClassifier(
-        model=model,
-        loss=criterion,
-        optimizer=optimizer,
-        input_shape=input_shape,
-        nb_classes=nb_classes,
-    )
+    classifier = _build_art_classifier(model, input_shape, nb_classes, learning_rate)
 
     clever_scores = []
     for idx in range(n_samples):
@@ -1484,6 +1548,10 @@ def get_clever_score(model, test_sample, nb_classes, learning_rate, max_samples=
     return float(np.mean(clever_scores))
 
 
+# ---------------------------------------------------------------------------
+# Sustainability and communication metrics
+# ---------------------------------------------------------------------------
+
 def stop_emissions_tracking_and_save(
     tracker: EmissionsTracker,
     outdir: str,
@@ -1574,6 +1642,11 @@ def comm_efficiency(bytes_up: int, bytes_down: int, test_acc_avg: float, eps: fl
 
     return total_bytes / acc
 
+
+# ---------------------------------------------------------------------------
+# Additional robustness and adversarial metrics
+# ---------------------------------------------------------------------------
+
 def get_loss_sensitivity_score(model, test_sample, nb_classes, learning_rate, max_samples=8):
 
     """
@@ -1589,29 +1662,13 @@ def get_loss_sensitivity_score(model, test_sample, nb_classes, learning_rate, ma
     Returns:
         float: Mean loss sensitivity score across the selected samples.
     """
-    samples, labels = test_sample
-
-    if not (torch.is_tensor(samples) and torch.is_tensor(labels) and samples.shape[0] > 0):
-        raise ValueError("`test_sample` must contain non-empty tensors for samples and labels.")
-
-    try:
-        max_samples = max(1, int(max_samples))
-    except Exception:
-        max_samples = 8
+    samples, labels = _validate_test_sample_tensors(test_sample)
 
+    max_samples = _coerce_max_samples(max_samples)
     n_samples = min(int(samples.shape[0]), max_samples)
 
-    criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters(), learning_rate)
-
     # Create the ART classifier once and reuse it for all selected samples.
-    classifier = PyTorchClassifier(
-        model=model,
-        loss=criterion,
-        optimizer=optimizer,
-        input_shape=samples.shape[1:],
-        nb_classes=nb_classes,
-    )
+    classifier = _build_art_classifier(model, samples.shape[1:], nb_classes, learning_rate)
 
     sensitivity_scores = []
     for idx in range(n_samples):
@@ -1661,21 +1718,6 @@ def compute_adversarial_accuracy_art(
     model.eval()
     model.to(device)
 
-    criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
-
-    sample_batch = next(iter(test_loader))
-    samples, _ = sample_batch
-    input_shape = samples.shape[1:]
-
-    classifier = PyTorchClassifier(
-        model=model,
-        loss=criterion,
-        optimizer=optimizer,
-        input_shape=input_shape,
-        nb_classes=nb_classes,
-    )
-
     correct = 0
     total = 0
 
@@ -1687,12 +1729,13 @@ def compute_adversarial_accuracy_art(
 
         with torch.no_grad():
             outputs = model(x_adv)
-            preds = outputs.argmax(dim=1)
+            logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
+            preds = logits.argmax(dim=1)
 
         correct += (preds == labels).sum().item()
         total += labels.size(0)
 
-    return correct / total
+    return correct / total if total > 0 else 0.0
 
 def get_empirical_robustness_score(
     model,
@@ -1719,24 +1762,13 @@ def get_empirical_robustness_score(
         float: Empirical robustness score (>= 0.0). If it cannot be computed, returns 0.0.
     """
     try:
-        samples, _ = test_sample
+        samples, _ = _validate_test_sample_tensors(test_sample)
 
         batch_size: int = int(samples.shape[0])
         n: int = int(min(max_samples, batch_size))
         x = samples[:n].detach().cpu().numpy()
 
-        input_shape = tuple(samples.shape[1:])
-
-        criterion = nn.CrossEntropyLoss()
-        optimizer = optim.Adam(model.parameters(), learning_rate)
-
-        classifier = PyTorchClassifier(
-            model=model,
-            loss=criterion,
-            optimizer=optimizer,
-            input_shape=input_shape,
-            nb_classes=nb_classes,
-        )
+        classifier = _build_art_classifier(model, samples.shape[1:], nb_classes, learning_rate)
 
         score = empirical_robustness(
             classifier=classifier,
@@ -1773,12 +1805,18 @@ def fgsm_attack(model, samples, labels, epsilon=0.03):
         Returns:
             torch.Tensor: Adversarially perturbed samples with the same shape as `samples`.
     """
-    samples = samples.clone().detach().to(samples.device)
-    labels = labels.to(samples.device)
+    try:
+        device = next(model.parameters()).device
+    except Exception:
+        device = samples.device
+
+    samples = samples.clone().detach().to(device)
+    labels = labels.to(device)
     samples.requires_grad = True
 
     outputs = model(samples)
-    loss = nn.CrossEntropyLoss()(outputs, labels)
+    logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
+    loss = nn.CrossEntropyLoss()(logits, labels)
     model.zero_grad()
     loss.backward()
 
@@ -1862,6 +1900,7 @@ def attack_success_rate(model, test_sample,epsilon=0.03):
     """
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.eval()
+    model.to(device)
 
     images, labels = test_sample
     images = images.to(device)
@@ -1869,7 +1908,8 @@ def attack_success_rate(model, test_sample,epsilon=0.03):
 
     with torch.no_grad():
         outputs = model(images)
-        preds = outputs.argmax(dim=1)
+        logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
+        preds = logits.argmax(dim=1)
 
     correct_mask = preds.eq(labels)
     num_correct = correct_mask.sum().item()
@@ -1881,7 +1921,8 @@ def attack_success_rate(model, test_sample,epsilon=0.03):
 
     with torch.no_grad():
         outputs_adv = model(x_adv)
-        preds_adv = outputs_adv.argmax(dim=1)
+        logits_adv = outputs_adv[0] if isinstance(outputs_adv, (tuple, list)) else outputs_adv
+        preds_adv = logits_adv.argmax(dim=1)
 
     successful_attacks = (correct_mask & preds_adv.ne(labels)).sum().item()
 
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index b5372a7f3..2ff4dce3b 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -104,18 +104,6 @@ def populate_factsheet_pre_train(self, data, scenario_name, model):
                     factsheet["configuration"]["monitoring"] = True
                     factsheet["configuration"]["total_round_num"] = n_rounds
 
-                    """
-                    if poisoned_noise_percent != 0:
-                        factsheet["configuration"]["differential_privacy"] = True
-                        factsheet["configuration"]["dp_epsilon"] = poisoned_noise_percent
-                    else:
-                        factsheet["configuration"]["differential_privacy"] = False
-                        factsheet["configuration"]["dp_epsilon"] = ""
-                    """
-
-                    factsheet["configuration"]["differential_privacy"] = False
-                    factsheet["configuration"]["dp_epsilon"] = ""
-
                     factsheet["configuration"]["learning_rate"] = model.get_learning_rate()
                     factsheet["configuration"]["trainable_param_num"] = model.count_parameters()
                     factsheet["configuration"]["local_update_steps"] = data["epochs"]
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 7977ff929..121496690 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -228,7 +228,7 @@ async def finish_experiment_role_pre_actions(self):
         self._sample_size = len(train_loader)
 
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
-        federation = trust_config.get("federation")  # "CFL" or "DFL"
+        federation = trust_config.get("federation")
 
         if federation == "DFL" or federation == "SDFL":
             await self._finish_trustscores_exchange(federation, trust_config, experiment_name)
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
index 0acdbe8b1..0c99255ec 100755
--- a/nebula/addons/trustworthiness/utils.py
+++ b/nebula/addons/trustworthiness/utils.py
@@ -307,7 +307,7 @@ def load_data_results_participant(experiment_name: str, participant_id: int | st
     accuracy = float(row["accuracy"])
     loss = float(row["loss"])
     val_accuracy = float(row["val_accuracy"])
-    dp_enabled = bool(row["dp_enabled"])
+    dp_enabled = row["dp_enabled"].lower() == "true"
     dp_epsilon = float(row["dp_epsilon"])
 
     return bytes_sent, bytes_recv, accuracy, loss, val_accuracy, dp_enabled, dp_epsilon
diff --git a/nebula/controller/scenarios.py b/nebula/controller/scenarios.py
index 262f5b102..62b2a8a41 100644
--- a/nebula/controller/scenarios.py
+++ b/nebula/controller/scenarios.py
@@ -711,6 +711,10 @@ def __init__(self, scenario, user=None):
                     participant_config["training_args"]["dp"]["noise_multiplier"] = float(
                         self.scenario.dp["noise_multiplier"]
                     )
+                if "max_grad_norm" in self.scenario.dp:
+                    participant_config["training_args"]["dp"]["max_grad_norm"] = float(
+                        self.scenario.dp["max_grad_norm"]
+                    )
             participant_config["device_args"]["accelerator"] = self.scenario.accelerator
             participant_config["device_args"]["gpu_id"] = self.scenario.gpu_id
             participant_config["device_args"]["logging"] = self.scenario.logginglevel
@@ -977,7 +981,7 @@ async def load_configurations_and_start_nodes(
 
                 logging.info(f"Configuration | additional nodes |  participant: {self.n_nodes + i + 1}")
                 last_ip = participant_config["network_args"]["ip"]
-                logging.info(f"Valores de la ultima ip: ({last_ip})")
+                logging.info(f"Last ip values: ({last_ip})")
                 participant_config["scenario_args"]["n_nodes"] = self.n_nodes + additional_nodes  # self.n_nodes + i + 1
                 participant_config["device_args"]["idx"] = last_participant_index + i
                 participant_config["network_args"]["neighbors"] = ""
@@ -1018,9 +1022,6 @@ async def load_configurations_and_start_nodes(
         dataset_name = self.scenario.dataset
         dataset = None
 
-
-        logging.info(f"[DEBUG] dataset_name received: {dataset_name!r}")
-        logging.info("SALE YA")
         if dataset_name == "MNIST":
             dataset = MNISTDataset(
                 num_classes=10,
@@ -1052,7 +1053,6 @@ async def load_configurations_and_start_nodes(
                 config_dir=self.config_dir,
             )
         elif dataset_name == "KDDCUP99":
-            logging.info("[DEBUG] entrando en rama KDDCUP99 para crear dataset")
             dataset = KDDCUP99Dataset(
                 num_classes=2,
                 partitions_number=self.n_nodes,
@@ -1117,15 +1117,7 @@ async def load_configurations_and_start_nodes(
 
         logging.info(f"Splitting {dataset_name} dataset...")
         dataset.initialize_dataset()
-        logging.info(
-            f"[DEBUG] train_set is None? {dataset.train_set is None} | "
-            f"test_set is None? {dataset.test_set is None}"
-        )
 
-        if dataset.train_set is not None and hasattr(dataset.train_set, "data"):
-            logging.info(f"[DEBUG] Dataset train_set.data.shape = {dataset.train_set.data.shape}")
-        else:
-            logging.info("[DEBUG] Dataset train_set has no .data yet (or train_set is None)")
         logging.info(f"Splitting {dataset_name} dataset... Done")
 
         if self.scenario.deployment in ["docker", "process", "physical"]:
diff --git a/nebula/frontend/static/js/deployment/dp.js b/nebula/frontend/static/js/deployment/dp.js
index 7512dfaa3..7821386f2 100644
--- a/nebula/frontend/static/js/deployment/dp.js
+++ b/nebula/frontend/static/js/deployment/dp.js
@@ -2,7 +2,8 @@
 const DpManager = (function() {
     const DEFAULT_DP_CONFIG = {
         enabled: false,
-        noise_multiplier: 1.0
+        noise_multiplier: 1.0,
+        max_grad_norm: 1.0
     };
 
     function initializeDifferentialPrivacy() {
@@ -29,12 +30,17 @@ const DpManager = (function() {
     function getDpConfig() {
         const noiseMultiplierInput = document.getElementById("dpNoiseMultiplier");
         const noiseMultiplier = parseFloat(noiseMultiplierInput?.value);
+        const maxGradNormInput = document.getElementById("dpMaxGradNorm");
+        const maxGradNorm = parseFloat(maxGradNormInput?.value);
 
         return {
             enabled: Boolean(document.getElementById("dpSwitch")?.checked),
             noise_multiplier: Number.isFinite(noiseMultiplier)
                 ? noiseMultiplier
-                : DEFAULT_DP_CONFIG.noise_multiplier
+                : DEFAULT_DP_CONFIG.noise_multiplier,
+            max_grad_norm: Number.isFinite(maxGradNorm)
+                ? maxGradNorm
+                : DEFAULT_DP_CONFIG.max_grad_norm
         };
     }
 
@@ -52,6 +58,10 @@ const DpManager = (function() {
         if (noiseMultiplierInput) {
             noiseMultiplierInput.value = dpConfig.noise_multiplier;
         }
+        const maxGradNormInput = document.getElementById("dpMaxGradNorm");
+        if (maxGradNormInput) {
+            maxGradNormInput.value = dpConfig.max_grad_norm;
+        }
         toggleDpSettings(dpSwitch.checked);
     }
 
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index 4ad79c579..129a848ca 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -535,6 +535,12 @@ <h5 class="step-title">Noise multiplier</h5>
                                 placeholder="Noise multiplier" min="0" step="0.01" value="1.0"
                                 style="display: inline; width: 80%">
                         </div>
+                        <h5 class="step-title">Max Grad Norm</h5>
+                        <div class="form-check form-check-inline">
+                            <input type="number" class="form-control" id="dpMaxGradNorm"
+                                placeholder="Max Grad Norm" min="0" step="0.01" value="1.0"
+                                style="display: inline; width: 80%">
+                        </div>
                         <small class="form-text text-muted">
                             Other DP parameters are still taken from
                             <code>nebula/frontend/config/participant.json.example</code>.

From 32651fba5f05899c0db29f957122a70634c6c486 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Mon, 4 May 2026 14:02:30 +0200
Subject: [PATCH 37/66] Factsheet refactoring, CFL reputation metrics added,
 CFL reputation updated and fixed

---
 nebula/addons/reputation/reputation.py        | 354 +++++++++---------
 .../trustworthiness/configs/eval_metrics.json |  28 ++
 .../configs/factsheet_template.json           |   2 +
 .../addons/trustworthiness/dfl_factsheet.py   | 224 +++--------
 nebula/addons/trustworthiness/factsheet.py    | 213 +++--------
 .../trustworthiness/factsheet_common.py       | 195 ++++++++++
 nebula/core/engine.py                         |  12 +-
 7 files changed, 532 insertions(+), 496 deletions(-)
 create mode 100644 nebula/addons/trustworthiness/factsheet_common.py

diff --git a/nebula/addons/reputation/reputation.py b/nebula/addons/reputation/reputation.py
index 561199513..dfa630296 100644
--- a/nebula/addons/reputation/reputation.py
+++ b/nebula/addons/reputation/reputation.py
@@ -61,7 +61,7 @@ class Reputation:
     The class handles collection of metrics, calculation of static and dynamic reputation,
     updating history, and communication of reputation scores to neighbors.
     """
-    
+
     REPUTATION_THRESHOLD = 0.6
     SIMILARITY_THRESHOLD = 0.6
     INITIAL_ROUND_FOR_REPUTATION = 1
@@ -70,12 +70,12 @@ class Reputation:
     WEIGHTED_HISTORY_ROUNDS = 3
     FRACTION_ANOMALY_MULTIPLIER = 1.20
     THRESHOLD_ANOMALY_MULTIPLIER = 1.15
-    
+
     # Augmentation factors
     LATENCY_AUGMENT_FACTOR = 1.4
     MESSAGE_AUGMENT_FACTOR_EARLY = 2.0
     MESSAGE_AUGMENT_FACTOR_NORMAL = 1.1
-    
+
     # Penalty and decay factors
     HISTORICAL_PENALTY_THRESHOLD = 0.9
     NEGATIVE_LATENCY_PENALTY = 0.3
@@ -104,7 +104,7 @@ def __init__(self, engine: "Engine", config: "Config"):
         self._addr = engine.addr
         self._log_dir = engine.log_dir
         self._idx = engine.idx
-        
+
         self._initialize_data_structures()
         self._configure_constants()
         self._load_configuration()
@@ -116,7 +116,7 @@ def _configure_constants(self):
         """Configure system constants from config or use defaults."""
         reputation_config = self._config.participant.get("defense_args", {}).get("reputation", {})
         constants_config = reputation_config.get("constants", {})
-        
+
         self.REPUTATION_THRESHOLD = constants_config.get("reputation_threshold", self.REPUTATION_THRESHOLD)
         self.SIMILARITY_THRESHOLD = constants_config.get("similarity_threshold", self.SIMILARITY_THRESHOLD)
         self.INITIAL_ROUND_FOR_REPUTATION = constants_config.get("initial_round_for_reputation", self.INITIAL_ROUND_FOR_REPUTATION)
@@ -188,7 +188,7 @@ def _configure_metric_weights(self):
         """Configure weights for different metrics based on weighting factor."""
         default_weight = 0.25
         metric_names = ["model_arrival_latency", "model_similarity", "num_messages", "fraction_parameters_changed"]
-        
+
         if self._weighting_factor == "static":
             self._weight_model_arrival_latency = float(
                 self._metrics.get("model_arrival_latency", {}).get("weight", default_weight)
@@ -209,7 +209,7 @@ def _configure_metric_weights(self):
                 elif not isinstance(self._metrics[metric_name], dict):
                     self._metrics[metric_name] = {"enabled": bool(self._metrics[metric_name])}
                 self._metrics[metric_name]["weight"] = default_weight
-            
+
             self._weight_model_arrival_latency = default_weight
             self._weight_model_similarity = default_weight
             self._weight_num_messages = default_weight
@@ -229,24 +229,24 @@ def engine(self):
     def _is_metric_enabled(self, metric_name: str, metrics_config: dict = None) -> bool:
         """
         Check if a specific metric is enabled based on the provided configuration.
-        
+
         Args:
             metric_name (str): The name of the metric to check.
-            metrics_config (dict, optional): The configuration dictionary for metrics. 
+            metrics_config (dict, optional): The configuration dictionary for metrics.
                                            If None, uses the instance's _metrics.
-            
+
         Returns:
             bool: True if the metric is enabled, False otherwise.
         """
         config_to_use = metrics_config if metrics_config is not None else getattr(self, '_metrics', None)
-        
+
         if not isinstance(config_to_use, dict):
             if metrics_config is not None:
                 logging.warning(f"metrics_config is not a dictionary: {type(metrics_config)}")
             else:
                 logging.warning("_metrics is not properly initialized")
             return False
-            
+
         metric_config = config_to_use.get(metric_name)
         if metric_config is None:
             return False
@@ -269,7 +269,7 @@ def save_data(
     ):
         """
         Save data between nodes and aggregated models.
-        
+
         Args:
             type_data: Type of data to save ('number_message', 'fraction_of_params_changed', 'model_arrival_latency')
             nei: Neighbor identifier
@@ -290,7 +290,7 @@ def save_data(
 
         try:
             metrics_instance = self.connection_metrics[nei]
-            
+
             if type_data == "number_message":
                 message_data = {"time": time, "current_round": current_round}
                 if not isinstance(metrics_instance.messages, list):
@@ -345,19 +345,19 @@ async def init_reputation(
     ):
         """
         Initialize the reputation system.
-        
+
         Args:
             federation_nodes: List of federation node identifiers
-            round_num: Current round number  
+            round_num: Current round number
             last_feedback_round: Last round that received feedback
             init_reputation: Initial reputation value to assign
         """
         if not self._enabled:
             return
-            
+
         if not self._validate_init_parameters(federation_nodes, round_num, init_reputation):
             return
-            
+
         neighbors = self._validate_federation_nodes(federation_nodes)
         if not neighbors:
             logging.error("init_reputation | No valid neighbors found")
@@ -370,13 +370,13 @@ def _validate_init_parameters(self, federation_nodes, round_num, init_reputation
         if not federation_nodes:
             logging.error("init_reputation | No federation nodes provided")
             return False
-            
+
         if round_num is None:
             logging.warning("init_reputation | Round number not provided")
-            
+
         if init_reputation is None:
             logging.warning("init_reputation | Initial reputation value not provided")
-            
+
         return True
 
     async def _initialize_neighbor_reputations(self, neighbors: list, round_num: int, last_feedback_round: int, init_reputation: float):
@@ -392,7 +392,7 @@ def _create_or_update_reputation_entry(self, nei: str, round_num: int, last_feed
             "round": round_num,
             "last_feedback_round": last_feedback_round,
         }
-        
+
         if nei not in self.reputation:
             self.reputation[nei] = reputation_data
         elif self.reputation[nei].get("reputation") is None:
@@ -401,21 +401,21 @@ def _create_or_update_reputation_entry(self, nei: str, round_num: int, last_feed
     def _validate_federation_nodes(self, federation_nodes) -> list:
         """
         Validate and filter federation nodes.
-        
+
         Args:
             federation_nodes: List of federation node identifiers
-            
+
         Returns:
             list: List of valid node identifiers
         """
         if not federation_nodes:
             return []
-            
+
         valid_nodes = [node for node in federation_nodes if node and str(node).strip()]
-        
+
         if not valid_nodes:
             logging.warning("No valid federation nodes found after filtering")
-            
+
         return valid_nodes
 
     async def _calculate_static_reputation(
@@ -429,7 +429,7 @@ async def _calculate_static_reputation(
 
         Args:
             addr: The participant's address
-            nei: The neighbor's address  
+            nei: The neighbor's address
             metric_values: Dictionary with metric values
         """
         static_weights = {
@@ -440,10 +440,10 @@ async def _calculate_static_reputation(
         }
 
         reputation_static = sum(
-            metric_values.get(metric_name, 0) * static_weights[metric_name] 
+            metric_values.get(metric_name, 0) * static_weights[metric_name]
             for metric_name in static_weights
         )
-        
+
         logging.info(f"Static reputation for node {nei} at round {await self.engine.get_round()}: {reputation_static}")
 
         avg_reputation = await self.save_reputation_history_in_memory(self.engine.addr, nei, reputation_static)
@@ -476,48 +476,48 @@ async def _calculate_dynamic_reputation(self, addr, neighbors):
     async def _calculate_average_weights(self):
         """Calculate average weights for all enabled metrics."""
         average_weights = {}
-        
+
         for metric_name in self.history_data.keys():
             if self._is_metric_enabled(metric_name):
                 average_weights[metric_name] = await self._get_metric_average_weight(metric_name)
-        
+
         return average_weights
-    
+
     async def _get_metric_average_weight(self, metric_name):
         """Get the average weight for a specific metric."""
         if metric_name not in self.history_data or not self.history_data[metric_name]:
             logging.debug(f"No history data available for metric: {metric_name}")
             return 0
-        
+
         valid_entries = [
             entry for entry in self.history_data[metric_name]
-            if (entry.get("round") is not None and 
-                entry["round"] >= await self._engine.get_round() and 
+            if (entry.get("round") is not None and
+                entry["round"] >= await self._engine.get_round() and
                 entry.get("weight") not in [None, -1])
         ]
-        
+
         if not valid_entries:
             return 0
-            
+
         try:
             weights = [entry["weight"] for entry in valid_entries if entry.get("weight") is not None]
             return sum(weights) / len(weights) if weights else 0
         except (TypeError, ZeroDivisionError) as e:
             logging.warning(f"Error calculating average weight for {metric_name}: {e}")
             return 0
-    
+
     async def _process_neighbors_reputation(self, addr, neighbors, average_weights):
         """Process reputation calculation for all neighbors."""
         for nei in neighbors:
             metric_values = await self._get_neighbor_metric_values(nei)
-            
+
             if all(metric_name in metric_values for metric_name in average_weights):
                 await self._update_neighbor_reputation(addr, nei, metric_values, average_weights)
-    
+
     async def _get_neighbor_metric_values(self, nei):
         """Get metric values for a specific neighbor in the current round."""
         metric_values = {}
-        
+
         for metric_name in self.history_data:
             if self._is_metric_enabled(metric_name):
                 for entry in self.history_data.get(metric_name, []):
@@ -526,16 +526,16 @@ async def _get_neighbor_metric_values(self, nei):
                         entry.get("nei") == nei):
                         metric_values[metric_name] = entry.get("metric_value", 0)
                         break
-        
+
         return metric_values
-    
+
     async def _update_neighbor_reputation(self, addr, nei, metric_values, average_weights):
         """Update reputation for a specific neighbor."""
         reputation_with_weights = sum(
-            metric_values.get(metric_name, 0) * average_weights[metric_name] 
+            metric_values.get(metric_name, 0) * average_weights[metric_name]
             for metric_name in average_weights
         )
-        
+
         logging.info(
             f"Dynamic reputation with weights for {nei} at round {await self._engine.get_round()}: {reputation_with_weights}"
         )
@@ -564,7 +564,7 @@ async def _update_reputation_record(self, nei: str, reputation: float, data: dic
             data: Additional data to update (currently unused)
         """
         current_round = await self._engine.get_round()
-        
+
         if nei not in self.reputation:
             self.reputation[nei] = {
                 "reputation": reputation,
@@ -576,7 +576,7 @@ async def _update_reputation_record(self, nei: str, reputation: float, data: dic
             self.reputation[nei]["round"] = current_round
 
         logging.info(f"Reputation of node {nei}: {self.reputation[nei]['reputation']}")
-        
+
         if self.reputation[nei]["reputation"] < self.REPUTATION_THRESHOLD and current_round > 0:
             self.rejected_nodes.add(nei)
             logging.info(f"Rejected node {nei} at round {current_round}")
@@ -608,23 +608,23 @@ def calculate_weighted_values(
             reputation_metrics
         )
         self._add_current_metrics_to_history(active_metrics, history_data, current_round, addr, nei)
-        
+
         if current_round >= self.INITIAL_ROUND_FOR_REPUTATION and len(active_metrics) > 0:
             adjusted_weights = self._calculate_dynamic_weights(active_metrics, history_data)
         else:
             adjusted_weights = self._calculate_uniform_weights(active_metrics)
-        
+
         self._update_history_with_weights(active_metrics, history_data, adjusted_weights, current_round, nei)
 
     def _ensure_history_data_structure(self, history_data: dict):
         """Ensure all required keys exist in history data structure."""
         required_keys = [
             "num_messages",
-            "model_similarity", 
+            "model_similarity",
             "fraction_parameters_changed",
             "model_arrival_latency",
         ]
-        
+
         for key in required_keys:
             if key not in history_data:
                 history_data[key] = []
@@ -644,7 +644,7 @@ def _get_active_metrics(
             "fraction_parameters_changed": fraction_score_asign,
             "model_arrival_latency": avg_model_arrival_latency,
         }
-        
+
         return {k: v for k, v in all_metrics.items() if self._is_metric_enabled(k, reputation_metrics)}
 
     def _add_current_metrics_to_history(self, active_metrics: dict, history_data: dict, current_round: int, addr: str, nei: str):
@@ -662,7 +662,7 @@ def _add_current_metrics_to_history(self, active_metrics: dict, history_data: di
     def _calculate_dynamic_weights(self, active_metrics: dict, history_data: dict) -> dict:
         """Calculate dynamic weights based on metric deviations."""
         deviations = self._calculate_metric_deviations(active_metrics, history_data)
-        
+
         if all(deviation == 0.0 for deviation in deviations.values()):
             return self._generate_random_weights(active_metrics)
         else:
@@ -672,7 +672,7 @@ def _calculate_dynamic_weights(self, active_metrics: dict, history_data: dict) -
     def _calculate_metric_deviations(self, active_metrics: dict, history_data: dict) -> dict:
         """Calculate deviations of current metrics from historical means."""
         deviations = {}
-        
+
         for metric_name, current_value in active_metrics.items():
             historical_values = history_data[metric_name]
             metric_values = [
@@ -680,11 +680,11 @@ def _calculate_metric_deviations(self, active_metrics: dict, history_data: dict)
                 for entry in historical_values
                 if "metric_value" in entry and entry["metric_value"] != 0
             ]
-            
+
             mean_value = np.mean(metric_values) if metric_values else 0
             deviation = abs(current_value - mean_value)
             deviations[metric_name] = deviation
-            
+
         return deviations
 
     def _generate_random_weights(self, active_metrics: dict) -> dict:
@@ -692,7 +692,7 @@ def _generate_random_weights(self, active_metrics: dict) -> dict:
         num_metrics = len(active_metrics)
         random_weights = [random.random() for _ in range(num_metrics)]
         total_random_weight = sum(random_weights)
-        
+
         return {
             metric_name: weight / total_random_weight
             for metric_name, weight in zip(active_metrics, random_weights, strict=False)
@@ -702,14 +702,14 @@ def _normalize_deviation_weights(self, deviations: dict) -> dict:
         """Normalize weights based on deviations."""
         max_deviation = max(deviations.values()) if deviations else 1
         normalized_weights = {
-            metric_name: (deviation / max_deviation) 
+            metric_name: (deviation / max_deviation)
             for metric_name, deviation in deviations.items()
         }
-        
+
         total_weight = sum(normalized_weights.values())
         if total_weight > 0:
             return {
-                metric_name: weight / total_weight 
+                metric_name: weight / total_weight
                 for metric_name, weight in normalized_weights.items()
             }
         else:
@@ -720,20 +720,20 @@ def _adjust_weights_with_minimum(self, normalized_weights: dict, deviations: dic
         """Apply minimum weight constraints and renormalize."""
         mean_deviation = np.mean(list(deviations.values()))
         dynamic_min_weight = max(self.DYNAMIC_MIN_WEIGHT_THRESHOLD, mean_deviation / (mean_deviation + 1))
-        
+
         adjusted_weights = {}
         total_adjusted_weight = 0
-        
+
         for metric_name, weight in normalized_weights.items():
             adjusted_weight = max(weight, dynamic_min_weight)
             adjusted_weights[metric_name] = adjusted_weight
             total_adjusted_weight += adjusted_weight
-        
+
         # Renormalize if total weight exceeds 1
         if total_adjusted_weight > 1:
             for metric_name in adjusted_weights:
                 adjusted_weights[metric_name] /= total_adjusted_weight
-                
+
         return adjusted_weights
 
     def _calculate_uniform_weights(self, active_metrics: dict) -> dict:
@@ -748,8 +748,8 @@ def _update_history_with_weights(self, active_metrics: dict, history_data: dict,
         for metric_name in active_metrics:
             weight = weights.get(metric_name, -1)
             for entry in history_data[metric_name]:
-                if (entry["metric_name"] == metric_name and 
-                    entry["round"] == current_round and 
+                if (entry["metric_name"] == metric_name and
+                    entry["round"] == current_round and
                     entry["nei"] == nei):
                     entry["weight"] = weight
 
@@ -765,7 +765,7 @@ async def calculate_value_metrics(self, addr, nei, metrics_active=None):
         try:
             current_round = await self._engine.get_round()
             metrics_instance = self.connection_metrics.get(nei)
-            
+
             if not metrics_instance:
                 logging.warning(f"No metrics found for neighbor {nei}")
                 return self._get_default_metric_values()
@@ -778,7 +778,7 @@ async def calculate_value_metrics(self, addr, nei, metrics_active=None):
             }
 
             self._log_metrics_graphics(metric_results, addr, nei, current_round)
-            
+
             return (
                 metric_results["messages"]["avg"],
                 metric_results["similarity"],
@@ -802,7 +802,7 @@ def _process_num_messages_metric(self, metrics_instance, addr: str, nei: str, cu
         filtered_messages = [
             msg for msg in metrics_instance.messages if msg.get("current_round") == current_round
         ]
-        
+
         for msg in filtered_messages:
             self.messages_number_message.append({
                 "number_message": msg.get("time"),
@@ -813,9 +813,9 @@ def _process_num_messages_metric(self, metrics_instance, addr: str, nei: str, cu
         normalized, count = self.manage_metric_number_message(
             self.messages_number_message, addr, nei, current_round, True
         )
-        
+
         avg = self.save_number_message_history(addr, nei, normalized, current_round)
-        
+
         if avg is None and current_round > self.HISTORY_ROUNDS_LOOKBACK:
             avg = self.number_message_history[(addr, nei)][current_round - 1]["avg_number_message"]
 
@@ -901,7 +901,7 @@ def _process_model_arrival_latency_metric(self, metrics_instance, addr: str, nei
             if avg_latency is None and current_round > 1:
                 avg_latency = self.model_arrival_latency_history[(addr, nei)][current_round - 1]["score"]
             return avg_latency or 0
-        
+
         return 0
 
     def _process_model_similarity_metric(self, nei: str, current_round: int, metrics_active) -> float:
@@ -938,7 +938,7 @@ def create_graphics_to_metrics(
     ):
         """
         Create and log graphics for reputation metrics.
-        
+
         Args:
             number_message_count: Count of messages for logging
             number_message_norm: Normalized message metric
@@ -952,25 +952,25 @@ def create_graphics_to_metrics(
         """
         if current_round is None or current_round >= total_rounds:
             return
-            
+
         self.engine.trainer._logger.log_data(
-            {f"R-Model_arrival_latency_reputation/{addr}": {nei: model_arrival_latency}}, 
+            {f"R-Model_arrival_latency_reputation/{addr}": {nei: model_arrival_latency}},
             step=current_round
         )
         self.engine.trainer._logger.log_data(
-            {f"R-Count_messages_number_message_reputation/{addr}": {nei: number_message_count}}, 
+            {f"R-Count_messages_number_message_reputation/{addr}": {nei: number_message_count}},
             step=current_round
         )
         self.engine.trainer._logger.log_data(
-            {f"R-number_message_reputation/{addr}": {nei: number_message_norm}}, 
+            {f"R-number_message_reputation/{addr}": {nei: number_message_norm}},
             step=current_round
         )
         self.engine.trainer._logger.log_data(
-            {f"R-Similarity_reputation/{addr}": {nei: similarity}}, 
+            {f"R-Similarity_reputation/{addr}": {nei: similarity}},
             step=current_round
         )
         self.engine.trainer._logger.log_data(
-            {f"R-Fraction_reputation/{addr}": {nei: fraction}}, 
+            {f"R-Fraction_reputation/{addr}": {nei: fraction}},
             step=current_round
         )
 
@@ -991,7 +991,7 @@ def analyze_anomalies(
         try:
             key = (addr, nei, current_round)
             self._initialize_fraction_history_entry(key, fraction_changed, threshold)
-            
+
             if current_round == 0:
                 return self._handle_initial_round_anomalies(key, fraction_changed, threshold)
             else:
@@ -1032,16 +1032,16 @@ def _handle_subsequent_round_anomalies(
     ) -> float:
         """Handle anomaly analysis for subsequent rounds."""
         prev_stats = self._find_previous_valid_stats(addr, nei, current_round)
-        
+
         if prev_stats is None:
             logging.warning(f"No valid previous stats found for {addr}, {nei}, round {current_round}")
             return 1.0
-            
+
         anomalies = self._detect_anomalies(fraction_changed, threshold, prev_stats)
         values = self._calculate_anomaly_values(fraction_changed, threshold, prev_stats, anomalies)
         fraction_score = self._calculate_combined_score(values)
         self._update_fraction_statistics(key, fraction_changed, threshold, prev_stats, anomalies, fraction_score)
-        
+
         return max(fraction_score, 0)
 
     def _find_previous_valid_stats(self, addr: str, nei: str, current_round: int) -> dict:
@@ -1049,18 +1049,18 @@ def _find_previous_valid_stats(self, addr: str, nei: str, current_round: int) ->
         for i in range(1, current_round + 1):
             candidate_key = (addr, nei, current_round - i)
             candidate_data = self.fraction_changed_history.get(candidate_key, {})
-            
+
             required_keys = ["mean_fraction", "std_dev_fraction", "mean_threshold", "std_dev_threshold"]
             if all(candidate_data.get(k) is not None for k in required_keys):
                 return candidate_data
-                
+
         return None
 
     def _detect_anomalies(self, current_fraction: float, current_threshold: float, prev_stats: dict) -> dict:
         """Detect if current values are anomalous compared to previous statistics."""
         upper_mean_fraction = (prev_stats["mean_fraction"] + prev_stats["std_dev_fraction"]) * self.FRACTION_ANOMALY_MULTIPLIER
         upper_mean_threshold = (prev_stats["mean_threshold"] + prev_stats["std_dev_threshold"]) * self.THRESHOLD_ANOMALY_MULTIPLIER
-        
+
         return {
             "fraction_anomaly": current_fraction > upper_mean_fraction,
             "threshold_anomaly": current_threshold > upper_mean_threshold,
@@ -1074,19 +1074,19 @@ def _calculate_anomaly_values(
         """Calculate penalty values for fraction and threshold anomalies."""
         fraction_value = 1.0
         threshold_value = 1.0
-        
+
         if anomalies["fraction_anomaly"]:
             mean_fraction_prev = prev_stats["mean_fraction"]
             if mean_fraction_prev > 0:
                 penalization_factor = abs(current_fraction - mean_fraction_prev) / mean_fraction_prev
                 fraction_value = 1 - (1 / (1 + np.exp(-penalization_factor)))
-        
+
         if anomalies["threshold_anomaly"]:
             mean_threshold_prev = prev_stats["mean_threshold"]
             if mean_threshold_prev > 0:
                 penalization_factor = abs(current_threshold - mean_threshold_prev) / mean_threshold_prev
                 threshold_value = 1 - (1 / (1 + np.exp(-penalization_factor)))
-        
+
         return {
             "fraction_value": fraction_value,
             "threshold_value": threshold_value,
@@ -1099,19 +1099,19 @@ def _calculate_combined_score(self, values: dict) -> float:
         return fraction_weight * values["fraction_value"] + threshold_weight * values["threshold_value"]
 
     def _update_fraction_statistics(
-        self, key: tuple, current_fraction: float, current_threshold: float, 
+        self, key: tuple, current_fraction: float, current_threshold: float,
         prev_stats: dict, anomalies: dict, fraction_score: float
     ):
         """Update the fraction statistics for the current round."""
         self.fraction_changed_history[key]["fraction_anomaly"] = anomalies["fraction_anomaly"]
         self.fraction_changed_history[key]["threshold_anomaly"] = anomalies["threshold_anomaly"]
-        
+
         self.fraction_changed_history[key]["mean_fraction"] = (current_fraction + prev_stats["mean_fraction"]) / 2
         self.fraction_changed_history[key]["mean_threshold"] = (current_threshold + prev_stats["mean_threshold"]) / 2
-        
+
         fraction_variance = ((current_fraction - prev_stats["mean_fraction"]) ** 2 + prev_stats["std_dev_fraction"] ** 2) / 2
         threshold_variance = ((self.THRESHOLD_VARIANCE_MULTIPLIER * (current_threshold - prev_stats["mean_threshold"]) ** 2) + prev_stats["std_dev_threshold"] ** 2) / 2
-        
+
         self.fraction_changed_history[key]["std_dev_fraction"] = np.sqrt(fraction_variance)
         self.fraction_changed_history[key]["std_dev_threshold"] = np.sqrt(threshold_variance)
         self.fraction_changed_history[key]["fraction_score"] = fraction_score
@@ -1132,9 +1132,9 @@ def manage_model_arrival_latency(self, addr, nei, latency, current_round, round_
         """
         try:
             current_key = nei
-            
+
             self._initialize_latency_round_entry(current_round, current_key, latency)
-            
+
             if current_round >= 1:
                 score = self._calculate_latency_score(current_round, current_key, latency)
                 self._update_latency_entry_with_score(current_round, current_key, score)
@@ -1161,17 +1161,17 @@ def _calculate_latency_score(self, current_round: int, current_key: str, latency
         """Calculate the latency score based on historical data."""
         target_round = self._get_target_round_for_latency(current_round)
         all_latencies = self._get_all_latencies_for_round(target_round)
-        
+
         if not all_latencies:
             return 0.0
-            
+
         mean_latency = np.mean(all_latencies)
         augment_mean = mean_latency * self.LATENCY_AUGMENT_FACTOR
-        
+
         if latency is None:
             logging.info(f"latency is None in round {current_round} for nei {current_key}")
             return -0.5
-            
+
         if latency <= augment_mean:
             return 1.0
         else:
@@ -1195,7 +1195,7 @@ def _update_latency_entry_with_score(self, current_round: int, current_key: str,
         target_round = self._get_target_round_for_latency(current_round)
         all_latencies = self._get_all_latencies_for_round(target_round)
         mean_latency = np.mean(all_latencies) if all_latencies else 0
-        
+
         self.model_arrival_latency_history[current_round][current_key].update({
             "mean_latency": mean_latency,
             "score": score,
@@ -1215,9 +1215,9 @@ def save_model_arrival_latency_history(self, nei, model_arrival_latency, round_n
         """
         try:
             current_key = nei
-            
+
             self._initialize_latency_history_entry(round_num, current_key, model_arrival_latency)
-            
+
             if model_arrival_latency > 0 and round_num >= 1:
                 avg_model_arrival_latency = self._calculate_latency_weighted_average_positive(
                     round_num, current_key, model_arrival_latency
@@ -1236,7 +1236,7 @@ def save_model_arrival_latency_history(self, nei, model_arrival_latency, round_n
             )
 
             return avg_model_arrival_latency
-            
+
         except Exception:
             logging.exception("Error saving model_arrival_latency history")
 
@@ -1284,14 +1284,14 @@ def manage_metric_number_message(
     ) -> tuple[float, int]:
         """
         Manage the number of messages metric for a specific neighbor.
-        
+
         Args:
             messages_number_message: List of message data
             addr: Source address
             nei: Neighbor address
             current_round: Current round number
             metric_active: Whether the metric is active
-            
+
         Returns:
             Tuple of (normalized_messages, messages_count)
         """
@@ -1301,13 +1301,13 @@ def manage_metric_number_message(
 
             messages_count = self._count_relevant_messages(messages_number_message, addr, nei, current_round)
             neighbor_stats = self._calculate_neighbor_statistics(messages_number_message, current_round)
-            
+
             normalized_messages = self._calculate_normalized_messages(messages_count, neighbor_stats)
-            
+
             normalized_messages = self._apply_historical_penalty(
                 normalized_messages, addr, nei, current_round
             )
-            
+
             self._store_message_history(addr, nei, current_round, normalized_messages)
             normalized_messages = max(0.001, normalized_messages)
 
@@ -1339,7 +1339,7 @@ def _calculate_neighbor_statistics(self, messages: list, current_round: int) ->
             neighbor_counts[key] = neighbor_counts.get(key, 0) + 1
 
         counts_all_neighbors = list(neighbor_counts.values())
-        
+
         if not counts_all_neighbors:
             return {
                 "percentile_reference": 0,
@@ -1349,7 +1349,7 @@ def _calculate_neighbor_statistics(self, messages: list, current_round: int) ->
             }
 
         mean_messages = np.mean(counts_all_neighbors)
-        
+
         return {
             "percentile_reference": np.percentile(counts_all_neighbors, 25),
             "std_dev": np.std(counts_all_neighbors),
@@ -1361,10 +1361,10 @@ def _calculate_normalized_messages(self, messages_count: int, neighbor_stats: di
         """Calculate normalized message score with relative and extra penalties."""
         normalized_messages = 1.0
         penalties_applied = []
-        
+
         relative_increase = self._calculate_relative_increase(messages_count, neighbor_stats["percentile_reference"])
         dynamic_margin = self._calculate_dynamic_margin(neighbor_stats)
-        
+
         if relative_increase > dynamic_margin:
             penalty_ratio = self._calculate_penalty_ratio(relative_increase, dynamic_margin)
             normalized_messages *= np.exp(-(penalty_ratio**2))
@@ -1400,7 +1400,7 @@ def _calculate_penalty_ratio(self, relative_increase: float, dynamic_margin: flo
 
     def _should_apply_extra_penalty(self, messages_count: int, neighbor_stats: dict) -> bool:
         """Determine if extra penalty should be applied."""
-        return (neighbor_stats["mean_messages"] > 0 and 
+        return (neighbor_stats["mean_messages"] > 0 and
                 messages_count > neighbor_stats["augment_mean"])
 
     def _calculate_extra_penalty_factor(self, messages_count: int, neighbor_stats: dict) -> float:
@@ -1408,7 +1408,7 @@ def _calculate_extra_penalty_factor(self, messages_count: int, neighbor_stats: d
         epsilon = 1e-6
         mean_messages = neighbor_stats["mean_messages"]
         augment_mean = neighbor_stats["augment_mean"]
-        
+
         extra_penalty = (messages_count - mean_messages) / (mean_messages + epsilon)
         amplification = 1 + (augment_mean / (mean_messages + epsilon))
         return extra_penalty * amplification
@@ -1417,27 +1417,27 @@ def _apply_historical_penalty(self, normalized_messages: float, addr: str, nei:
         """Apply historical penalty based on previous round's score."""
         if current_round <= 1:
             return normalized_messages
-            
+
         prev_data = (
             self.number_message_history.get((addr, nei), {})
             .get(current_round - 1, {})
         )
-        
+
         prev_score = prev_data.get("normalized_messages")
         was_previously_penalized = prev_data.get("was_penalized", False)
-        
+
         if prev_score is not None and prev_score < self.HISTORICAL_PENALTY_THRESHOLD:
             original_score = normalized_messages
-            
+
             if was_previously_penalized:
                 penalty_factor = self.HISTORICAL_PENALTY_THRESHOLD * 0.8
                 logging.debug(f"Repeated penalty applied to {nei}: stricter historical penalty")
             else:
                 penalty_factor = self.HISTORICAL_PENALTY_THRESHOLD
-            
+
             normalized_messages *= penalty_factor
             logging.debug(f"Historical penalty applied to {nei}: {original_score:.4f} -> {normalized_messages:.4f} (prev_score: {prev_score:.4f}, was_penalized: {was_previously_penalized})")
-            
+
         return normalized_messages
 
     def _store_message_history(self, addr: str, nei: str, current_round: int, normalized_messages: float):
@@ -1445,9 +1445,9 @@ def _store_message_history(self, addr: str, nei: str, current_round: int, normal
         key = (addr, nei)
         if key not in self.number_message_history:
             self.number_message_history[key] = {}
-        
+
         was_penalized = normalized_messages < 1.0
-        
+
         self.number_message_history[key][current_round] = {
             "normalized_messages": normalized_messages,
             "was_penalized": was_penalized,
@@ -1464,9 +1464,9 @@ def save_number_message_history(self, addr, nei, messages_number_message_normali
         """
         try:
             key = (addr, nei)
-            
+
             self._initialize_message_history_entry(key, current_round, messages_number_message_normalized)
-            
+
             if messages_number_message_normalized > 0 and current_round >= 1:
                 avg_number_message = self._calculate_weighted_average_positive(key, current_round, messages_number_message_normalized)
             elif messages_number_message_normalized == 0 and current_round >= 1:
@@ -1478,7 +1478,7 @@ def save_number_message_history(self, addr, nei, messages_number_message_normali
 
             self.number_message_history[key][current_round]["avg_number_message"] = avg_number_message
             return avg_number_message
-            
+
         except Exception:
             logging.exception("Error saving number_message history")
             return -1
@@ -1524,7 +1524,7 @@ async def save_reputation_history_in_memory(self, addr: str, nei: str, reputatio
 
         Args:
             addr: The node's identifier
-            nei: The neighboring node identifier  
+            nei: The neighboring node identifier
             reputation: The reputation value to save
 
         Returns:
@@ -1533,27 +1533,27 @@ async def save_reputation_history_in_memory(self, addr: str, nei: str, reputatio
         try:
             key = (addr, nei)
             current_round = await self._engine.get_round()
-            
+
             if key not in self.reputation_history:
                 self.reputation_history[key] = {}
 
             self.reputation_history[key][current_round] = reputation
 
             rounds = sorted(self.reputation_history[key].keys(), reverse=True)[:2]
-            
+
             if len(rounds) >= 2:
                 current_rep = self.reputation_history[key][rounds[0]]
                 previous_rep = self.reputation_history[key][rounds[1]]
-                
+
                 current_weight = self.REPUTATION_CURRENT_WEIGHT
                 previous_weight = self.REPUTATION_FEEDBACK_WEIGHT
                 avg_reputation = (current_rep * current_weight) + (previous_rep * previous_weight)
-                
+
                 logging.info(f"Current reputation: {current_rep}, Previous reputation: {previous_rep}")
                 logging.info(f"Reputation ponderated: {avg_reputation}")
             else:
                 avg_reputation = reputation
-                
+
             return avg_reputation
 
         except Exception:
@@ -1577,23 +1577,23 @@ def calculate_similarity_from_metrics(self, nei: str, current_round: int) -> flo
                 return 0.0
 
             relevant_metrics = [
-                metric for metric in metrics_instance.similarity 
+                metric for metric in metrics_instance.similarity
                 if metric.get("nei") == nei and metric.get("current_round") == current_round
             ]
-            
+
             if not relevant_metrics:
                 relevant_metrics = [
-                    metric for metric in metrics_instance.similarity 
+                    metric for metric in metrics_instance.similarity
                     if metric.get("nei") == nei
                 ]
-                
+
             if not relevant_metrics:
                 return 0.0
             neighbor_metric = relevant_metrics[-1]
 
             similarity_weights = {
                 "cosine": 0.25,
-                "euclidean": 0.25, 
+                "euclidean": 0.25,
                 "manhattan": 0.25,
                 "pearson_correlation": 0.25,
             }
@@ -1604,7 +1604,7 @@ def calculate_similarity_from_metrics(self, nei: str, current_round: int) -> flo
             )
 
             return max(0.0, min(1.0, similarity_value))
-            
+
         except Exception:
             return 0.0
 
@@ -1620,13 +1620,15 @@ async def calculate_reputation(self, ae: AggregationEvent):
 
         (updates, _, _) = await ae.get_event_data()
         await self._log_reputation_calculation_start()
-        
+
         neighbors = set(await self._engine._cm.get_addrs_current_connections(only_direct=True))
-        
+        federation = self._engine.config.participant["scenario_args"].get("federation")
+
         await self._process_neighbor_metrics(neighbors)
         await self._calculate_reputation_by_factor(neighbors)
         await self._handle_initial_reputation()
-        await self._process_feedback()
+        if federation != "CFL":
+            await self._process_feedback()
         await self._finalize_reputation_calculation(updates, neighbors)
 
     async def _log_reputation_calculation_start(self):
@@ -1644,7 +1646,7 @@ async def _process_neighbor_metrics(self, neighbors):
             metrics = await self.calculate_value_metrics(
                 self._addr, nei, metrics_active=self._metrics
             )
-            
+
             if self._weighting_factor == "dynamic":
                 await self._process_dynamic_metrics(nei, metrics)
             elif self._weighting_factor == "static" and await self._engine.get_round() >= 1:
@@ -1653,7 +1655,7 @@ async def _process_neighbor_metrics(self, neighbors):
     async def _process_dynamic_metrics(self, nei, metrics):
         """Process metrics for dynamic weighting factor."""
         (metric_messages_number, metric_similarity, metric_fraction, metric_model_arrival_latency) = metrics
-        
+
         self.calculate_weighted_values(
             metric_messages_number,
             metric_similarity,
@@ -1669,7 +1671,7 @@ async def _process_dynamic_metrics(self, nei, metrics):
     async def _process_static_metrics(self, nei, metrics):
         """Process metrics for static weighting factor."""
         (metric_messages_number, metric_similarity, metric_fraction, metric_model_arrival_latency) = metrics
-        
+
         metric_values_dict = {
             "num_messages": metric_messages_number,
             "model_similarity": metric_similarity,
@@ -1698,7 +1700,7 @@ async def _process_feedback(self):
         """Process and include feedback in reputation."""
         status = await self.include_feedback_in_reputation()
         current_round = await self._engine.get_round()
-        
+
         if status:
             logging.info(f"Feedback included in reputation at round {current_round}")
         else:
@@ -1709,7 +1711,9 @@ async def _finalize_reputation_calculation(self, updates, neighbors):
         if self.reputation is not None:
             self.create_graphic_reputation(self._addr, await self._engine.get_round())
             await self.update_process_aggregation(updates)
-            await self.send_reputation_to_neighbors(neighbors)
+            federation = self._engine.config.participant["scenario_args"].get("federation")
+            if federation != "CFL":
+                await self.send_reputation_to_neighbors(neighbors)
 
     async def send_reputation_to_neighbors(self, neighbors):
         """
@@ -1735,7 +1739,7 @@ async def send_reputation_to_neighbors(self, neighbors):
     def create_graphic_reputation(self, addr: str, round_num: int):
         """
         Log reputation data for visualization.
-        
+
         Args:
             addr: The node address
             round_num: The round number for logging step
@@ -1746,7 +1750,7 @@ def create_graphic_reputation(self, addr: str, round_num: int):
                 for node_id, data in self.reputation.items()
                 if data.get("reputation") is not None
             }
-            
+
             if valid_reputations:
                 reputation_data = {f"Reputation/{addr}": valid_reputations}
                 self._engine.trainer._logger.log_data(reputation_data, step=round_num)
@@ -1954,26 +1958,26 @@ def _recalculate_pending_latencies(self, current_round):
     async def recollect_similarity(self, ure: UpdateReceivedEvent):
         """
         Collect and analyze model similarity metrics.
-        
+
         Args:
             ure: UpdateReceivedEvent containing model and metadata
         """
         (decoded_model, weight, nei, round_num, local) = await ure.get_event_data()
-        
+
         if not (self._enabled and self._is_metric_enabled("model_similarity")):
             return
-            
+
         if not self._engine.config.participant["adaptive_args"]["model_similarity"]:
             return
-            
+
         if nei == self._addr:
             return
-            
+
         logging.info("🤖  handle_model_message | Checking model similarity")
-        
+
         local_model = self._engine.trainer.get_model_parameters()
         similarity_values = self._calculate_all_similarity_metrics(local_model, decoded_model)
-        
+
         similarity_metrics = {
             "timestamp": datetime.now(),
             "nei": nei,
@@ -1996,7 +2000,7 @@ def _calculate_all_similarity_metrics(self, local_model: dict, received_model: d
                 "jaccard": 0.0,
                 "minkowski": 0.0,
             }
-        
+
         similarity_functions = [
             ("cosine", cosine_metric),
             ("euclidean", euclidean_metric),
@@ -2004,29 +2008,29 @@ def _calculate_all_similarity_metrics(self, local_model: dict, received_model: d
             ("pearson_correlation", pearson_correlation_metric),
             ("jaccard", jaccard_metric),
         ]
-        
+
         similarity_values = {}
-        
+
         for name, metric_func in similarity_functions:
             try:
                 similarity_values[name] = metric_func(local_model, received_model, similarity=True)
             except Exception:
                 similarity_values[name] = 0.0
-        
+
         try:
             similarity_values["minkowski"] = minkowski_metric(
                 local_model, received_model, p=2, similarity=True
             )
         except Exception:
             similarity_values["minkowski"] = 0.0
-        
+
         return similarity_values
 
     def _store_similarity_metrics(self, nei: str, similarity_metrics: dict):
         """Store similarity metrics for the given neighbor."""
         if nei not in self.connection_metrics:
             self.connection_metrics[nei] = Metrics()
-            
+
         self.connection_metrics[nei].similarity.append(similarity_metrics)
 
     def _check_similarity_threshold(self, nei: str, cosine_value: float):
@@ -2064,25 +2068,25 @@ async def _record_message_data(self, source: str):
     async def recollect_fraction_of_parameters_changed(self, ure: UpdateReceivedEvent):
         """
         Collect and analyze the fraction of parameters that changed between models.
-        
+
         Args:
             ure: UpdateReceivedEvent containing model and metadata
         """
         (decoded_model, weight, source, round_num, local) = await ure.get_event_data()
-        
+
         current_round = await self._engine.get_round()
         parameters_local = self._engine.trainer.get_model_parameters()
-        
+
         prev_threshold = self._get_previous_threshold(source, current_round)
         differences = self._calculate_parameter_differences(parameters_local, decoded_model)
         current_threshold = self._calculate_threshold(differences, prev_threshold)
-        
+
         changed_params, total_params, changes_record = self._count_changed_parameters(
             parameters_local, decoded_model, current_threshold
         )
-        
+
         fraction_changed = changed_params / total_params if total_params > 0 else 0.0
-        
+
         self._store_fraction_data(source, current_round, {
             "fraction_changed": fraction_changed,
             "total_params": total_params,
@@ -2102,7 +2106,7 @@ async def recollect_fraction_of_parameters_changed(self, ure: UpdateReceivedEven
 
     def _get_previous_threshold(self, source: str, current_round: int) -> float:
         """Get the threshold from the previous round for the given source."""
-        if (source in self.fraction_of_params_changed and 
+        if (source in self.fraction_of_params_changed and
             current_round - 1 in self.fraction_of_params_changed[source]):
             return self.fraction_of_params_changed[source][current_round - 1][-1]["threshold"]
         return None
@@ -2122,7 +2126,7 @@ def _calculate_threshold(self, differences: list, prev_threshold: float) -> floa
         """Calculate the threshold for determining parameter changes."""
         if not differences:
             return 0
-            
+
         mean_threshold = torch.mean(torch.tensor(differences)).item()
         if prev_threshold is not None:
             return (prev_threshold + mean_threshold) / 2
@@ -2133,20 +2137,20 @@ def _count_changed_parameters(self, local_params: dict, received_params: dict, t
         total_params = 0
         changed_params = 0
         changes_record = {}
-        
+
         for key in local_params.keys():
             if key in received_params:
                 local_tensor = local_params[key].cpu()
                 received_tensor = received_params[key].cpu()
                 diff = torch.abs(local_tensor - received_tensor)
                 total_params += diff.numel()
-                
+
                 num_changed = torch.sum(diff > threshold).item()
                 changed_params += num_changed
-                
+
                 if num_changed > 0:
                     changes_record[key] = num_changed
-                    
+
         return changed_params, total_params, changes_record
 
     def _store_fraction_data(self, source: str, current_round: int, data: dict):
@@ -2155,5 +2159,5 @@ def _store_fraction_data(self, source: str, current_round: int, data: dict):
             self.fraction_of_params_changed[source] = {}
         if current_round not in self.fraction_of_params_changed[source]:
             self.fraction_of_params_changed[source][current_round] = []
-            
-        self.fraction_of_params_changed[source][current_round].append(data)
\ No newline at end of file
+
+        self.fraction_of_params_changed[source][current_round].append(data)
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics.json b/nebula/addons/trustworthiness/configs/eval_metrics.json
index 080755818..106f645b9 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics.json
@@ -122,6 +122,18 @@
             "type": "true_score",
             "description": "The use of personalized FL algorithm.",
             "weight": 0.2
+          },
+          "reputation_enabled": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/reputation_enabled"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of an active reputation-based defense mechanism.",
+            "weight": 0.2
           }
         }
       },
@@ -142,6 +154,18 @@
             "description": "The number of clients in the model.",
             "weight": 0.3
           },
+          "average_neighbor_reputation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Average reputation score of the neighbors associated with the node or federation.",
+            "weight": 0.3
+          },
           "dropout_rate": {
             "inputs": [
               {
@@ -546,6 +570,10 @@
               {
                 "source": "factsheet",
                 "field_path": "participants/avg_dataset_size"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
               }
             ],
             "operation": "check_properties",
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template.json b/nebula/addons/trustworthiness/configs/factsheet_template.json
index 6948e4982..17f53d052 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template.json
@@ -13,11 +13,13 @@
 		"client_num": "",
 		"sample_client_rate": "",
 		"client_selector": "",
+		"avg_neighbor_reputation": "",
 		"avg_dataset_size": ""
 	},
 	"configuration": {
 		"aggregation_algorithm": "",
 		"training_model": "",
+		"reputation_enabled": "",
 		"personalization": "",
 		"visualization": "",
 		"monitoring": "",
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 026c64d87..fb0bb93c8 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -1,23 +1,45 @@
-# nebula/addons/trustworthiness/dfl_factsheet.py
-import json, os, shutil
-from datetime import datetime
-from nebula.addons.trustworthiness.metric import TrustMetricManager
 import logging
-import glob
+import json
+import os
 import shutil
-from json import JSONDecodeError
-import pickle
 import numpy as np
 import pandas as pd
-import time
 
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_bytes_model, get_underfitting_score, get_overfitting_score, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score, get_underfitting_score_local, get_dp_local
-from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
+from nebula.addons.trustworthiness.calculation import (
+    get_bytes_model,
+    get_cv,
+    get_dp_local,
+    get_elapsed_time,
+    get_underfitting_score_local,
+)
+from nebula.addons.trustworthiness.factsheet_common import (
+    cap_score,
+    populate_common_pre_train_sections,
+    populate_model_quality_metrics,
+    populate_participation,
+    populate_reliability,
+    populate_reputation,
+    set_dp_configuration,
+)
+from nebula.addons.trustworthiness.utils import read_csv, get_all_data_entropy
 
 dirname = os.path.dirname(__file__)
 logger = logging.getLogger(__name__)
 
-def populate_factsheet(experiment_name, participant_idx, data, start_time, end_time, model, train_loader, test_loader, reputation_summary=None, participation_summary=None, reliability_summary=None):
+
+def populate_factsheet(
+    experiment_name,
+    participant_idx,
+    data,
+    start_time,
+    end_time,
+    model,
+    train_loader,
+    test_loader,
+    reputation_summary=None,
+    participation_summary=None,
+    reliability_summary=None,
+):
     trust_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
     os.makedirs(trust_dir, exist_ok=True)
 
@@ -34,73 +56,10 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
 
         logging.info("DFL FactSheet: Populating factsheet")
 
-        federation = data["federation"]
-        n_nodes = int(data["n_nodes"])
-        dataset = data["dataset"]
-        algorithm = data["model"]
-        aggregation_algorithm = data["agg_algorithm"]
-        n_rounds = int(data["rounds"])
-        attack = data["attack_params"]["attacks"]
-
-        attack_params = data.get("attack_params", {})
-
-        poisoned_node_percent = int(attack_params.get("poisoned_node_percent", 0) or 0)
-        poisoned_sample_percent = int(attack_params.get("poisoned_sample_percent", 0) or 0)
-        poisoned_noise_percent = float(attack_params.get("poisoned_noise_percent", 0) or 0)
-
-        with_reputation = data["reputation"]["enabled"]
-        topology = data["topology"]
-
-        if attack != "No Attack" and with_reputation == True:
-            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used is {attack}. A reputation-based defence is used, and the trustworthiness of the project is desired."
-
-        elif attack != "No Attack" and with_reputation == False:
-            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used is {attack}. No defence mechanism is used, and the trustworthiness of the project is desired."
-
-        elif attack == "No Attack" and with_reputation == True:
-            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks are used. A reputation-based defence is used, and the trustworthiness of the project is desired."
-
-        elif attack == "No Attack" and with_reputation == False:
-            background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks are used. No defence mechanism is used, and the trustworthiness of the project is desired."
-
-        # Set project specifications
-        factsheet["project"]["overview"] = data["scenario_title"]
-        factsheet["project"]["purpose"] = data["scenario_description"]
-        factsheet["project"]["background"] = background
-
-        # Set data specifications
-        factsheet["data"]["provenance"] = data["dataset"]
-        factsheet["data"]["preprocessing"] = data["topology"]
-
-        # Set participants
-        factsheet["participants"]["client_num"] = data["n_nodes"] or ""
-        factsheet["participants"]["sample_client_rate"] = 1
-
-        if with_reputation == True:
-            factsheet["participants"]["client_selector"] = "Reputation Based"
-        else:
-            factsheet["participants"]["client_selector"] = "Full Participation"
-
-        # Set configuration
-        factsheet["configuration"]["aggregation_algorithm"] = data["agg_algorithm"] or ""
-        factsheet["configuration"]["training_model"] = data["model"] or ""
-        factsheet["configuration"]["personalization"] = False
-        factsheet["configuration"]["reputation_enabled"] = bool(data.get("reputation", {}).get("enabled", False))
-        factsheet["configuration"]["visualization"] = True
-        factsheet["configuration"]["monitoring"] = True
-        factsheet["configuration"]["total_round_num"] = n_rounds
+        populate_common_pre_train_sections(factsheet, data, model)
 
         dp_enabled, dp_epsilon = get_dp_local(experiment_name, participant_idx)
-        if dp_enabled:
-            factsheet["configuration"]["differential_privacy"] = True
-            factsheet["configuration"]["dp_epsilon"] = dp_epsilon
-        else:
-            factsheet["configuration"]["differential_privacy"] = False
-            factsheet["configuration"]["dp_epsilon"] = ""
-
-        factsheet["configuration"]["learning_rate"] = model.get_learning_rate()
-        factsheet["configuration"]["trainable_param_num"] = model.count_parameters()
-        factsheet["configuration"]["local_update_steps"] = data["epochs"]
+        set_dp_configuration(factsheet, dp_enabled, dp_epsilon)
 
         files_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
 
@@ -108,7 +67,12 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
 
         get_all_data_entropy(experiment_name)
 
-        data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_idx)}_class_count.json")
+        data_class_count_file = os.path.join(
+            os.environ.get('NEBULA_LOGS_DIR'),
+            experiment_name,
+            "trustworthiness",
+            f"{str(participant_idx)}_class_count.json",
+        )
 
         entropy_local = normalized_entropy_from_class_counts(data_class_count_file)
 
@@ -131,12 +95,7 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
         factsheet["system"]["upload_bytes"] = int(bytes_sent)
         factsheet["system"]["download_bytes"] = int(bytes_recv)
 
-        if reliability_summary is not None:
-            factsheet["system"]["dropout_rate"] = reliability_summary.get("dropout_rate", 0.0)
-            factsheet["system"]["timeout_rate"] = reliability_summary.get("timeout_rate", 0.0)
-        else:
-            factsheet["system"]["dropout_rate"] = 0.0
-            factsheet["system"]["timeout_rate"] = 0.0
+        populate_reliability(factsheet, reliability_summary)
 
         factsheet["system"]["time_minutes"] = get_elapsed_time(start_time, end_time)
 
@@ -146,103 +105,37 @@ def populate_factsheet(experiment_name, participant_idx, data, start_time, end_t
                 class_distribution = json.load(fs)
             class_samples_sizes = list(class_distribution.values())
             class_imbalance = get_cv(list=class_samples_sizes)
-            factsheet["fairness"]["class_imbalance"] = 1 if class_imbalance > 1 else class_imbalance
+            factsheet["fairness"]["class_imbalance"] = cap_score(class_imbalance)
         else:
             factsheet["fairness"]["class_imbalance"] = factsheet["fairness"].get("class_imbalance", 0.0)
 
-        if participation_summary is not None:
-            factsheet["fairness"]["selection_cv"] = participation_summary.get("selection_cv", 1)
-        else:
-            factsheet["fairness"]["selection_cv"] = 1
+        populate_participation(factsheet, participation_summary)
 
-        carbon_intensity_local, emissions_training_local, energy_consumed_local, sample_size = get_emissions(emissions_file, participant_idx)
+        carbon_intensity_local, emissions_training_local, energy_consumed_local, sample_size = get_emissions(
+            emissions_file,
+            participant_idx,
+        )
 
         factsheet["sustainability"]["carbon_intensity_local"] = carbon_intensity_local
         factsheet["sustainability"]["emissions_training_local"] = emissions_training_local
         factsheet["sustainability"]["energy_consumed_local"] = energy_consumed_local
         factsheet["participants"]["local_dataset_size"] = sample_size
 
-        if reputation_summary is not None:
-            factsheet["participants"]["avg_neighbor_reputation"] = reputation_summary.get("avg_neighbor_reputation", "")
-            factsheet["participants"]["neighbor_num"] = reputation_summary.get("neighbor_num", 0)
-        else:
-            factsheet["participants"]["avg_neighbor_reputation"] = 0
-            factsheet["participants"]["neighbor_num"] = 0
+        populate_reputation(factsheet, reputation_summary, include_neighbor_num=True)
 
-        factsheet["sustainability"]["emissions_communication_local"] = (bytes_sent * 2.24e-10 * carbon_intensity_local)+(bytes_recv * 2.24e-10 * carbon_intensity_local)
-
-        test_sample = next(iter(test_loader))
-        explainability_metrics = get_explainability_metrics_summary(model, test_loader)
-        factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_loader)
-        factsheet["privacy"]["epsilon_star"] = get_epsilon_star(
-            model,
-            train_loader,
-            test_loader,
-        )
-        factsheet["privacy"]["epsilon_star_score"] = 1/(1 + factsheet["privacy"]["epsilon_star"])
-        factsheet["privacy"]["mia_auc"] = get_mia_auc(
-            model,
-            train_loader,
-            test_loader,
+        factsheet["sustainability"]["emissions_communication_local"] = (
+            (bytes_sent * 2.24e-10 * carbon_intensity_local)
+            + (bytes_recv * 2.24e-10 * carbon_intensity_local)
         )
 
-        factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
         factsheet["fairness"]["underfitting"] = get_underfitting_score_local(experiment_name, participant_idx)
-        overfitting_value = get_overfitting_score(
+        populate_model_quality_metrics(
+            factsheet,
             model,
             train_loader,
-            factsheet["performance"]["test_acc"],
-        )
-
-        factsheet["fairness"]["overfitting"] = 1/(1 + overfitting_value)
-
-        well_calibration_error_value = get_well_calibration_error(
-            model,
-            test_loader,
-        )
-
-        factsheet["fairness"]["well_calibration_error"] = 1/(1 + well_calibration_error_value)
-        generalized_entropy_index_value = get_generalized_entropy_index(
-            model,
-            test_loader,
-        )
-        factsheet["fairness"]["generalized_entropy_index"] = 1/(1 + generalized_entropy_index_value)
-        theil_index_value = get_theil_index(
-            model,
-            test_loader,
-        )
-        factsheet["fairness"]["theil_index"] = 1/(1 + theil_index_value)
-        coefficient_of_variation_value = get_coefficient_of_variation(
-            model,
             test_loader,
+            factsheet["performance"]["test_acc"],
         )
-        factsheet["fairness"]["coefficient_of_variation"] = 1/(1 + coefficient_of_variation_value)
-        factsheet["explainability"]["alpha_score"] = explainability_metrics["alpha_score"]
-        factsheet["explainability"]["spread_ratio"] = explainability_metrics["spread_ratio"]
-        factsheet["explainability"]["spread_divergence"] = explainability_metrics["spread_divergence"]
-
-        lr = factsheet["configuration"]["learning_rate"]
-
-        value_clever = get_clever_score(model, test_sample, model.get_num_classes(), lr)
-        factsheet["performance"]["test_clever"] = 1 if value_clever > 1 else value_clever
-
-        value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, model.get_num_classes(), lr)
-        factsheet["performance"]["test_loss_sensitivity"] = 1 / (1 + value_loss_sensitivity)
-
-        value_adv_accuracy = compute_adversarial_accuracy_art(model, test_loader, model.get_num_classes(), lr)
-        factsheet["performance"]["test_adv_accuracy"] = 1 if value_adv_accuracy > 1 else value_adv_accuracy
-
-        value_empirical_robustness = get_empirical_robustness_score(model, test_sample, model.get_num_classes(), lr)
-        factsheet["performance"]["test_empirical_robustness"] = 1 if value_empirical_robustness > 1 else value_empirical_robustness
-
-        value_confidence_score = get_confidence_score(model, test_sample)
-        factsheet["performance"]["test_confidence_score"] = 1 if value_confidence_score > 1 else value_confidence_score
-
-        value_attack_success_rate = attack_success_rate(model, test_sample)
-        factsheet["performance"]["test_attack_success_rate"] = 1 - value_attack_success_rate
-
-        feature_importance = explainability_metrics["feature_importance_cv"]
-        factsheet["performance"]["test_feature_importance_cv"] = 1 if feature_importance > 1 else feature_importance
 
         f.seek(0)
         f.truncate()
@@ -260,7 +153,12 @@ def load_round_metrics(experiment_name, participant_idx):
     return df
 
 def get_bytes(experiment_name, participant_idx):
-    data_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"data_results_{participant_idx}.csv")
+    data_file = os.path.join(
+        os.environ.get('NEBULA_LOGS_DIR'),
+        experiment_name,
+        "trustworthiness",
+        f"data_results_{participant_idx}.csv",
+    )
 
     data = read_csv(data_file)
 
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 2ff4dce3b..8c9415a93 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -1,16 +1,33 @@
 import json
 import logging
 import os
-import glob
 import shutil
 from json import JSONDecodeError
-import pickle
 import numpy as np
 import pandas as pd
-import time
 
-from nebula.addons.trustworthiness.calculation import get_elapsed_time, get_bytes_sent_recv, get_avg_loss_accuracy, get_cv, get_clever_score, get_feature_importance_cv, get_loss_sensitivity_score, compute_adversarial_accuracy_art,get_empirical_robustness_score,get_confidence_score,attack_success_rate, get_entropy_list, get_avg_class_imbalance_model_size, get_underfitting_score, get_overfitting_score, get_participant_loss_accuracy, get_well_calibration_error, get_generalized_entropy_index, get_theil_index, get_coefficient_of_variation, get_alpha_score, get_spread_ratio, get_spread_divergence, get_epsilon_star, get_mia_auc, get_explainability_metrics_summary, get_macro_f1_score, get_dp_global
-from nebula.addons.trustworthiness.utils import count_all_class_samples, read_csv, check_field_filled, get_all_data_entropy
+from nebula.addons.trustworthiness.calculation import (
+    get_avg_class_imbalance_model_size,
+    get_avg_loss_accuracy,
+    get_bytes_sent_recv,
+    get_cv,
+    get_dp_global,
+    get_elapsed_time,
+    get_entropy_list,
+    get_participant_loss_accuracy,
+    get_underfitting_score,
+)
+from nebula.addons.trustworthiness.factsheet_common import (
+    cap_score,
+    inverse_score,
+    populate_common_pre_train_sections,
+    populate_model_quality_metrics,
+    populate_participation,
+    populate_reliability,
+    populate_reputation,
+    set_dp_configuration,
+)
+from nebula.addons.trustworthiness.utils import read_csv, check_field_filled
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
 
 dirname = os.path.dirname(__file__)
@@ -33,7 +50,12 @@ def populate_factsheet_pre_train(self, data, scenario_name, model):
             scenario_name (string): The name of the scenario.
         """
 
-        factsheet_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", self.factsheet_file_nm)
+        factsheet_file = os.path.join(
+            os.environ.get('NEBULA_LOGS_DIR'),
+            scenario_name,
+            "trustworthiness",
+            self.factsheet_file_nm,
+        )
 
         factsheet_template = os.path.join(dirname, "configs", self.factsheet_template_file_nm)
 
@@ -49,64 +71,7 @@ def populate_factsheet_pre_train(self, data, scenario_name, model):
                 if data is not None:
                     logging.info("FactSheet: Populating factsheet with pre training metrics")
 
-                    federation = data["federation"]
-                    n_nodes = int(data["n_nodes"])
-                    dataset = data["dataset"]
-                    algorithm = data["model"]
-                    aggregation_algorithm = data["agg_algorithm"]
-                    n_rounds = int(data["rounds"])
-                    attack = data["attack_params"]["attacks"]
-
-                    attack_params = data.get("attack_params", {})
-
-                    poisoned_node_percent = int(attack_params.get("poisoned_node_percent", 0) or 0)
-                    poisoned_sample_percent = int(attack_params.get("poisoned_sample_percent", 0) or 0)
-                    poisoned_noise_percent = float(attack_params.get("poisoned_noise_percent", 0) or 0)
-
-                    with_reputation = data["reputation"]["enabled"]
-                    topology = data["topology"]
-
-                    if attack != "No Attack" and with_reputation == True:
-                        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used is {attack}. A reputation-based defence is used, and the trustworthiness of the project is desired."
-
-                    elif attack != "No Attack" and with_reputation == False:
-                        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. In addition, the type of attack used is {attack}. No defence mechanism is used, and the trustworthiness of the project is desired."
-
-                    elif attack == "No Attack" and with_reputation == True:
-                        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks are used. A reputation-based defence is used, and the trustworthiness of the project is desired."
-
-                    elif attack == "No Attack" and with_reputation == False:
-                        background = f"For the project setup, the most important aspects are the following: The federation architecture is {federation}, involving {n_nodes} clients, the dataset used is {dataset}, the learning algorithm is {algorithm}, the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. No attacks are used. No defence mechanism is used, and the trustworthiness of the project is desired."
-
-                    # Set project specifications
-                    factsheet["project"]["overview"] = data["scenario_title"]
-                    factsheet["project"]["purpose"] = data["scenario_description"]
-                    factsheet["project"]["background"] = background
-
-                    # Set data specifications
-                    factsheet["data"]["provenance"] = data["dataset"]
-                    factsheet["data"]["preprocessing"] = data["topology"]
-
-                    # Set participants
-                    factsheet["participants"]["client_num"] = data["n_nodes"] or ""
-                    factsheet["participants"]["sample_client_rate"] = 1
-                    if with_reputation == True:
-                        factsheet["participants"]["client_selector"] = "Reputation Based"
-                    else:
-                        factsheet["participants"]["client_selector"] = "Full Participation"
-
-                    # Set configuration
-                    factsheet["configuration"]["aggregation_algorithm"] = data["agg_algorithm"] or ""
-                    factsheet["configuration"]["training_model"] = data["model"] or ""
-                    factsheet["configuration"]["personalization"] = False
-                    factsheet["configuration"]["reputation_enabled"] = bool(data.get("reputation", {}).get("enabled", False))
-                    factsheet["configuration"]["visualization"] = True
-                    factsheet["configuration"]["monitoring"] = True
-                    factsheet["configuration"]["total_round_num"] = n_rounds
-
-                    factsheet["configuration"]["learning_rate"] = model.get_learning_rate()
-                    factsheet["configuration"]["trainable_param_num"] = model.count_parameters()
-                    factsheet["configuration"]["local_update_steps"] = data["epochs"]
+                    populate_common_pre_train_sections(factsheet, data, model)
 
                     f.seek(0)
                     f.truncate()
@@ -116,14 +81,31 @@ def populate_factsheet_pre_train(self, data, scenario_name, model):
                 logging.warning(f"{factsheet_file} is invalid")
                 logging.error(e)
 
-    def populate_factsheet_post_train(self, scenario_name, start_time, end_time, participant_idx, model, train_loader, test_loader, reputation_summary=None, participation_summary=None, reliability_summary=None):
+    def populate_factsheet_post_train(
+        self,
+        scenario_name,
+        start_time,
+        end_time,
+        participant_idx,
+        model,
+        train_loader,
+        test_loader,
+        reputation_summary=None,
+        participation_summary=None,
+        reliability_summary=None,
+    ):
         """
         Populates the factsheet with values after the training.
 
         Args:
             scenario (object): The scenario object.
         """
-        factsheet_file = os.path.join(f"{os.environ.get('NEBULA_LOGS_DIR')}{scenario_name}/trustworthiness/{self.factsheet_file_nm}")
+        factsheet_file = os.path.join(
+            os.environ.get('NEBULA_LOGS_DIR'),
+            scenario_name,
+            "trustworthiness",
+            self.factsheet_file_nm,
+        )
 
         logging.info("FactSheet: Populating factsheet with post training metrics")
 
@@ -156,12 +138,7 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 _, participant_test_acc = get_participant_loss_accuracy(scenario_name, participant_idx)
 
                 dp_enabled, dp_epsilon = get_dp_global(scenario_name)
-                if dp_enabled:
-                    factsheet["configuration"]["differential_privacy"] = True
-                    factsheet["configuration"]["dp_epsilon"] = dp_epsilon
-                else:
-                    factsheet["configuration"]["differential_privacy"] = False
-                    factsheet["configuration"]["dp_epsilon"] = ""
+                set_dp_configuration(factsheet, dp_enabled, dp_epsilon)
 
                 factsheet["system"]["avg_time_minutes"] = get_elapsed_time(start_time, end_time)
                 factsheet["system"]["avg_model_size"] = avg_model_size
@@ -171,97 +148,23 @@ def populate_factsheet_post_train(self, scenario_name, start_time, end_time, par
                 factsheet["system"]["total_download_bytes"] = result_bytes_sent_recv[1]
                 factsheet["system"]["avg_upload_bytes"] = result_bytes_sent_recv[2]
                 factsheet["system"]["avg_download_bytes"] = result_bytes_sent_recv[3]
-                if reliability_summary is not None:
-                    factsheet["system"]["dropout_rate"] = reliability_summary.get("dropout_rate", 0.0)
-                    factsheet["system"]["timeout_rate"] = reliability_summary.get("timeout_rate", 0.0)
-                else:
-                    factsheet["system"]["dropout_rate"] = 0.0
-                    factsheet["system"]["timeout_rate"] = 0.0
-
-                if participation_summary is not None:
-                    factsheet["fairness"]["selection_cv"] = participation_summary.get("selection_cv", 1)
-                else:
-                    factsheet["fairness"]["selection_cv"] = 1
-
-                class_imbalance_score = 1 / (1+avg_class_imbalance)
-                factsheet["fairness"]["class_imbalance"] = 1 if class_imbalance_score > 1 else class_imbalance_score
-                if reputation_summary is not None:
-                    factsheet["participants"]["avg_neighbor_reputation"] = reputation_summary.get("avg_neighbor_reputation", "")
-                else:
-                    factsheet["participants"]["avg_neighbor_reputation"] = 0
-
-                test_sample = next(iter(test_loader))
-                explainability_metrics = get_explainability_metrics_summary(model, test_loader)
-                factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_loader)
-                factsheet["privacy"]["epsilon_star"] = get_epsilon_star(
-                    model,
-                    train_loader,
-                    test_loader,
-                )
-                factsheet["privacy"]["epsilon_star_score"] = 1/(1 + factsheet["privacy"]["epsilon_star"])
-                factsheet["privacy"]["mia_auc"] = get_mia_auc(
-                    model,
-                    train_loader,
-                    test_loader,
-                )
-                factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
+                populate_reliability(factsheet, reliability_summary)
+                populate_participation(factsheet, participation_summary)
+
+                class_imbalance_score = inverse_score(avg_class_imbalance)
+                factsheet["fairness"]["class_imbalance"] = cap_score(class_imbalance_score)
+                populate_reputation(factsheet, reputation_summary)
 
                 underfitting_score = get_underfitting_score(scenario_name, participant_idx)
 
                 factsheet["fairness"]["underfitting"] = underfitting_score
-                overfitting_value = get_overfitting_score(
+                populate_model_quality_metrics(
+                    factsheet,
                     model,
                     train_loader,
-                    participant_test_acc,
-                )
-                factsheet["fairness"]["overfitting"] = 1/(1 + overfitting_value)
-                well_calibration_error_value = get_well_calibration_error(
-                    model,
-                    test_loader,
-                )
-
-                factsheet["fairness"]["well_calibration_error"] = 1/(1 + well_calibration_error_value)
-                generalized_entropy_index_value = get_generalized_entropy_index(
-                    model,
-                    test_loader,
-                )
-                factsheet["fairness"]["generalized_entropy_index"] = 1/(1 + generalized_entropy_index_value)
-                theil_index_value = get_theil_index(
-                    model,
-                    test_loader,
-                )
-                factsheet["fairness"]["theil_index"] = 1/(1 + theil_index_value)
-                coefficient_of_variation_value = get_coefficient_of_variation(
-                    model,
                     test_loader,
+                    participant_test_acc,
                 )
-                factsheet["fairness"]["coefficient_of_variation"] = 1/(1 + coefficient_of_variation_value)
-                factsheet["explainability"]["alpha_score"] = explainability_metrics["alpha_score"]
-                factsheet["explainability"]["spread_ratio"] = explainability_metrics["spread_ratio"]
-                factsheet["explainability"]["spread_divergence"] = explainability_metrics["spread_divergence"]
-
-                lr = factsheet["configuration"]["learning_rate"]
-
-                value_clever = get_clever_score(model, test_sample, model.get_num_classes(), lr)
-                factsheet["performance"]["test_clever"] = 1 if value_clever > 1 else value_clever
-
-                value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, model.get_num_classes(), lr)
-                factsheet["performance"]["test_loss_sensitivity"] = 1 / (1 + value_loss_sensitivity)
-
-                value_adv_accuracy = compute_adversarial_accuracy_art(model, test_loader, model.get_num_classes(), lr)
-                factsheet["performance"]["test_adv_accuracy"] = 1 if value_adv_accuracy > 1 else value_adv_accuracy
-
-                value_empirical_robustness = get_empirical_robustness_score(model, test_sample, model.get_num_classes(), lr)
-                factsheet["performance"]["test_empirical_robustness"] = 1 if value_empirical_robustness > 1 else value_empirical_robustness
-
-                value_confidence_score = get_confidence_score(model, test_sample)
-                factsheet["performance"]["test_confidence_score"] = 1 if value_confidence_score > 1 else value_confidence_score
-
-                value_attack_success_rate = attack_success_rate(model, test_sample)
-                factsheet["performance"]["test_attack_success_rate"] = 1 - value_attack_success_rate
-
-                feature_importance = explainability_metrics["feature_importance_cv"]
-                factsheet["performance"]["test_feature_importance_cv"] = 1 if feature_importance > 1 else feature_importance
 
                 # Set emissions metrics
                 emissions = None if emissions_file is None else read_csv(emissions_file)
diff --git a/nebula/addons/trustworthiness/factsheet_common.py b/nebula/addons/trustworthiness/factsheet_common.py
new file mode 100644
index 000000000..dc9dadac6
--- /dev/null
+++ b/nebula/addons/trustworthiness/factsheet_common.py
@@ -0,0 +1,195 @@
+"""Shared helpers for trustworthiness factsheet generation."""
+
+from nebula.addons.trustworthiness.calculation import (
+    attack_success_rate,
+    compute_adversarial_accuracy_art,
+    get_clever_score,
+    get_coefficient_of_variation,
+    get_confidence_score,
+    get_empirical_robustness_score,
+    get_epsilon_star,
+    get_explainability_metrics_summary,
+    get_generalized_entropy_index,
+    get_loss_sensitivity_score,
+    get_macro_f1_score,
+    get_mia_auc,
+    get_overfitting_score,
+    get_theil_index,
+    get_well_calibration_error,
+)
+
+
+def cap_score(value, maximum=1):
+    """Caps a score to the maximum value expected by the factsheet."""
+    return maximum if value > maximum else value
+
+
+def inverse_score(value):
+    """Converts an error or risk value into a bounded inverse score."""
+    return 1 / (1 + value)
+
+
+def build_project_background(data):
+    """Builds the natural-language scenario description used in factsheets."""
+    federation = data["federation"]
+    n_nodes = int(data["n_nodes"])
+    dataset = data["dataset"]
+    algorithm = data["model"]
+    aggregation_algorithm = data["agg_algorithm"]
+    n_rounds = int(data["rounds"])
+    attack = data["attack_params"]["attacks"]
+    with_reputation = data["reputation"]["enabled"]
+
+    base = (
+        "For the project setup, the most important aspects are the following: "
+        f"The federation architecture is {federation}, involving {n_nodes} clients, "
+        f"the dataset used is {dataset}, the learning algorithm is {algorithm}, "
+        f"the aggregation algorithm is {aggregation_algorithm} and the number of rounds is {n_rounds}. "
+    )
+
+    if attack != "No Attack":
+        attack_text = f"In addition, the type of attack used is {attack}. "
+    else:
+        attack_text = "No attacks are used. "
+
+    if with_reputation:
+        defence_text = "A reputation-based defence is used, and the trustworthiness of the project is desired."
+    else:
+        defence_text = "No defence mechanism is used, and the trustworthiness of the project is desired."
+
+    return base + attack_text + defence_text
+
+
+def populate_common_pre_train_sections(factsheet, data, model):
+    """Populates project, data, participant and training configuration fields."""
+    with_reputation = data["reputation"]["enabled"]
+
+    factsheet["project"]["overview"] = data["scenario_title"]
+    factsheet["project"]["purpose"] = data["scenario_description"]
+    factsheet["project"]["background"] = build_project_background(data)
+
+    factsheet["data"]["provenance"] = data["dataset"]
+    factsheet["data"]["preprocessing"] = data["topology"]
+
+    factsheet["participants"]["client_num"] = data["n_nodes"] or ""
+    factsheet["participants"]["sample_client_rate"] = 1
+    factsheet["participants"]["client_selector"] = (
+        "Reputation Based" if with_reputation else "Full Participation"
+    )
+
+    factsheet["configuration"]["aggregation_algorithm"] = data["agg_algorithm"] or ""
+    factsheet["configuration"]["training_model"] = data["model"] or ""
+    factsheet["configuration"]["personalization"] = False
+    factsheet["configuration"]["reputation_enabled"] = bool(
+        data.get("reputation", {}).get("enabled", False)
+    )
+    factsheet["configuration"]["visualization"] = True
+    factsheet["configuration"]["monitoring"] = True
+    factsheet["configuration"]["total_round_num"] = int(data["rounds"])
+    factsheet["configuration"]["learning_rate"] = model.get_learning_rate()
+    factsheet["configuration"]["trainable_param_num"] = model.count_parameters()
+    factsheet["configuration"]["local_update_steps"] = data["epochs"]
+
+
+def set_dp_configuration(factsheet, dp_enabled, dp_epsilon):
+    """Writes differential privacy configuration using the factsheet schema."""
+    factsheet["configuration"]["differential_privacy"] = bool(dp_enabled)
+    factsheet["configuration"]["dp_epsilon"] = dp_epsilon if dp_enabled else ""
+
+
+def populate_reliability(factsheet, reliability_summary):
+    """Writes dropout and timeout rates, defaulting to a fully reliable run."""
+    factsheet["system"]["dropout_rate"] = (
+        reliability_summary.get("dropout_rate", 0.0)
+        if reliability_summary is not None
+        else 0.0
+    )
+    factsheet["system"]["timeout_rate"] = (
+        reliability_summary.get("timeout_rate", 0.0)
+        if reliability_summary is not None
+        else 0.0
+    )
+
+
+def populate_participation(factsheet, participation_summary):
+    """Writes participant selection dispersion, defaulting to full participation."""
+    factsheet["fairness"]["selection_cv"] = (
+        participation_summary.get("selection_cv", 1)
+        if participation_summary is not None
+        else 1
+    )
+
+
+def populate_reputation(factsheet, reputation_summary, include_neighbor_num=False):
+    """Writes reputation information for centralized or decentralized factsheets."""
+    if reputation_summary is not None:
+        factsheet["participants"]["avg_neighbor_reputation"] = reputation_summary.get(
+            "avg_neighbor_reputation",
+            "",
+        )
+        if include_neighbor_num:
+            factsheet["participants"]["neighbor_num"] = reputation_summary.get(
+                "neighbor_num",
+                0,
+            )
+        return
+
+    factsheet["participants"]["avg_neighbor_reputation"] = 0
+    if include_neighbor_num:
+        factsheet["participants"]["neighbor_num"] = 0
+
+
+def populate_model_quality_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
+    """Calculates common privacy, fairness, explainability and robustness metrics."""
+    test_sample = next(iter(test_loader))
+    explainability_metrics = get_explainability_metrics_summary(model, test_loader)
+
+    factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_loader)
+
+    factsheet["privacy"]["epsilon_star"] = get_epsilon_star(model, train_loader, test_loader)
+    factsheet["privacy"]["epsilon_star_score"] = inverse_score(factsheet["privacy"]["epsilon_star"])
+    factsheet["privacy"]["mia_auc"] = get_mia_auc(model, train_loader, test_loader)
+    factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
+
+    overfitting_value = get_overfitting_score(model, train_loader, test_accuracy)
+    factsheet["fairness"]["overfitting"] = inverse_score(overfitting_value)
+
+    well_calibration_error_value = get_well_calibration_error(model, test_loader)
+    factsheet["fairness"]["well_calibration_error"] = inverse_score(well_calibration_error_value)
+
+    generalized_entropy_index_value = get_generalized_entropy_index(model, test_loader)
+    factsheet["fairness"]["generalized_entropy_index"] = inverse_score(generalized_entropy_index_value)
+
+    theil_index_value = get_theil_index(model, test_loader)
+    factsheet["fairness"]["theil_index"] = inverse_score(theil_index_value)
+
+    coefficient_of_variation_value = get_coefficient_of_variation(model, test_loader)
+    factsheet["fairness"]["coefficient_of_variation"] = inverse_score(coefficient_of_variation_value)
+
+    factsheet["explainability"]["alpha_score"] = explainability_metrics["alpha_score"]
+    factsheet["explainability"]["spread_ratio"] = explainability_metrics["spread_ratio"]
+    factsheet["explainability"]["spread_divergence"] = explainability_metrics["spread_divergence"]
+
+    lr = factsheet["configuration"]["learning_rate"]
+    num_classes = model.get_num_classes()
+
+    value_clever = get_clever_score(model, test_sample, num_classes, lr)
+    factsheet["performance"]["test_clever"] = cap_score(value_clever)
+
+    value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes, lr)
+    factsheet["performance"]["test_loss_sensitivity"] = inverse_score(value_loss_sensitivity)
+
+    value_adv_accuracy = compute_adversarial_accuracy_art(model, test_loader, num_classes, lr)
+    factsheet["performance"]["test_adv_accuracy"] = cap_score(value_adv_accuracy)
+
+    value_empirical_robustness = get_empirical_robustness_score(model, test_sample, num_classes, lr)
+    factsheet["performance"]["test_empirical_robustness"] = cap_score(value_empirical_robustness)
+
+    value_confidence_score = get_confidence_score(model, test_sample)
+    factsheet["performance"]["test_confidence_score"] = cap_score(value_confidence_score)
+
+    value_attack_success_rate = attack_success_rate(model, test_sample)
+    factsheet["performance"]["test_attack_success_rate"] = 1 - value_attack_success_rate
+
+    feature_importance = explainability_metrics["feature_importance_cv"]
+    factsheet["performance"]["test_feature_importance_cv"] = cap_score(feature_importance)
diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index 527763448..69e6ee7ff 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -166,7 +166,13 @@ def __init__(
         else:
             self._situational_awareness = None
 
-        if self.config.participant["defense_args"]["reputation"]["enabled"]:
+        self._reputation = None
+
+        role = self.config.participant["device_args"]["role"]
+        federation = self.config.participant["scenario_args"].get("federation")
+        reputation_enabled = self.config.participant["defense_args"]["reputation"]["enabled"]
+
+        if reputation_enabled and (role == "server" or federation!="CFL"):
             self._reputation = Reputation(engine=self, config=self.config)
 
     @property
@@ -683,8 +689,8 @@ async def deploy_components(self):
         await self.aggregator.init()
         if "situational_awareness" in self.config.participant:
             await self.sa.init()
-        if self.config.participant["defense_args"]["reputation"]["enabled"]:
-            await self._reputation.setup()
+        if self._reputation is not None:
+          await self._reputation.setup()
         await self._reporter.start()
         await self._addon_manager.deploy_additional_services()
 

From 5653780014f790dc99a7829932b71f299cef320a Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Tue, 5 May 2026 17:39:53 +0200
Subject: [PATCH 38/66] SDFL Fixed: Forwarding trainer and aggregator,
 aggregation fixed, new message, leadership/ACK fixed

---
 nebula/core/aggregation/aggregator.py |  41 +++--
 nebula/core/engine.py                 | 242 +++++++++++++++++++++++++
 nebula/core/network/actions.py        |   9 +
 nebula/core/network/communications.py |   2 +-
 nebula/core/network/messages.py       |  12 +-
 nebula/core/noderole.py               | 243 +++++++++++++++++---------
 nebula/core/pb/nebula.proto           |  14 ++
 nebula/core/pb/nebula_pb2.py          |  96 +++++-----
 nebula/core/utils/locker.py           |   1 +
 9 files changed, 513 insertions(+), 147 deletions(-)

diff --git a/nebula/core/aggregation/aggregator.py b/nebula/core/aggregation/aggregator.py
index ff88668de..1da611ed9 100755
--- a/nebula/core/aggregation/aggregator.py
+++ b/nebula/core/aggregation/aggregator.py
@@ -54,13 +54,13 @@ async def update_federation_nodes(self, federation_nodes: set):
         """
         Updates the current set of nodes expected to participate in the upcoming aggregation round.
 
-        This method informs the update handler (`us`) about the new set of federation nodes, 
-        clears any pending models, and attempts to acquire the aggregation lock to prepare 
+        This method informs the update handler (`us`) about the new set of federation nodes,
+        clears any pending models, and attempts to acquire the aggregation lock to prepare
         for model aggregation. If the aggregation process is already running, it releases the lock
         and tries again to ensure proper cleanup between rounds.
 
         Args:
-            federation_nodes (set): A set of addresses representing the nodes expected to contribute 
+            federation_nodes (set): A set of addresses representing the nodes expected to contribute
                                     updates for the next aggregation round.
 
         Raises:
@@ -108,7 +108,10 @@ async def get_aggregation(self):
             TimeoutError: If the aggregation lock is not acquired within the defined timeout.
             asyncio.CancelledError: If the aggregation lock acquisition is cancelled.
             Exception: For any other unexpected errors during the aggregation process.
-        """            
+        """
+        lock_acquired = False
+        lock_task = None
+        skip_task = None
         try:
             timeout = self.config.participant["aggregator_args"]["aggregation_timeout"]
             logging.info(f"Aggregation timeout: {timeout} starts...")
@@ -119,24 +122,38 @@ async def get_aggregation(self):
                 [lock_task, skip_task],
                 return_when=asyncio.FIRST_COMPLETED,
             )
-            lock_acquired = lock_task in done
+
             if skip_task in done:
                 logging.info("Skipping aggregation timeout, updates received before grace time")
                 self._aggregation_waiting_skip.clear()
-                if not lock_acquired:
+                if not lock_task.done():
                     lock_task.cancel()
+
+            if lock_task in done:
                 try:
-                    await lock_task  # Clean cancel
+                    await lock_task
+                    lock_acquired = True
+                except TimeoutError:
+                    logging.info("🔄  get_aggregation | Timeout reached; aggregating received updates")
                 except asyncio.CancelledError:
-                    pass
+                    logging.info("🔄  get_aggregation | Lock acquisition was cancelled")
 
-        except TimeoutError:
-            logging.exception("🔄  get_aggregation | Timeout reached for aggregation")
         except asyncio.CancelledError:
-            logging.exception("🔄  get_aggregation | Lock acquisition was cancelled")
+            logging.exception("🔄  get_aggregation | Aggregation wait was cancelled")
         except Exception as e:
             logging.exception(f"🔄  get_aggregation | Error acquiring lock: {e}")
         finally:
+            for task in (lock_task, skip_task):
+                if task is None:
+                    continue
+                if not task.done():
+                    task.cancel()
+                try:
+                    await task
+                except asyncio.CancelledError:
+                    pass
+                except TimeoutError:
+                    pass
             if lock_acquired or self._aggregation_done_lock.locked():
                 await self._aggregation_done_lock.release_async()
 
@@ -145,7 +162,7 @@ async def get_aggregation(self):
         if not updates:
             logging.info(f"🔄  get_aggregation | No updates has been received..resolving conflict to continue...")
             updates = {self._addr: await self.engine.resolve_missing_updates()}
-        
+
         missing_nodes = await self.us.get_round_missing_nodes()
         if missing_nodes:
             logging.info(f"🔄  get_aggregation | Aggregation incomplete, missing models from: {missing_nodes}")
diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index 69e6ee7ff..75007c4df 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -156,6 +156,11 @@ def __init__(
         self.sinchronized_status_lock = Locker(name="sinchronized_status_lock")
 
         self.trainning_in_progress_lock = Locker(name="trainning_in_progress_lock", async_lock=True)
+        self._global_model_received = asyncio.Event()
+        self._global_model_source = None
+        self._leadership_transfer_lock = Locker("leadership_transfer_lock", async_lock=True)
+        self._leadership_transfer_pending = None
+        self._leadership_transfer_ack = asyncio.Event()
 
         event_manager = EventManager.get_instance(verbose=False)
         self._addon_manager = AddondManager(self, self.config)
@@ -222,6 +227,76 @@ async def update_federation_nodes(self, federation_nodes):
         async with self._federation_nodes_lock:
             self.federation_nodes = federation_nodes
 
+    async def mark_leadership_transfer_pending(self, successor: str):
+        async with self._leadership_transfer_lock:
+            self._leadership_transfer_pending = successor
+            self._leadership_transfer_ack.clear()
+            logging.info(f"SDFL leadership | Waiting ACK from successor {successor}")
+
+    async def confirm_leadership_transfer_ack(self, source: str) -> bool:
+        async with self._leadership_transfer_lock:
+            if self._leadership_transfer_pending != source:
+                logging.info(
+                    f"SDFL leadership | Ignoring ACK from {source}; "
+                    f"pending successor is {self._leadership_transfer_pending}"
+                )
+                return False
+
+            logging.info(f"SDFL leadership | ACK received from successor {source}")
+            self._leadership_transfer_ack.set()
+            return True
+
+    async def wait_pending_leadership_ack(self):
+        if self.config.participant["scenario_args"].get("federation") != "SDFL":
+            return
+
+        async with self._leadership_transfer_lock:
+            successor = self._leadership_transfer_pending
+
+        if successor is None:
+            return
+
+        timeout = float(self.config.participant.get("misc_args", {}).get("leadership_ack_timeout", 5))
+        logging.info(f"SDFL leadership | Waiting up to {timeout}s for ACK from {successor}")
+
+        ack_received = False
+        try:
+            await asyncio.wait_for(self._leadership_transfer_ack.wait(), timeout=timeout)
+            ack_received = True
+        except TimeoutError:
+            logging.warning(
+                f"SDFL leadership | ACK from {successor} not received before next round; "
+                "keeping aggregator role"
+            )
+
+        async with self._leadership_transfer_lock:
+            if self._leadership_transfer_pending != successor:
+                return
+
+            self._leadership_transfer_pending = None
+            self._leadership_transfer_ack.clear()
+
+        if ack_received:
+            await self.rb.set_next_role(Role.TRAINER)
+
+    def get_sdfl_expected_trainers(self) -> set[str]:
+        nodes = self.config.participant.get("trust_args", {}).get("scenario", {}).get("nodes", {})
+        expected_nodes = set()
+        roles_to_include = {"trainer", "aggregator", "trainer_aggregator", "malicious"}
+
+        for node in nodes.values():
+            role = node.get("role")
+            ip = node.get("ip")
+            port = node.get("port")
+            if role not in roles_to_include or ip is None or port is None:
+                continue
+
+            addr = f"{ip}:{port}"
+            if addr != self.addr:
+                expected_nodes.add(addr)
+
+        return expected_nodes
+
     def get_initialization_status(self):
         return self.initialized
 
@@ -279,6 +354,9 @@ async def model_update_callback(self, source, message):
         if not self.get_federation_ready_lock().locked() and len(await self.get_federation_nodes()) == 0:
             logging.info("🤖  handle_model_message | There are no defined federation nodes")
             return
+        if self.config.participant["scenario_args"].get("federation") == "SDFL":
+            logging.info("SDFL | Ignoring legacy model/update; use sdflmodel messages")
+            return
         decoded_model = self.trainer.deserialize_model(message.parameters)
         updt_received_event = UpdateReceivedEvent(decoded_model, message.weight, source, message.round)
         await EventManager.get_instance().publish_node_event(updt_received_event)
@@ -344,6 +422,10 @@ async def _control_leadership_transfer_callback(self, source, message):
     async def _control_leadership_transfer_ack_callback(self, source, message):
         logging.info(f"🔧  handle_control_message | Trigger | Received leadership transfer ack message from {source}")
         # No concurrence of difference ack received treated, be aware of that.
+        if self.config.participant["scenario_args"].get("federation") == "SDFL":
+            await self.confirm_leadership_transfer_ack(source)
+            return
+
         if await self._round_in_process_lock.locked_async():
             logging.info("Learning cycle is executing, role behavior will be modified next round")
             await self.rb.set_next_role(Role.TRAINER)
@@ -482,6 +564,88 @@ async def _trustscores_share_callback(self, source, message):
         except Exception as e:
             logging.exception(f"Error handling trustscores message: {e}")
 
+    async def sdfl_trainer_update_callback(self, source, message):
+        try:
+            logging.info(
+                f"SDFL | TRAINER_UPDATE callback triggered | "
+                f"source={source} | node_id={message.node_id} | "
+                f"target={message.target} | round={message.round} | "
+                f"local_round={self.round} | role={self.rb.get_role_name(True)}"
+            )
+
+            federation = self.config.participant["scenario_args"]["federation"]
+
+            if federation != "SDFL":
+                logging.info("SDFL | Ignoring TRAINER_UPDATE because federation is not SDFL")
+                return
+
+            role = self.rb.get_role_name(True)
+
+            if role != "aggregator":
+                logging.info(f"SDFL | Ignoring TRAINER_UPDATE because role={role}")
+                return
+
+            if message.target != "aggregator":
+                logging.info(f"SDFL | Ignoring TRAINER_UPDATE because target={message.target}")
+                return
+
+            if message.round != self.round:
+                logging.info(
+                    f"SDFL | Ignoring TRAINER_UPDATE from round={message.round}; "
+                    f"current round={self.round}"
+                )
+                return
+
+            decoded_model = self.trainer.deserialize_model(message.parameters)
+
+            event = UpdateReceivedEvent(
+                decoded_model,
+                message.weight,
+                message.node_id,
+                message.round,
+            )
+
+            await EventManager.get_instance().publish_node_event(event)
+
+            logging.info(
+                f"SDFL aggregator | Published UpdateReceivedEvent | "
+                f"trainer={message.node_id} | round={message.round} | weight={message.weight}"
+            )
+
+        except Exception as e:
+            logging.exception(f"Error handling SDFL TRAINER_UPDATE message: {e}")
+
+    async def sdfl_global_model_callback(self, source, message):
+        role = self.rb.get_role_name(True)
+        logging.info(
+            f"SDFL | GLOBAL_MODEL callback triggered | "
+            f"source={source} | node_id={message.node_id} | "
+            f"target={message.target} | round={message.round} | "
+            f"local_round={self.round} | role={role}"
+        )
+
+        if self.config.participant["scenario_args"].get("federation") == "SDFL":
+            if role != "trainer":
+                logging.info(f"SDFL | Ignoring GLOBAL_MODEL because role={role}")
+                return
+
+            if message.target != "trainer":
+                logging.info(f"SDFL | Ignoring GLOBAL_MODEL because target={message.target}")
+                return
+
+            if message.round != self.round:
+                logging.info(
+                    f"SDFL | Ignoring GLOBAL_MODEL from round={message.round}; "
+                    f"current round={self.round}"
+                )
+                return
+
+        decoded_model = self.trainer.deserialize_model(message.parameters)
+        self.trainer.set_model_parameters(decoded_model)
+
+        self._global_model_source = message.node_id
+        self._global_model_received.set()
+
     """                                                     ##############################
                                                             #    REGISTERING CALLBACKS   #
                                                             ##############################
@@ -498,6 +662,10 @@ async def init_message_callbacks(self):
         await self.register_message_callback(("model", "initialization"), "model_initialization_callback")
         await self.register_message_callback(("model", "update"), "model_update_callback")
 
+        # SDFL model callbacks
+        await self.register_message_callback(("sdflmodel", "trainer_update"), "sdfl_trainer_update_callback")
+        await self.register_message_callback(("sdflmodel", "global_model"), "sdfl_global_model_callback")
+
     async def register_message_events_callbacks(self):
         me_dict = self.cm.get_messages_events()
         message_events = [
@@ -819,6 +987,79 @@ async def _waiting_model_updates(self):
         else:
             logging.error("Aggregation finished with no parameters")
 
+    async def send_sdfl_global_model(self) -> None:
+        model_params = self.trainer.get_model_parameters()
+        serialized_model = (
+            model_params
+            if isinstance(model_params, bytes)
+            else self.trainer.serialize_model(model_params)
+        )
+
+        message = self.cm.create_message(
+            "sdflmodel",
+            "global_model",
+            target="trainer",
+            parameters=serialized_model,
+            weight=self.trainer.get_model_weight(),
+            round=self.round,
+            node_id=self.addr,
+        )
+
+        neighbors = await self.cm.get_addrs_current_connections(
+            only_direct=True,
+            myself=False,
+        )
+
+        logging.info(
+            f"SDFL aggregator | Broadcasting GLOBAL_MODEL to neighbors: {neighbors}"
+        )
+
+        tasks = []
+
+        for neighbor in neighbors:
+            tasks.append(
+                asyncio.create_task(
+                    self.cm.send_message(
+                        neighbor,
+                        message,
+                        "sdflmodel",
+                    )
+                )
+            )
+
+        if tasks:
+            await asyncio.gather(*tasks)
+        else:
+            logging.warning(
+                "SDFL aggregator | No neighbors available to send GLOBAL_MODEL"
+            )
+
+    def _is_sdfl_trainer(self):
+        federation = self.config.participant["scenario_args"].get("federation")
+        return federation == "SDFL" and self.rb.get_role_name(effective=True) == "trainer"
+
+    def prepare_waiting_global_model(self):
+        self._global_model_source = None
+        self._global_model_received.clear()
+
+    async def _waiting_global_model(self):
+        """
+        Wait for a global model sent by the current SDFL aggregator.
+
+        SDFL trainers must not aggregate locally. They train, send their update,
+        and block here until a model update is received and applied by
+        ``model_update_callback``.
+        """
+        timeout = self.config.participant["aggregator_args"]["aggregation_timeout"]
+        logging.info(f"💤  Waiting global SDFL model in round {self.round}.")
+        try:
+            await asyncio.wait_for(self._global_model_received.wait(), timeout=timeout)
+            logging.info(
+                f"🤖  SDFL trainer | Global model received from {self._global_model_source} in round {self.round}"
+            )
+        except TimeoutError:
+            logging.error(f"🤖  SDFL trainer | Timeout waiting global model in round {self.round}")
+
     def print_round_information(self):
         print_msg_box(
             msg=f"Round {self.round} of {self.total_rounds} started.",
@@ -907,6 +1148,7 @@ async def _learning_cycle(self):
                     title="Round information",
                 )
 
+                await self.wait_pending_leadership_ack()
                 await self.update_self_role()
 
                 logging.info(f"Federation nodes: {self.federation_nodes}")
diff --git a/nebula/core/network/actions.py b/nebula/core/network/actions.py
index bd5bde211..294301399 100644
--- a/nebula/core/network/actions.py
+++ b/nebula/core/network/actions.py
@@ -97,6 +97,14 @@ class TrustscoresAction(Enum):
 
     SHARE = nebula_pb2.TrustscoresMessage.Action.SHARE
 
+class SdflmodelAction(Enum):
+    """
+    Enum for SDFL model messages exchanged through broadcast/forwarding.
+    """
+
+    TRAINER_UPDATE = nebula_pb2.SdflmodelMessage.Action.TRAINER_UPDATE
+    GLOBAL_MODEL = nebula_pb2.SdflmodelMessage.Action.GLOBAL_MODEL
+
 
 # Mapping between message type strings and their corresponding Enum classes
 ACTION_CLASSES = {
@@ -110,6 +118,7 @@ class TrustscoresAction(Enum):
     "reputation": ReputationAction,
     "trustworthiness": TrustworthinessAction,
     "trustscores": TrustscoresAction,
+    "sdflmodel": SdflmodelAction,
 }
 
 
diff --git a/nebula/core/network/communications.py b/nebula/core/network/communications.py
index 5533b379f..a6cd861a0 100755
--- a/nebula/core/network/communications.py
+++ b/nebula/core/network/communications.py
@@ -21,7 +21,7 @@
 
 BLACKLIST_EXPIRATION_TIME = 60
 
-_COMPRESSED_MESSAGES = ["model", "offer_model"]
+_COMPRESSED_MESSAGES = ["model", "offer_model", "sdflmodel"]
 
 
 class CommunicationsManager:
diff --git a/nebula/core/network/messages.py b/nebula/core/network/messages.py
index e66edad4f..25e41d66a 100644
--- a/nebula/core/network/messages.py
+++ b/nebula/core/network/messages.py
@@ -86,6 +86,13 @@ def _define_message_templates(self):
                     "weight": 1,
                 },
             },
+            "sdflmodel": {
+                "parameters": ["action", "target", "parameters", "weight", "round", "node_id"],
+                "defaults": {
+                    "weight": 1,
+                    "node_id": self.addr,
+                },
+            },
             "reputation": {
                 "parameters": ["node_id", "score", "round", "action"],
                 "defaults": {
@@ -157,7 +164,7 @@ async def process_message(self, data, addr_from):
             addr_from (str): Address from which the message was received.
         """
         not_processing_messages = {"control_message", "connection_message"}
-        special_processing_messages = {"discovery_message", "federation_message", "model_message", "trustscores_message"}
+        special_processing_messages = {"discovery_message", "federation_message", "model_message", "trustscores_message", "sdflmodel_message"}
 
         try:
             message_wrapper = nebula_pb2.Wrapper()
@@ -239,6 +246,9 @@ def _should_forward_message(self, message_type, message_wrapper):
         if message_type == "trustscores_message":
             return True
 
+        if  self.cm.config.participant["scenario_args"]["federation"] == "SDFL" and message_type == "sdflmodel_message":
+            return True
+
     def create_message(self, message_type: str, action: str = "", *args, **kwargs):
         """
         Create and serialize a protobuf message of the given type and action.
diff --git a/nebula/core/noderole.py b/nebula/core/noderole.py
index 9bd258fef..7eaf9232d 100644
--- a/nebula/core/noderole.py
+++ b/nebula/core/noderole.py
@@ -32,7 +32,7 @@ class Role(Enum):
     IDLE = "idle"
     SERVER = "server"
     MALICIOUS = "malicious"
-    
+
 def factory_node_role(role: str) -> Role:
     if role == "trainer":
         return Role.TRAINER
@@ -68,27 +68,27 @@ def __init__(self):
         self._next_role: Role = None
         self._next_role_locker = Locker("next_role_locker", async_lock=True)
         self._source_to_notificate = None
-        
+
     @abstractmethod
     def get_role(self):
         """
         Returns the Role enum value representing the current role of the node.
         """
         raise NotImplementedError
-    
+
     @abstractmethod
     def get_role_name(self, effective=False):
         """
         Returns a string representation of the current role.
-        
+
         Args:
             effective (bool): Whether to return the name of the current effective role when going as malicious.
-        
+
         Returns:
             str: Name of the role.
         """
         raise NotImplementedError
-    
+
     @abstractmethod
     async def extended_learning_cycle(self):
         """
@@ -98,19 +98,19 @@ async def extended_learning_cycle(self):
         including training, aggregating updates, and coordinating with neighbors.
         """
         raise NotImplementedError
-    
+
     @abstractmethod
     async def select_nodes_to_wait(self):
         """
         Determines which neighbors the node should wait for during the current cycle.
 
         This logic varies depending on whether the node is an aggregator, trainer, or other role.
-        
+
         Returns:
             Set[Any]: A set of neighbor node identifiers to wait for.
         """
         raise NotImplementedError
-    
+
     @abstractmethod
     async def resolve_missing_updates(self):
         """
@@ -118,16 +118,16 @@ async def resolve_missing_updates(self):
 
         For example, an aggregator might default to a fresh model, while a trainer might proceed
         with its own local model.
-        
+
         Returns:
             Any: The resolution outcome depending on the role's specific logic.
         """
         raise NotImplementedError
-    
+
     async def set_next_role(self, role: Role, source_to_notificate = None):
         """
         Schedules a role change and optionally stores the source to notify upon completion.
-        
+
         Args:
             role (Role): The new role to transition to.
             source_to_notificate (Optional[Any]): Identifier of the node that triggered the change.
@@ -135,7 +135,7 @@ async def set_next_role(self, role: Role, source_to_notificate = None):
         async with self._next_role_locker:
             self._next_role = role
             self._source_to_notificate = source_to_notificate
-        
+
     async def get_next_role(self) -> Role:
         """
         Retrieves and clears the next role value.
@@ -147,7 +147,7 @@ async def get_next_role(self) -> Role:
             next_role = self._next_role
             self._next_role = None
         return next_role
-    
+
     async def get_source_to_notificate(self):
         """
         Retrieves and clears the stored source to notify after a role change.
@@ -159,7 +159,7 @@ async def get_source_to_notificate(self):
             source_to_notificate = self._source_to_notificate
             self._source_to_notificate = None
         return source_to_notificate
-        
+
     async def update_role_needed(self):
         """
         Checks whether a role update is scheduled.
@@ -170,12 +170,12 @@ async def update_role_needed(self):
         async with self._next_role_locker:
             updt_needed = self._next_role != None
         return updt_needed
-    
+
 """                                                         ##############################
                                                             #     MALICIOUS BEHAVIOR     #
                                                             ##############################
 """
-    
+
 class MaliciousRoleBehavior(RoleBehavior):
     def __init__(self, engine: Engine, config: Config):
         super().__init__()
@@ -193,28 +193,28 @@ def __init__(self, engine: Engine, config: Config):
         benign_role = self._config.participant["adversarial_args"]["fake_behavior"]
         self._fake_role_behavior = factory_role_behavior(benign_role, self._engine, self._config)
         self._role = factory_node_role("malicious")
-    
+
     def get_role(self):
         return self._role
-        
+
     def get_role_name(self, effective=False):
         if effective:
             return self._fake_role_behavior.get_role_name()
         return f"{self._role.value} as {self._fake_role_behavior.get_role_name()}"
-    
-    async def extended_learning_cycle(self):     
+
+    async def extended_learning_cycle(self):
         try:
             await self.attack.attack()
         except Exception:
             attack_name = self._config.participant["adversarial_args"]["attacks"]
             logging.exception(f"Attack {attack_name} failed")
-            
+
         await self._fake_role_behavior.extended_learning_cycle()
-        
+
     async def select_nodes_to_wait(self):
         nodes = await self._fake_role_behavior.select_nodes_to_wait()
         return nodes
-    
+
     async def resolve_missing_updates(self):
         return await self._fake_role_behavior.resolve_missing_updates()
 
@@ -222,20 +222,20 @@ async def resolve_missing_updates(self):
                                                             # TRAINER AGGREGATOR BEHAVIOR #
                                                             ###############################
 """
-        
+
 class TrainerAggregatorRoleBehavior(RoleBehavior):
     def __init__(self, engine: Engine, config: Config):
         super().__init__()
         self._engine = engine
         self._config = config
         self._role = factory_node_role("trainer_aggregator")
-        
+
     def get_role(self):
-        return self._role    
-        
+        return self._role
+
     def get_role_name(self, effective=False):
         return self._role.value
-    
+
     async def extended_learning_cycle(self):
         await self._engine.trainer.test()
         await self._engine.trainning_in_progress_lock.acquire_async()
@@ -249,13 +249,13 @@ async def extended_learning_cycle(self):
 
         mpe = ModelPropagationEvent(await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False), "stable")
         await EventManager.get_instance().publish_node_event(mpe)
-        
+
         await self._engine._waiting_model_updates()
-        
+
     async def select_nodes_to_wait(self):
         nodes = await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=True)
         return nodes
-    
+
     async def resolve_missing_updates(self):
         return {}
 
@@ -263,7 +263,7 @@ async def resolve_missing_updates(self):
                                                             #    AGGREGATOR BEHAVIOR     #
                                                             ##############################
 """
-        
+
 class AggregatorRoleBehavior(RoleBehavior):
     def __init__(self, engine: Engine, config: Config):
         super().__init__()
@@ -271,70 +271,89 @@ def __init__(self, engine: Engine, config: Config):
         self._config = config
         self._role = factory_node_role("aggregator")
         self._transfer_send = False
-        
+
     def get_role(self):
-        return self._role    
-        
+        return self._role
+
     def get_role_name(self, effective=False):
         return self._role.value
-    
+
     async def extended_learning_cycle(self):
         await self._engine.trainer.test()
-            
+
         await self._engine._waiting_model_updates()
-        
+
+        federation = self._config.participant["scenario_args"].get("federation")
+
+        if federation == "SDFL":
+
+            await self._engine.send_sdfl_global_model()
+            await self._transfer_leadership()
+
+            return
+
+
         mpe = ModelPropagationEvent(await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False), "stable")
         await EventManager.get_instance().publish_node_event(mpe)
-        
-        # Transfer leadership
+
+        await self._transfer_leadership()
+
+    async def _transfer_leadership(self):
         neighbors = await self._engine.cm.get_addrs_current_connections(myself=False)
         if len(neighbors) and not self._transfer_send:
             random_neighbor = random.choice(list(neighbors))
             lt_message = self._engine.cm.create_message("control", "leadership_transfer")
             logging.info(f"Sending transfer leadership to: {random_neighbor}")
+            if self._config.participant["scenario_args"].get("federation") == "SDFL":
+                await self._engine.mark_leadership_transfer_pending(random_neighbor)
             asyncio.create_task(self._engine.cm.send_message(random_neighbor, lt_message))
             self._transfer_send = True
-        
+
     async def select_nodes_to_wait(self):
+        if self._config.participant["scenario_args"].get("federation") == "SDFL":
+            nodes = self._engine.get_sdfl_expected_trainers()
+            if nodes:
+                return nodes
+
         nodes = await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False)
         return nodes
-    
+
     async def resolve_missing_updates(self):
         return (self._engine.trainer.get_model_parameters(), self._engine.trainer.BYPASS_MODEL_WEIGHT)
-        
+
 """                                                         ##############################
                                                             #       SERVER BEHAVIOR      #
                                                             ##############################
 """
-        
+
 class ServerRoleBehavior(RoleBehavior):
     from datetime import datetime
-    
+
     def __init__(self, engine: Engine, config: Config):
         super().__init__()
         self._engine = engine
         self._config = config
         self._start_time = ServerRoleBehavior.datetime.now().strftime("%d/%m/%Y %H:%M:%S")
         self._role = factory_node_role("server")
-        
+
     def get_role(self):
-        return self._role    
-        
+        return self._role
+
     def get_role_name(self, effective=False):
         return self._role.value
-        
+
     async def extended_learning_cycle(self):
         await self._engine.trainer.test()
 
         await self._engine._waiting_model_updates()
-        
+
         mpe = ModelPropagationEvent(await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False), "stable")
         await EventManager.get_instance().publish_node_event(mpe)
-        
+
     async def select_nodes_to_wait(self):
         nodes = await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False)
-        return nodes 
-    
+        return nodes
+
     async def resolve_missing_updates(self):
         return (self._engine.trainer.get_model_parameters(), self._engine.trainer.BYPASS_MODEL_WEIGHT)
 
@@ -342,35 +361,89 @@ async def resolve_missing_updates(self):
                                                             #      TRAINER BEHAVIOR      #
                                                             ##############################
 """
-        
+
 class TrainerRoleBehavior(RoleBehavior):
     def __init__(self, engine: Engine, config: Config):
         super().__init__()
         self._engine = engine
         self._config = config
         self._role = factory_node_role("trainer")
-        
+
     def get_role(self):
-        return self._role    
-        
+        return self._role
+
     def get_role_name(self, effective=False):
         return self._role.value
-        
+
     async def extended_learning_cycle(self):
         logging.info("Waiting global update | Assign _waiting_global_update = True")
 
         await self._engine.trainer.test()
         await self._engine.trainer.train()
 
+        federation = self._config.participant["scenario_args"].get("federation")
+
+        if federation == "SDFL":
+            self._engine.prepare_waiting_global_model()
+
+            model_params = self._engine.trainer.get_model_parameters()
+            serialized_model = (
+                model_params
+                if isinstance(model_params, bytes)
+                else self._engine.trainer.serialize_model(model_params)
+            )
+
+            message = self._engine.cm.create_message(
+                "sdflmodel",
+                "trainer_update",
+                target="aggregator",
+                parameters=serialized_model,
+                weight=self._engine.trainer.get_model_weight(),
+                round=self._engine.round,
+                node_id=self._engine.addr,
+            )
+
+            neighbors = await self._engine.cm.get_addrs_current_connections(
+                only_direct=True,
+                myself=False,
+            )
+
+            logging.info(
+                f"SDFL trainer | Broadcasting TRAINER_UPDATE to neighbors: {neighbors}"
+            )
+
+            tasks = []
+
+            for neighbor in neighbors:
+                tasks.append(
+                    asyncio.create_task(
+                        self._engine.cm.send_message(
+                            neighbor,
+                            message,
+                            "sdflmodel",
+                        )
+                    )
+                )
+
+            if tasks:
+                await asyncio.gather(*tasks)
+            else:
+                logging.warning(
+                    "SDFL trainer | No neighbors available to send TRAINER_UPDATE"
+                )
+
+            await self._engine._waiting_global_model()
+            return
+
         mpe = ModelPropagationEvent(await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False), "stable")
         await EventManager.get_instance().publish_node_event(mpe)
-        
+
         await self._engine._waiting_model_updates()
-        
+
     async def select_nodes_to_wait(self):
         nodes = await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False)
         return nodes
-    
+
     async def resolve_missing_updates(self):
         return (self._engine.trainer.get_model_parameters(), self._engine.trainer.get_model_weight())
 
@@ -378,31 +451,31 @@ async def resolve_missing_updates(self):
                                                             #       IDLE BEHAVIOR        #
                                                             ##############################
 """
-        
+
 class IdleRoleBehavior(RoleBehavior):
     def __init__(self, engine: Engine, config: Config):
         super().__init__()
         self._engine = engine
         self._config = config
         self._role = factory_node_role("idle")
-        
+
     def get_role(self):
-        return self._role    
-        
+        return self._role
+
     def get_role_name(self, effective=False):
         return self._role.value
-        
+
     async def extended_learning_cycle(self):
         logging.info("Waiting global update | Assign _waiting_global_update = True")
         await self._engine._waiting_model_updates()
-        
+
     async def select_nodes_to_wait(self):
         nodes = await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False)
         return nodes
-    
+
     async def resolve_missing_updates(self):
         raise NotImplementedError
-        
+
 """                                                         ##############################
                                                             #       PROXY BEHAVIOR       #
                                                             ##############################
@@ -414,21 +487,21 @@ def __init__(self, engine: Engine, config: Config):
         self._engine = engine
         self._config = config
         self._role = factory_node_role("proxy")
-        
+
     def get_role(self):
-        return self._role    
-        
+        return self._role
+
     def get_role_name(self, effective=False):
         return self._role.value
-        
+
     async def extended_learning_cycle(self):
         logging.info("Waiting global update | Assign _waiting_global_update = True")
         await self._engine._waiting_model_updates()
-        
+
     async def select_nodes_to_wait(self):
         nodes = await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False)
-        return nodes 
-    
+        return nodes
+
     async def resolve_missing_updates(self):
         raise NotImplementedError
 
@@ -436,12 +509,12 @@ async def resolve_missing_updates(self):
                                                             #    UTILS ROLE BEHAVIORS    #
                                                             ##############################
 """
-          
+
 class roleBehaviorException(Exception):
     pass
 
-def factory_role_behavior(role: str, engine: Engine, config: Config) -> RoleBehavior | None: 
-     
+def factory_role_behavior(role: str, engine: Engine, config: Config) -> RoleBehavior | None:
+
     role_behaviors = {
         "malicious": MaliciousRoleBehavior,
         "trainer": TrainerRoleBehavior,
@@ -451,14 +524,14 @@ def factory_role_behavior(role: str, engine: Engine, config: Config) -> RoleBeha
         "proxy": ProxyRoleBehavior,
         "idle": IdleRoleBehavior,
     }
-    
+
     node_role = role_behaviors.get(role, None)
 
     if node_role:
         return node_role(engine, config)
     else:
         raise roleBehaviorException(f"Node Role Behavior {role} not found")
-    
+
 def change_role_behavior(old_role: RoleBehavior, new_role: Role, *parameters) -> RoleBehavior:
     engine, config = parameters
     if not isinstance(old_role, MaliciousRoleBehavior):
@@ -466,8 +539,4 @@ def change_role_behavior(old_role: RoleBehavior, new_role: Role, *parameters) ->
     else:
         fake_behavior = factory_role_behavior(new_role.value, engine, config)
         old_role._fake_role_behavior = fake_behavior
-        return old_role            
-            
-
-
-        
+        return old_role
diff --git a/nebula/core/pb/nebula.proto b/nebula/core/pb/nebula.proto
index 46c127c05..acb0969f3 100755
--- a/nebula/core/pb/nebula.proto
+++ b/nebula/core/pb/nebula.proto
@@ -28,6 +28,7 @@ message Wrapper {
     LinkMessage link_message = 11;
     TrustworthinessMessage trustworthiness_message = 12;
     TrustscoresMessage trustscores_message = 13;
+    SdflmodelMessage sdflmodel_message = 14;
   }
 }
 
@@ -75,6 +76,19 @@ message ModelMessage {
   int32 round = 3;            // Identifies the communication round, particularly useful in iterative processes.
 }
 
+message SdflmodelMessage {
+  enum Action {
+    TRAINER_UPDATE = 0;
+    GLOBAL_MODEL = 1;
+  }
+  Action action = 1;
+  string target = 2;          // Target role: "aggregator" or "trainer".
+  bytes parameters = 3;       // Serialized form of the model parameters.
+  int64 weight = 4;           // Significance or weighting factor of this model update.
+  int32 round = 5;            // Identifies the communication round.
+  string node_id = 6;         // Logical producer of the update/model, preserved during forwarding.
+}
+
 message ConnectionMessage {
   enum Action {
     CONNECT = 0;
diff --git a/nebula/core/pb/nebula_pb2.py b/nebula/core/pb/nebula_pb2.py
index 0cdc85585..3d8003607 100644
--- a/nebula/core/pb/nebula_pb2.py
+++ b/nebula/core/pb/nebula_pb2.py
@@ -13,7 +13,7 @@
 
 
 
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xac\x05\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x12\x39\n\x13trustscores_message\x18\r \x01(\x0b\x32\x1a.nebula.TrustscoresMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\x80\x04\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\x12\x17\n\x0f\x63lass_imbalance\x18\x11 \x01(\x02\x12\x12\n\nmodel_size\x18\x12 \x01(\x03\x12\x15\n\rlocal_entropy\x18\x13 \x01(\x02\x12\x14\n\x0cval_accuracy\x18\x14 \x01(\x02\x12\x12\n\ndp_enabled\x18\x15 \x01(\x08\x12\x12\n\ndp_epsilon\x18\x16 \x01(\x02\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\"\x88\x01\n\x12TrustscoresMessage\x12\x31\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32!.nebula.TrustscoresMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x19\n\x11trust_report_json\x18\x03 \x01(\t\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xe3\x05\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x12\x39\n\x13trustscores_message\x18\r \x01(\x0b\x32\x1a.nebula.TrustscoresMessageH\x00\x12\x35\n\x11sdflmodel_message\x18\x0e \x01(\x0b\x32\x18.nebula.SdflmodelMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\xc7\x01\n\x10SdflmodelMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.SdflmodelMessage.Action\x12\x0e\n\x06target\x18\x02 \x01(\t\x12\x12\n\nparameters\x18\x03 \x01(\x0c\x12\x0e\n\x06weight\x18\x04 \x01(\x03\x12\r\n\x05round\x18\x05 \x01(\x05\x12\x0f\n\x07node_id\x18\x06 \x01(\t\".\n\x06\x41\x63tion\x12\x12\n\x0eTRAINER_UPDATE\x10\x00\x12\x10\n\x0cGLOBAL_MODEL\x10\x01\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\x80\x04\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\x12\x17\n\x0f\x63lass_imbalance\x18\x11 \x01(\x02\x12\x12\n\nmodel_size\x18\x12 \x01(\x03\x12\x15\n\rlocal_entropy\x18\x13 \x01(\x02\x12\x14\n\x0cval_accuracy\x18\x14 \x01(\x02\x12\x12\n\ndp_enabled\x18\x15 \x01(\x08\x12\x12\n\ndp_epsilon\x18\x16 \x01(\x02\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\"\x88\x01\n\x12TrustscoresMessage\x12\x31\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32!.nebula.TrustscoresMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x19\n\x11trust_report_json\x18\x03 \x01(\t\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\x62\x06proto3')
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
 _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'nebula_pb2', globals())
@@ -21,49 +21,53 @@
 
   DESCRIPTOR._options = None
   _WRAPPER._serialized_start=25
-  _WRAPPER._serialized_end=709
-  _DISCOVERYMESSAGE._serialized_start=712
-  _DISCOVERYMESSAGE._serialized_end=870
-  _DISCOVERYMESSAGE_ACTION._serialized_start=818
-  _DISCOVERYMESSAGE_ACTION._serialized_end=870
-  _CONTROLMESSAGE._serialized_start=873
-  _CONTROLMESSAGE._serialized_end=1082
-  _CONTROLMESSAGE_ACTION._serialized_start=952
-  _CONTROLMESSAGE_ACTION._serialized_end=1082
-  _FEDERATIONMESSAGE._serialized_start=1085
-  _FEDERATIONMESSAGE._serialized_end=1290
-  _FEDERATIONMESSAGE_ACTION._serialized_start=1190
-  _FEDERATIONMESSAGE_ACTION._serialized_end=1290
-  _MODELMESSAGE._serialized_start=1292
-  _MODELMESSAGE._serialized_end=1357
-  _CONNECTIONMESSAGE._serialized_start=1360
-  _CONNECTIONMESSAGE._serialized_end=1503
-  _CONNECTIONMESSAGE_ACTION._serialized_start=1431
-  _CONNECTIONMESSAGE_ACTION._serialized_end=1503
-  _DISCOVERMESSAGE._serialized_start=1506
-  _DISCOVERMESSAGE._serialized_end=1655
-  _DISCOVERMESSAGE_ACTION._serialized_start=1573
-  _DISCOVERMESSAGE_ACTION._serialized_end=1655
-  _OFFERMESSAGE._serialized_start=1658
-  _OFFERMESSAGE._serialized_end=1864
-  _OFFERMESSAGE_ACTION._serialized_start=1821
-  _OFFERMESSAGE_ACTION._serialized_end=1864
-  _LINKMESSAGE._serialized_start=1866
-  _LINKMESSAGE._serialized_end=1985
-  _LINKMESSAGE_ACTION._serialized_start=1940
-  _LINKMESSAGE_ACTION._serialized_end=1985
-  _REPUTATIONMESSAGE._serialized_start=1988
-  _REPUTATIONMESSAGE._serialized_end=2125
-  _REPUTATIONMESSAGE_ACTION._serialized_start=2106
-  _REPUTATIONMESSAGE_ACTION._serialized_end=2125
-  _RESPONSEMESSAGE._serialized_start=2127
-  _RESPONSEMESSAGE._serialized_end=2162
-  _TRUSTWORTHINESSMESSAGE._serialized_start=2165
-  _TRUSTWORTHINESSMESSAGE._serialized_end=2677
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2657
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2677
-  _TRUSTSCORESMESSAGE._serialized_start=2680
-  _TRUSTSCORESMESSAGE._serialized_end=2816
-  _TRUSTSCORESMESSAGE_ACTION._serialized_start=2106
-  _TRUSTSCORESMESSAGE_ACTION._serialized_end=2125
+  _WRAPPER._serialized_end=764
+  _DISCOVERYMESSAGE._serialized_start=767
+  _DISCOVERYMESSAGE._serialized_end=925
+  _DISCOVERYMESSAGE_ACTION._serialized_start=873
+  _DISCOVERYMESSAGE_ACTION._serialized_end=925
+  _CONTROLMESSAGE._serialized_start=928
+  _CONTROLMESSAGE._serialized_end=1137
+  _CONTROLMESSAGE_ACTION._serialized_start=1007
+  _CONTROLMESSAGE_ACTION._serialized_end=1137
+  _FEDERATIONMESSAGE._serialized_start=1140
+  _FEDERATIONMESSAGE._serialized_end=1345
+  _FEDERATIONMESSAGE_ACTION._serialized_start=1245
+  _FEDERATIONMESSAGE_ACTION._serialized_end=1345
+  _MODELMESSAGE._serialized_start=1347
+  _MODELMESSAGE._serialized_end=1412
+  _SDFLMODELMESSAGE._serialized_start=1415
+  _SDFLMODELMESSAGE._serialized_end=1614
+  _SDFLMODELMESSAGE_ACTION._serialized_start=1568
+  _SDFLMODELMESSAGE_ACTION._serialized_end=1614
+  _CONNECTIONMESSAGE._serialized_start=1617
+  _CONNECTIONMESSAGE._serialized_end=1760
+  _CONNECTIONMESSAGE_ACTION._serialized_start=1688
+  _CONNECTIONMESSAGE_ACTION._serialized_end=1760
+  _DISCOVERMESSAGE._serialized_start=1763
+  _DISCOVERMESSAGE._serialized_end=1912
+  _DISCOVERMESSAGE_ACTION._serialized_start=1830
+  _DISCOVERMESSAGE_ACTION._serialized_end=1912
+  _OFFERMESSAGE._serialized_start=1915
+  _OFFERMESSAGE._serialized_end=2121
+  _OFFERMESSAGE_ACTION._serialized_start=2078
+  _OFFERMESSAGE_ACTION._serialized_end=2121
+  _LINKMESSAGE._serialized_start=2123
+  _LINKMESSAGE._serialized_end=2242
+  _LINKMESSAGE_ACTION._serialized_start=2197
+  _LINKMESSAGE_ACTION._serialized_end=2242
+  _REPUTATIONMESSAGE._serialized_start=2245
+  _REPUTATIONMESSAGE._serialized_end=2382
+  _REPUTATIONMESSAGE_ACTION._serialized_start=2363
+  _REPUTATIONMESSAGE_ACTION._serialized_end=2382
+  _RESPONSEMESSAGE._serialized_start=2384
+  _RESPONSEMESSAGE._serialized_end=2419
+  _TRUSTWORTHINESSMESSAGE._serialized_start=2422
+  _TRUSTWORTHINESSMESSAGE._serialized_end=2934
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2914
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2934
+  _TRUSTSCORESMESSAGE._serialized_start=2937
+  _TRUSTSCORESMESSAGE._serialized_end=3073
+  _TRUSTSCORESMESSAGE_ACTION._serialized_start=2363
+  _TRUSTSCORESMESSAGE_ACTION._serialized_end=2382
 # @@protoc_insertion_point(module_scope)
diff --git a/nebula/core/utils/locker.py b/nebula/core/utils/locker.py
index 160897bdc..2bd69d8f4 100755
--- a/nebula/core/utils/locker.py
+++ b/nebula/core/utils/locker.py
@@ -92,6 +92,7 @@ async def locked_async(self):
         result = self._lock.locked()
         if self._verbose:
             logging.debug(f"🔐  Async lock [{self._name}] is locked? {result}")
+        return result
 
     async def __aenter__(self):
         logging.debug(f"🔒  Acquiring async lock [{self._name}] using [async with] statement")

From 012619157f9774aed2ea0cfa0fe179c7f1c9154c Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 6 May 2026 10:36:10 +0200
Subject: [PATCH 39/66] SDFL timeouts fixed, nodes accept global update after
 learning cycle, leadership ACK updated and fixed, last round: leadership not
 transfered

---
 nebula/core/engine.py            | 12 +++++++++---
 nebula/core/network/forwarder.py |  7 ++++++-
 nebula/core/noderole.py          |  7 +++++++
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index 75007c4df..536645680 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -266,18 +266,23 @@ async def wait_pending_leadership_ack(self):
         except TimeoutError:
             logging.warning(
                 f"SDFL leadership | ACK from {successor} not received before next round; "
-                "keeping aggregator role"
+                "keeping aggregator role until ACK arrives"
             )
 
         async with self._leadership_transfer_lock:
             if self._leadership_transfer_pending != successor:
                 return
 
+            if self._leadership_transfer_ack.is_set():
+                ack_received = True
+
+            if not ack_received:
+                return
+
             self._leadership_transfer_pending = None
             self._leadership_transfer_ack.clear()
 
-        if ack_received:
-            await self.rb.set_next_role(Role.TRAINER)
+        await self.rb.set_next_role(Role.TRAINER)
 
     def get_sdfl_expected_trainers(self) -> set[str]:
         nodes = self.config.participant.get("trust_args", {}).get("scenario", {}).get("nodes", {})
@@ -1023,6 +1028,7 @@ async def send_sdfl_global_model(self) -> None:
                         neighbor,
                         message,
                         "sdflmodel",
+                        allow_after_learning_finished=True,
                     )
                 )
             )
diff --git a/nebula/core/network/forwarder.py b/nebula/core/network/forwarder.py
index db98246fb..510f89fd2 100755
--- a/nebula/core/network/forwarder.py
+++ b/nebula/core/network/forwarder.py
@@ -139,7 +139,12 @@ def _allow_forward_after_learning_finished(self, msg: bytes) -> bool:
         try:
             message_wrapper = nebula_pb2.Wrapper()
             message_wrapper.ParseFromString(msg)
-            return message_wrapper.WhichOneof("message") == "trustscores_message"
+            message_type = message_wrapper.WhichOneof("message")
+            if message_type == "trustscores_message":
+                return True
+            if message_type == "sdflmodel_message":
+                return message_wrapper.sdflmodel_message.action == nebula_pb2.SdflmodelMessage.Action.GLOBAL_MODEL
+            return False
         except Exception as e:
             logging.warning(f"🔁  Could not inspect forwarded message type: {e!s}")
             return False
diff --git a/nebula/core/noderole.py b/nebula/core/noderole.py
index 7eaf9232d..7f7f7bb52 100644
--- a/nebula/core/noderole.py
+++ b/nebula/core/noderole.py
@@ -299,6 +299,13 @@ async def extended_learning_cycle(self):
         await self._transfer_leadership()
 
     async def _transfer_leadership(self):
+        if self._engine.round >= self._engine.total_rounds - 1:
+            logging.info(
+                f"Skipping leadership transfer in final round {self._engine.round} "
+                f"of {self._engine.total_rounds - 1}"
+            )
+            return
+
         neighbors = await self._engine.cm.get_addrs_current_connections(myself=False)
         if len(neighbors) and not self._transfer_send:
             random_neighbor = random.choice(list(neighbors))

From eef6fef4d369f31037c1c27c4f70e883e743718a Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 6 May 2026 13:08:04 +0200
Subject: [PATCH 40/66] Trustworthiness refactor: factsheets

---
 nebula/addons/trustworthiness/calculation.py  |  85 +++++-
 .../addons/trustworthiness/dfl_factsheet.py   | 145 ++++-----
 nebula/addons/trustworthiness/factsheet.py    | 278 ++++++++----------
 .../trustworthiness/factsheet_common.py       |  43 +++
 .../addons/trustworthiness/trustworthiness.py |  13 +-
 nebula/addons/trustworthiness/utils.py        |  18 +-
 6 files changed, 304 insertions(+), 278 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index e0bd71959..5c526dfbd 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -230,16 +230,89 @@ def check_properties(*args):
 # Local/global data distribution and participation metrics
 # ---------------------------------------------------------------------------
 
+def get_class_count_file(scenario_name, participant_id):
+    """
+    Returns the class-count file path for a participant.
+    """
+    return os.path.join(
+        os.environ.get("NEBULA_LOGS_DIR"),
+        scenario_name,
+        "trustworthiness",
+        f"{str(participant_id)}_class_count.json",
+    )
+
+
+def load_class_counts(scenario_name, participant_id):
+    """
+    Loads the saved class-count distribution for a participant.
+    """
+    with open(get_class_count_file(scenario_name, participant_id), "r") as file:
+        return json.load(file)
+
+
+def get_class_imbalance_from_counts(class_counts):
+    """
+    Calculates class imbalance as coefficient of variation over class counts.
+
+    Higher values mean a more imbalanced local dataset.
+    """
+    return get_cv(list=list(class_counts.values()))
+
+
+def get_class_imbalance_score(class_imbalance):
+    """
+    Converts class imbalance into a trust score.
+
+    A score of 1 means balanced classes; higher imbalance lowers the score.
+    """
+    return 1 / (1 + class_imbalance)
+
+
 def get_class_imbalance_local(participant_id, experiment_name):
-    data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_id)}_class_count.json")
+    class_distribution = load_class_counts(experiment_name, participant_id)
+    return get_class_imbalance_from_counts(class_distribution)
+
+
+def get_local_class_imbalance_score(scenario_name, participant_id):
+    """
+    Calculates the class-imbalance trust score for a participant.
+    """
+    return get_class_imbalance_score(get_class_imbalance_local(participant_id, scenario_name))
+
+
+def get_entropy_from_class_counts(class_counts, normalize=False):
+    """
+    Calculates entropy from class counts.
+
+    When normalized, returns a value in [0, 1] independent of class count.
+    """
+    counts = np.array(list(class_counts.values()), dtype=float)
+    total = counts.sum()
+    if total <= 0:
+        return 0.0
+
+    probabilities = counts / total
+    entropy_value = entropy(probabilities, base=2)
 
-    with open(data_class_count_file, "r") as file:
-        class_distribution = json.load(file)
+    if not normalize:
+        return round(float(entropy_value), 6)
+
+    class_count = len(probabilities)
+    if class_count <= 1:
+        return 0.0
 
-    class_samples_sizes = [x for x in class_distribution.values()]
-    class_imbalance = get_cv(list=class_samples_sizes)
+    normalized_entropy = float(entropy_value / np.log2(class_count))
+    return max(0.0, min(1.0, normalized_entropy))
 
-    return class_imbalance
+
+def get_local_normalized_entropy(scenario_name, participant_id):
+    """
+    Calculates normalized entropy from a participant's saved class counts.
+    """
+    return get_entropy_from_class_counts(
+        load_class_counts(scenario_name, participant_id),
+        normalize=True,
+    )
 
 
 def get_cv(list=None, std=None, mean=None):
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index fb0bb93c8..92b04ce46 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -1,84 +1,79 @@
 import logging
-import json
 import os
-import shutil
-import numpy as np
 import pandas as pd
 
 from nebula.addons.trustworthiness.calculation import (
     get_bytes_model,
-    get_cv,
     get_dp_local,
     get_elapsed_time,
+    get_local_class_imbalance_score,
+    get_local_normalized_entropy,
     get_underfitting_score_local,
 )
 from nebula.addons.trustworthiness.factsheet_common import (
-    cap_score,
+    get_factsheet_path,
+    get_trustworthiness_dir,
+    load_or_create_factsheet,
     populate_common_pre_train_sections,
     populate_model_quality_metrics,
     populate_participation,
     populate_reliability,
     populate_reputation,
     set_dp_configuration,
+    write_factsheet,
 )
 from nebula.addons.trustworthiness.utils import read_csv, get_all_data_entropy
 
-dirname = os.path.dirname(__file__)
 logger = logging.getLogger(__name__)
 
-
-def populate_factsheet(
-    experiment_name,
-    participant_idx,
-    data,
-    start_time,
-    end_time,
-    model,
-    train_loader,
-    test_loader,
-    reputation_summary=None,
-    participation_summary=None,
-    reliability_summary=None,
-):
-    trust_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
-    os.makedirs(trust_dir, exist_ok=True)
-
-    factsheet_name = f"factsheet_participant_{participant_idx}.json"
-    factsheet_path = os.path.join(trust_dir, factsheet_name)
-
-    template_path = os.path.join(dirname, "configs", "factsheet_template_dfl.json")
-    if not os.path.exists(factsheet_path):
-        shutil.copyfile(template_path, factsheet_path)
-
-    with open(factsheet_path, "r+", encoding="utf-8") as f:
-        factsheet = {}
-        factsheet = json.load(f)
+class DflFactsheet:
+    def __init__(self):
+        """
+        Manager class to populate the FactSheet
+        """
+        self.factsheet_template_file_nm = "factsheet_template_dfl.json"
+
+    def populate_factsheet_dfl(
+        self,
+        scenario_name,
+        participant_idx,
+        data,
+        start_time,
+        end_time,
+        model,
+        train_loader,
+        test_loader,
+        reputation_summary=None,
+        participation_summary=None,
+        reliability_summary=None,
+    ):
+
+        self.factsheet_file_nm = f"factsheet_participant_{participant_idx}.json"
+
+        factsheet_file = get_factsheet_path(scenario_name, self.factsheet_file_nm)
+
+        factsheet_file, factsheet = load_or_create_factsheet(
+            scenario_name,
+            self.factsheet_file_nm,
+            self.factsheet_template_file_nm,
+        )
 
         logging.info("DFL FactSheet: Populating factsheet")
 
         populate_common_pre_train_sections(factsheet, data, model)
 
-        dp_enabled, dp_epsilon = get_dp_local(experiment_name, participant_idx)
+        dp_enabled, dp_epsilon = get_dp_local(scenario_name, participant_idx)
         set_dp_configuration(factsheet, dp_enabled, dp_epsilon)
 
-        files_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
+        files_dir = get_trustworthiness_dir(scenario_name)
 
         emissions_file = os.path.join(files_dir, f"emissions_{participant_idx}.csv")
 
-        get_all_data_entropy(experiment_name)
-
-        data_class_count_file = os.path.join(
-            os.environ.get('NEBULA_LOGS_DIR'),
-            experiment_name,
-            "trustworthiness",
-            f"{str(participant_idx)}_class_count.json",
-        )
-
-        entropy_local = normalized_entropy_from_class_counts(data_class_count_file)
+        get_all_data_entropy(scenario_name)
 
-        factsheet["data"]["entropy_local"] = entropy_local
+        factsheet["data"]["entropy_local"] = get_local_normalized_entropy(scenario_name, participant_idx)
 
-        df = load_round_metrics(experiment_name, participant_idx)
+        df = load_round_metrics(scenario_name, participant_idx)
         acc = df["accuracy"].astype(float).to_numpy()
         loss = df["loss"].astype(float).to_numpy()
 
@@ -88,7 +83,7 @@ def populate_factsheet(
         factsheet["performance"]["test_loss"] = float(final_loss)
         factsheet["performance"]["test_acc"] = float(final_acc)
 
-        bytes_sent, bytes_recv = get_bytes(experiment_name, participant_idx)
+        bytes_sent, bytes_recv = get_bytes(scenario_name, participant_idx)
 
         factsheet["system"]["model_size"] = get_bytes_model(model)
 
@@ -100,14 +95,11 @@ def populate_factsheet(
         factsheet["system"]["time_minutes"] = get_elapsed_time(start_time, end_time)
 
         count_class_file = os.path.join(files_dir, f"{participant_idx}_class_count.json")
-        if os.path.exists(count_class_file):
-            with open(count_class_file, "r") as fs:
-                class_distribution = json.load(fs)
-            class_samples_sizes = list(class_distribution.values())
-            class_imbalance = get_cv(list=class_samples_sizes)
-            factsheet["fairness"]["class_imbalance"] = cap_score(class_imbalance)
-        else:
-            factsheet["fairness"]["class_imbalance"] = factsheet["fairness"].get("class_imbalance", 0.0)
+        factsheet["fairness"]["class_imbalance"] = (
+            get_local_class_imbalance_score(scenario_name, participant_idx)
+            if os.path.exists(count_class_file)
+            else factsheet["fairness"].get("class_imbalance", 0.0)
+        )
 
         populate_participation(factsheet, participation_summary)
 
@@ -128,7 +120,7 @@ def populate_factsheet(
             + (bytes_recv * 2.24e-10 * carbon_intensity_local)
         )
 
-        factsheet["fairness"]["underfitting"] = get_underfitting_score_local(experiment_name, participant_idx)
+        factsheet["fairness"]["underfitting"] = get_underfitting_score_local(scenario_name, participant_idx)
         populate_model_quality_metrics(
             factsheet,
             model,
@@ -137,12 +129,11 @@ def populate_factsheet(
             factsheet["performance"]["test_acc"],
         )
 
-        f.seek(0)
-        f.truncate()
-        json.dump(factsheet, f, indent=4)
+        write_factsheet(factsheet_file, factsheet)
 
-def load_round_metrics(experiment_name, participant_idx):
-    files_dir = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), experiment_name, "trustworthiness")
+
+def load_round_metrics(scenario_name, participant_idx):
+    files_dir = get_trustworthiness_dir(scenario_name)
     path = os.path.join(files_dir, f"round_metrics_participant_{participant_idx}.csv")
     df = pd.read_csv(path)
 
@@ -152,11 +143,10 @@ def load_round_metrics(experiment_name, participant_idx):
     df = df.dropna(subset=["loss", "accuracy"])
     return df
 
-def get_bytes(experiment_name, participant_idx):
+
+def get_bytes(scenario_name, participant_idx):
     data_file = os.path.join(
-        os.environ.get('NEBULA_LOGS_DIR'),
-        experiment_name,
-        "trustworthiness",
+        get_trustworthiness_dir(scenario_name),
         f"data_results_{participant_idx}.csv",
     )
 
@@ -169,6 +159,7 @@ def get_bytes(experiment_name, participant_idx):
 
     return bytes_sent, bytes_recv
 
+
 def get_emissions(emissions_file, participant_idx):
     data = read_csv(emissions_file)
 
@@ -180,25 +171,3 @@ def get_emissions(emissions_file, participant_idx):
     sample_size = row["sample_size"].iloc[0]
 
     return avg_carbon_intensity_clients, emissions_training, energy_consumed, sample_size
-
-def normalized_entropy_from_class_counts(count_class_file):
-    with open(count_class_file, "r") as f:
-        dist = json.load(f)
-
-    counts = np.array(list(dist.values()), dtype=float)
-    total = counts.sum()
-    if total <= 0:
-        return 0.0
-
-    p = counts / total
-
-    eps = 1e-12
-    H = -float(np.sum(p * np.log(p + eps)))
-
-    K = len(p)
-    if K <= 1:
-        return 0.0
-
-    H_norm = H / float(np.log(K))
-
-    return max(0.0, min(1.0, H_norm))
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 8c9415a93..d2647691c 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -1,7 +1,5 @@
-import json
 import logging
 import os
-import shutil
 from json import JSONDecodeError
 import numpy as np
 import pandas as pd
@@ -10,6 +8,7 @@
     get_avg_class_imbalance_model_size,
     get_avg_loss_accuracy,
     get_bytes_sent_recv,
+    get_class_imbalance_score,
     get_cv,
     get_dp_global,
     get_elapsed_time,
@@ -19,21 +18,23 @@
 )
 from nebula.addons.trustworthiness.factsheet_common import (
     cap_score,
-    inverse_score,
+    get_factsheet_path,
+    get_trustworthiness_dir,
+    load_or_create_factsheet,
     populate_common_pre_train_sections,
     populate_model_quality_metrics,
     populate_participation,
     populate_reliability,
     populate_reputation,
     set_dp_configuration,
+    write_factsheet,
 )
 from nebula.addons.trustworthiness.utils import read_csv, check_field_filled
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
 
-dirname = os.path.dirname(__file__)
 logger = logging.getLogger(__name__)
 
-class Factsheet:
+class CflFactsheet:
     def __init__(self):
         """
         Manager class to populate the FactSheet
@@ -41,49 +42,10 @@ def __init__(self):
         self.factsheet_file_nm = "factsheet.json"
         self.factsheet_template_file_nm = "factsheet_template.json"
 
-    def populate_factsheet_pre_train(self, data, scenario_name, model):
-        """
-        Populates the factsheet with values before the training.
-
-        Args:
-            data (dict): Contains the data from the scenario.
-            scenario_name (string): The name of the scenario.
-        """
-
-        factsheet_file = os.path.join(
-            os.environ.get('NEBULA_LOGS_DIR'),
-            scenario_name,
-            "trustworthiness",
-            self.factsheet_file_nm,
-        )
-
-        factsheet_template = os.path.join(dirname, "configs", self.factsheet_template_file_nm)
-
-        if not os.path.exists(factsheet_file):
-            shutil.copyfile(factsheet_template, factsheet_file)
-
-        with open(factsheet_file, "r+") as f:
-            factsheet = {}
-
-            try:
-                factsheet = json.load(f)
-
-                if data is not None:
-                    logging.info("FactSheet: Populating factsheet with pre training metrics")
-
-                    populate_common_pre_train_sections(factsheet, data, model)
-
-                    f.seek(0)
-                    f.truncate()
-                    json.dump(factsheet, f, indent=4)
-
-            except JSONDecodeError as e:
-                logging.warning(f"{factsheet_file} is invalid")
-                logging.error(e)
-
-    def populate_factsheet_post_train(
+    def populate_factsheet_cfl(
         self,
         scenario_name,
+        data,
         start_time,
         end_time,
         participant_idx,
@@ -94,120 +56,112 @@ def populate_factsheet_post_train(
         participation_summary=None,
         reliability_summary=None,
     ):
-        """
-        Populates the factsheet with values after the training.
 
-        Args:
-            scenario (object): The scenario object.
-        """
-        factsheet_file = os.path.join(
-            os.environ.get('NEBULA_LOGS_DIR'),
-            scenario_name,
-            "trustworthiness",
-            self.factsheet_file_nm,
-        )
-
-        logging.info("FactSheet: Populating factsheet with post training metrics")
-
-        with open(factsheet_file, "r+") as f:
-            factsheet = {}
-            try:
-                factsheet = json.load(f)
-
-                files_dir = f"{os.environ.get('NEBULA_LOGS_DIR')}/{scenario_name}/trustworthiness"
-
-                emissions_file = os.path.join(files_dir, "emissions.csv")
-
-                avg_class_imbalance, avg_model_size = get_avg_class_imbalance_model_size(scenario_name)
-                entropy_distribution = get_entropy_list (scenario_name)
-
-                values = np.array(entropy_distribution)
-
-                normalized_values = (values - np.min(values)) / (np.max(values) - np.min(values))
-
-                avg_entropy = np.mean(normalized_values)
-
-                factsheet["data"]["avg_entropy"] = avg_entropy
-
-                # Set performance data
-                result_avg_loss_accuracy = get_avg_loss_accuracy(scenario_name)
-                factsheet["performance"]["test_loss_avg"] = result_avg_loss_accuracy[0]
-                factsheet["performance"]["test_acc_avg"] = result_avg_loss_accuracy[1]
-                test_acc_cv = get_cv(std=result_avg_loss_accuracy[2], mean=result_avg_loss_accuracy[1])
-                factsheet["fairness"]["test_acc_cv"] = 1 if test_acc_cv > 1 else test_acc_cv
-                _, participant_test_acc = get_participant_loss_accuracy(scenario_name, participant_idx)
-
-                dp_enabled, dp_epsilon = get_dp_global(scenario_name)
-                set_dp_configuration(factsheet, dp_enabled, dp_epsilon)
-
-                factsheet["system"]["avg_time_minutes"] = get_elapsed_time(start_time, end_time)
-                factsheet["system"]["avg_model_size"] = avg_model_size
-
-                result_bytes_sent_recv = get_bytes_sent_recv(scenario_name)
-                factsheet["system"]["total_upload_bytes"] = result_bytes_sent_recv[0]
-                factsheet["system"]["total_download_bytes"] = result_bytes_sent_recv[1]
-                factsheet["system"]["avg_upload_bytes"] = result_bytes_sent_recv[2]
-                factsheet["system"]["avg_download_bytes"] = result_bytes_sent_recv[3]
-                populate_reliability(factsheet, reliability_summary)
-                populate_participation(factsheet, participation_summary)
-
-                class_imbalance_score = inverse_score(avg_class_imbalance)
-                factsheet["fairness"]["class_imbalance"] = cap_score(class_imbalance_score)
-                populate_reputation(factsheet, reputation_summary)
-
-                underfitting_score = get_underfitting_score(scenario_name, participant_idx)
-
-                factsheet["fairness"]["underfitting"] = underfitting_score
-                populate_model_quality_metrics(
-                    factsheet,
-                    model,
-                    train_loader,
-                    test_loader,
-                    participant_test_acc,
-                )
-
-                # Set emissions metrics
-                emissions = None if emissions_file is None else read_csv(emissions_file)
-                if emissions is not None:
-                    logging.info("FactSheet: Populating emissions")
-                    cpu_spez_df = pd.read_csv(os.path.join(os.path.dirname(__file__), "benchmarks", "CPU_benchmarks_v4.csv"), header=0)
-                    emissions["CPU_model"] = emissions["CPU_model"].astype(str).str.replace(r"\([^)]*\)", "", regex=True)
-                    emissions["CPU_model"] = emissions["CPU_model"].astype(str).str.replace(r" CPU", "", regex=True)
-                    emissions["GPU_model"] = emissions["GPU_model"].astype(str).str.replace(r"[0-9] x ", "", regex=True)
-                    emissions = pd.merge(emissions, cpu_spez_df[["cpuName", "powerPerf"]], left_on="CPU_model", right_on="cpuName", how="left")
-                    gpu_spez_df = pd.read_csv(os.path.join(os.path.dirname(__file__), "benchmarks", "GPU_benchmarks_v7.csv"), header=0)
-                    emissions = pd.merge(emissions, gpu_spez_df[["gpuName", "powerPerformance"]], left_on="GPU_model", right_on="gpuName", how="left")
-
-                    emissions.drop("cpuName", axis=1, inplace=True)
-                    emissions.drop("gpuName", axis=1, inplace=True)
-                    emissions["powerPerf"] = emissions["powerPerf"].astype(float)
-                    emissions["powerPerformance"] = emissions["powerPerformance"].astype(float)
-                    client_emissions = emissions.loc[emissions["role"] == "trainer"]
-                    client_avg_carbon_intensity = round(client_emissions["energy_grid"].mean(), 2)
-                    factsheet["sustainability"]["avg_carbon_intensity_clients"] = check_field_filled(factsheet, ["sustainability", "avg_carbon_intensity_clients"], client_avg_carbon_intensity, "")
-                    factsheet["sustainability"]["emissions_training"] = check_field_filled(factsheet, ["sustainability", "emissions_training"], client_emissions["emissions"].sum(), "")
-                    factsheet["participants"]["avg_dataset_size"] = check_field_filled(factsheet, ["participants", "avg_dataset_size"], client_emissions["sample_size"].mean(), "")
-                    GPU_powerperf = (client_emissions.loc[client_emissions["GPU_used"] == True])["powerPerformance"]
-                    CPU_powerperf = (client_emissions.loc[client_emissions["CPU_used"] == True])["powerPerf"]
-                    clients_power_performance = round(pd.concat([GPU_powerperf, CPU_powerperf]).mean(), 2)
-                    factsheet["sustainability"]["avg_power_performance_clients"] = check_field_filled(factsheet, ["sustainability", "avg_power_performance_clients"], clients_power_performance, "")
-
-                    server_emissions = emissions.loc[emissions["role"] == "server"]
-                    server_avg_carbon_intensity = round(server_emissions["energy_grid"].mean(), 2)
-                    factsheet["sustainability"]["avg_carbon_intensity_server"] = check_field_filled(factsheet, ["sustainability", "avg_carbon_intensity_server"], server_avg_carbon_intensity, "")
-                    factsheet["sustainability"]["emissions_aggregation"] = check_field_filled(factsheet, ["sustainability", "emissions_aggregation"], server_emissions["emissions"].sum(), "")
-                    GPU_powerperf = (server_emissions.loc[server_emissions["GPU_used"] == True])["powerPerformance"]
-                    CPU_powerperf = (server_emissions.loc[server_emissions["CPU_used"] == True])["powerPerf"]
-                    server_power_performance = round(pd.concat([GPU_powerperf, CPU_powerperf]).mean(), 2)
-                    factsheet["sustainability"]["avg_power_performance_server"] = check_field_filled(factsheet, ["sustainability", "avg_power_performance_server"], server_power_performance, "")
-
-                    factsheet["sustainability"]["emissions_communication_uplink"] = check_field_filled(factsheet, ["sustainability", "emissions_communication_uplink"], factsheet["system"]["total_upload_bytes"] * 2.24e-10 * factsheet["sustainability"]["avg_carbon_intensity_clients"], "")
-                    factsheet["sustainability"]["emissions_communication_downlink"] = check_field_filled(factsheet, ["sustainability", "emissions_communication_downlink"], factsheet["system"]["total_download_bytes"] * 2.24e-10 * factsheet["sustainability"]["avg_carbon_intensity_server"], "")
-
-                f.seek(0)
-                f.truncate()
-                json.dump(factsheet, f, indent=4)
-
-            except JSONDecodeError as e:
-                logging.info(f"{factsheet_file} is invalid")
-                logging.error(e)
+        factsheet_file = get_factsheet_path(scenario_name, self.factsheet_file_nm)
+
+        try:
+            factsheet_file, factsheet = load_or_create_factsheet(
+                scenario_name,
+                self.factsheet_file_nm,
+                self.factsheet_template_file_nm,
+            )
+
+            logging.info("FactSheet: Populating factsheet with pre training metrics")
+
+            populate_common_pre_train_sections(factsheet, data, model)
+
+            files_dir = get_trustworthiness_dir(scenario_name)
+
+            emissions_file = os.path.join(files_dir, "emissions.csv")
+
+            avg_class_imbalance, avg_model_size = get_avg_class_imbalance_model_size(scenario_name)
+            entropy_distribution = get_entropy_list (scenario_name)
+
+            values = np.array(entropy_distribution)
+
+            normalized_values = (values - np.min(values)) / (np.max(values) - np.min(values))
+
+            avg_entropy = np.mean(normalized_values)
+
+            factsheet["data"]["avg_entropy"] = avg_entropy
+
+            # Set performance data
+            result_avg_loss_accuracy = get_avg_loss_accuracy(scenario_name)
+            factsheet["performance"]["test_loss_avg"] = result_avg_loss_accuracy[0]
+            factsheet["performance"]["test_acc_avg"] = result_avg_loss_accuracy[1]
+            test_acc_cv = get_cv(std=result_avg_loss_accuracy[2], mean=result_avg_loss_accuracy[1])
+            factsheet["fairness"]["test_acc_cv"] = 1 if test_acc_cv > 1 else test_acc_cv
+            _, participant_test_acc = get_participant_loss_accuracy(scenario_name, participant_idx)
+
+            dp_enabled, dp_epsilon = get_dp_global(scenario_name)
+            set_dp_configuration(factsheet, dp_enabled, dp_epsilon)
+
+            factsheet["system"]["avg_time_minutes"] = get_elapsed_time(start_time, end_time)
+            factsheet["system"]["avg_model_size"] = avg_model_size
+
+            result_bytes_sent_recv = get_bytes_sent_recv(scenario_name)
+            factsheet["system"]["total_upload_bytes"] = result_bytes_sent_recv[0]
+            factsheet["system"]["total_download_bytes"] = result_bytes_sent_recv[1]
+            factsheet["system"]["avg_upload_bytes"] = result_bytes_sent_recv[2]
+            factsheet["system"]["avg_download_bytes"] = result_bytes_sent_recv[3]
+            populate_reliability(factsheet, reliability_summary)
+            populate_participation(factsheet, participation_summary)
+
+            class_imbalance_score = get_class_imbalance_score(avg_class_imbalance)
+            factsheet["fairness"]["class_imbalance"] = cap_score(class_imbalance_score)
+            populate_reputation(factsheet, reputation_summary)
+
+            underfitting_score = get_underfitting_score(scenario_name, participant_idx)
+
+            factsheet["fairness"]["underfitting"] = underfitting_score
+            populate_model_quality_metrics(
+                factsheet,
+                model,
+                train_loader,
+                test_loader,
+                participant_test_acc,
+            )
+
+            # Set emissions metrics
+            emissions = None if emissions_file is None else read_csv(emissions_file)
+            if emissions is not None:
+                logging.info("FactSheet: Populating emissions")
+                cpu_spez_df = pd.read_csv(os.path.join(os.path.dirname(__file__), "benchmarks", "CPU_benchmarks_v4.csv"), header=0)
+                emissions["CPU_model"] = emissions["CPU_model"].astype(str).str.replace(r"\([^)]*\)", "", regex=True)
+                emissions["CPU_model"] = emissions["CPU_model"].astype(str).str.replace(r" CPU", "", regex=True)
+                emissions["GPU_model"] = emissions["GPU_model"].astype(str).str.replace(r"[0-9] x ", "", regex=True)
+                emissions = pd.merge(emissions, cpu_spez_df[["cpuName", "powerPerf"]], left_on="CPU_model", right_on="cpuName", how="left")
+                gpu_spez_df = pd.read_csv(os.path.join(os.path.dirname(__file__), "benchmarks", "GPU_benchmarks_v7.csv"), header=0)
+                emissions = pd.merge(emissions, gpu_spez_df[["gpuName", "powerPerformance"]], left_on="GPU_model", right_on="gpuName", how="left")
+
+                emissions.drop("cpuName", axis=1, inplace=True)
+                emissions.drop("gpuName", axis=1, inplace=True)
+                emissions["powerPerf"] = emissions["powerPerf"].astype(float)
+                emissions["powerPerformance"] = emissions["powerPerformance"].astype(float)
+                client_emissions = emissions.loc[emissions["role"] == "trainer"]
+                client_avg_carbon_intensity = round(client_emissions["energy_grid"].mean(), 2)
+                factsheet["sustainability"]["avg_carbon_intensity_clients"] = check_field_filled(factsheet, ["sustainability", "avg_carbon_intensity_clients"], client_avg_carbon_intensity, "")
+                factsheet["sustainability"]["emissions_training"] = check_field_filled(factsheet, ["sustainability", "emissions_training"], client_emissions["emissions"].sum(), "")
+                factsheet["participants"]["avg_dataset_size"] = check_field_filled(factsheet, ["participants", "avg_dataset_size"], client_emissions["sample_size"].mean(), "")
+                GPU_powerperf = (client_emissions.loc[client_emissions["GPU_used"] == True])["powerPerformance"]
+                CPU_powerperf = (client_emissions.loc[client_emissions["CPU_used"] == True])["powerPerf"]
+                clients_power_performance = round(pd.concat([GPU_powerperf, CPU_powerperf]).mean(), 2)
+                factsheet["sustainability"]["avg_power_performance_clients"] = check_field_filled(factsheet, ["sustainability", "avg_power_performance_clients"], clients_power_performance, "")
+
+                server_emissions = emissions.loc[emissions["role"] == "server"]
+                server_avg_carbon_intensity = round(server_emissions["energy_grid"].mean(), 2)
+                factsheet["sustainability"]["avg_carbon_intensity_server"] = check_field_filled(factsheet, ["sustainability", "avg_carbon_intensity_server"], server_avg_carbon_intensity, "")
+                factsheet["sustainability"]["emissions_aggregation"] = check_field_filled(factsheet, ["sustainability", "emissions_aggregation"], server_emissions["emissions"].sum(), "")
+                GPU_powerperf = (server_emissions.loc[server_emissions["GPU_used"] == True])["powerPerformance"]
+                CPU_powerperf = (server_emissions.loc[server_emissions["CPU_used"] == True])["powerPerf"]
+                server_power_performance = round(pd.concat([GPU_powerperf, CPU_powerperf]).mean(), 2)
+                factsheet["sustainability"]["avg_power_performance_server"] = check_field_filled(factsheet, ["sustainability", "avg_power_performance_server"], server_power_performance, "")
+
+                factsheet["sustainability"]["emissions_communication_uplink"] = check_field_filled(factsheet, ["sustainability", "emissions_communication_uplink"], factsheet["system"]["total_upload_bytes"] * 2.24e-10 * factsheet["sustainability"]["avg_carbon_intensity_clients"], "")
+                factsheet["sustainability"]["emissions_communication_downlink"] = check_field_filled(factsheet, ["sustainability", "emissions_communication_downlink"], factsheet["system"]["total_download_bytes"] * 2.24e-10 * factsheet["sustainability"]["avg_carbon_intensity_server"], "")
+
+            write_factsheet(factsheet_file, factsheet)
+
+        except JSONDecodeError as e:
+            logging.info(f"{factsheet_file} is invalid")
+            logging.error(e)
diff --git a/nebula/addons/trustworthiness/factsheet_common.py b/nebula/addons/trustworthiness/factsheet_common.py
index dc9dadac6..b11cc7b49 100644
--- a/nebula/addons/trustworthiness/factsheet_common.py
+++ b/nebula/addons/trustworthiness/factsheet_common.py
@@ -1,5 +1,9 @@
 """Shared helpers for trustworthiness factsheet generation."""
 
+import json
+import os
+import shutil
+
 from nebula.addons.trustworthiness.calculation import (
     attack_success_rate,
     compute_adversarial_accuracy_art,
@@ -19,6 +23,45 @@
 )
 
 
+dirname = os.path.dirname(__file__)
+
+
+def get_trustworthiness_dir(scenario_name):
+    """Returns the trustworthiness output directory for a scenario."""
+    return os.path.join(os.environ.get("NEBULA_LOGS_DIR"), scenario_name, "trustworthiness")
+
+
+def get_factsheet_path(scenario_name, factsheet_name):
+    """Returns the path to a factsheet inside the scenario trustworthiness directory."""
+    return os.path.join(get_trustworthiness_dir(scenario_name), factsheet_name)
+
+
+def get_factsheet_template_path(template_name):
+    """Returns the path to a factsheet template bundled with the addon."""
+    return os.path.join(dirname, "configs", template_name)
+
+
+def load_or_create_factsheet(scenario_name, factsheet_name, template_name):
+    """Loads a factsheet, creating it from its template if it does not exist."""
+    trustworthiness_dir = get_trustworthiness_dir(scenario_name)
+    os.makedirs(trustworthiness_dir, exist_ok=True)
+
+    factsheet_path = os.path.join(trustworthiness_dir, factsheet_name)
+    template_path = get_factsheet_template_path(template_name)
+
+    if not os.path.exists(factsheet_path):
+        shutil.copyfile(template_path, factsheet_path)
+
+    with open(factsheet_path, encoding="utf-8") as factsheet_file:
+        return factsheet_path, json.load(factsheet_file)
+
+
+def write_factsheet(factsheet_path, factsheet):
+    """Writes a factsheet using the standard JSON formatting."""
+    with open(factsheet_path, "w", encoding="utf-8") as factsheet_file:
+        json.dump(factsheet, factsheet_file, indent=4)
+
+
 def cap_score(value, maximum=1):
     """Caps a score to the maximum value expected by the factsheet."""
     return maximum if value > maximum else value
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 121496690..fb337da9b 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -12,9 +12,9 @@
 from codecarbon import EmissionsTracker
 from nebula.addons.trustworthiness.per_round_metrics import PerRoundTrustMetrics
 from datetime import datetime
-from nebula.addons.trustworthiness.factsheet import Factsheet
+from nebula.addons.trustworthiness.factsheet import CflFactsheet
 from nebula.addons.trustworthiness.metric import TrustMetricManager
-from nebula.addons.trustworthiness.dfl_factsheet import populate_factsheet
+from nebula.addons.trustworthiness.dfl_factsheet import DflFactsheet
 from nebula.addons.trustworthiness.graphics import Graphics
 from nebula.addons.trustworthiness.weights import load_trust_weights
 import json
@@ -337,11 +337,12 @@ async def _finish_trustscores_exchange(self, federation, trust_config, experimen
             self._finalize_sdfl_global_trustscores_aggregation()
 
     def _compute_local_trustscores_report(self, experiment_name, trust_config, weights, federation) -> str:
+        factsheet = DflFactsheet()
         self._engine.trainer.datamodule.setup(stage="fit")
         train_loader = self._engine.trainer.datamodule.train_dataloader()
         self._engine.trainer.datamodule.setup(stage="test")
         test_loader = self._engine.trainer.datamodule.test_dataloader()[0]
-        populate_factsheet(
+        factsheet.populate_factsheet_dfl(
             experiment_name,
             self._idx,
             trust_config,
@@ -807,14 +808,14 @@ async def register_trustworthiness_report(self, source, message):
             logging.info(f"[TW SERVER] all reports received, waiting for finish post, csv_completed {self._csv_completed}")
 
     async def _generate_factsheet(self, trust_config, experiment_name):
-        factsheet = Factsheet()
+        factsheet = CflFactsheet()
         self._engine.trainer.datamodule.setup(stage="fit")
         train_loader = self._engine.trainer.datamodule.train_dataloader()
         self._engine.trainer.datamodule.setup(stage="test")
         test_loader = self._engine.trainer.datamodule.test_dataloader()[0]
-        factsheet.populate_factsheet_pre_train(trust_config, experiment_name, self._engine.trainer.model)
-        factsheet.populate_factsheet_post_train(
+        factsheet.populate_factsheet_cfl(
             experiment_name,
+            trust_config,
             self._start_time,
             self._end_time,
             self._idx,
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
index 0c99255ec..62dfe5f08 100755
--- a/nebula/addons/trustworthiness/utils.py
+++ b/nebula/addons/trustworthiness/utils.py
@@ -102,12 +102,7 @@ def get_all_data_entropy(experiment_name):
         with open(data_class_count_file, "r") as f:
             class_count = json.load(f)
 
-        total = sum(class_count.values())
-        if total == 0:
-            entropy_value = 0.0
-        else:
-            probabilities = [count / total for count in class_count.values()]
-            entropy_value = entropy(probabilities, base=2)
+        entropy_value = calculation.get_entropy_from_class_counts(class_count)
 
         entropy_per_participant[str(participant_id)] = round(entropy_value, 6)
         participant_id += 1
@@ -123,16 +118,7 @@ def get_local_entropy(id, experiment_name):
     with open(data_class_count_file, "r") as f:
         class_count = json.load(f)
 
-    total = sum(class_count.values())
-    if total == 0:
-        entropy_value = 0.0
-    else:
-        probabilities = [count / total for count in class_count.values()]
-        entropy_value = entropy(probabilities, base=2)
-
-    entropy_local = round(entropy_value, 6)
-
-    return entropy_local
+    return calculation.get_entropy_from_class_counts(class_count)
 
 def get_entropy(client_id, scenario_name, dataloader):
     """

From fee69909b218bcc0074248e38810383cd698daa8 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Mon, 11 May 2026 11:31:38 +0200
Subject: [PATCH 41/66] Reputation implemented for SDFL. Leadership updated.

---
 nebula/addons/reputation/reputation.py        | 492 +++++++++++++++++-
 nebula/core/aggregation/aggregator.py         |  24 +
 .../updatehandlers/cflupdatehandler.py        |   8 +-
 .../updatehandlers/dflupdatehandler.py        |   8 +-
 .../updatehandlers/sdflupdatehandler.py       |   4 +
 nebula/core/engine.py                         |  85 ++-
 nebula/core/nebulaevents.py                   |  14 +-
 nebula/core/network/actions.py                |   8 +
 nebula/core/network/forwarder.py              |   2 +
 nebula/core/network/messages.py               |  22 +-
 nebula/core/noderole.py                       |  36 +-
 nebula/core/pb/nebula.proto                   |  11 +
 nebula/core/pb/nebula_pb2.py                  | 104 ++--
 .../frontend/config/participant.json.example  |   3 +-
 14 files changed, 736 insertions(+), 85 deletions(-)

diff --git a/nebula/addons/reputation/reputation.py b/nebula/addons/reputation/reputation.py
index dfa630296..19b4e9159 100644
--- a/nebula/addons/reputation/reputation.py
+++ b/nebula/addons/reputation/reputation.py
@@ -1,3 +1,5 @@
+import asyncio
+import json
 import logging
 import random
 import time
@@ -8,7 +10,13 @@
 from typing import TYPE_CHECKING
 from nebula.addons.functions import print_msg_box
 from nebula.core.eventmanager import EventManager
-from nebula.core.nebulaevents import AggregationEvent, RoundStartEvent, UpdateReceivedEvent, DuplicatedMessageEvent
+from nebula.core.nebulaevents import (
+    AggregationEvent,
+    RoundEndEvent,
+    RoundStartEvent,
+    UpdateReceivedEvent,
+    DuplicatedMessageEvent,
+)
 from nebula.core.utils.helper import (
     cosine_metric,
     euclidean_metric,
@@ -165,6 +173,16 @@ def _initialize_data_structures(self):
         self.previous_std_dev_number_message = {}
         self.previous_percentile_25_number_message = {}
         self.previous_percentile_85_number_message = {}
+        self._last_reputation_calculation_round = None
+        self._pending_sdfl_reputation_updates = {}
+        self._sdfl_training_finished_rounds = set()
+        self._sdfl_reputation_updates_expected = {}
+        self._sdfl_reputation_updates_received = {}
+        self._sdfl_reputation_updates_events = {}
+        self.reputation_tables = {}
+        self._reputation_tables_expected = {}
+        self._reputation_tables_events = {}
+        self._reputation_tables_wait_tasks = {}
 
     def _load_configuration(self):
         """Load and validate reputation configuration."""
@@ -285,8 +303,7 @@ def save_data(
             return
 
         if nei not in self.connection_metrics:
-            logging.warning(f"Neighbor {nei} not found in connection_metrics")
-            return
+            self.connection_metrics[nei] = Metrics()
 
         try:
             metrics_instance = self.connection_metrics[nei]
@@ -320,17 +337,35 @@ async def setup(self):
         """Set up the reputation system by subscribing to relevant events."""
         if self._enabled:
             await EventManager.get_instance().subscribe_node_event(RoundStartEvent, self.on_round_start)
-            await EventManager.get_instance().subscribe_node_event(AggregationEvent, self.calculate_reputation)
-            if self._is_metric_enabled("model_similarity"):
-                await EventManager.get_instance().subscribe_node_event(UpdateReceivedEvent, self.recollect_similarity)
-            if self._is_metric_enabled("fraction_parameters_changed"):
-                await EventManager.get_instance().subscribe_node_event(
-                    UpdateReceivedEvent, self.recollect_fraction_of_parameters_changed
-                )
+            federation = self._engine.config.participant["scenario_args"].get("federation")
+            if federation == "SDFL":
+                await EventManager.get_instance().subscribe_node_event(AggregationEvent, self.calculate_reputation)
+                await EventManager.get_instance().subscribe_node_event(RoundEndEvent, self.calculate_sdfl_reputation)
+            else:
+                await EventManager.get_instance().subscribe_node_event(AggregationEvent, self.calculate_reputation)
+            if federation == "SDFL":
+                if (
+                    self._is_metric_enabled("model_similarity")
+                    or self._is_metric_enabled("fraction_parameters_changed")
+                ):
+                    await EventManager.get_instance().subscribe_node_event(
+                        UpdateReceivedEvent, self.recollect_or_buffer_sdfl_model_metrics
+                    )
+            else:
+                if self._is_metric_enabled("model_similarity"):
+                    await EventManager.get_instance().subscribe_node_event(UpdateReceivedEvent, self.recollect_similarity)
+                if self._is_metric_enabled("fraction_parameters_changed"):
+                    await EventManager.get_instance().subscribe_node_event(
+                        UpdateReceivedEvent, self.recollect_fraction_of_parameters_changed
+                    )
             if self._is_metric_enabled("model_arrival_latency"):
                 await EventManager.get_instance().subscribe_node_event(
                     UpdateReceivedEvent, self.recollect_model_arrival_latency
                 )
+            if federation == "SDFL":
+                await EventManager.get_instance().subscribe_node_event(
+                    UpdateReceivedEvent, self.mark_sdfl_reputation_update_received
+                )
             if self._is_metric_enabled("num_messages"):
                 await EventManager.get_instance().subscribe(("model", "update"), self.recollect_number_message)
                 await EventManager.get_instance().subscribe(("model", "initialization"), self.recollect_number_message)
@@ -338,7 +373,130 @@ async def setup(self):
                 await EventManager.get_instance().subscribe(
                     ("federation", "federation_models_included"), self.recollect_number_message
                 )
-                await EventManager.get_instance().subscribe_node_event(DuplicatedMessageEvent, self.recollect_duplicated_number_message)
+                if federation != "SDFL":
+                    await EventManager.get_instance().subscribe_node_event(
+                        DuplicatedMessageEvent, self.recollect_duplicated_number_message
+                    )
+
+    async def _should_recollect_update_event(self, ure: UpdateReceivedEvent) -> bool:
+        """Return whether this update belongs to the reputation observation channel."""
+        (_, _, source, _, _) = await ure.get_event_data()
+
+        if source == self._addr:
+            return False
+
+        federation = self._engine.config.participant["scenario_args"].get("federation")
+        if federation != "SDFL":
+            return not ure.is_reputation_update()
+
+        if not ure.is_reputation_update():
+            return False
+
+        direct_neighbors = await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False)
+        return source in direct_neighbors
+
+    async def recollect_or_buffer_sdfl_model_metrics(self, ure: UpdateReceivedEvent):
+        """Delay SDFL model-comparison metrics while the local node is still training."""
+        if not await self._should_recollect_update_event(ure):
+            return
+
+        (_, _, source, round_num, _) = await ure.get_event_data()
+        role = self._engine.rb.get_role_name(True)
+        local_training_pending = role == "trainer" and round_num not in self._sdfl_training_finished_rounds
+        if local_training_pending or await self._engine.trainning_in_progress_lock.locked_async():
+            self._pending_sdfl_reputation_updates.setdefault(round_num, {})
+            self._pending_sdfl_reputation_updates[round_num][source] = ure
+            logging.info(
+                f"SDFL reputation | Buffered model metrics from {source} for round {round_num}; "
+                "local training has not finished yet"
+            )
+            return
+
+        await self._process_sdfl_model_metrics(ure)
+
+    async def process_pending_sdfl_reputation_updates(self, round_num: int = None):
+        """Process buffered SDFL reputation updates after local training has finished."""
+        if round_num is None:
+            round_num = await self._engine.get_round()
+
+        self._sdfl_training_finished_rounds.add(round_num)
+        pending_updates = self._pending_sdfl_reputation_updates.pop(round_num, {})
+        if not pending_updates:
+            return
+
+        logging.info(
+            f"SDFL reputation | Processing {len(pending_updates)} buffered model metrics for round {round_num}"
+        )
+        for ure in pending_updates.values():
+            await self._process_sdfl_model_metrics(ure)
+
+    async def _process_sdfl_model_metrics(self, ure: UpdateReceivedEvent):
+        if self._is_metric_enabled("model_similarity"):
+            await self.recollect_similarity(ure)
+        if self._is_metric_enabled("fraction_parameters_changed"):
+            await self.recollect_fraction_of_parameters_changed(ure)
+
+    async def mark_sdfl_reputation_update_received(self, ure: UpdateReceivedEvent):
+        """Mark a direct-neighbor SDFL reputation update as processed for this round."""
+        if not await self._should_recollect_update_event(ure):
+            return
+
+        (_, _, source, round_num, _) = await ure.get_event_data()
+        self._sdfl_reputation_updates_received.setdefault(round_num, set()).add(source)
+
+        expected = self._sdfl_reputation_updates_expected.get(round_num)
+        event = self._sdfl_reputation_updates_events.get(round_num)
+        received = self._sdfl_reputation_updates_received.get(round_num, set())
+        if expected and event and expected.issubset(received):
+            event.set()
+
+        logging.info(
+            f"SDFL reputation | Reputation model/update processed for round {round_num} from {source}; "
+            f"received={len(received)}"
+        )
+
+    async def wait_sdfl_reputation_updates(self, expected_nodes, round_num: int = None, timeout: float = None):
+        """Wait until direct-neighbor SDFL reputation updates arrive or timeout expires."""
+        if round_num is None:
+            round_num = await self._engine.get_round()
+        if timeout is None:
+            timeout = float(
+                self._config.participant["defense_args"]
+                .get("reputation", {})
+                .get("model_update_timeout",
+                     self._config.participant["defense_args"].get("reputation", {}).get("table_aggregation_timeout", 30))
+            )
+
+        expected_nodes = set(expected_nodes) - {self._addr}
+        self._sdfl_reputation_updates_expected[round_num] = expected_nodes
+        event = self._sdfl_reputation_updates_events.setdefault(round_num, asyncio.Event())
+
+        received = self._sdfl_reputation_updates_received.setdefault(round_num, set())
+        if expected_nodes.issubset(received):
+            event.set()
+
+        if expected_nodes:
+            logging.info(
+                f"SDFL reputation | Waiting reputation model/update messages for round {round_num}; "
+                f"expected={sorted(expected_nodes)} already_received={sorted(received & expected_nodes)} "
+                f"timeout={timeout}"
+            )
+
+        try:
+            await asyncio.wait_for(event.wait(), timeout=timeout)
+        except asyncio.TimeoutError:
+            logging.info(
+                f"SDFL reputation | Timeout waiting reputation model/update messages for round {round_num}; "
+                f"missing={sorted(expected_nodes - received)}"
+            )
+
+        received = self._sdfl_reputation_updates_received.get(round_num, set())
+        missing = expected_nodes - received
+        logging.info(
+            f"SDFL reputation | Reputation model/update wait finished for round {round_num}; "
+            f"received={sorted(received & expected_nodes)} missing={sorted(missing)}"
+        )
+        return received & expected_nodes, missing
 
     async def init_reputation(
         self, federation_nodes=None, round_num=None, last_feedback_round=None, init_reputation=None
@@ -444,14 +602,24 @@ async def _calculate_static_reputation(
             for metric_name in static_weights
         )
 
-        logging.info(f"Static reputation for node {nei} at round {await self.engine.get_round()}: {reputation_static}")
+        current_round = await self.engine.get_round()
+        logging.info(
+            f"Reputation debug | static raw calculation | round={current_round} node={nei} "
+            f"metrics={json.dumps(metric_values, sort_keys=True, default=str)} "
+            f"weights={json.dumps(static_weights, sort_keys=True, default=str)} "
+            f"raw_reputation={reputation_static}"
+        )
 
         avg_reputation = await self.save_reputation_history_in_memory(self.engine.addr, nei, reputation_static)
+        logging.info(
+            f"Reputation debug | static smoothed result | round={current_round} node={nei} "
+            f"raw_reputation={reputation_static} smoothed_reputation={avg_reputation}"
+        )
 
         metrics_data = {
             "addr": addr,
             "nei": nei,
-            "round": await self.engine.get_round(),
+            "round": current_round,
             "reputation_without_feedback": avg_reputation,
             **{f"average_{name}": weight for name, weight in static_weights.items()}
         }
@@ -536,16 +704,24 @@ async def _update_neighbor_reputation(self, addr, nei, metric_values, average_we
             for metric_name in average_weights
         )
 
+        current_round = await self._engine.get_round()
         logging.info(
-            f"Dynamic reputation with weights for {nei} at round {await self._engine.get_round()}: {reputation_with_weights}"
+            f"Reputation debug | dynamic raw calculation | round={current_round} node={nei} "
+            f"metrics={json.dumps(metric_values, sort_keys=True, default=str)} "
+            f"average_weights={json.dumps(average_weights, sort_keys=True, default=str)} "
+            f"raw_reputation={reputation_with_weights}"
         )
 
         avg_reputation = await self.save_reputation_history_in_memory(self._engine.addr, nei, reputation_with_weights)
+        logging.info(
+            f"Reputation debug | dynamic smoothed result | round={current_round} node={nei} "
+            f"raw_reputation={reputation_with_weights} smoothed_reputation={avg_reputation}"
+        )
 
         metrics_data = {
             "addr": addr,
             "nei": nei,
-            "round": await self._engine.get_round(),
+            "round": current_round,
             "reputation_without_feedback": avg_reputation,
         }
 
@@ -615,6 +791,11 @@ def calculate_weighted_values(
             adjusted_weights = self._calculate_uniform_weights(active_metrics)
 
         self._update_history_with_weights(active_metrics, history_data, adjusted_weights, current_round, nei)
+        logging.info(
+            f"Reputation | metric values and weights | round={current_round} node={nei} "
+            f"active_metrics={json.dumps(active_metrics, sort_keys=True, default=str)} "
+            f"weights={json.dumps(adjusted_weights, sort_keys=True, default=str)}"
+        )
 
     def _ensure_history_data_structure(self, history_data: dict):
         """Ensure all required keys exist in history data structure."""
@@ -664,10 +845,18 @@ def _calculate_dynamic_weights(self, active_metrics: dict, history_data: dict) -
         deviations = self._calculate_metric_deviations(active_metrics, history_data)
 
         if all(deviation == 0.0 for deviation in deviations.values()):
-            return self._generate_random_weights(active_metrics)
+            weights = self._generate_random_weights(active_metrics)
         else:
             normalized_weights = self._normalize_deviation_weights(deviations)
-            return self._adjust_weights_with_minimum(normalized_weights, deviations)
+            weights = self._adjust_weights_with_minimum(normalized_weights, deviations)
+
+        logging.info(
+            "Reputation debug | dynamic weight calculation | "
+            f"active_metrics={json.dumps(active_metrics, sort_keys=True, default=str)} "
+            f"deviations={json.dumps(deviations, sort_keys=True, default=str)} "
+            f"weights={json.dumps(weights, sort_keys=True, default=str)}"
+        )
+        return weights
 
     def _calculate_metric_deviations(self, active_metrics: dict, history_data: dict) -> dict:
         """Calculate deviations of current metrics from historical means."""
@@ -777,6 +966,14 @@ async def calculate_value_metrics(self, addr, nei, metrics_active=None):
                 "similarity": self._process_model_similarity_metric(nei, current_round, metrics_active)
             }
 
+            logging.info(
+                f"Reputation debug | calculated metric results | round={current_round} node={nei} "
+                f"messages={json.dumps(metric_results['messages'], sort_keys=True, default=str)} "
+                f"similarity={metric_results['similarity']} "
+                f"fraction={metric_results['fraction']} "
+                f"latency={metric_results['latency']}"
+            )
+
             self._log_metrics_graphics(metric_results, addr, nei, current_round)
 
             return (
@@ -819,6 +1016,10 @@ def _process_num_messages_metric(self, metrics_instance, addr: str, nei: str, cu
         if avg is None and current_round > self.HISTORY_ROUNDS_LOOKBACK:
             avg = self.number_message_history[(addr, nei)][current_round - 1]["avg_number_message"]
 
+        logging.info(
+            f"Reputation debug | num_messages metric | round={current_round} node={nei} "
+            f"filtered_messages={len(filtered_messages)} normalized={normalized} count={count} avg={avg or 0}"
+        )
         return {"normalized": normalized, "count": count, "avg": avg or 0}
 
     def _process_fraction_parameters_metric(self, metrics_instance, addr: str, nei: str, current_round: int, metrics_active) -> float:
@@ -833,9 +1034,16 @@ def _process_fraction_parameters_metric(self, metrics_instance, addr: str, nei:
             score_fraction = self.analyze_anomalies(addr, nei, current_round, fraction_changed, threshold)
 
         if current_round >= self.INITIAL_ROUND_FOR_FRACTION:
-            return self._calculate_fraction_score_assignment(addr, nei, current_round, score_fraction)
+            final_fraction = self._calculate_fraction_score_assignment(addr, nei, current_round, score_fraction)
         else:
-            return 0
+            final_fraction = 0
+
+        logging.info(
+            f"Reputation debug | fraction_parameters_changed metric | round={current_round} node={nei} "
+            f"raw_score={score_fraction} final_score={final_fraction} "
+            f"has_current_data={metrics_instance.fraction_of_params_changed.get('current_round') == current_round}"
+        )
+        return final_fraction
 
     def _calculate_fraction_score_assignment(self, addr: str, nei: str, current_round: int, score_fraction: float) -> float:
         """Calculate the final fraction score assignment."""
@@ -900,6 +1108,11 @@ def _process_model_arrival_latency_metric(self, metrics_instance, addr: str, nei
             avg_latency = self.save_model_arrival_latency_history(nei, latency_normalized, current_round)
             if avg_latency is None and current_round > 1:
                 avg_latency = self.model_arrival_latency_history[(addr, nei)][current_round - 1]["score"]
+            logging.info(
+                f"Reputation debug | model_arrival_latency metric | round={current_round} node={nei} "
+                f"latency_normalized={latency_normalized} avg_latency={avg_latency or 0} "
+                f"has_current_data={metrics_instance.model_arrival_latency.get('round_received') == current_round}"
+            )
             return avg_latency or 0
 
         return 0
@@ -907,7 +1120,12 @@ def _process_model_arrival_latency_metric(self, metrics_instance, addr: str, nei
     def _process_model_similarity_metric(self, nei: str, current_round: int, metrics_active) -> float:
         """Process the model similarity metric."""
         if current_round >= 1 and self._is_metric_enabled("model_similarity", metrics_active):
-            return self.calculate_similarity_from_metrics(nei, current_round)
+            similarity = self.calculate_similarity_from_metrics(nei, current_round)
+            logging.info(
+                f"Reputation debug | model_similarity metric | round={current_round} node={nei} "
+                f"similarity={similarity}"
+            )
+            return similarity
         return 0
 
     def _log_metrics_graphics(self, metric_results: dict, addr: str, nei: str, current_round: int):
@@ -1549,8 +1767,12 @@ async def save_reputation_history_in_memory(self, addr: str, nei: str, reputatio
                 previous_weight = self.REPUTATION_FEEDBACK_WEIGHT
                 avg_reputation = (current_rep * current_weight) + (previous_rep * previous_weight)
 
-                logging.info(f"Current reputation: {current_rep}, Previous reputation: {previous_rep}")
-                logging.info(f"Reputation ponderated: {avg_reputation}")
+                logging.info(
+                    f"Reputation debug | reputation smoothing | round={current_round} node={nei} "
+                    f"current_raw={current_rep} previous_raw={previous_rep} "
+                    f"current_weight={current_weight} previous_weight={previous_weight} "
+                    f"smoothed={avg_reputation}"
+                )
             else:
                 avg_reputation = reputation
 
@@ -1618,6 +1840,11 @@ async def calculate_reputation(self, ae: AggregationEvent):
         if not self._enabled:
             return
 
+        current_round = await self._engine.get_round()
+        if self._last_reputation_calculation_round == current_round:
+            logging.info(f"Reputation already calculated for round {current_round}; skipping")
+            return
+
         (updates, _, _) = await ae.get_event_data()
         await self._log_reputation_calculation_start()
 
@@ -1630,6 +1857,31 @@ async def calculate_reputation(self, ae: AggregationEvent):
         if federation != "CFL":
             await self._process_feedback()
         await self._finalize_reputation_calculation(updates, neighbors)
+        self._last_reputation_calculation_round = current_round
+
+    async def calculate_sdfl_reputation(self, _ree: RoundEndEvent):
+        """Calculate SDFL reputation at round end for trainers and aggregators."""
+        await self.calculate_and_send_sdfl_reputation_table()
+
+    async def calculate_and_send_sdfl_reputation_table(self):
+        """Calculate local SDFL reputation and broadcast the table immediately."""
+        if not self._enabled:
+            return
+
+        current_round = await self._engine.get_round()
+        if self._last_reputation_calculation_round == current_round:
+            logging.info(f"Reputation already calculated for round {current_round}; skipping")
+            return
+
+        await self._log_reputation_calculation_start()
+
+        neighbors = set(await self._engine._cm.get_addrs_current_connections(only_direct=True))
+        await self._process_neighbor_metrics(neighbors)
+        await self._calculate_reputation_by_factor(neighbors)
+        await self._handle_initial_reputation()
+        await self._process_feedback()
+        await self._finalize_reputation_calculation({}, neighbors)
+        self._last_reputation_calculation_round = current_round
 
     async def _log_reputation_calculation_start(self):
         """Log the start of reputation calculation with relevant information."""
@@ -1712,9 +1964,179 @@ async def _finalize_reputation_calculation(self, updates, neighbors):
             self.create_graphic_reputation(self._addr, await self._engine.get_round())
             await self.update_process_aggregation(updates)
             federation = self._engine.config.participant["scenario_args"].get("federation")
-            if federation != "CFL":
+            if federation == "SDFL":
+                await self.send_reputation_table_to_neighbors(neighbors)
+            elif federation != "CFL":
                 await self.send_reputation_to_neighbors(neighbors)
 
+    async def get_local_reputation_table(self, round_num: int = None):
+        """Return current-round reputation scores for direct neighbors only."""
+        if round_num is None:
+            round_num = await self._engine.get_round()
+
+        direct_neighbors = set(await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False))
+        return {
+            node_id: float(data["reputation"])
+            for node_id, data in self.reputation.items()
+            if node_id in direct_neighbors
+            and data.get("round") == round_num
+            and data.get("reputation") is not None
+        }
+
+    async def register_reputation_table(self, node_id: str, round_num: int, reputation_table: dict, received_from: str = None):
+        """Store a reputation table received for a round."""
+        normalized_table = {}
+        for neighbor, score in reputation_table.items():
+            try:
+                normalized_table[str(neighbor)] = float(score)
+            except (TypeError, ValueError):
+                logging.warning(
+                    f"SDFL reputation | Ignoring invalid reputation score from table {node_id}: "
+                    f"{neighbor}={score}"
+                )
+
+        self.reputation_tables.setdefault(round_num, {})
+        self.reputation_tables[round_num][node_id] = normalized_table
+
+        logging.info(
+            f"SDFL reputation | Stored reputation table from {node_id} for round {round_num} "
+            f"via {received_from}; tables={len(self.reputation_tables[round_num])}"
+        )
+
+        expected = self._reputation_tables_expected.get(round_num)
+        event = self._reputation_tables_events.get(round_num)
+        if expected and event and expected.issubset(self.reputation_tables[round_num].keys()):
+            event.set()
+
+    async def wait_reputation_tables(self, expected_nodes, round_num: int, timeout: float):
+        """Wait until all expected reputation tables arrive or the timeout expires."""
+        expected_nodes = set(expected_nodes)
+        self._reputation_tables_expected[round_num] = expected_nodes
+        event = self._reputation_tables_events.setdefault(round_num, asyncio.Event())
+
+        if expected_nodes.issubset(self.reputation_tables.get(round_num, {}).keys()):
+            event.set()
+
+        try:
+            await asyncio.wait_for(event.wait(), timeout=timeout)
+        except asyncio.TimeoutError:
+            missing = expected_nodes - set(self.reputation_tables.get(round_num, {}).keys())
+            logging.info(
+                f"SDFL reputation | Timeout waiting reputation tables for round {round_num}; "
+                f"missing={sorted(missing)}"
+            )
+
+        tables = self.reputation_tables.get(round_num, {})
+        missing = expected_nodes - set(tables.keys())
+        return tables, missing
+
+    def start_reputation_tables_collection(self, expected_nodes, round_num: int, timeout: float):
+        """Start a background wait for reputation tables of one SDFL round."""
+        if round_num in self._reputation_tables_wait_tasks:
+            return
+
+        async def _wait_and_log():
+            tables, missing = await self.wait_reputation_tables(expected_nodes, round_num, timeout)
+            logging.info(
+                f"SDFL reputation | Reputation table collection snapshot for round {round_num}; "
+                f"received={len(tables)} missing={len(missing)} missing_nodes={sorted(missing)}"
+            )
+
+        self._reputation_tables_wait_tasks[round_num] = asyncio.create_task(
+            _wait_and_log(),
+            name=f"SDFL_reputation_tables_round_{round_num}",
+        )
+
+    async def calculate_indirect_reputation_for_non_neighbors(
+        self,
+        target_nodes,
+        expected_table_nodes,
+        round_num: int,
+        timeout: float,
+    ):
+        """Calculate indirect SDFL reputation for non-neighbor nodes from received tables."""
+        direct_neighbors = set(await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False))
+        target_nodes = set(target_nodes) - direct_neighbors - {self._addr}
+        expected_table_nodes = set(expected_table_nodes)
+
+        if not target_nodes:
+            logging.info(f"SDFL reputation | No non-neighbor nodes require indirect reputation in round {round_num}")
+            return {}
+
+        logging.info(
+            f"SDFL reputation | Waiting reputation tables before aggregation for round {round_num}; "
+            f"expected_tables={len(expected_table_nodes)} target_non_neighbors={sorted(target_nodes)}"
+        )
+        tables, missing = await self.wait_reputation_tables(expected_table_nodes, round_num, timeout)
+        logging.info(
+            f"SDFL reputation | Reputation tables used before aggregation for round {round_num}; "
+            f"received={len(tables)} missing={len(missing)} missing_nodes={sorted(missing)}:\n"
+            f"{json.dumps(tables, sort_keys=True, indent=2)}"
+        )
+
+        indirect_reputations = {}
+        for node_id in target_nodes:
+            scores = [
+                float(table[node_id])
+                for table in tables.values()
+                if isinstance(table, dict) and node_id in table
+            ]
+            if not scores:
+                logging.info(
+                    f"SDFL reputation | No received reputation table contains non-neighbor {node_id} "
+                    f"for round {round_num}"
+                )
+                continue
+
+            reputation = float(sum(scores) / len(scores))
+            self.reputation[node_id] = {
+                "reputation": reputation,
+                "round": round_num,
+                "last_feedback_round": self.reputation.get(node_id, {}).get("last_feedback_round", -1),
+            }
+            indirect_reputations[node_id] = reputation
+
+            if reputation < self.REPUTATION_THRESHOLD and round_num > 0:
+                self.rejected_nodes.add(node_id)
+                logging.info(f"SDFL reputation | Indirect reputation rejected node {node_id} at round {round_num}")
+
+        logging.info(
+            f"SDFL reputation | Indirect reputations for non-neighbors before aggregation round {round_num}: "
+            f"{json.dumps(indirect_reputations, sort_keys=True)}; missing_tables={sorted(missing)}"
+        )
+        return indirect_reputations
+
+    async def send_reputation_table_to_neighbors(self, neighbors):
+        """Send the local SDFL reputation table through the forwarding channel."""
+        round_num = await self._engine.get_round()
+        reputation_table = await self.get_local_reputation_table(round_num)
+        await self.register_reputation_table(self._addr, round_num, reputation_table, received_from=self._addr)
+
+        if self._engine.rb.get_role_name(True) == "aggregator":
+            expected_nodes = self._engine.get_sdfl_expected_trainers()
+            timeout = float(
+                self._config.participant["defense_args"]
+                .get("reputation", {})
+                .get("table_aggregation_timeout", 10)
+            )
+            self.start_reputation_tables_collection(expected_nodes, round_num, timeout)
+
+        message = self._engine.cm.create_message(
+            "reputationtable",
+            "table",
+            node_id=self._addr,
+            round=round_num,
+            reputation_table_json=json.dumps(reputation_table, sort_keys=True),
+        )
+
+        for neighbor in neighbors:
+            await self._engine.cm.send_message(neighbor, message)
+
+        logging.info(
+            f"SDFL reputation | Sent reputation table for round {round_num} "
+            f"to {len(neighbors)} neighbors"
+        )
+
     async def send_reputation_to_neighbors(self, neighbors):
         """
         Send the calculated reputation to the neighbors.
@@ -1782,6 +2204,8 @@ async def update_process_aggregation(self, updates):
                         logging.info(f"✅ Nei {nei} with reputation {rep:.4f}, scaled model with weight {weight:.4f}")
                     else:
                         logging.info(f"⛔ Nei {nei} with reputation {rep:.4f}, model rejected")
+                        updates.pop(nei, None)
+                        self.rejected_nodes.add(nei)
 
         logging.info(f"Updates after rejected nodes: {list(updates.keys())}")
         logging.info(f"Nodes rejected: {self.rejected_nodes}")
@@ -1846,11 +2270,15 @@ async def on_round_start(self, rse: RoundStartEvent):
         if round_id not in self.round_timing_info:
             self.round_timing_info[round_id] = {}
         self.round_timing_info[round_id]["start_time"] = start_time
+        self._sdfl_training_finished_rounds.discard(round_id)
         expected_nodes.difference_update(self.rejected_nodes)
         expected_nodes = list(expected_nodes)
         self._recalculate_pending_latencies(round_id)
 
     async def recollect_model_arrival_latency(self, ure: UpdateReceivedEvent):
+        if not await self._should_recollect_update_event(ure):
+            return
+
         (decoded_model, weight, source, round_num, local) = await ure.get_event_data()
         current_round = await self._engine.get_round()
 
@@ -1962,11 +2390,14 @@ async def recollect_similarity(self, ure: UpdateReceivedEvent):
         Args:
             ure: UpdateReceivedEvent containing model and metadata
         """
-        (decoded_model, weight, nei, round_num, local) = await ure.get_event_data()
-
         if not (self._enabled and self._is_metric_enabled("model_similarity")):
             return
 
+        if not await self._should_recollect_update_event(ure):
+            return
+
+        (decoded_model, weight, nei, round_num, local) = await ure.get_event_data()
+
         if not self._engine.config.participant["adaptive_args"]["model_similarity"]:
             return
 
@@ -2045,6 +2476,9 @@ async def recollect_number_message(self, source, message):
 
     async def recollect_duplicated_number_message(self, dme: DuplicatedMessageEvent):
         """Record a duplicated message event."""
+        if self._engine.config.participant["scenario_args"].get("federation") == "SDFL":
+            return
+
         event_data = await dme.get_event_data()
         if isinstance(event_data, tuple):
             source = event_data[0]
@@ -2055,6 +2489,11 @@ async def recollect_duplicated_number_message(self, dme: DuplicatedMessageEvent)
     async def _record_message_data(self, source: str):
         """Record message data for the given source if it's not the current address."""
         if source != self._addr:
+            if self._engine.config.participant["scenario_args"].get("federation") == "SDFL":
+                direct_neighbors = await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False)
+                if source not in direct_neighbors:
+                    return
+
             current_time = time.time()
             if current_time:
                 self.save_data(
@@ -2072,6 +2511,9 @@ async def recollect_fraction_of_parameters_changed(self, ure: UpdateReceivedEven
         Args:
             ure: UpdateReceivedEvent containing model and metadata
         """
+        if not await self._should_recollect_update_event(ure):
+            return
+
         (decoded_model, weight, source, round_num, local) = await ure.get_event_data()
 
         current_round = await self._engine.get_round()
diff --git a/nebula/core/aggregation/aggregator.py b/nebula/core/aggregation/aggregator.py
index 1da611ed9..4338a7647 100755
--- a/nebula/core/aggregation/aggregator.py
+++ b/nebula/core/aggregation/aggregator.py
@@ -169,11 +169,35 @@ async def get_aggregation(self):
         else:
             logging.info("🔄  get_aggregation | All models accounted for, proceeding with aggregation.")
 
+        await self._calculate_sdfl_indirect_reputation_before_aggregation(updates)
+
         agg_event = AggregationEvent(updates, self._federation_nodes, missing_nodes)
         await EventManager.get_instance().publish_node_event(agg_event)
         aggregated_result = self.run_aggregation(updates)
         return aggregated_result
 
+    async def _calculate_sdfl_indirect_reputation_before_aggregation(self, updates):
+        if self.config.participant["scenario_args"].get("federation") != "SDFL":
+            return
+        if not hasattr(self.engine, "_reputation") or self.engine._reputation is None:
+            return
+
+        round_num = await self.engine.get_round()
+        expected_table_nodes = self.engine.get_sdfl_expected_trainers()
+        target_nodes = set(self._federation_nodes) | set(updates.keys())
+        timeout = float(
+            self.config.participant["defense_args"]
+            .get("reputation", {})
+            .get("table_aggregation_timeout", 10)
+        )
+
+        await self.engine._reputation.calculate_indirect_reputation_for_non_neighbors(
+            target_nodes=target_nodes,
+            expected_table_nodes=expected_table_nodes,
+            round_num=round_num,
+            timeout=timeout,
+        )
+
     def print_model_size(self, model):
         total_memory = 0
 
diff --git a/nebula/core/aggregation/updatehandlers/cflupdatehandler.py b/nebula/core/aggregation/updatehandlers/cflupdatehandler.py
index 6e66203cb..d3ccace29 100644
--- a/nebula/core/aggregation/updatehandlers/cflupdatehandler.py
+++ b/nebula/core/aggregation/updatehandlers/cflupdatehandler.py
@@ -15,7 +15,7 @@
 class Update:
     """
     Represents a model update received from a node in a specific training round.
-    
+
     Attributes:
         model (object): The model object or weights received.
         weight (float): The weight or importance of the update.
@@ -55,7 +55,7 @@ class CFLUpdateHandler(UpdateHandler):
         _missing_ones (set): Tracks nodes whose updates are missing.
         _role (str): Role of this node (e.g., trainer or server).
     """
-    
+
     def __init__(self, aggregator, addr, buffersize=MAX_UPDATE_BUFFER_SIZE):
         self._addr = addr
         self._aggregator: Aggregator = aggregator
@@ -130,6 +130,10 @@ async def storage_update(self, updt_received_event: UpdateReceivedEvent):
         Args:
             updt_received_event (UpdateReceivedEvent): The event containing the update.
         """
+        if updt_received_event.is_reputation_update():
+            logging.debug("Discard reputation-only update in aggregation storage")
+            return
+
         time_received = time.time()
         (model, weight, source, round, _) = await updt_received_event.get_event_data()
 
diff --git a/nebula/core/aggregation/updatehandlers/dflupdatehandler.py b/nebula/core/aggregation/updatehandlers/dflupdatehandler.py
index b98cbaf98..c8b5a16d8 100644
--- a/nebula/core/aggregation/updatehandlers/dflupdatehandler.py
+++ b/nebula/core/aggregation/updatehandlers/dflupdatehandler.py
@@ -15,7 +15,7 @@
 class Update:
     """
     Represents a model update received from a node in a specific training round.
-    
+
     Attributes:
         model (object): The model object or weights received.
         weight (float): The weight or importance of the update.
@@ -47,7 +47,7 @@ class DFLUpdateHandler(UpdateHandler):
     This handler manages the reception, storage, and tracking of model updates from federation nodes
     during asynchronous rounds. It supports partial updates, late arrivals, and maintains update history.
     """
-    
+
     def __init__(self, aggregator, addr, buffersize=MAX_UPDATE_BUFFER_SIZE):
         """
         Initialize the update handler with required locks and storage.
@@ -149,6 +149,10 @@ async def storage_update(self, updt_received_event: UpdateReceivedEvent):
         Args:
             updt_received_event (UpdateReceivedEvent): Event with model update data.
         """
+        if updt_received_event.is_reputation_update():
+            logging.debug("Discard reputation-only update in aggregation storage")
+            return
+
         time_received = time.time()
         (model, weight, source, round, _) = await updt_received_event.get_event_data()
         if source in self._sources_expected:
diff --git a/nebula/core/aggregation/updatehandlers/sdflupdatehandler.py b/nebula/core/aggregation/updatehandlers/sdflupdatehandler.py
index ec214f4cb..956abb011 100644
--- a/nebula/core/aggregation/updatehandlers/sdflupdatehandler.py
+++ b/nebula/core/aggregation/updatehandlers/sdflupdatehandler.py
@@ -143,6 +143,10 @@ async def storage_update(self, updt_received_event: UpdateReceivedEvent):
         Args:
             updt_received_event (UpdateReceivedEvent): Event with model update data.
         """
+        if updt_received_event.is_reputation_update():
+            logging.debug("Discard reputation-only update in SDFL aggregation storage")
+            return
+
         time_received = time.time()
         (model, weight, source, round, _) = await updt_received_event.get_event_data()
         if source in self._sources_expected:
diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index 536645680..b4718f608 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -1,4 +1,5 @@
 import asyncio
+import json
 import logging
 import os
 import random
@@ -256,7 +257,7 @@ async def wait_pending_leadership_ack(self):
         if successor is None:
             return
 
-        timeout = float(self.config.participant.get("misc_args", {}).get("leadership_ack_timeout", 5))
+        timeout = float(self.config.participant.get("misc_args", {}).get("leadership_ack_timeout", 20))
         logging.info(f"SDFL leadership | Waiting up to {timeout}s for ACK from {successor}")
 
         ack_received = False
@@ -360,12 +361,57 @@ async def model_update_callback(self, source, message):
             logging.info("🤖  handle_model_message | There are no defined federation nodes")
             return
         if self.config.participant["scenario_args"].get("federation") == "SDFL":
-            logging.info("SDFL | Ignoring legacy model/update; use sdflmodel messages")
+            direct_neighbors = await self.cm.get_addrs_current_connections(only_direct=True, myself=False)
+            if source not in direct_neighbors:
+                logging.info(f"SDFL reputation | Ignoring model/update from non-neighbor source={source}")
+                return
+
+            decoded_model = self.trainer.deserialize_model(message.parameters)
+            updt_received_event = UpdateReceivedEvent(
+                decoded_model,
+                message.weight,
+                source,
+                message.round,
+                update_type=UpdateReceivedEvent.REPUTATION_UPDATE,
+            )
+            await EventManager.get_instance().publish_node_event(updt_received_event)
+            logging.info(f"SDFL reputation | Published reputation UpdateReceivedEvent from {source}")
             return
         decoded_model = self.trainer.deserialize_model(message.parameters)
         updt_received_event = UpdateReceivedEvent(decoded_model, message.weight, source, message.round)
         await EventManager.get_instance().publish_node_event(updt_received_event)
 
+    async def send_sdfl_reputation_model_update(self):
+        if self.config.participant["scenario_args"].get("federation") != "SDFL":
+            return
+
+        model_params = self.trainer.get_model_parameters()
+        serialized_model = (
+            model_params
+            if isinstance(model_params, bytes)
+            else self.trainer.serialize_model(model_params)
+        )
+
+        message = self.cm.create_message(
+            "model",
+            round=self.round,
+            parameters=serialized_model,
+            weight=self.trainer.get_model_weight(),
+        )
+
+        neighbors = await self.cm.get_addrs_current_connections(only_direct=True, myself=False)
+        if not neighbors:
+            logging.info("SDFL reputation | No direct neighbors to send model/update")
+            return
+
+        logging.info(f"SDFL reputation | Broadcasting model/update to direct neighbors: {neighbors}")
+        await asyncio.gather(
+            *[
+                asyncio.create_task(self.cm.send_message(neighbor, message, "model"))
+                for neighbor in neighbors
+            ]
+        )
+
     """                                                     ##############################
                                                             #      General callbacks     #
                                                             ##############################
@@ -508,6 +554,41 @@ async def _reputation_share_callback(self, source, message):
         except Exception as e:
             logging.exception(f"Error handling reputation message: {e}")
 
+    async def _reputationtable_table_callback(self, source, message):
+        try:
+            if self.config.participant["scenario_args"].get("federation") != "SDFL":
+                return
+            if self.rb.get_role_name(True) != "aggregator":
+                return
+            if not hasattr(self, "_reputation") or self._reputation is None:
+                return
+
+            reputation_table = json.loads(message.reputation_table_json or "{}")
+            if not isinstance(reputation_table, dict):
+                logging.warning(
+                    f"SDFL reputation | Ignoring reputation table from {message.node_id}; "
+                    f"invalid payload type: {type(reputation_table)}"
+                )
+                return
+
+            await self._reputation.register_reputation_table(
+                message.node_id,
+                message.round,
+                reputation_table,
+                received_from=source,
+            )
+            expected_nodes = self.get_sdfl_expected_trainers()
+            timeout = float(
+                self.config.participant["defense_args"]
+                .get("reputation", {})
+                .get("table_aggregation_timeout", 10)
+            )
+            self._reputation.start_reputation_tables_collection(expected_nodes, message.round, timeout)
+        except json.JSONDecodeError as e:
+            logging.warning(f"SDFL reputation | Could not decode reputation table from {source}: {e}")
+        except Exception as e:
+            logging.exception(f"Error handling reputation table message: {e}")
+
     async def _trustworthiness_report_callback(self, source, message):
         try:
             report = {
diff --git a/nebula/core/nebulaevents.py b/nebula/core/nebulaevents.py
index 0e3e77977..ecdd482da 100644
--- a/nebula/core/nebulaevents.py
+++ b/nebula/core/nebulaevents.py
@@ -296,7 +296,10 @@ async def is_concurrent(self) -> bool:
 
 
 class UpdateReceivedEvent(NodeEvent):
-    def __init__(self, decoded_model, weight, source, round, local=False):
+    FEDERATION_UPDATE = "federation"
+    REPUTATION_UPDATE = "reputation"
+
+    def __init__(self, decoded_model, weight, source, round, local=False, update_type=FEDERATION_UPDATE):
         """
         Initializes an UpdateReceivedEvent.
 
@@ -306,12 +309,15 @@ def __init__(self, decoded_model, weight, source, round, local=False):
             source (str): The identifier or address of the node that sent the update.
             round (int): The round number in which the update was received.
             local (bool): Local update
+            update_type (str): Semantic channel for this update. Federation updates feed aggregation;
+                reputation updates only feed reputation metrics.
         """
         self._source = source
         self._round = round
         self._model = decoded_model
         self._weight = weight
         self._local = local
+        self._update_type = update_type
 
     def __str__(self):
         return f"Update received from source: {self._source}, round: {self._round}"
@@ -330,6 +336,12 @@ async def get_event_data(self) -> tuple[object, int, str, int, bool]:
         """
         return (self._model, self._weight, self._source, self._round, self._local)
 
+    async def get_update_type(self) -> str:
+        return self._update_type
+
+    def is_reputation_update(self) -> bool:
+        return self._update_type == self.REPUTATION_UPDATE
+
     async def is_concurrent(self) -> bool:
         return False
 
diff --git a/nebula/core/network/actions.py b/nebula/core/network/actions.py
index 294301399..98d8c93f8 100644
--- a/nebula/core/network/actions.py
+++ b/nebula/core/network/actions.py
@@ -83,6 +83,13 @@ class ReputationAction(Enum):
 
     SHARE = nebula_pb2.ReputationMessage.Action.SHARE
 
+class ReputationtableAction(Enum):
+    """
+    Enum for full reputation table exchange messages in SDFL.
+    """
+
+    TABLE = nebula_pb2.ReputationtableMessage.Action.TABLE
+
 class TrustworthinessAction(Enum):
     """
     Enum for trustworthiness exchange messages in the federation.
@@ -116,6 +123,7 @@ class SdflmodelAction(Enum):
     "offer": OfferAction,
     "link": LinkAction,
     "reputation": ReputationAction,
+    "reputationtable": ReputationtableAction,
     "trustworthiness": TrustworthinessAction,
     "trustscores": TrustscoresAction,
     "sdflmodel": SdflmodelAction,
diff --git a/nebula/core/network/forwarder.py b/nebula/core/network/forwarder.py
index 510f89fd2..e6831eec7 100755
--- a/nebula/core/network/forwarder.py
+++ b/nebula/core/network/forwarder.py
@@ -144,6 +144,8 @@ def _allow_forward_after_learning_finished(self, msg: bytes) -> bool:
                 return True
             if message_type == "sdflmodel_message":
                 return message_wrapper.sdflmodel_message.action == nebula_pb2.SdflmodelMessage.Action.GLOBAL_MODEL
+            if message_type == "reputationtable_message":
+                return True
             return False
         except Exception as e:
             logging.warning(f"🔁  Could not inspect forwarded message type: {e!s}")
diff --git a/nebula/core/network/messages.py b/nebula/core/network/messages.py
index 25e41d66a..0d8a036ed 100644
--- a/nebula/core/network/messages.py
+++ b/nebula/core/network/messages.py
@@ -99,6 +99,14 @@ def _define_message_templates(self):
                     "round": None,
                 },
             },
+            "reputationtable": {
+                "parameters": ["action", "node_id", "round", "reputation_table_json"],
+                "defaults": {
+                    "node_id": self.addr,
+                    "round": None,
+                    "reputation_table_json": "{}",
+                },
+            },
             "discover": {"parameters": ["action"], "defaults": {}},
             "link": {"parameters": ["action", "addrs"], "defaults": {}},
             "trustworthiness": {
@@ -164,7 +172,14 @@ async def process_message(self, data, addr_from):
             addr_from (str): Address from which the message was received.
         """
         not_processing_messages = {"control_message", "connection_message"}
-        special_processing_messages = {"discovery_message", "federation_message", "model_message", "trustscores_message", "sdflmodel_message"}
+        special_processing_messages = {
+            "discovery_message",
+            "federation_message",
+            "model_message",
+            "trustscores_message",
+            "sdflmodel_message",
+            "reputationtable_message",
+        }
 
         try:
             message_wrapper = nebula_pb2.Wrapper()
@@ -248,6 +263,11 @@ def _should_forward_message(self, message_type, message_wrapper):
 
         if  self.cm.config.participant["scenario_args"]["federation"] == "SDFL" and message_type == "sdflmodel_message":
             return True
+        if (
+            self.cm.config.participant["scenario_args"]["federation"] == "SDFL"
+            and message_type == "reputationtable_message"
+        ):
+            return True
 
     def create_message(self, message_type: str, action: str = "", *args, **kwargs):
         """
diff --git a/nebula/core/noderole.py b/nebula/core/noderole.py
index 7f7f7bb52..d097f585e 100644
--- a/nebula/core/noderole.py
+++ b/nebula/core/noderole.py
@@ -281,6 +281,9 @@ def get_role_name(self, effective=False):
     async def extended_learning_cycle(self):
         await self._engine.trainer.test()
 
+        if self._config.participant["scenario_args"].get("federation") == "SDFL":
+            await self._engine.send_sdfl_reputation_model_update()
+
         await self._engine._waiting_model_updates()
 
         federation = self._config.participant["scenario_args"].get("federation")
@@ -386,13 +389,44 @@ async def extended_learning_cycle(self):
         logging.info("Waiting global update | Assign _waiting_global_update = True")
 
         await self._engine.trainer.test()
-        await self._engine.trainer.train()
+        await self._engine.trainning_in_progress_lock.acquire_async()
+        try:
+            await self._engine.trainer.train()
+        finally:
+            await self._engine.trainning_in_progress_lock.release_async()
 
         federation = self._config.participant["scenario_args"].get("federation")
 
         if federation == "SDFL":
             self._engine.prepare_waiting_global_model()
 
+            if self._engine._reputation is not None:
+                await self._engine._reputation.process_pending_sdfl_reputation_updates(self._engine.round)
+
+            await self._engine.send_sdfl_reputation_model_update()
+
+            if self._engine._reputation is not None:
+                expected_reputation_neighbors = await self._engine.cm.get_addrs_current_connections(
+                    only_direct=True,
+                    myself=False,
+                )
+                reputation_timeout = float(
+                    self._config.participant["defense_args"]
+                    .get("reputation", {})
+                    .get(
+                        "model_update_timeout",
+                        self._config.participant["defense_args"]
+                        .get("reputation", {})
+                        .get("table_aggregation_timeout", 30),
+                    )
+                )
+                await self._engine._reputation.wait_sdfl_reputation_updates(
+                    expected_reputation_neighbors,
+                    self._engine.round,
+                    reputation_timeout,
+                )
+                await self._engine._reputation.calculate_and_send_sdfl_reputation_table()
+
             model_params = self._engine.trainer.get_model_parameters()
             serialized_model = (
                 model_params
diff --git a/nebula/core/pb/nebula.proto b/nebula/core/pb/nebula.proto
index acb0969f3..c05d6131f 100755
--- a/nebula/core/pb/nebula.proto
+++ b/nebula/core/pb/nebula.proto
@@ -29,6 +29,7 @@ message Wrapper {
     TrustworthinessMessage trustworthiness_message = 12;
     TrustscoresMessage trustscores_message = 13;
     SdflmodelMessage sdflmodel_message = 14;
+    ReputationtableMessage reputationtable_message = 15;
   }
 }
 
@@ -142,6 +143,16 @@ message ReputationMessage {
   Action action = 4; // Action type (default: SHARE)
 }
 
+message ReputationtableMessage {
+  enum Action {
+    TABLE = 0;
+  }
+  string node_id = 1; // Logical source node of the reputation table.
+  int32 round = 2; // Round to which the reputation table belongs.
+  string reputation_table_json = 3; // JSON encoded reputation table.
+  Action action = 4; // Action type (default: TABLE)
+}
+
 // Response transmits the outcome of a requested operation, including any errors.
 message ResponseMessage {
   string response = 1;      // Outcome of the requested operation.
diff --git a/nebula/core/pb/nebula_pb2.py b/nebula/core/pb/nebula_pb2.py
index 3d8003607..bb470f06b 100644
--- a/nebula/core/pb/nebula_pb2.py
+++ b/nebula/core/pb/nebula_pb2.py
@@ -13,7 +13,7 @@
 
 
 
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xe3\x05\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x12\x39\n\x13trustscores_message\x18\r \x01(\x0b\x32\x1a.nebula.TrustscoresMessageH\x00\x12\x35\n\x11sdflmodel_message\x18\x0e \x01(\x0b\x32\x18.nebula.SdflmodelMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\xc7\x01\n\x10SdflmodelMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.SdflmodelMessage.Action\x12\x0e\n\x06target\x18\x02 \x01(\t\x12\x12\n\nparameters\x18\x03 \x01(\x0c\x12\x0e\n\x06weight\x18\x04 \x01(\x03\x12\r\n\x05round\x18\x05 \x01(\x05\x12\x0f\n\x07node_id\x18\x06 \x01(\t\".\n\x06\x41\x63tion\x12\x12\n\x0eTRAINER_UPDATE\x10\x00\x12\x10\n\x0cGLOBAL_MODEL\x10\x01\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\x80\x04\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\x12\x17\n\x0f\x63lass_imbalance\x18\x11 \x01(\x02\x12\x12\n\nmodel_size\x18\x12 \x01(\x03\x12\x15\n\rlocal_entropy\x18\x13 \x01(\x02\x12\x14\n\x0cval_accuracy\x18\x14 \x01(\x02\x12\x12\n\ndp_enabled\x18\x15 \x01(\x08\x12\x12\n\ndp_epsilon\x18\x16 \x01(\x02\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\"\x88\x01\n\x12TrustscoresMessage\x12\x31\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32!.nebula.TrustscoresMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x19\n\x11trust_report_json\x18\x03 \x01(\t\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xa6\x06\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x12\x39\n\x13trustscores_message\x18\r \x01(\x0b\x32\x1a.nebula.TrustscoresMessageH\x00\x12\x35\n\x11sdflmodel_message\x18\x0e \x01(\x0b\x32\x18.nebula.SdflmodelMessageH\x00\x12\x41\n\x17reputationtable_message\x18\x0f \x01(\x0b\x32\x1e.nebula.ReputationtableMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\xc7\x01\n\x10SdflmodelMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.SdflmodelMessage.Action\x12\x0e\n\x06target\x18\x02 \x01(\t\x12\x12\n\nparameters\x18\x03 \x01(\x0c\x12\x0e\n\x06weight\x18\x04 \x01(\x03\x12\r\n\x05round\x18\x05 \x01(\x05\x12\x0f\n\x07node_id\x18\x06 \x01(\t\".\n\x06\x41\x63tion\x12\x12\n\x0eTRAINER_UPDATE\x10\x00\x12\x10\n\x0cGLOBAL_MODEL\x10\x01\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"\xa3\x01\n\x16ReputationtableMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05round\x18\x02 \x01(\x05\x12\x1d\n\x15reputation_table_json\x18\x03 \x01(\t\x12\x35\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32%.nebula.ReputationtableMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05TABLE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\x80\x04\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\x12\x17\n\x0f\x63lass_imbalance\x18\x11 \x01(\x02\x12\x12\n\nmodel_size\x18\x12 \x01(\x03\x12\x15\n\rlocal_entropy\x18\x13 \x01(\x02\x12\x14\n\x0cval_accuracy\x18\x14 \x01(\x02\x12\x12\n\ndp_enabled\x18\x15 \x01(\x08\x12\x12\n\ndp_epsilon\x18\x16 \x01(\x02\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\"\x88\x01\n\x12TrustscoresMessage\x12\x31\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32!.nebula.TrustscoresMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x19\n\x11trust_report_json\x18\x03 \x01(\t\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\x62\x06proto3')
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
 _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'nebula_pb2', globals())
@@ -21,53 +21,57 @@
 
   DESCRIPTOR._options = None
   _WRAPPER._serialized_start=25
-  _WRAPPER._serialized_end=764
-  _DISCOVERYMESSAGE._serialized_start=767
-  _DISCOVERYMESSAGE._serialized_end=925
-  _DISCOVERYMESSAGE_ACTION._serialized_start=873
-  _DISCOVERYMESSAGE_ACTION._serialized_end=925
-  _CONTROLMESSAGE._serialized_start=928
-  _CONTROLMESSAGE._serialized_end=1137
-  _CONTROLMESSAGE_ACTION._serialized_start=1007
-  _CONTROLMESSAGE_ACTION._serialized_end=1137
-  _FEDERATIONMESSAGE._serialized_start=1140
-  _FEDERATIONMESSAGE._serialized_end=1345
-  _FEDERATIONMESSAGE_ACTION._serialized_start=1245
-  _FEDERATIONMESSAGE_ACTION._serialized_end=1345
-  _MODELMESSAGE._serialized_start=1347
-  _MODELMESSAGE._serialized_end=1412
-  _SDFLMODELMESSAGE._serialized_start=1415
-  _SDFLMODELMESSAGE._serialized_end=1614
-  _SDFLMODELMESSAGE_ACTION._serialized_start=1568
-  _SDFLMODELMESSAGE_ACTION._serialized_end=1614
-  _CONNECTIONMESSAGE._serialized_start=1617
-  _CONNECTIONMESSAGE._serialized_end=1760
-  _CONNECTIONMESSAGE_ACTION._serialized_start=1688
-  _CONNECTIONMESSAGE_ACTION._serialized_end=1760
-  _DISCOVERMESSAGE._serialized_start=1763
-  _DISCOVERMESSAGE._serialized_end=1912
-  _DISCOVERMESSAGE_ACTION._serialized_start=1830
-  _DISCOVERMESSAGE_ACTION._serialized_end=1912
-  _OFFERMESSAGE._serialized_start=1915
-  _OFFERMESSAGE._serialized_end=2121
-  _OFFERMESSAGE_ACTION._serialized_start=2078
-  _OFFERMESSAGE_ACTION._serialized_end=2121
-  _LINKMESSAGE._serialized_start=2123
-  _LINKMESSAGE._serialized_end=2242
-  _LINKMESSAGE_ACTION._serialized_start=2197
-  _LINKMESSAGE_ACTION._serialized_end=2242
-  _REPUTATIONMESSAGE._serialized_start=2245
-  _REPUTATIONMESSAGE._serialized_end=2382
-  _REPUTATIONMESSAGE_ACTION._serialized_start=2363
-  _REPUTATIONMESSAGE_ACTION._serialized_end=2382
-  _RESPONSEMESSAGE._serialized_start=2384
-  _RESPONSEMESSAGE._serialized_end=2419
-  _TRUSTWORTHINESSMESSAGE._serialized_start=2422
-  _TRUSTWORTHINESSMESSAGE._serialized_end=2934
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=2914
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=2934
-  _TRUSTSCORESMESSAGE._serialized_start=2937
-  _TRUSTSCORESMESSAGE._serialized_end=3073
-  _TRUSTSCORESMESSAGE_ACTION._serialized_start=2363
-  _TRUSTSCORESMESSAGE_ACTION._serialized_end=2382
+  _WRAPPER._serialized_end=831
+  _DISCOVERYMESSAGE._serialized_start=834
+  _DISCOVERYMESSAGE._serialized_end=992
+  _DISCOVERYMESSAGE_ACTION._serialized_start=940
+  _DISCOVERYMESSAGE_ACTION._serialized_end=992
+  _CONTROLMESSAGE._serialized_start=995
+  _CONTROLMESSAGE._serialized_end=1204
+  _CONTROLMESSAGE_ACTION._serialized_start=1074
+  _CONTROLMESSAGE_ACTION._serialized_end=1204
+  _FEDERATIONMESSAGE._serialized_start=1207
+  _FEDERATIONMESSAGE._serialized_end=1412
+  _FEDERATIONMESSAGE_ACTION._serialized_start=1312
+  _FEDERATIONMESSAGE_ACTION._serialized_end=1412
+  _MODELMESSAGE._serialized_start=1414
+  _MODELMESSAGE._serialized_end=1479
+  _SDFLMODELMESSAGE._serialized_start=1482
+  _SDFLMODELMESSAGE._serialized_end=1681
+  _SDFLMODELMESSAGE_ACTION._serialized_start=1635
+  _SDFLMODELMESSAGE_ACTION._serialized_end=1681
+  _CONNECTIONMESSAGE._serialized_start=1684
+  _CONNECTIONMESSAGE._serialized_end=1827
+  _CONNECTIONMESSAGE_ACTION._serialized_start=1755
+  _CONNECTIONMESSAGE_ACTION._serialized_end=1827
+  _DISCOVERMESSAGE._serialized_start=1830
+  _DISCOVERMESSAGE._serialized_end=1979
+  _DISCOVERMESSAGE_ACTION._serialized_start=1897
+  _DISCOVERMESSAGE_ACTION._serialized_end=1979
+  _OFFERMESSAGE._serialized_start=1982
+  _OFFERMESSAGE._serialized_end=2188
+  _OFFERMESSAGE_ACTION._serialized_start=2145
+  _OFFERMESSAGE_ACTION._serialized_end=2188
+  _LINKMESSAGE._serialized_start=2190
+  _LINKMESSAGE._serialized_end=2309
+  _LINKMESSAGE_ACTION._serialized_start=2264
+  _LINKMESSAGE_ACTION._serialized_end=2309
+  _REPUTATIONMESSAGE._serialized_start=2312
+  _REPUTATIONMESSAGE._serialized_end=2449
+  _REPUTATIONMESSAGE_ACTION._serialized_start=2430
+  _REPUTATIONMESSAGE_ACTION._serialized_end=2449
+  _REPUTATIONTABLEMESSAGE._serialized_start=2452
+  _REPUTATIONTABLEMESSAGE._serialized_end=2615
+  _REPUTATIONTABLEMESSAGE_ACTION._serialized_start=2596
+  _REPUTATIONTABLEMESSAGE_ACTION._serialized_end=2615
+  _RESPONSEMESSAGE._serialized_start=2617
+  _RESPONSEMESSAGE._serialized_end=2652
+  _TRUSTWORTHINESSMESSAGE._serialized_start=2655
+  _TRUSTWORTHINESSMESSAGE._serialized_end=3167
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=3147
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=3167
+  _TRUSTSCORESMESSAGE._serialized_start=3170
+  _TRUSTSCORESMESSAGE._serialized_end=3306
+  _TRUSTSCORESMESSAGE_ACTION._serialized_start=2430
+  _TRUSTSCORESMESSAGE_ACTION._serialized_end=2449
 # @@protoc_insertion_point(module_scope)
diff --git a/nebula/frontend/config/participant.json.example b/nebula/frontend/config/participant.json.example
index 725251cf5..ca9a86a71 100755
--- a/nebula/frontend/config/participant.json.example
+++ b/nebula/frontend/config/participant.json.example
@@ -158,6 +158,7 @@
   },
   "misc_args": {
     "grace_time_connection": 10,
-    "grace_time_start_federation": 10
+    "grace_time_start_federation": 10,
+    "leadership_ack_timeout": 20
   }
 }

From d0c1f8dc86a5f99e15072c355b47d9a00f25137f Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Thu, 14 May 2026 17:25:49 +0200
Subject: [PATCH 42/66] Mitigation: Feature Squeezing for images

---
 nebula/addons/defenses/__init__.py            |   1 +
 nebula/addons/defenses/feature_squeezing.py   | 217 ++++++++++++++++++
 nebula/controller/scenarios.py                |  13 ++
 nebula/core/node.py                           |   2 +
 .../frontend/config/participant.json.example  |   7 +
 .../static/js/deployment/feature-squeezing.js |  71 ++++++
 nebula/frontend/static/js/deployment/main.js  |   3 +
 .../frontend/static/js/deployment/scenario.js |   7 +
 nebula/frontend/templates/deployment.html     |  21 ++
 9 files changed, 342 insertions(+)
 create mode 100644 nebula/addons/defenses/__init__.py
 create mode 100644 nebula/addons/defenses/feature_squeezing.py
 create mode 100644 nebula/frontend/static/js/deployment/feature-squeezing.js

diff --git a/nebula/addons/defenses/__init__.py b/nebula/addons/defenses/__init__.py
new file mode 100644
index 000000000..5e1105d48
--- /dev/null
+++ b/nebula/addons/defenses/__init__.py
@@ -0,0 +1 @@
+"""Defense add-ons for Nebula."""
diff --git a/nebula/addons/defenses/feature_squeezing.py b/nebula/addons/defenses/feature_squeezing.py
new file mode 100644
index 000000000..acfe316d5
--- /dev/null
+++ b/nebula/addons/defenses/feature_squeezing.py
@@ -0,0 +1,217 @@
+import logging
+from dataclasses import dataclass
+from typing import Any
+
+import numpy as np
+import torch
+from PIL import Image
+
+IMAGE_DATASETS = {"MNIST", "FashionMNIST", "EMNIST", "CIFAR10", "CIFAR100"}
+PIL_IMAGE_MODES = {"1", "L", "P", "RGB", "RGBA", "CMYK", "YCbCr"}
+
+
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class FeatureSqueezingConfig:
+    enabled: bool = False
+    bit_depth: int = 8
+    dataset_name: str | None = None
+    apply_to_train: bool = True
+    apply_to_test: bool = True
+    apply_to_local_test: bool = True
+
+
+# ---------------------------------------------------------------------------
+# Defense
+# ---------------------------------------------------------------------------
+
+
+class FeatureSqueezingDefense:
+    """Dataset-level feature squeezing for image Nebula datasets."""
+
+    def __init__(self, config: FeatureSqueezingConfig):
+        if not isinstance(config.bit_depth, int) or not 1 <= config.bit_depth <= 64:
+            raise ValueError("feature_squeezing.bit_depth must be an integer in [1, 64]")  # noqa: TRY003
+
+        self.config = config
+        self.levels = float((2**config.bit_depth) - 1)
+
+    @classmethod
+    def from_participant_config(cls, participant_config: dict[str, Any]) -> "FeatureSqueezingDefense | None":
+        raw = participant_config.get("defense_args", {}).get("feature_squeezing", {})
+        if not raw or not raw.get("enabled", False):
+            return None
+
+        return cls(
+            FeatureSqueezingConfig(
+                enabled=True,
+                bit_depth=int(raw.get("bit_depth", raw.get("n", 8))),
+                dataset_name=participant_config.get("data_args", {}).get("dataset"),
+                apply_to_train=bool(raw.get("apply_to_train", True)),
+                apply_to_test=bool(raw.get("apply_to_test", True)),
+                apply_to_local_test=bool(raw.get("apply_to_local_test", True)),
+            )
+        )
+
+    def apply_to_partition(self, partition) -> None:
+        train_set = getattr(partition, "train_set", None)
+        if train_set is None:
+            logging.warning("[FeatureSqueezingDefense] No train set found; skipping defense")
+            return
+
+        if self.config.dataset_name not in IMAGE_DATASETS:
+            logging.info(
+                "[FeatureSqueezingDefense] Skipping feature squeezing: dataset is not image-supported | dataset=%s",
+                self.config.dataset_name,
+            )
+            return
+
+        logging.info(
+            "[FeatureSqueezingDefense] Applying feature squeezing | dataset=%s | bit_depth=%s",
+            self.config.dataset_name,
+            self.config.bit_depth,
+        )
+
+        seen_data: set[int] = set()
+        for name, dataset, enabled in (
+            ("train", train_set, self.config.apply_to_train),
+            ("test", getattr(partition, "test_set", None), self.config.apply_to_test),
+            ("local_test", getattr(partition, "local_test_set", None), self.config.apply_to_local_test),
+        ):
+            if enabled:
+                self._transform_dataset(dataset, name, seen_data)
+
+    def _transform_dataset(self, dataset, name: str, seen_data: set[int]) -> None:
+        data = getattr(dataset, "data", None)
+        if dataset is None or data is None:
+            return
+
+        if id(data) in seen_data:
+            logging.info("[FeatureSqueezingDefense] Dataset %s already transformed; skipping duplicate data", name)
+            self._log_check(data, name, status="already_transformed")
+            return
+
+        before_sample = data[0] if len(data) else None
+        before = self._summary(before_sample) if before_sample is not None else None
+        for idx, sample in enumerate(data):
+            data[idx] = self._transform_sample(sample)
+
+        seen_data.add(id(data))
+        logging.info("[FeatureSqueezingDefense] Transformed %s samples in %s set", len(data), name)
+        self._log_check(data, name, status="transformed", before=before)
+
+    def _transform_sample(self, sample):
+        if isinstance(sample, tuple) and sample:
+            return (self._squeeze_image(sample[0]), *sample[1:])
+        return self._squeeze_image(sample)
+
+    # ------------------------------------------------------------------
+    # Image squeezing
+    # ------------------------------------------------------------------
+
+    def _squeeze_image(self, value):
+        if isinstance(value, Image.Image):
+            image = value if value.mode in PIL_IMAGE_MODES else value.convert("RGB")
+            arr = np.asarray(image)
+            squeezed = np.rint(self._squeeze_image_array(arr)).clip(0, 255).astype(arr.dtype, copy=False)
+            return Image.fromarray(squeezed, mode=image.mode)
+
+        squeezed = self._squeeze_image_array(self._as_numpy(value))
+        return self._restore_type(value, squeezed)
+
+    def _squeeze_image_array(self, arr: np.ndarray) -> np.ndarray:
+        arr_float = arr.astype(np.float32, copy=False)
+        if np.issubdtype(arr.dtype, np.integer):
+            info = np.iinfo(arr.dtype)
+            low, high = float(info.min), float(info.max)
+        else:
+            low, high = float(np.nanmin(arr_float)), float(np.nanmax(arr_float))
+            if low >= 0.0 and high <= 1.0:
+                low, high = 0.0, 1.0
+
+        value_range = high - low
+        if value_range == 0:
+            return arr.copy()
+        return self._quantize01((arr_float - low) / value_range) * value_range + low
+
+    # ------------------------------------------------------------------
+    # Shared helpers and diagnostics
+    # ------------------------------------------------------------------
+
+    def _quantize01(self, arr: np.ndarray) -> np.ndarray:
+        return np.rint(np.clip(arr, 0.0, 1.0) * self.levels) / self.levels
+
+    def _log_check(self, data, name: str, status: str, before: str | None = None) -> None:
+        if not len(data):
+            logging.info("[FeatureSqueezingDefense] Verification %s | status=%s | empty dataset", name, status)
+            return
+
+        expectation = f"expected_unique_values<={int(self.levels + 1)}"
+
+        after = self._summary(data[0])
+        if before is None:
+            logging.info(
+                "[FeatureSqueezingDefense] Verification %s | status=%s | %s | sample_after={%s}",
+                name,
+                status,
+                expectation,
+                after,
+            )
+            return
+
+        logging.info(
+            "[FeatureSqueezingDefense] Verification %s | status=%s | %s | sample_before={%s} | "
+            "sample_after={%s}",
+            name,
+            status,
+            expectation,
+            before,
+            after,
+        )
+
+    def _summary(self, sample) -> str:
+        arr = self._as_numpy(self._unwrap(sample))
+        if arr.size == 0:
+            return f"shape={arr.shape}, empty=True"
+
+        flat = arr.reshape(-1)
+        unique = np.unique(flat)
+        preview = ", ".join(self._fmt(value) for value in unique[: min(12, len(unique))])
+        return (
+            f"shape={arr.shape}, dtype={arr.dtype}, min={self._fmt(np.nanmin(flat))}, "
+            f"max={self._fmt(np.nanmax(flat))}, unique_count={len(unique)}, unique_preview=[{preview}]"
+        )
+
+    def _as_numpy(self, value) -> np.ndarray:
+        if isinstance(value, torch.Tensor):
+            return value.detach().cpu().numpy()
+        if isinstance(value, Image.Image):
+            return np.asarray(value)
+        return np.asarray(value)
+
+    def _restore_type(self, original, arr: np.ndarray):
+        if isinstance(original, torch.Tensor):
+            return torch.as_tensor(arr, dtype=original.dtype, device=original.device)
+        if isinstance(original, np.ndarray):
+            return arr.astype(original.dtype, copy=False)
+        return arr
+
+    def _unwrap(self, sample):
+        return sample[0] if isinstance(sample, tuple) and sample else sample
+
+    def _fmt(self, value) -> str:
+        try:
+            number = float(value)
+        except (TypeError, ValueError):
+            return str(value)
+        return str(int(number)) if number.is_integer() else f"{number:.6g}"
+
+
+def apply_feature_squeezing_if_enabled(partition, participant_config: dict[str, Any]) -> None:
+    defense = FeatureSqueezingDefense.from_participant_config(participant_config)
+    if defense is not None:
+        defense.apply_to_partition(partition)
diff --git a/nebula/controller/scenarios.py b/nebula/controller/scenarios.py
index 62b2a8a41..7b27d475b 100644
--- a/nebula/controller/scenarios.py
+++ b/nebula/controller/scenarios.py
@@ -116,6 +116,7 @@ def __init__(
         sar_training,
         sar_training_policy,
         dp=None,
+        feature_squeezing=None,
         physical_ips=None,
     ):
         """
@@ -194,6 +195,7 @@ def __init__(
         self.network_gateway = network_gateway
         self.epochs = epochs
         self.dp = dp
+        self.feature_squeezing = feature_squeezing
         self.attack_params = attack_params
         self.reputation = reputation
         self.random_geo = random_geo
@@ -715,6 +717,17 @@ def __init__(self, scenario, user=None):
                     participant_config["training_args"]["dp"]["max_grad_norm"] = float(
                         self.scenario.dp["max_grad_norm"]
                     )
+            feature_squeezing = (
+                self.scenario.feature_squeezing if isinstance(self.scenario.feature_squeezing, dict) else {}
+            )
+            participant_config.setdefault("defense_args", {})
+            participant_config["defense_args"].setdefault("feature_squeezing", {})
+            participant_config["defense_args"]["feature_squeezing"]["enabled"] = bool(
+                feature_squeezing.get("enabled", False)
+            )
+            bit_depth = feature_squeezing.get("bit_depth", feature_squeezing.get("n"))
+            if bit_depth is not None:
+                participant_config["defense_args"]["feature_squeezing"]["bit_depth"] = int(bit_depth)
             participant_config["device_args"]["accelerator"] = self.scenario.accelerator
             participant_config["device_args"]["gpu_id"] = self.scenario.gpu_id
             participant_config["device_args"]["logging"] = self.scenario.logginglevel
diff --git a/nebula/core/node.py b/nebula/core/node.py
index a78fa78b2..c74a5cb94 100755
--- a/nebula/core/node.py
+++ b/nebula/core/node.py
@@ -19,6 +19,7 @@
 import logging
 from collections import Counter
 
+from nebula.addons.defenses.feature_squeezing import apply_feature_squeezing_if_enabled
 from nebula.config.config import Config
 from nebula.core.datasets.cifar10.cifar10 import CIFAR10PartitionHandler
 from nebula.core.datasets.cifar100.cifar100 import CIFAR100PartitionHandler
@@ -187,6 +188,7 @@ async def main(config: Config):
 
     dataset = NebulaPartition(handler=handler, config=config)
     dataset.load_partition()
+    apply_feature_squeezing_if_enabled(dataset, config.participant)
     dataset.log_partition()
     samples_per_label = Counter(dataset.get_train_labels())
 
diff --git a/nebula/frontend/config/participant.json.example b/nebula/frontend/config/participant.json.example
index ca9a86a71..50da11a7b 100755
--- a/nebula/frontend/config/participant.json.example
+++ b/nebula/frontend/config/participant.json.example
@@ -102,6 +102,13 @@
     "aggregation_push": "slow"
   },
   "defense_args": {
+    "feature_squeezing": {
+      "enabled": false,
+      "bit_depth": 4,
+      "apply_to_train": true,
+      "apply_to_test": true,
+      "apply_to_local_test": true
+    },
     "reputation": {
       "enabled": false,
       "metrics": {},
diff --git a/nebula/frontend/static/js/deployment/feature-squeezing.js b/nebula/frontend/static/js/deployment/feature-squeezing.js
new file mode 100644
index 000000000..c68708a41
--- /dev/null
+++ b/nebula/frontend/static/js/deployment/feature-squeezing.js
@@ -0,0 +1,71 @@
+// Feature Squeezing Module
+const FeatureSqueezingManager = (function() {
+    const DEFAULT_FEATURE_SQUEEZING_CONFIG = {
+        enabled: false,
+        bit_depth: 4
+    };
+
+    function initializeFeatureSqueezing() {
+        setupFeatureSqueezingSwitch();
+        setFeatureSqueezingConfig(DEFAULT_FEATURE_SQUEEZING_CONFIG);
+    }
+
+    function setupFeatureSqueezingSwitch() {
+        const featureSqueezingSwitch = document.getElementById("featureSqueezingSwitch");
+        if (!featureSqueezingSwitch) return;
+
+        featureSqueezingSwitch.addEventListener("change", function() {
+            toggleFeatureSqueezingSettings(this.checked);
+        });
+    }
+
+    function toggleFeatureSqueezingSettings(enabled) {
+        const featureSqueezingSettings = document.getElementById("feature-squeezing-settings");
+        if (!featureSqueezingSettings) return;
+
+        featureSqueezingSettings.style.display = enabled ? "block" : "none";
+    }
+
+    function getFeatureSqueezingConfig() {
+        const nInput = document.getElementById("featureSqueezingN");
+        const bitDepth = parseInt(nInput?.value, 10);
+
+        return {
+            enabled: Boolean(document.getElementById("featureSqueezingSwitch")?.checked),
+            bit_depth: Number.isFinite(bitDepth)
+                ? bitDepth
+                : DEFAULT_FEATURE_SQUEEZING_CONFIG.bit_depth
+        };
+    }
+
+    function setFeatureSqueezingConfig(config = DEFAULT_FEATURE_SQUEEZING_CONFIG) {
+        const featureSqueezingConfig = {
+            ...DEFAULT_FEATURE_SQUEEZING_CONFIG,
+            ...(config || {})
+        };
+        const bitDepth = featureSqueezingConfig.bit_depth ?? featureSqueezingConfig.n;
+
+        const featureSqueezingSwitch = document.getElementById("featureSqueezingSwitch");
+        if (!featureSqueezingSwitch) return;
+
+        featureSqueezingSwitch.checked = Boolean(featureSqueezingConfig.enabled);
+        const nInput = document.getElementById("featureSqueezingN");
+        if (nInput) {
+            nInput.value = bitDepth ?? DEFAULT_FEATURE_SQUEEZING_CONFIG.bit_depth;
+        }
+        toggleFeatureSqueezingSettings(featureSqueezingSwitch.checked);
+    }
+
+    function resetFeatureSqueezingConfig() {
+        setFeatureSqueezingConfig(DEFAULT_FEATURE_SQUEEZING_CONFIG);
+    }
+
+    return {
+        initializeFeatureSqueezing,
+        getFeatureSqueezingConfig,
+        setFeatureSqueezingConfig,
+        resetFeatureSqueezingConfig
+    };
+})();
+
+export default FeatureSqueezingManager;
diff --git a/nebula/frontend/static/js/deployment/main.js b/nebula/frontend/static/js/deployment/main.js
index 44dd652f8..eed8b0d31 100644
--- a/nebula/frontend/static/js/deployment/main.js
+++ b/nebula/frontend/static/js/deployment/main.js
@@ -9,6 +9,7 @@ import GraphSettings from './graph-settings.js';
 import Utils from './utils.js';
 import TrustworthinessManager from './trustworthiness.js';
 import DpManager from './dp.js';
+import FeatureSqueezingManager from './feature-squeezing.js';
 
 const DeploymentManager = (function() {
     function initialize() {
@@ -33,6 +34,7 @@ const DeploymentManager = (function() {
         SaManager.initializeSa();
         TrustworthinessManager.initializeTrustworthinessSystem();
         DpManager.initializeDifferentialPrivacy();
+        FeatureSqueezingManager.initializeFeatureSqueezing();
         GraphSettings.initializeDistanceControls();
 
         // Make modules globally available
@@ -44,6 +46,7 @@ const DeploymentManager = (function() {
         window.SaManager = SaManager;
         window.TrustworthinessManager = TrustworthinessManager;
         window.DpManager = DpManager;
+        window.FeatureSqueezingManager = FeatureSqueezingManager;
         window.GraphSettings = GraphSettings;
         window.DeploymentManager = DeploymentManager;
         window.Utils = Utils;
diff --git a/nebula/frontend/static/js/deployment/scenario.js b/nebula/frontend/static/js/deployment/scenario.js
index 6af1e7d79..6cf174854 100644
--- a/nebula/frontend/static/js/deployment/scenario.js
+++ b/nebula/frontend/static/js/deployment/scenario.js
@@ -75,6 +75,7 @@ const ScenarioManager = (function () {
             report_status_data_queue: document.getElementById("reportingSwitch").checked,
             epochs: parseInt(document.getElementById("epochs").value),
             dp: window.DpManager.getDpConfig(),
+            feature_squeezing: window.FeatureSqueezingManager.getFeatureSqueezingConfig(),
             attack_params: attackConfig,
             reputation: {
                 enabled: window.ReputationManager.getReputationConfig().enabled || false,
@@ -267,6 +268,9 @@ const ScenarioManager = (function () {
         if (window.DpManager) {
             window.DpManager.setDpConfig(scenario.dp);
         }
+        if (window.FeatureSqueezingManager) {
+            window.FeatureSqueezingManager.setFeatureSqueezingConfig(scenario.feature_squeezing);
+        }
 
         // Load module configurations
         if (scenario.attacks && scenario.attacks.length > 0) {
@@ -443,6 +447,9 @@ const ScenarioManager = (function () {
         if (window.DpManager) {
             window.DpManager.resetDpConfig();
         }
+        if (window.FeatureSqueezingManager) {
+            window.FeatureSqueezingManager.resetFeatureSqueezingConfig();
+        }
 
         // Trigger necessary events
         document.getElementById("federationArchitecture").dispatchEvent(new Event('change'));
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index 129a848ca..4bbc8de99 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -547,6 +547,27 @@ <h5 class="step-title">Max Grad Norm</h5>
                         </small>
                     </div>
                 </div>
+                <!-- Feature Squeezing -->
+                <div class="form-group row container-shadow tiny grey">
+                    <h5 class="step-number">Feature Squeezing <i class="fa fa-compress"></i>
+                    </h5>
+                    <h5 class="step-title">Enable/Disable Feature Squeezing</h5>
+                    <div class="form-check form-switch" style="margin-left: 23px;">
+                        <input class="form-check-input" type="checkbox" id="featureSqueezingSwitch"
+                            style="display: inline; width: 80px; height: 30px;">
+                    </div>
+                    <div id="feature-squeezing-settings" style="margin-top: 10px; display: none;">
+                        <h5 class="step-title">n (bit depth)</h5>
+                        <div class="form-check form-check-inline">
+                            <input type="number" class="form-control" id="featureSqueezingN"
+                                placeholder="n" min="1" max="64" step="1" value="4"
+                                style="display: inline; width: 80%">
+                        </div>
+                        <small class="form-text text-muted">
+                            Applies only to image datasets.
+                        </small>
+                    </div>
+                </div>
                 <!-- Advanced Robustness -->
                 <div class="form-group row container-shadow tiny grey">
                     <h5 class="step-number">Robustness <i class="fa fa-shield"></i>

From ca88c855ca933cbeb77041f771e672e19dfad9ce Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 15 May 2026 10:12:03 +0200
Subject: [PATCH 43/66] Leadership updated: Leadership counter

---
 nebula/core/engine.py   | 36 ++++++++++++++++++++++++++++++++++++
 nebula/core/noderole.py | 12 +++++++-----
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index b4718f608..702b473ac 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -162,6 +162,7 @@ def __init__(
         self._leadership_transfer_lock = Locker("leadership_transfer_lock", async_lock=True)
         self._leadership_transfer_pending = None
         self._leadership_transfer_ack = asyncio.Event()
+        self._leadership_transfer_counts = {}
 
         event_manager = EventManager.get_instance(verbose=False)
         self._addon_manager = AddondManager(self, self.config)
@@ -285,6 +286,40 @@ async def wait_pending_leadership_ack(self):
 
         await self.rb.set_next_role(Role.TRAINER)
 
+    async def select_leadership_successor(self, candidates) -> str | None:
+        candidates = sorted(set(candidates))
+        if not candidates:
+            return None
+
+        async with self._leadership_transfer_lock:
+            candidate_counts = {
+                candidate: self._leadership_transfer_counts.get(candidate, 0)
+                for candidate in candidates
+            }
+
+        min_count = min(candidate_counts.values())
+        least_used_candidates = [
+            candidate
+            for candidate, count in candidate_counts.items()
+            if count == min_count
+        ]
+        successor = random.choice(least_used_candidates)
+        logging.info(
+            f"Leadership transfer candidate counts: {candidate_counts} | "
+            f"selected={successor}"
+        )
+        return successor
+
+    async def register_leadership_transfer(self, node: str):
+        async with self._leadership_transfer_lock:
+            self._leadership_transfer_counts[node] = (
+                self._leadership_transfer_counts.get(node, 0) + 1
+            )
+            logging.info(
+                f"Leadership transfer count updated | node={node} | "
+                f"count={self._leadership_transfer_counts[node]}"
+            )
+
     def get_sdfl_expected_trainers(self) -> set[str]:
         nodes = self.config.participant.get("trust_args", {}).get("scenario", {}).get("nodes", {})
         expected_nodes = set()
@@ -453,6 +488,7 @@ async def _control_alive_callback(self, source, message):
 
     async def _control_leadership_transfer_callback(self, source, message):
         logging.info(f"🔧  handle_control_message | Trigger | Received leadership transfer message from {source}")
+        await self.register_leadership_transfer(source)
 
         if await self._round_in_process_lock.locked_async():
             logging.info("Learning cycle is executing, role behavior will be modified next round")
diff --git a/nebula/core/noderole.py b/nebula/core/noderole.py
index d097f585e..48991994f 100644
--- a/nebula/core/noderole.py
+++ b/nebula/core/noderole.py
@@ -7,7 +7,6 @@
 from nebula.core.utils.locker import Locker
 from nebula.core.eventmanager import EventManager
 from nebula.core.nebulaevents import UpdateReceivedEvent, ModelPropagationEvent
-import random
 from enum import Enum
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING
@@ -311,12 +310,15 @@ async def _transfer_leadership(self):
 
         neighbors = await self._engine.cm.get_addrs_current_connections(myself=False)
         if len(neighbors) and not self._transfer_send:
-            random_neighbor = random.choice(list(neighbors))
+            successor = await self._engine.select_leadership_successor(neighbors)
+            if successor is None:
+                return
             lt_message = self._engine.cm.create_message("control", "leadership_transfer")
-            logging.info(f"Sending transfer leadership to: {random_neighbor}")
+            logging.info(f"Sending transfer leadership to: {successor}")
             if self._config.participant["scenario_args"].get("federation") == "SDFL":
-                await self._engine.mark_leadership_transfer_pending(random_neighbor)
-            asyncio.create_task(self._engine.cm.send_message(random_neighbor, lt_message))
+                await self._engine.mark_leadership_transfer_pending(successor)
+            asyncio.create_task(self._engine.cm.send_message(successor, lt_message))
+            await self._engine.register_leadership_transfer(successor)
             self._transfer_send = True
 
     async def select_nodes_to_wait(self):

From 45b71f39547f6170633116f5943ac0ef181b594a Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 15 May 2026 12:09:19 +0200
Subject: [PATCH 44/66] Metrics name changed

---
 nebula/addons/defenses/feature_squeezing.py   |  2 +-
 .../trustworthiness/configs/eval_metrics.json | 74 +++++++++---------
 .../configs/eval_metrics_dfl.json             | 76 +++++++++----------
 .../trustworthiness/factsheet_common.py       | 26 +++----
 4 files changed, 89 insertions(+), 89 deletions(-)

diff --git a/nebula/addons/defenses/feature_squeezing.py b/nebula/addons/defenses/feature_squeezing.py
index acfe316d5..2ca27acd1 100644
--- a/nebula/addons/defenses/feature_squeezing.py
+++ b/nebula/addons/defenses/feature_squeezing.py
@@ -35,7 +35,7 @@ class FeatureSqueezingDefense:
 
     def __init__(self, config: FeatureSqueezingConfig):
         if not isinstance(config.bit_depth, int) or not 1 <= config.bit_depth <= 64:
-            raise ValueError("feature_squeezing.bit_depth must be an integer in [1, 64]")  # noqa: TRY003
+            raise ValueError("feature_squeezing.bit_depth must be an integer in [1, 64]")
 
         self.config = config
         self.levels = float((2**config.bit_depth) - 1)
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics.json b/nebula/addons/trustworthiness/configs/eval_metrics.json
index 106f645b9..889635d90 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics.json
@@ -7,7 +7,7 @@
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_clever"
+                "field_path": "performance/clipped_test_clever"
               }
             ],
             "operation": "get_value",
@@ -16,11 +16,11 @@
             "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
             "weight": 0.4
           },
-          "loss_sensitivity": {
+          "inverse_loss_sensitivity": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_loss_sensitivity"
+                "field_path": "performance/inverse_test_loss_sensitivity"
               }
             ],
             "operation": "get_value",
@@ -29,11 +29,11 @@
             "description": "",
             "weight": 0.2
           },
-          "adversarial_accuracy": {
+          "clipped_adversarial_accuracy": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_adv_accuracy"
+                "field_path": "performance/clipped_test_adv_accuracy"
               }
             ],
             "operation": "get_value",
@@ -42,11 +42,11 @@
             "description": "",
             "weight": 0.1
           },
-          "empirical_robustness": {
+          "clipped_empirical_robustness": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_empirical_robustness"
+                "field_path": "performance/clipped_test_empirical_robustness"
               }
             ],
             "operation": "get_value",
@@ -55,11 +55,11 @@
             "description": "",
             "weight": 0.1
           },
-          "confidence_score": {
+          "clipped_confidence_score": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_confidence_score"
+                "field_path": "performance/clipped_test_confidence_score"
               }
             ],
             "operation": "get_value",
@@ -68,11 +68,11 @@
             "description": "",
             "weight": 0.1
           },
-          "attack_success_rate": {
+          "inverse_attack_success_rate": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_attack_success_rate"
+                "field_path": "performance/inverse_test_attack_success_rate"
               }
             ],
             "operation": "get_value",
@@ -258,7 +258,7 @@
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "privacy/epsilon_star_score"
+                "field_path": "privacy/inverse_epsilon_star"
               }
             ],
             "operation": "get_value",
@@ -351,11 +351,11 @@
             "description": "Held-out performance proxy used as an outcome-level fairness signal.",
             "weight": 0.1667
           },
-          "overfitting": {
+          "inverse_overfitting": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "fairness/overfitting"
+                "field_path": "fairness/inverse_overfitting"
               }
             ],
             "operation": "get_value",
@@ -363,11 +363,11 @@
             "description": "Generalization quality proxy transformed so higher is better.",
             "weight": 0.1667
           },
-          "well_calibration_error": {
+          "inverse_well_calibration_error": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "fairness/well_calibration_error"
+                "field_path": "fairness/inverse_well_calibration_error"
               }
             ],
             "operation": "get_value",
@@ -375,11 +375,11 @@
             "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
             "weight": 0.1667
           },
-          "generalized_entropy_index": {
+          "inverse_generalized_entropy_index": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "fairness/generalized_entropy_index"
+                "field_path": "fairness/inverse_generalized_entropy_index"
               }
             ],
             "operation": "get_value",
@@ -387,11 +387,11 @@
             "description": "Outcome inequality score transformed so higher values indicate better fairness.",
             "weight": 0.1667
           },
-          "theil_index": {
+          "inverse_theil_index": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "fairness/theil_index"
+                "field_path": "fairness/inverse_theil_index"
               }
             ],
             "operation": "get_value",
@@ -399,11 +399,11 @@
             "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
             "weight": 0.1667
           },
-          "coefficient_of_variation": {
+          "inverse_coefficient_of_variation": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "fairness/coefficient_of_variation"
+                "field_path": "fairness/inverse_coefficient_of_variation"
               }
             ],
             "operation": "get_value",
@@ -464,11 +464,11 @@
       "post_hoc_methods": {
         "weight": 0.6,
         "metrics": {
-          "feature_importance": {
+          "clipped_feature_importance": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_feature_importance_cv"
+                "field_path": "performance/clipped_test_feature_importance_cv"
               }
             ],
             "operation": "get_value",
@@ -669,31 +669,31 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_feature_importance_cv"
+                "field_path": "performance/clipped_test_feature_importance_cv"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_clever"
+                "field_path": "performance/clipped_test_clever"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_loss_sensitivity"
+                "field_path": "performance/inverse_test_loss_sensitivity"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_adv_accuracy"
+                "field_path": "performance/clipped_test_adv_accuracy"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_empirical_robustness"
+                "field_path": "performance/clipped_test_empirical_robustness"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_confidence_score"
+                "field_path": "performance/clipped_test_confidence_score"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_attack_success_rate"
+                "field_path": "performance/inverse_test_attack_success_rate"
               }
             ],
             "operation": "check_properties",
@@ -721,23 +721,23 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "fairness/overfitting"
+                "field_path": "fairness/inverse_overfitting"
               },
               {
                 "source": "factsheet",
-                "field_path": "fairness/well_calibration_error"
+                "field_path": "fairness/inverse_well_calibration_error"
               },
               {
                 "source": "factsheet",
-                "field_path": "fairness/generalized_entropy_index"
+                "field_path": "fairness/inverse_generalized_entropy_index"
               },
               {
                 "source": "factsheet",
-                "field_path": "fairness/theil_index"
+                "field_path": "fairness/inverse_theil_index"
               },
               {
                 "source": "factsheet",
-                "field_path": "fairness/coefficient_of_variation"
+                "field_path": "fairness/inverse_coefficient_of_variation"
               }
             ],
             "operation": "check_properties",
@@ -797,7 +797,7 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "privacy/epsilon_star_score"
+                "field_path": "privacy/inverse_epsilon_star"
               },
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
index c396abddc..c3d2f2755 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
@@ -7,7 +7,7 @@
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_clever"
+                "field_path": "performance/clipped_test_clever"
               }
             ],
             "operation": "get_value",
@@ -16,11 +16,11 @@
             "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
             "weight": 0.4
           },
-          "loss_sensitivity": {
+          "inverse_loss_sensitivity": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_loss_sensitivity"
+                "field_path": "performance/inverse_test_loss_sensitivity"
               }
             ],
             "operation": "get_value",
@@ -29,11 +29,11 @@
             "description": "",
             "weight": 0.2
           },
-          "adversarial_accuracy": {
+          "clipped_adversarial_accuracy": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_adv_accuracy"
+                "field_path": "performance/clipped_test_adv_accuracy"
               }
             ],
             "operation": "get_value",
@@ -42,11 +42,11 @@
             "description": "",
             "weight": 0.1
           },
-          "empirical_robustness": {
+          "clipped_empirical_robustness": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_empirical_robustness"
+                "field_path": "performance/clipped_test_empirical_robustness"
               }
             ],
             "operation": "get_value",
@@ -55,11 +55,11 @@
             "description": "",
             "weight": 0.1
           },
-          "confidence_score": {
+          "clipped_confidence_score": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_confidence_score"
+                "field_path": "performance/clipped_test_confidence_score"
               }
             ],
             "operation": "get_value",
@@ -68,11 +68,11 @@
             "description": "",
             "weight": 0.1
           },
-          "attack_success_rate": {
+          "inverse_attack_success_rate": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_attack_success_rate"
+                "field_path": "performance/inverse_test_attack_success_rate"
               }
             ],
             "operation": "get_value",
@@ -254,11 +254,11 @@
             "description": "A worst-case approximation of the maximal risk for distinguishing two clients.",
             "weight": 0.2
           },
-          "epsilon_star": {
+          "inverse_epsilon_star": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "privacy/epsilon_star_score"
+                "field_path": "privacy/inverse_epsilon_star"
               }
             ],
             "operation": "get_value",
@@ -327,11 +327,11 @@
             "description": "Held-out performance proxy used as an outcome-level fairness signal.",
             "weight": 0.1667
           },
-          "overfitting": {
+          "inverse_overfitting": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "fairness/overfitting"
+                "field_path": "fairness/inverse_overfitting"
               }
             ],
             "operation": "get_value",
@@ -339,11 +339,11 @@
             "description": "Generalization quality proxy transformed so higher is better.",
             "weight": 0.1667
           },
-          "well_calibration_error": {
+          "inverse_well_calibration_error": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "fairness/well_calibration_error"
+                "field_path": "fairness/inverse_well_calibration_error"
               }
             ],
             "operation": "get_value",
@@ -351,11 +351,11 @@
             "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
             "weight": 0.1667
           },
-          "generalized_entropy_index": {
+          "inverse_generalized_entropy_index": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "fairness/generalized_entropy_index"
+                "field_path": "fairness/inverse_generalized_entropy_index"
               }
             ],
             "operation": "get_value",
@@ -363,11 +363,11 @@
             "description": "Outcome inequality score transformed so higher values indicate better fairness.",
             "weight": 0.1667
           },
-          "theil_index": {
+          "inverse_theil_index": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "fairness/theil_index"
+                "field_path": "fairness/inverse_theil_index"
               }
             ],
             "operation": "get_value",
@@ -375,11 +375,11 @@
             "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
             "weight": 0.1667
           },
-          "coefficient_of_variation": {
+          "inverse_coefficient_of_variation": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "fairness/coefficient_of_variation"
+                "field_path": "fairness/inverse_coefficient_of_variation"
               }
             ],
             "operation": "get_value",
@@ -440,11 +440,11 @@
       "post_hoc_methods": {
         "weight": 0.6,
         "metrics": {
-          "feature_importance": {
+          "clipped_feature_importance": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/test_feature_importance_cv"
+                "field_path": "performance/clipped_test_feature_importance_cv"
               }
             ],
             "operation": "get_value",
@@ -649,31 +649,31 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_feature_importance_cv"
+                "field_path": "performance/clipped_test_feature_importance_cv"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_clever"
+                "field_path": "performance/clipped_test_clever"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_loss_sensitivity"
+                "field_path": "performance/inverse_test_loss_sensitivity"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_adv_accuracy"
+                "field_path": "performance/clipped_test_adv_accuracy"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_empirical_robustness"
+                "field_path": "performance/clipped_test_empirical_robustness"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_confidence_score"
+                "field_path": "performance/clipped_test_confidence_score"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/test_attack_success_rate"
+                "field_path": "performance/inverse_test_attack_success_rate"
               }
             ],
             "operation": "check_properties",
@@ -697,23 +697,23 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "fairness/overfitting"
+                "field_path": "fairness/inverse_overfitting"
               },
               {
                 "source": "factsheet",
-                "field_path": "fairness/well_calibration_error"
+                "field_path": "fairness/inverse_well_calibration_error"
               },
               {
                 "source": "factsheet",
-                "field_path": "fairness/generalized_entropy_index"
+                "field_path": "fairness/inverse_generalized_entropy_index"
               },
               {
                 "source": "factsheet",
-                "field_path": "fairness/theil_index"
+                "field_path": "fairness/inverse_theil_index"
               },
               {
                 "source": "factsheet",
-                "field_path": "fairness/coefficient_of_variation"
+                "field_path": "fairness/inverse_coefficient_of_variation"
               }
             ],
             "operation": "check_properties",
@@ -765,7 +765,7 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "privacy/epsilon_star_score"
+                "field_path": "privacy/inveres_epsilon_star"
               },
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/factsheet_common.py b/nebula/addons/trustworthiness/factsheet_common.py
index b11cc7b49..290882395 100644
--- a/nebula/addons/trustworthiness/factsheet_common.py
+++ b/nebula/addons/trustworthiness/factsheet_common.py
@@ -190,24 +190,24 @@ def populate_model_quality_metrics(factsheet, model, train_loader, test_loader,
     factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_loader)
 
     factsheet["privacy"]["epsilon_star"] = get_epsilon_star(model, train_loader, test_loader)
-    factsheet["privacy"]["epsilon_star_score"] = inverse_score(factsheet["privacy"]["epsilon_star"])
+    factsheet["privacy"]["inverse_epsilon_star"] = inverse_score(factsheet["privacy"]["epsilon_star"])
     factsheet["privacy"]["mia_auc"] = get_mia_auc(model, train_loader, test_loader)
     factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
 
     overfitting_value = get_overfitting_score(model, train_loader, test_accuracy)
-    factsheet["fairness"]["overfitting"] = inverse_score(overfitting_value)
+    factsheet["fairness"]["inverse_overfitting"] = inverse_score(overfitting_value)
 
     well_calibration_error_value = get_well_calibration_error(model, test_loader)
-    factsheet["fairness"]["well_calibration_error"] = inverse_score(well_calibration_error_value)
+    factsheet["fairness"]["inverse_well_calibration_error"] = inverse_score(well_calibration_error_value)
 
     generalized_entropy_index_value = get_generalized_entropy_index(model, test_loader)
-    factsheet["fairness"]["generalized_entropy_index"] = inverse_score(generalized_entropy_index_value)
+    factsheet["fairness"]["inverse_generalized_entropy_index"] = inverse_score(generalized_entropy_index_value)
 
     theil_index_value = get_theil_index(model, test_loader)
-    factsheet["fairness"]["theil_index"] = inverse_score(theil_index_value)
+    factsheet["fairness"]["inverse_theil_index"] = inverse_score(theil_index_value)
 
     coefficient_of_variation_value = get_coefficient_of_variation(model, test_loader)
-    factsheet["fairness"]["coefficient_of_variation"] = inverse_score(coefficient_of_variation_value)
+    factsheet["fairness"]["inverse_coefficient_of_variation"] = inverse_score(coefficient_of_variation_value)
 
     factsheet["explainability"]["alpha_score"] = explainability_metrics["alpha_score"]
     factsheet["explainability"]["spread_ratio"] = explainability_metrics["spread_ratio"]
@@ -217,22 +217,22 @@ def populate_model_quality_metrics(factsheet, model, train_loader, test_loader,
     num_classes = model.get_num_classes()
 
     value_clever = get_clever_score(model, test_sample, num_classes, lr)
-    factsheet["performance"]["test_clever"] = cap_score(value_clever)
+    factsheet["performance"]["clipped_test_clever"] = cap_score(value_clever)
 
     value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes, lr)
-    factsheet["performance"]["test_loss_sensitivity"] = inverse_score(value_loss_sensitivity)
+    factsheet["performance"]["inverse_test_loss_sensitivity"] = inverse_score(value_loss_sensitivity)
 
     value_adv_accuracy = compute_adversarial_accuracy_art(model, test_loader, num_classes, lr)
-    factsheet["performance"]["test_adv_accuracy"] = cap_score(value_adv_accuracy)
+    factsheet["performance"]["clipped_test_adv_accuracy"] = cap_score(value_adv_accuracy)
 
     value_empirical_robustness = get_empirical_robustness_score(model, test_sample, num_classes, lr)
-    factsheet["performance"]["test_empirical_robustness"] = cap_score(value_empirical_robustness)
+    factsheet["performance"]["clipped_test_empirical_robustness"] = cap_score(value_empirical_robustness)
 
     value_confidence_score = get_confidence_score(model, test_sample)
-    factsheet["performance"]["test_confidence_score"] = cap_score(value_confidence_score)
+    factsheet["performance"]["clipped_test_confidence_score"] = cap_score(value_confidence_score)
 
     value_attack_success_rate = attack_success_rate(model, test_sample)
-    factsheet["performance"]["test_attack_success_rate"] = 1 - value_attack_success_rate
+    factsheet["performance"]["inverse_test_attack_success_rate"] = 1 - value_attack_success_rate
 
     feature_importance = explainability_metrics["feature_importance_cv"]
-    factsheet["performance"]["test_feature_importance_cv"] = cap_score(feature_importance)
+    factsheet["performance"]["clipped_test_feature_importance_cv"] = cap_score(feature_importance)

From 328977ffb0f365c3458dda24419c110f96540e31 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Tue, 19 May 2026 12:46:14 +0200
Subject: [PATCH 45/66] Adversarial Training implemented for images,
 trustworthiness: FGSM fixed for images, frontend updated (differential
 privacy, feature squeezing, adversarial training)

---
 .../addons/defenses/adversarial_training.py   | 246 ++++++++++++++++++
 nebula/addons/trustworthiness/calculation.py  |  42 ++-
 nebula/controller/scenarios.py                |  42 +++
 nebula/core/models/nebulamodel.py             |  43 ++-
 nebula/core/node.py                           |   2 +
 .../frontend/config/participant.json.example  |  14 +
 .../js/deployment/adversarial-training.js     | 209 +++++++++++++++
 nebula/frontend/static/js/deployment/dp.js    |  18 ++
 .../static/js/deployment/feature-squeezing.js |  44 +++-
 nebula/frontend/static/js/deployment/main.js  |  17 ++
 .../frontend/static/js/deployment/scenario.js |   7 +
 nebula/frontend/templates/deployment.html     |  97 ++++++-
 12 files changed, 768 insertions(+), 13 deletions(-)
 create mode 100644 nebula/addons/defenses/adversarial_training.py
 create mode 100644 nebula/frontend/static/js/deployment/adversarial-training.js

diff --git a/nebula/addons/defenses/adversarial_training.py b/nebula/addons/defenses/adversarial_training.py
new file mode 100644
index 000000000..6260edd8f
--- /dev/null
+++ b/nebula/addons/defenses/adversarial_training.py
@@ -0,0 +1,246 @@
+import logging
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any
+
+import torch
+
+IMAGE_DATASET_NORMALIZATION = {
+    "MNIST": ((0.5,), (0.5,)),
+    "FashionMNIST": ((0.5,), (0.5,)),
+    "EMNIST": ((0.5,), (0.5,)),
+    "CIFAR10": ((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)),
+    "CIFAR100": ((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)),
+}
+
+
+@dataclass(frozen=True)
+class AdversarialTrainingConfig:
+    enabled: bool = False
+    dataset_name: str | None = None
+    domain: str = "image"
+    attack: str = "fgsm"
+    epsilon: float = 8.0 / 255.0
+    alpha: float | None = None
+    steps: int = 1
+    clean_weight: float = 0.5
+    adversarial_weight: float = 0.5
+    mode: str = "mixed"
+    apply_probability: float = 1.0
+    clip_min: float = 0.0
+    clip_max: float = 1.0
+    log_adversarial_metrics: bool = True
+
+
+class AdversarialExampleGenerator(ABC):
+    """Base interface for domain-specific adversarial example generators."""
+
+    @abstractmethod
+    def generate(self, model, x, y, criterion):
+        raise NotImplementedError
+
+
+class ImageAdversarialExampleGenerator(AdversarialExampleGenerator):
+    def __init__(self, config: AdversarialTrainingConfig, mean: tuple[float, ...], std: tuple[float, ...]):
+        self.config = config
+        self.mean = mean
+        self.std = std
+
+    def _channel_tensor(self, values: tuple[float, ...], x: torch.Tensor) -> torch.Tensor:
+        shape = [1, len(values)] + [1] * max(x.dim() - 2, 0)
+        return torch.tensor(values, dtype=x.dtype, device=x.device).view(*shape)
+
+    def _epsilon(self, x: torch.Tensor) -> torch.Tensor:
+        std = self._channel_tensor(self.std, x)
+        return float(self.config.epsilon) / std
+
+    def _alpha(self, x: torch.Tensor) -> torch.Tensor:
+        alpha = self.config.alpha
+        if alpha is None:
+            alpha = self.config.epsilon / max(int(self.config.steps), 1)
+        std = self._channel_tensor(self.std, x)
+        return float(alpha) / std
+
+    def _bounds(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        mean = self._channel_tensor(self.mean, x)
+        std = self._channel_tensor(self.std, x)
+        lower = (float(self.config.clip_min) - mean) / std
+        upper = (float(self.config.clip_max) - mean) / std
+        return lower, upper
+
+    def _project(self, x_adv: torch.Tensor, x_clean: torch.Tensor) -> torch.Tensor:
+        epsilon = self._epsilon(x_clean)
+        lower, upper = self._bounds(x_clean)
+        x_adv = torch.max(torch.min(x_adv, x_clean + epsilon), x_clean - epsilon)
+        return torch.max(torch.min(x_adv, upper), lower)
+
+
+class ImageFGSMGenerator(ImageAdversarialExampleGenerator):
+    def generate(self, model, x, y, criterion):
+        x_adv = x.detach().clone().requires_grad_(True)
+        logits = model(x_adv)
+        loss = criterion(logits, y)
+        grad = torch.autograd.grad(loss, x_adv, only_inputs=True)[0]
+        x_adv = x_adv + self._epsilon(x_adv) * grad.sign()
+        return self._project(x_adv.detach(), x.detach())
+
+
+class ImagePGDGenerator(ImageAdversarialExampleGenerator):
+    def generate(self, model, x, y, criterion):
+        x_clean = x.detach()
+        x_adv = x_clean.clone()
+        steps = max(int(self.config.steps), 1)
+
+        for _ in range(steps):
+            x_adv = x_adv.detach().requires_grad_(True)
+            logits = model(x_adv)
+            loss = criterion(logits, y)
+            grad = torch.autograd.grad(loss, x_adv, only_inputs=True)[0]
+            x_adv = x_adv + self._alpha(x_adv) * grad.sign()
+            x_adv = self._project(x_adv.detach(), x_clean)
+
+        return x_adv.detach()
+
+
+class AdversarialTrainingDefense:
+    """Batch-level adversarial training defense for Nebula models."""
+
+    def __init__(self, config: AdversarialTrainingConfig, generator: AdversarialExampleGenerator):
+        self.config = config
+        self.generator = generator
+
+    @classmethod
+    def from_participant_config(cls, participant_config: dict[str, Any]) -> "AdversarialTrainingDefense | None":
+        raw = participant_config.get("defense_args", {}).get("adversarial_training", {})
+        if not raw or not raw.get("enabled", False):
+            return None
+
+        dataset_name = participant_config.get("data_args", {}).get("dataset")
+        config = AdversarialTrainingConfig(
+            enabled=True,
+            dataset_name=dataset_name,
+            domain=str(raw.get("domain", "image")).lower(),
+            attack=str(raw.get("attack", "fgsm")).lower(),
+            epsilon=float(raw.get("epsilon", 8.0 / 255.0)),
+            alpha=float(raw["alpha"]) if raw.get("alpha") is not None else None,
+            steps=int(raw.get("steps", 1)),
+            clean_weight=float(raw.get("clean_weight", 0.5)),
+            adversarial_weight=float(raw.get("adversarial_weight", 0.5)),
+            mode=str(raw.get("mode", "mixed")).lower(),
+            apply_probability=float(raw.get("apply_probability", 1.0)),
+            clip_min=float(raw.get("clip_min", 0.0)),
+            clip_max=float(raw.get("clip_max", 1.0)),
+            log_adversarial_metrics=bool(raw.get("log_adversarial_metrics", True)),
+        )
+        cls._validate_config(config)
+
+        if config.domain != "image":
+            logging.warning(
+                "[AdversarialTrainingDefense] Skipping adversarial training: domain '%s' is not implemented yet",
+                config.domain,
+            )
+            return None
+
+        normalization = IMAGE_DATASET_NORMALIZATION.get(dataset_name)
+        if normalization is None:
+            logging.warning(
+                "[AdversarialTrainingDefense] Skipping adversarial training: dataset '%s' has no image bounds",
+                dataset_name,
+            )
+            return None
+
+        generator = cls._build_generator(config, normalization)
+        return cls(config=config, generator=generator)
+
+    @staticmethod
+    def _validate_config(config: AdversarialTrainingConfig) -> None:
+        if config.mode not in {"clean", "adversarial", "mixed"}:
+            raise ValueError("adversarial_training.mode must be one of: clean, adversarial, mixed")
+        if config.attack not in {"fgsm", "pgd"}:
+            raise ValueError("adversarial_training.attack must be one of: fgsm, pgd")
+        if config.epsilon < 0:
+            raise ValueError("adversarial_training.epsilon must be >= 0")
+        if config.alpha is not None and config.alpha < 0:
+            raise ValueError("adversarial_training.alpha must be >= 0")
+        if config.steps < 1:
+            raise ValueError("adversarial_training.steps must be >= 1")
+        if not 0.0 <= config.apply_probability <= 1.0:
+            raise ValueError("adversarial_training.apply_probability must be in [0, 1]")
+        if config.clean_weight < 0 or config.adversarial_weight < 0:
+            raise ValueError("adversarial_training loss weights must be >= 0")
+        if config.mode == "mixed" and config.clean_weight + config.adversarial_weight == 0:
+            raise ValueError("adversarial_training mixed mode requires at least one positive loss weight")
+        if config.clip_min >= config.clip_max:
+            raise ValueError("adversarial_training.clip_min must be smaller than clip_max")
+
+    @staticmethod
+    def _build_generator(config, normalization):
+        mean, std = normalization
+        if config.attack == "fgsm":
+            return ImageFGSMGenerator(config, mean, std)
+        if config.attack == "pgd":
+            return ImagePGDGenerator(config, mean, std)
+        raise ValueError(f"Unsupported adversarial training attack: {config.attack}")
+
+    def should_apply(self, x: torch.Tensor) -> bool:
+        if self.config.apply_probability >= 1.0:
+            return True
+        if self.config.apply_probability <= 0.0:
+            return False
+        return bool(torch.rand((), device=x.device).item() < self.config.apply_probability)
+
+    def compute_training_step(self, model, x, y, criterion):
+        if not self.should_apply(x):
+            logits = model(x)
+            loss = criterion(logits, y)
+            return loss, logits, {}
+
+        if self.config.mode == "clean":
+            logits = model(x)
+            loss = criterion(logits, y)
+            return loss, logits, {}
+
+        x_adv = self.generator.generate(model, x, y, criterion)
+        adv_logits = model(x_adv)
+        adv_loss = criterion(adv_logits, y)
+
+        if self.config.mode == "adversarial":
+            return adv_loss, adv_logits, self._extra_metrics({
+                "Adversarial Loss": adv_loss,
+                "Adversarial Accuracy": self._accuracy(adv_logits, y),
+            })
+
+        clean_logits = model(x)
+        clean_loss = criterion(clean_logits, y)
+        total_weight = self.config.clean_weight + self.config.adversarial_weight
+        loss = (
+            self.config.clean_weight * clean_loss + self.config.adversarial_weight * adv_loss
+        ) / total_weight
+
+        return loss, clean_logits, self._extra_metrics({
+            "Clean Loss": clean_loss,
+            "Adversarial Loss": adv_loss,
+            "Adversarial Accuracy": self._accuracy(adv_logits, y),
+        })
+
+    def _accuracy(self, logits, y):
+        predictions = torch.argmax(logits, dim=1)
+        return torch.mean((predictions == y).float())
+
+    def _extra_metrics(self, metrics):
+        if not self.config.log_adversarial_metrics:
+            return {}
+        return metrics
+
+
+def apply_adversarial_training_if_enabled(model, participant_config: dict[str, Any]) -> None:
+    defense = AdversarialTrainingDefense.from_participant_config(participant_config)
+    if defense is not None:
+        model.set_adversarial_training(defense)
+        logging.info(
+            "[AdversarialTrainingDefense] Enabled | dataset=%s | attack=%s | epsilon=%s | mode=%s",
+            defense.config.dataset_name,
+            defense.config.attack,
+            defense.config.epsilon,
+            defense.config.mode,
+        )
diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index 5c526dfbd..ff8a49e7b 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -1865,6 +1865,41 @@ def get_empirical_robustness_score(
 
 
 
+def _get_image_normalization_for_samples(samples):
+    if not isinstance(samples, torch.Tensor) or samples.ndim < 4:
+        return None
+
+    channels = int(samples.shape[1])
+    if channels == 1:
+        return (0.5,), (0.5,)
+    if channels == 3:
+        return (0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)
+    return None
+
+
+def _channel_tensor(values, samples):
+    shape = [1, len(values)] + [1] * max(samples.dim() - 2, 0)
+    return torch.tensor(values, dtype=samples.dtype, device=samples.device).view(*shape)
+
+
+def _fgsm_step_and_clamp(samples, grad, epsilon):
+    normalization = _get_image_normalization_for_samples(samples)
+    if normalization is None:
+        return samples + epsilon * grad.sign()
+
+    mean, std = normalization
+    mean = _channel_tensor(mean, samples)
+    std = _channel_tensor(std, samples)
+
+    normalized_epsilon = float(epsilon) / std
+    lower = (0.0 - mean) / std
+    upper = (1.0 - mean) / std
+
+    x_adv = samples + normalized_epsilon * grad.sign()
+    x_adv = torch.max(torch.min(x_adv, samples + normalized_epsilon), samples - normalized_epsilon)
+    return torch.max(torch.min(x_adv, upper), lower)
+
+
 def fgsm_attack(model, samples, labels, epsilon=0.03):
     """
         Performs an FGSM (Fast Gradient Sign Method) adversarial attack on a batch of samples.
@@ -1890,11 +1925,8 @@ def fgsm_attack(model, samples, labels, epsilon=0.03):
     outputs = model(samples)
     logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
     loss = nn.CrossEntropyLoss()(logits, labels)
-    model.zero_grad()
-    loss.backward()
-
-    perturbation = epsilon * samples.grad.sign()
-    x_adv = samples + perturbation
+    grad = torch.autograd.grad(loss, samples, only_inputs=True)[0]
+    x_adv = _fgsm_step_and_clamp(samples, grad, epsilon)
 
     return x_adv.detach()
 
diff --git a/nebula/controller/scenarios.py b/nebula/controller/scenarios.py
index 7b27d475b..f9fa18a17 100644
--- a/nebula/controller/scenarios.py
+++ b/nebula/controller/scenarios.py
@@ -117,6 +117,7 @@ def __init__(
         sar_training_policy,
         dp=None,
         feature_squeezing=None,
+        adversarial_training=None,
         physical_ips=None,
     ):
         """
@@ -196,6 +197,7 @@ def __init__(
         self.epochs = epochs
         self.dp = dp
         self.feature_squeezing = feature_squeezing
+        self.adversarial_training = adversarial_training
         self.attack_params = attack_params
         self.reputation = reputation
         self.random_geo = random_geo
@@ -728,6 +730,46 @@ def __init__(self, scenario, user=None):
             bit_depth = feature_squeezing.get("bit_depth", feature_squeezing.get("n"))
             if bit_depth is not None:
                 participant_config["defense_args"]["feature_squeezing"]["bit_depth"] = int(bit_depth)
+            adversarial_training = (
+                self.scenario.adversarial_training if isinstance(self.scenario.adversarial_training, dict) else {}
+            )
+            participant_config["defense_args"].setdefault("adversarial_training", {})
+            participant_config["defense_args"]["adversarial_training"]["enabled"] = bool(
+                adversarial_training.get("enabled", False)
+            )
+            if "domain" in adversarial_training:
+                participant_config["defense_args"]["adversarial_training"]["domain"] = str(
+                    adversarial_training["domain"]
+                )
+            if "attack" in adversarial_training:
+                participant_config["defense_args"]["adversarial_training"]["attack"] = str(
+                    adversarial_training["attack"]
+                )
+            for key in (
+                "epsilon",
+                "alpha",
+                "clean_weight",
+                "adversarial_weight",
+                "apply_probability",
+                "clip_min",
+                "clip_max",
+            ):
+                if key in adversarial_training and adversarial_training[key] is not None:
+                    participant_config["defense_args"]["adversarial_training"][key] = float(
+                        adversarial_training[key]
+                    )
+            if "steps" in adversarial_training:
+                participant_config["defense_args"]["adversarial_training"]["steps"] = int(
+                    adversarial_training["steps"]
+                )
+            if "mode" in adversarial_training:
+                participant_config["defense_args"]["adversarial_training"]["mode"] = str(
+                    adversarial_training["mode"]
+                )
+            if "log_adversarial_metrics" in adversarial_training:
+                participant_config["defense_args"]["adversarial_training"]["log_adversarial_metrics"] = bool(
+                    adversarial_training["log_adversarial_metrics"]
+                )
             participant_config["device_args"]["accelerator"] = self.scenario.accelerator
             participant_config["device_args"]["gpu_id"] = self.scenario.gpu_id
             participant_config["device_args"]["logging"] = self.scenario.logginglevel
diff --git a/nebula/core/models/nebulamodel.py b/nebula/core/models/nebulamodel.py
index b8ae90160..b2d6065d8 100755
--- a/nebula/core/models/nebulamodel.py
+++ b/nebula/core/models/nebulamodel.py
@@ -88,6 +88,13 @@ def log_metrics_end(self, phase):
                 key: float(value.detach().cpu().item()) for key, value in output.items()
             }
 
+        if phase == "Train" and self._train_extra_metrics:
+            output.update({
+                f"{phase}/{key}": torch.tensor(value["sum"] / value["count"], device=self.device)
+                for key, value in self._train_extra_metrics.items()
+                if value["count"] > 0
+            })
+
         self.logger.log_data(output, step=self.global_number[phase])
 
         metrics_str = ""
@@ -206,10 +213,12 @@ def __init__(
         self._optimizer = None
         self._optimizer_override = None
         self._latest_validation_metrics = {}
+        self._train_extra_metrics = {}
 
         self.dp_enabled = False
         self.dp_epsilon = None
         self.dp_delta = None
+        self.adversarial_training = None
 
     def set_optimizer_override(self, optimizer):
         self._optimizer_override = optimizer
@@ -221,6 +230,12 @@ def clear_optimizer_override(self):
     def get_optimizer_override(self):
         return self._optimizer_override
 
+    def set_adversarial_training(self, adversarial_training):
+        self.adversarial_training = adversarial_training
+
+    def clear_adversarial_training(self):
+        self.adversarial_training = None
+
     def set_communication_manager(self, communication_manager):
         self.communication_manager = communication_manager
 
@@ -242,13 +257,35 @@ def configure_optimizers(self):
     def step(self, batch, batch_idx, phase):
         """Training/validation/test step."""
         x, y = batch
-        y_pred = self.forward(x)
-        loss = self.criterion(y_pred, y)
+        extra_metrics = {}
+        if phase == "Train" and self.adversarial_training is not None:
+            loss, y_pred, extra_metrics = self.adversarial_training.compute_training_step(
+                self,
+                x,
+                y,
+                self.criterion,
+            )
+        else:
+            y_pred = self.forward(x)
+            loss = self.criterion(y_pred, y)
+
         self.process_metrics(phase, y_pred, y, loss)
+        if phase == "Train" and extra_metrics:
+            self._log_training_extra_metrics(extra_metrics)
 
         self._current_loss = loss
         return loss
 
+    def _log_training_extra_metrics(self, metrics):
+        if self.logger is None:
+            return
+        detached_metrics = {key: value.detach() for key, value in metrics.items()}
+        for key, value in detached_metrics.items():
+            metric = self._train_extra_metrics.setdefault(key, {"sum": 0.0, "count": 0})
+            metric["sum"] += float(value.cpu().item())
+            metric["count"] += 1
+        self.logger.log_data({f"Train/{key}": value for key, value in detached_metrics.items()})
+
     def get_loss(self):
         return self._current_loss
 
@@ -294,6 +331,7 @@ def on_train_end(self):
     def on_train_epoch_end(self):
         self.log_metrics_end("Train")
         self.train_metrics.reset()
+        self._train_extra_metrics = {}
         self.global_number["Train"] += 1
 
     def validation_step(self, batch, batch_idx):
@@ -370,6 +408,7 @@ def on_train_end(self):
     def on_train_epoch_end(self):
         self.log_metrics_end("Train")
         self.train_metrics.reset()
+        self._train_extra_metrics = {}
         # NebulaModel registers training rounds
         # NebulaModelStandalone register the global number of epochs instead of rounds
         self.global_number["Train"] += 1
diff --git a/nebula/core/node.py b/nebula/core/node.py
index c74a5cb94..395752cc5 100755
--- a/nebula/core/node.py
+++ b/nebula/core/node.py
@@ -19,6 +19,7 @@
 import logging
 from collections import Counter
 
+from nebula.addons.defenses.adversarial_training import apply_adversarial_training_if_enabled
 from nebula.addons.defenses.feature_squeezing import apply_feature_squeezing_if_enabled
 from nebula.config.config import Config
 from nebula.core.datasets.cifar10.cifar10 import CIFAR10PartitionHandler
@@ -189,6 +190,7 @@ async def main(config: Config):
     dataset = NebulaPartition(handler=handler, config=config)
     dataset.load_partition()
     apply_feature_squeezing_if_enabled(dataset, config.participant)
+    apply_adversarial_training_if_enabled(model, config.participant)
     dataset.log_partition()
     samples_per_label = Counter(dataset.get_train_labels())
 
diff --git a/nebula/frontend/config/participant.json.example b/nebula/frontend/config/participant.json.example
index 50da11a7b..e3c65f409 100755
--- a/nebula/frontend/config/participant.json.example
+++ b/nebula/frontend/config/participant.json.example
@@ -109,6 +109,20 @@
       "apply_to_test": true,
       "apply_to_local_test": true
     },
+    "adversarial_training": {
+      "enabled": false,
+      "domain": "image",
+      "attack": "fgsm",
+      "epsilon": 0.03,
+      "steps": 1,
+      "clean_weight": 0.5,
+      "adversarial_weight": 0.5,
+      "mode": "mixed",
+      "apply_probability": 1.0,
+      "clip_min": 0.0,
+      "clip_max": 1.0,
+      "log_adversarial_metrics": true
+    },
     "reputation": {
       "enabled": false,
       "metrics": {},
diff --git a/nebula/frontend/static/js/deployment/adversarial-training.js b/nebula/frontend/static/js/deployment/adversarial-training.js
new file mode 100644
index 000000000..72bc6a47d
--- /dev/null
+++ b/nebula/frontend/static/js/deployment/adversarial-training.js
@@ -0,0 +1,209 @@
+// Adversarial Training Module
+const AdversarialTrainingManager = (function() {
+    const DEFAULT_ADVERSARIAL_TRAINING_CONFIG = {
+        enabled: false,
+        domain: "image",
+        attack: "fgsm",
+        epsilon: 0.03,
+        alpha: null,
+        steps: 1,
+        mode: "mixed",
+        clean_weight: 0.5,
+        adversarial_weight: 0.5,
+        apply_probability: 1.0,
+        clip_min: 0.0,
+        clip_max: 1.0,
+        log_adversarial_metrics: true
+    };
+
+    const IMAGE_DATASETS = new Set(["MNIST", "FashionMNIST", "EMNIST", "CIFAR10", "CIFAR100"]);
+
+    function initializeAdversarialTraining() {
+        setupAdversarialTrainingSwitch();
+        setupAttackSelector();
+        setupDatasetAwareness();
+        setAdversarialTrainingConfig(DEFAULT_ADVERSARIAL_TRAINING_CONFIG);
+    }
+
+    function setupAdversarialTrainingSwitch() {
+        const adversarialTrainingSwitch = document.getElementById("adversarialTrainingSwitch");
+        if (!adversarialTrainingSwitch) return;
+
+        adversarialTrainingSwitch.addEventListener("change", function() {
+            if (this.checked && window.DpManager) {
+                window.DpManager.setDpConfig({enabled: false});
+            }
+            toggleAdversarialTrainingSettings(this.checked);
+        });
+    }
+
+    function setupAttackSelector() {
+        const attackSelect = document.getElementById("adversarialTrainingAttack");
+        if (!attackSelect) return;
+
+        attackSelect.addEventListener("change", function() {
+            toggleAttackSettings(this.value);
+        });
+    }
+
+    function setupDatasetAwareness() {
+        const datasetSelect = document.getElementById("datasetSelect");
+        if (!datasetSelect) return;
+
+        datasetSelect.addEventListener("change", updateDatasetAvailability);
+        updateDatasetAvailability();
+    }
+
+    function toggleAdversarialTrainingSettings(enabled) {
+        const settings = document.getElementById("adversarial-training-settings");
+        if (!settings) return;
+
+        settings.style.display = enabled ? "block" : "none";
+        toggleAttackSettings(document.getElementById("adversarialTrainingAttack")?.value || "fgsm");
+    }
+
+    function toggleAttackSettings(attack) {
+        const pgdSettings = document.getElementById("adversarial-training-pgd-settings");
+        if (!pgdSettings) return;
+
+        pgdSettings.style.display = attack === "pgd" ? "block" : "none";
+    }
+
+    function updateDatasetAvailability() {
+        const dataset = document.getElementById("datasetSelect")?.value;
+        const enabledForDataset = IMAGE_DATASETS.has(dataset);
+        const adversarialTrainingSwitch = document.getElementById("adversarialTrainingSwitch");
+        const datasetNote = document.getElementById("adversarial-training-dataset-note");
+
+        if (datasetNote) {
+            datasetNote.style.display = enabledForDataset ? "none" : "block";
+        }
+
+        if (!adversarialTrainingSwitch) return;
+        adversarialTrainingSwitch.disabled = !enabledForDataset;
+        if (!enabledForDataset) {
+            adversarialTrainingSwitch.checked = false;
+            toggleAdversarialTrainingSettings(false);
+        }
+    }
+
+    function numberValue(id, fallback) {
+        const value = parseFloat(document.getElementById(id)?.value);
+        return Number.isFinite(value) ? value : fallback;
+    }
+
+    function integerValue(id, fallback) {
+        const value = parseInt(document.getElementById(id)?.value, 10);
+        return Number.isFinite(value) ? value : fallback;
+    }
+
+    function optionalNumberValue(id, fallback) {
+        const rawValue = document.getElementById(id)?.value;
+        if (rawValue === undefined || rawValue === null || rawValue === "") {
+            return fallback;
+        }
+        const value = parseFloat(rawValue);
+        return Number.isFinite(value) ? value : fallback;
+    }
+
+    function getAdversarialTrainingConfig() {
+        const config = {
+            enabled: Boolean(document.getElementById("adversarialTrainingSwitch")?.checked),
+            domain: document.getElementById("adversarialTrainingDomain")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.domain,
+            attack: document.getElementById("adversarialTrainingAttack")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.attack,
+            epsilon: numberValue("adversarialTrainingEpsilon", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.epsilon),
+            alpha: optionalNumberValue("adversarialTrainingAlpha", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.alpha),
+            steps: integerValue("adversarialTrainingSteps", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.steps),
+            mode: document.getElementById("adversarialTrainingMode")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.mode,
+            clean_weight: numberValue("adversarialTrainingCleanWeight", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.clean_weight),
+            adversarial_weight: numberValue("adversarialTrainingAdversarialWeight", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.adversarial_weight),
+            apply_probability: numberValue("adversarialTrainingApplyProbability", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.apply_probability),
+            clip_min: numberValue("adversarialTrainingClipMin", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.clip_min),
+            clip_max: numberValue("adversarialTrainingClipMax", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.clip_max),
+            log_adversarial_metrics: Boolean(document.getElementById("adversarialTrainingLogMetrics")?.checked)
+        };
+
+        if (config.alpha === null || config.attack !== "pgd") {
+            delete config.alpha;
+        }
+        return config;
+    }
+
+    function setAdversarialTrainingConfig(config = DEFAULT_ADVERSARIAL_TRAINING_CONFIG) {
+        const adversarialTrainingConfig = {
+            ...DEFAULT_ADVERSARIAL_TRAINING_CONFIG,
+            ...(config || {})
+        };
+
+        const adversarialTrainingSwitch = document.getElementById("adversarialTrainingSwitch");
+        if (!adversarialTrainingSwitch) return;
+
+        adversarialTrainingSwitch.checked = Boolean(adversarialTrainingConfig.enabled);
+        setValue("adversarialTrainingDomain", adversarialTrainingConfig.domain);
+        setValue("adversarialTrainingAttack", adversarialTrainingConfig.attack);
+        setValue("adversarialTrainingEpsilon", adversarialTrainingConfig.epsilon);
+        setValue("adversarialTrainingAlpha", adversarialTrainingConfig.alpha ?? "");
+        setValue("adversarialTrainingSteps", adversarialTrainingConfig.steps);
+        setValue("adversarialTrainingMode", adversarialTrainingConfig.mode);
+        setValue("adversarialTrainingCleanWeight", adversarialTrainingConfig.clean_weight);
+        setValue("adversarialTrainingAdversarialWeight", adversarialTrainingConfig.adversarial_weight);
+        setValue("adversarialTrainingApplyProbability", adversarialTrainingConfig.apply_probability);
+        setValue("adversarialTrainingClipMin", adversarialTrainingConfig.clip_min);
+        setValue("adversarialTrainingClipMax", adversarialTrainingConfig.clip_max);
+
+        const logMetricsInput = document.getElementById("adversarialTrainingLogMetrics");
+        if (logMetricsInput) {
+            logMetricsInput.checked = Boolean(adversarialTrainingConfig.log_adversarial_metrics);
+        }
+
+        toggleAdversarialTrainingSettings(adversarialTrainingSwitch.checked);
+        updateDatasetAvailability();
+    }
+
+    function setValue(id, value) {
+        const element = document.getElementById(id);
+        if (element) {
+            element.value = value;
+        }
+    }
+
+    function resetAdversarialTrainingConfig() {
+        setAdversarialTrainingConfig(DEFAULT_ADVERSARIAL_TRAINING_CONFIG);
+    }
+
+    function validateConfig() {
+        const config = getAdversarialTrainingConfig();
+        if (!config.enabled) {
+            return null;
+        }
+        if (config.epsilon < 0) {
+            return "[Adversarial Training] Epsilon must be greater than or equal to 0.";
+        }
+        if (config.attack === "pgd" && config.steps < 1) {
+            return "[Adversarial Training] PGD steps must be at least 1.";
+        }
+        if (config.clean_weight < 0 || config.adversarial_weight < 0) {
+            return "[Adversarial Training] Loss weights must be greater than or equal to 0.";
+        }
+        if (config.mode === "mixed" && config.clean_weight + config.adversarial_weight === 0) {
+            return "[Adversarial Training] Mixed mode needs at least one positive loss weight.";
+        }
+        if (config.apply_probability < 0 || config.apply_probability > 1) {
+            return "[Adversarial Training] Apply probability must be between 0 and 1.";
+        }
+        if (config.clip_min >= config.clip_max) {
+            return "[Adversarial Training] Pixel min bound must be smaller than max bound.";
+        }
+        return null;
+    }
+
+    return {
+        initializeAdversarialTraining,
+        getAdversarialTrainingConfig,
+        setAdversarialTrainingConfig,
+        resetAdversarialTrainingConfig,
+        validateConfig
+    };
+})();
+
+export default AdversarialTrainingManager;
diff --git a/nebula/frontend/static/js/deployment/dp.js b/nebula/frontend/static/js/deployment/dp.js
index 7821386f2..9087152ad 100644
--- a/nebula/frontend/static/js/deployment/dp.js
+++ b/nebula/frontend/static/js/deployment/dp.js
@@ -16,10 +16,28 @@ const DpManager = (function() {
         if (!dpSwitch) return;
 
         dpSwitch.addEventListener("change", function() {
+            if (this.checked) {
+                disableAdversarialTraining();
+            }
             toggleDpSettings(this.checked);
         });
     }
 
+    function disableAdversarialTraining() {
+        if (window.AdversarialTrainingManager) {
+            window.AdversarialTrainingManager.setAdversarialTrainingConfig({enabled: false});
+        }
+
+        const adversarialTrainingSwitch = document.getElementById("adversarialTrainingSwitch");
+        const adversarialTrainingSettings = document.getElementById("adversarial-training-settings");
+        if (adversarialTrainingSwitch) {
+            adversarialTrainingSwitch.checked = false;
+        }
+        if (adversarialTrainingSettings) {
+            adversarialTrainingSettings.style.display = "none";
+        }
+    }
+
     function toggleDpSettings(enabled) {
         const dpSettings = document.getElementById("dp-settings");
         if (!dpSettings) return;
diff --git a/nebula/frontend/static/js/deployment/feature-squeezing.js b/nebula/frontend/static/js/deployment/feature-squeezing.js
index c68708a41..cb205371d 100644
--- a/nebula/frontend/static/js/deployment/feature-squeezing.js
+++ b/nebula/frontend/static/js/deployment/feature-squeezing.js
@@ -4,9 +4,12 @@ const FeatureSqueezingManager = (function() {
         enabled: false,
         bit_depth: 4
     };
+    const ALLOWED_BIT_DEPTHS = [1, 2, 4, 8, 16, 32, 64];
+    const IMAGE_DATASETS = new Set(["MNIST", "FashionMNIST", "EMNIST", "CIFAR10", "CIFAR100"]);
 
     function initializeFeatureSqueezing() {
         setupFeatureSqueezingSwitch();
+        setupDatasetAwareness();
         setFeatureSqueezingConfig(DEFAULT_FEATURE_SQUEEZING_CONFIG);
     }
 
@@ -19,6 +22,14 @@ const FeatureSqueezingManager = (function() {
         });
     }
 
+    function setupDatasetAwareness() {
+        const datasetSelect = document.getElementById("datasetSelect");
+        if (!datasetSelect) return;
+
+        datasetSelect.addEventListener("change", updateDatasetAvailability);
+        updateDatasetAvailability();
+    }
+
     function toggleFeatureSqueezingSettings(enabled) {
         const featureSqueezingSettings = document.getElementById("feature-squeezing-settings");
         if (!featureSqueezingSettings) return;
@@ -26,15 +37,31 @@ const FeatureSqueezingManager = (function() {
         featureSqueezingSettings.style.display = enabled ? "block" : "none";
     }
 
+    function updateDatasetAvailability() {
+        const dataset = document.getElementById("datasetSelect")?.value;
+        const enabledForDataset = IMAGE_DATASETS.has(dataset);
+        const featureSqueezingSwitch = document.getElementById("featureSqueezingSwitch");
+        const datasetNote = document.getElementById("feature-squeezing-dataset-note");
+
+        if (datasetNote) {
+            datasetNote.style.display = enabledForDataset ? "none" : "block";
+        }
+
+        if (!featureSqueezingSwitch) return;
+        featureSqueezingSwitch.disabled = !enabledForDataset;
+        if (!enabledForDataset) {
+            featureSqueezingSwitch.checked = false;
+            toggleFeatureSqueezingSettings(false);
+        }
+    }
+
     function getFeatureSqueezingConfig() {
         const nInput = document.getElementById("featureSqueezingN");
         const bitDepth = parseInt(nInput?.value, 10);
 
         return {
             enabled: Boolean(document.getElementById("featureSqueezingSwitch")?.checked),
-            bit_depth: Number.isFinite(bitDepth)
-                ? bitDepth
-                : DEFAULT_FEATURE_SQUEEZING_CONFIG.bit_depth
+            bit_depth: normalizeBitDepth(bitDepth)
         };
     }
 
@@ -51,9 +78,18 @@ const FeatureSqueezingManager = (function() {
         featureSqueezingSwitch.checked = Boolean(featureSqueezingConfig.enabled);
         const nInput = document.getElementById("featureSqueezingN");
         if (nInput) {
-            nInput.value = bitDepth ?? DEFAULT_FEATURE_SQUEEZING_CONFIG.bit_depth;
+            nInput.value = normalizeBitDepth(bitDepth);
         }
         toggleFeatureSqueezingSettings(featureSqueezingSwitch.checked);
+        updateDatasetAvailability();
+    }
+
+    function normalizeBitDepth(value) {
+        const bitDepth = parseInt(value, 10);
+        if (ALLOWED_BIT_DEPTHS.includes(bitDepth)) {
+            return bitDepth;
+        }
+        return DEFAULT_FEATURE_SQUEEZING_CONFIG.bit_depth;
     }
 
     function resetFeatureSqueezingConfig() {
diff --git a/nebula/frontend/static/js/deployment/main.js b/nebula/frontend/static/js/deployment/main.js
index eed8b0d31..40fe716b6 100644
--- a/nebula/frontend/static/js/deployment/main.js
+++ b/nebula/frontend/static/js/deployment/main.js
@@ -10,6 +10,7 @@ import Utils from './utils.js';
 import TrustworthinessManager from './trustworthiness.js';
 import DpManager from './dp.js';
 import FeatureSqueezingManager from './feature-squeezing.js';
+import AdversarialTrainingManager from './adversarial-training.js';
 
 const DeploymentManager = (function() {
     function initialize() {
@@ -35,6 +36,7 @@ const DeploymentManager = (function() {
         TrustworthinessManager.initializeTrustworthinessSystem();
         DpManager.initializeDifferentialPrivacy();
         FeatureSqueezingManager.initializeFeatureSqueezing();
+        AdversarialTrainingManager.initializeAdversarialTraining();
         GraphSettings.initializeDistanceControls();
 
         // Make modules globally available
@@ -47,6 +49,7 @@ const DeploymentManager = (function() {
         window.TrustworthinessManager = TrustworthinessManager;
         window.DpManager = DpManager;
         window.FeatureSqueezingManager = FeatureSqueezingManager;
+        window.AdversarialTrainingManager = AdversarialTrainingManager;
         window.GraphSettings = GraphSettings;
         window.DeploymentManager = DeploymentManager;
         window.Utils = Utils;
@@ -123,9 +126,23 @@ const DeploymentManager = (function() {
             return false;
         }
 
+        const adversarialTrainingValidationMessage = validateAdversarialTraining();
+        if (adversarialTrainingValidationMessage) {
+            Utils.showAlert('error', adversarialTrainingValidationMessage);
+            return false;
+        }
+
         return true;
     }
 
+    function validateAdversarialTraining() {
+        const manager = window.AdversarialTrainingManager || AdversarialTrainingManager;
+        if (manager && typeof manager.validateConfig === "function") {
+            return manager.validateConfig();
+        }
+        return null;
+    }
+
     function validateTrustworthinessWeights() {
         const manager = window.TrustworthinessManager || TrustworthinessManager;
         if (manager && typeof manager.validateWeights === "function") {
diff --git a/nebula/frontend/static/js/deployment/scenario.js b/nebula/frontend/static/js/deployment/scenario.js
index 6cf174854..890804430 100644
--- a/nebula/frontend/static/js/deployment/scenario.js
+++ b/nebula/frontend/static/js/deployment/scenario.js
@@ -76,6 +76,7 @@ const ScenarioManager = (function () {
             epochs: parseInt(document.getElementById("epochs").value),
             dp: window.DpManager.getDpConfig(),
             feature_squeezing: window.FeatureSqueezingManager.getFeatureSqueezingConfig(),
+            adversarial_training: window.AdversarialTrainingManager.getAdversarialTrainingConfig(),
             attack_params: attackConfig,
             reputation: {
                 enabled: window.ReputationManager.getReputationConfig().enabled || false,
@@ -271,6 +272,9 @@ const ScenarioManager = (function () {
         if (window.FeatureSqueezingManager) {
             window.FeatureSqueezingManager.setFeatureSqueezingConfig(scenario.feature_squeezing);
         }
+        if (window.AdversarialTrainingManager) {
+            window.AdversarialTrainingManager.setAdversarialTrainingConfig(scenario.adversarial_training);
+        }
 
         // Load module configurations
         if (scenario.attacks && scenario.attacks.length > 0) {
@@ -450,6 +454,9 @@ const ScenarioManager = (function () {
         if (window.FeatureSqueezingManager) {
             window.FeatureSqueezingManager.resetFeatureSqueezingConfig();
         }
+        if (window.AdversarialTrainingManager) {
+            window.AdversarialTrainingManager.resetAdversarialTrainingConfig();
+        }
 
         // Trigger necessary events
         document.getElementById("federationArchitecture").dispatchEvent(new Event('change'));
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index 4bbc8de99..e68a5fec9 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -556,18 +556,111 @@ <h5 class="step-title">Enable/Disable Feature Squeezing</h5>
                         <input class="form-check-input" type="checkbox" id="featureSqueezingSwitch"
                             style="display: inline; width: 80px; height: 30px;">
                     </div>
+                    <small id="feature-squeezing-dataset-note" class="form-text text-muted" style="display: none;">
+                        Feature Squeezing is currently available only for image datasets.
+                    </small>
                     <div id="feature-squeezing-settings" style="margin-top: 10px; display: none;">
                         <h5 class="step-title">n (bit depth)</h5>
                         <div class="form-check form-check-inline">
-                            <input type="number" class="form-control" id="featureSqueezingN"
-                                placeholder="n" min="1" max="64" step="1" value="4"
+                            <select class="form-control" id="featureSqueezingN"
                                 style="display: inline; width: 80%">
+                                <option value="1">1</option>
+                                <option value="2">2</option>
+                                <option value="4" selected>4</option>
+                                <option value="8">8</option>
+                                <option value="16">16</option>
+                                <option value="32">32</option>
+                                <option value="64">64</option>
+                            </select>
                         </div>
                         <small class="form-text text-muted">
                             Applies only to image datasets.
                         </small>
                     </div>
                 </div>
+                <!-- Adversarial Training -->
+                <div class="form-group row container-shadow tiny grey">
+                    <h5 class="step-number">Adversarial Training <i class="fa fa-shield"></i>
+                    </h5>
+                    <h5 class="step-title">Enable/Disable Adversarial Training</h5>
+                    <div class="form-check form-switch" style="margin-left: 23px;">
+                        <input class="form-check-input" type="checkbox" id="adversarialTrainingSwitch"
+                            style="display: inline; width: 80px; height: 30px;">
+                    </div>
+                    <small id="adversarial-training-dataset-note" class="form-text text-muted" style="display: none;">
+                        Adversarial Training is currently available only for image datasets.
+                    </small>
+                    <div id="adversarial-training-settings" style="margin-top: 10px; display: none;">
+                        <input type="hidden" id="adversarialTrainingDomain" value="image">
+                        <h5 class="step-title">Attack</h5>
+                        <div class="form-check form-check-inline">
+                            <select class="form-control" id="adversarialTrainingAttack"
+                                style="display: inline; width: 80%">
+                                <option value="fgsm" selected>FGSM</option>
+                                <option value="pgd">PGD</option>
+                            </select>
+                        </div>
+                        <h5 class="step-title">Epsilon</h5>
+                        <div class="form-check form-check-inline">
+                            <input type="number" class="form-control" id="adversarialTrainingEpsilon"
+                                placeholder="Epsilon" min="0" step="0.001" value="0.031372549"
+                                style="display: inline; width: 80%">
+                        </div>
+                        <div id="adversarial-training-pgd-settings" style="display: none;">
+                            <h5 class="step-title">PGD steps</h5>
+                            <div class="form-check form-check-inline">
+                                <input type="number" class="form-control" id="adversarialTrainingSteps"
+                                    placeholder="Steps" min="1" step="1" value="1"
+                                    style="display: inline; width: 80%">
+                            </div>
+                            <input type="hidden" id="adversarialTrainingAlpha" value="">
+                        </div>
+                        <h5 class="step-title">Training mode</h5>
+                        <div class="form-check form-check-inline">
+                            <select class="form-control" id="adversarialTrainingMode"
+                                style="display: inline; width: 80%">
+                                <option value="mixed" selected>Clean + adversarial</option>
+                                <option value="adversarial">Adversarial only</option>
+                                <option value="clean">Clean only</option>
+                            </select>
+                        </div>
+                        <details style="margin-top: 10px;">
+                            <summary class="step-title" style="cursor: pointer;">Advanced parameters</summary>
+                            <h5 class="step-title">Loss weights</h5>
+                            <div class="form-check form-check-inline">
+                                <input type="number" class="form-control" id="adversarialTrainingCleanWeight"
+                                    placeholder="Clean weight" min="0" step="0.1" value="0.5"
+                                    style="display: inline; width: 39%">
+                                <input type="number" class="form-control" id="adversarialTrainingAdversarialWeight"
+                                    placeholder="Adversarial weight" min="0" step="0.1" value="0.5"
+                                    style="display: inline; width: 39%">
+                            </div>
+                            <h5 class="step-title">Apply probability</h5>
+                            <div class="form-check form-check-inline">
+                                <input type="number" class="form-control" id="adversarialTrainingApplyProbability"
+                                    placeholder="Probability" min="0" max="1" step="0.05" value="1.0"
+                                    style="display: inline; width: 80%">
+                            </div>
+                            <h5 class="step-title">Pixel bounds</h5>
+                            <div class="form-check form-check-inline">
+                                <input type="number" class="form-control" id="adversarialTrainingClipMin"
+                                    placeholder="Min" step="0.1" value="0.0"
+                                    style="display: inline; width: 39%">
+                                <input type="number" class="form-control" id="adversarialTrainingClipMax"
+                                    placeholder="Max" step="0.1" value="1.0"
+                                    style="display: inline; width: 39%">
+                            </div>
+                            <h5 class="step-title">Log adversarial metrics</h5>
+                            <div class="form-check form-switch" style="margin-left: 23px;">
+                                <input class="form-check-input" type="checkbox" id="adversarialTrainingLogMetrics"
+                                    checked style="display: inline; width: 80px; height: 30px;">
+                            </div>
+                        </details>
+                        <small class="form-text text-muted">
+                            Epsilon and bounds use pixel scale; Nebula converts them to the normalized tensor space.
+                        </small>
+                    </div>
+                </div>
                 <!-- Advanced Robustness -->
                 <div class="form-group row container-shadow tiny grey">
                     <h5 class="step-number">Robustness <i class="fa fa-shield"></i>

From 130ff340561693be3b80152ff355ca651996ec14 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 20 May 2026 09:57:40 +0200
Subject: [PATCH 46/66] Models updated: Data type, tabular datasets fixed and
 continous/binary division, tabular attack configuration updated: Sample
 poisoning not available

---
 nebula/controller/scenarios.py                |  2 +-
 .../core/datasets/adultcensus/adultcensus.py  | 43 +++++++++++--
 .../datasets/breast_cancer/breast_cancer.py   | 16 ++++-
 nebula/core/datasets/covtype/covtype.py       | 44 ++++++++++++-
 nebula/core/datasets/kddcup99/kddcup99.py     | 47 ++++++++++++--
 nebula/core/models/adultcensus/mlp.py         |  3 +
 nebula/core/models/breast_cancer/mlp.py       |  3 +
 nebula/core/models/cifar10/cnn.py             |  3 +
 nebula/core/models/cifar10/cnnV2.py           |  3 +
 nebula/core/models/cifar10/cnnV3.py           |  3 +
 nebula/core/models/cifar10/fastermobilenet.py |  3 +
 nebula/core/models/cifar10/resnet.py          |  3 +
 nebula/core/models/cifar10/simplemobilenet.py |  3 +
 nebula/core/models/cifar100/cnn.py            |  3 +
 nebula/core/models/covtype/mlp.py             |  3 +
 nebula/core/models/emnist/cnn.py              |  3 +
 nebula/core/models/emnist/mlp.py              |  3 +
 nebula/core/models/fashionmnist/cnn.py        |  3 +
 nebula/core/models/fashionmnist/mlp.py        |  3 +
 nebula/core/models/kddcup99/mlp.py            |  3 +
 nebula/core/models/mnist/cnn.py               |  3 +
 nebula/core/models/mnist/mlp.py               |  3 +
 nebula/core/models/sentiment140/cnn.py        |  3 +
 nebula/core/models/sentiment140/rnn.py        |  3 +
 .../frontend/static/js/deployment/attack.js   | 62 +++++++++++++++++--
 nebula/frontend/static/js/deployment/main.js  | 14 +++++
 nebula/frontend/templates/deployment.html     |  3 +
 27 files changed, 266 insertions(+), 22 deletions(-)

diff --git a/nebula/controller/scenarios.py b/nebula/controller/scenarios.py
index f9fa18a17..70639462b 100644
--- a/nebula/controller/scenarios.py
+++ b/nebula/controller/scenarios.py
@@ -1109,7 +1109,7 @@ async def load_configurations_and_start_nodes(
             )
         elif dataset_name == "KDDCUP99":
             dataset = KDDCUP99Dataset(
-                num_classes=2,
+                num_classes=23,
                 partitions_number=self.n_nodes,
                 iid=self.scenario.iid,
                 partition=self.scenario.partition_selection,
diff --git a/nebula/core/datasets/adultcensus/adultcensus.py b/nebula/core/datasets/adultcensus/adultcensus.py
index f85d472e9..4c3c4584f 100644
--- a/nebula/core/datasets/adultcensus/adultcensus.py
+++ b/nebula/core/datasets/adultcensus/adultcensus.py
@@ -16,7 +16,14 @@ class AdultCensusTorchDataset(Dataset):
     x: float32 tensor (n_features,)
     y: long scalar {0,1} where 1 means >50K
     """
-    def __init__(self, x: np.ndarray, y: np.ndarray):
+    def __init__(
+        self,
+        x: np.ndarray,
+        y: np.ndarray,
+        feature_names: list[str] | None = None,
+        continuous_features: list[int] | None = None,
+        binary_features: list[int] | None = None,
+    ):
         if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
             raise ValueError("x and y must be numpy arrays")
 
@@ -34,6 +41,10 @@ def __init__(self, x: np.ndarray, y: np.ndarray):
         self.data: np.ndarray = self.x
         self.targets: np.ndarray = self.y
         self.classes: list[str] = ["<=50K", ">50K"]
+        self.feature_names = feature_names or [f"feature_{i}" for i in range(self.x.shape[1])]
+        self.continuous_features = continuous_features or []
+        self.binary_features = binary_features or []
+        self.input_dim = int(self.x.shape[1])
 
     def __len__(self) -> int:
         return int(self.y.shape[0])
@@ -209,6 +220,10 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
 
         X_train = preprocessor.fit_transform(X_train_df)
         X_test = preprocessor.transform(X_test_df)
+        try:
+            feature_names = [str(name) for name in preprocessor.get_feature_names_out()]
+        except Exception:
+            feature_names = [f"feature_{i}" for i in range(X_train.shape[1])]
 
         # In case some sklearn path returns sparse matrices, densify safely
         if hasattr(X_train, "toarray"):
@@ -221,9 +236,29 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
         logging.getLogger().info(f"[AdultCensus] X_train shape = {X_train_np.shape}")
         logging.getLogger().info(f"[AdultCensus] INPUT_DIM (post-OHE) = {int(X_train_np.shape[1])}")
         X_test_np: np.ndarray = np.asarray(X_test, dtype=np.float32)
-
-        train_ds = AdultCensusTorchDataset(X_train_np, np.asarray(y_train, dtype=np.int64))
-        test_ds = AdultCensusTorchDataset(X_test_np, np.asarray(y_test, dtype=np.int64))
+        continuous_features = [
+            idx for idx, name in enumerate(feature_names)
+            if name.startswith("num__")
+        ]
+        binary_features = [
+            idx for idx, name in enumerate(feature_names)
+            if name.startswith("cat__")
+        ]
+
+        train_ds = AdultCensusTorchDataset(
+            X_train_np,
+            np.asarray(y_train, dtype=np.int64),
+            feature_names=feature_names,
+            continuous_features=continuous_features,
+            binary_features=binary_features,
+        )
+        test_ds = AdultCensusTorchDataset(
+            X_test_np,
+            np.asarray(y_test, dtype=np.int64),
+            feature_names=feature_names,
+            continuous_features=continuous_features,
+            binary_features=binary_features,
+        )
 
         return train_ds, test_ds
 
diff --git a/nebula/core/datasets/breast_cancer/breast_cancer.py b/nebula/core/datasets/breast_cancer/breast_cancer.py
index 9181c1422..b951a8113 100644
--- a/nebula/core/datasets/breast_cancer/breast_cancer.py
+++ b/nebula/core/datasets/breast_cancer/breast_cancer.py
@@ -14,7 +14,7 @@ class BreastCancerTorchDataset(Dataset):
     x: float32 tensor (n_features,)
     y: long scalar {0,1}
     """
-    def __init__(self, x: np.ndarray, y: np.ndarray):
+    def __init__(self, x: np.ndarray, y: np.ndarray, feature_names: list[str] | None = None):
         if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
             raise ValueError("x and y must be numpy arrays")
 
@@ -32,6 +32,10 @@ def __init__(self, x: np.ndarray, y: np.ndarray):
         self.data = self.x
         self.targets = self.y
         self.classes = ["0", "1"]
+        self.feature_names = feature_names or [f"feature_{i}" for i in range(self.x.shape[1])]
+        self.continuous_features = list(range(self.x.shape[1]))
+        self.binary_features = []
+        self.input_dim = int(self.x.shape[1])
 
     def __len__(self) -> int:
         return int(self.y.shape[0])
@@ -120,6 +124,7 @@ def load_breast_cancer_dataset(self):
         try:
             from sklearn.datasets import load_breast_cancer
             from sklearn.model_selection import train_test_split
+            from sklearn.preprocessing import StandardScaler
         except Exception as e:
             raise ImportError(
                 "BreastCancerDataset requires scikit-learn. Install it (e.g., pip install scikit-learn)."
@@ -128,6 +133,7 @@ def load_breast_cancer_dataset(self):
         ds = load_breast_cancer()
         x = np.asarray(ds.data)
         y = np.asarray(ds.target).reshape(-1)  # already 0/1
+        feature_names = [str(name) for name in ds.feature_names]
 
         x_train, x_test, y_train, y_test = train_test_split(
             x,
@@ -138,8 +144,12 @@ def load_breast_cancer_dataset(self):
             stratify=y,
         )
 
-        train_ds = BreastCancerTorchDataset(x_train, y_train)
-        test_ds = BreastCancerTorchDataset(x_test, y_test)
+        scaler = StandardScaler()
+        x_train = scaler.fit_transform(x_train)
+        x_test = scaler.transform(x_test)
+
+        train_ds = BreastCancerTorchDataset(x_train, y_train, feature_names=feature_names)
+        test_ds = BreastCancerTorchDataset(x_test, y_test, feature_names=feature_names)
 
         return train_ds, test_ds
 
diff --git a/nebula/core/datasets/covtype/covtype.py b/nebula/core/datasets/covtype/covtype.py
index 60868889a..4c0d28cfc 100644
--- a/nebula/core/datasets/covtype/covtype.py
+++ b/nebula/core/datasets/covtype/covtype.py
@@ -18,7 +18,14 @@ class CovtypeTorchDataset(Dataset):
         x: torch.float32 tensor of shape (n_features,)
         y: torch.long scalar in [0, num_classes-1]
     """
-    def __init__(self, x: np.ndarray, y: np.ndarray):
+    def __init__(
+        self,
+        x: np.ndarray,
+        y: np.ndarray,
+        feature_names: list[str] | None = None,
+        continuous_features: list[int] | None = None,
+        binary_features: list[int] | None = None,
+    ):
         if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
             raise ValueError("x and y must be numpy arrays")
 
@@ -38,6 +45,10 @@ def __init__(self, x: np.ndarray, y: np.ndarray):
 
         n_classes = int(np.max(self.targets)) + 1
         self.classes = [str(i) for i in range(n_classes)]
+        self.feature_names = feature_names or [f"feature_{i}" for i in range(self.x.shape[1])]
+        self.continuous_features = continuous_features or []
+        self.binary_features = binary_features or []
+        self.input_dim = int(self.x.shape[1])
 
     def __len__(self) -> int:
         return int(self.y.shape[0])
@@ -150,6 +161,7 @@ def load_covtype_dataset(self):
         try:
             from sklearn.datasets import fetch_covtype
             from sklearn.model_selection import train_test_split
+            from sklearn.preprocessing import StandardScaler
         except Exception as e:
             raise ImportError(
                 "CovtypeDataset requires scikit-learn. Install it (e.g., pip install scikit-learn)."
@@ -159,6 +171,12 @@ def load_covtype_dataset(self):
 
         x = cov.data
         y = cov.target  # commonly 1..7 in sklearn
+        feature_names = getattr(cov, "feature_names", None)
+        if feature_names is None:
+            feature_names = [f"feature_{i}" for i in range(x.shape[1])]
+        feature_names = [str(name) for name in feature_names]
+        continuous_features = list(range(min(10, x.shape[1])))
+        binary_features = [i for i in range(x.shape[1]) if i not in continuous_features]
 
         # Map labels to 0..6 (CrossEntropyLoss convention)
         # If already 0..6, this is harmless for 1..7 only if we detect min.
@@ -194,8 +212,28 @@ def load_covtype_dataset(self):
                 stratify=y_test,
             )
 
-        train_ds = CovtypeTorchDataset(x_train, y_train)
-        test_ds = CovtypeTorchDataset(x_test, y_test)
+        # Covtype has continuous features followed by binary wilderness/soil indicators.
+        # Scale only the continuous block; keep binary indicators as 0/1.
+        scaler = StandardScaler()
+        x_train = np.asarray(x_train, dtype=np.float32).copy()
+        x_test = np.asarray(x_test, dtype=np.float32).copy()
+        x_train[:, continuous_features] = scaler.fit_transform(x_train[:, continuous_features])
+        x_test[:, continuous_features] = scaler.transform(x_test[:, continuous_features])
+
+        train_ds = CovtypeTorchDataset(
+            x_train,
+            y_train,
+            feature_names=feature_names,
+            continuous_features=continuous_features,
+            binary_features=binary_features,
+        )
+        test_ds = CovtypeTorchDataset(
+            x_test,
+            y_test,
+            feature_names=feature_names,
+            continuous_features=continuous_features,
+            binary_features=binary_features,
+        )
 
         return train_ds, test_ds
 
diff --git a/nebula/core/datasets/kddcup99/kddcup99.py b/nebula/core/datasets/kddcup99/kddcup99.py
index 6ff0e8a0f..d24ca3469 100644
--- a/nebula/core/datasets/kddcup99/kddcup99.py
+++ b/nebula/core/datasets/kddcup99/kddcup99.py
@@ -16,7 +16,14 @@ class KDDCUP99TorchDataset(Dataset):
         x: torch.float32 tensor of shape (n_features,)
         y: torch.long scalar in [0, num_classes-1]
     """
-    def __init__(self, x: np.ndarray, y: np.ndarray):
+    def __init__(
+        self,
+        x: np.ndarray,
+        y: np.ndarray,
+        feature_names: list[str] | None = None,
+        continuous_features: list[int] | None = None,
+        binary_features: list[int] | None = None,
+    ):
         if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
             raise ValueError("x and y must be numpy arrays")
 
@@ -36,6 +43,10 @@ def __init__(self, x: np.ndarray, y: np.ndarray):
 
         n_classes = int(np.max(self.targets)) + 1
         self.classes = [str(i) for i in range(n_classes)]
+        self.feature_names = feature_names or [f"feature_{i}" for i in range(self.x.shape[1])]
+        self.continuous_features = continuous_features or []
+        self.binary_features = binary_features or []
+        self.input_dim = int(self.x.shape[1])
 
     def __len__(self) -> int:
         return int(self.y.shape[0])
@@ -152,6 +163,7 @@ def load_kddcup99_dataset(self):
             import pandas as pd
             from sklearn.datasets import fetch_kddcup99
             from sklearn.model_selection import train_test_split
+            from sklearn.preprocessing import StandardScaler
         except Exception as e:
             raise ImportError(
                 "KDDCUP99Dataset requires scikit-learn and pandas. "
@@ -190,12 +202,14 @@ def _decode_if_bytes(v):
                 x[col] = x[col].map(_decode_if_bytes)
 
         y = y.map(_decode_if_bytes)
+        numeric_columns = x.select_dtypes(exclude=["object", "category"]).columns.tolist()
 
         # One-hot encode categorical columns, keep numeric ones as-is.
         x = pd.get_dummies(x, drop_first=False)
-
-        # Ensure fully numeric dense matrix
-        x = x.astype(np.float32).to_numpy(copy=False)
+        feature_names = [str(col) for col in x.columns]
+        numeric_columns = [col for col in numeric_columns if col in x.columns]
+        continuous_features = [x.columns.get_loc(col) for col in numeric_columns]
+        binary_features = [i for i in range(len(feature_names)) if i not in continuous_features]
 
         # Map labels to 0..num_classes-1 deterministically
         y = pd.Series(y).astype(str)
@@ -234,8 +248,29 @@ def _decode_if_bytes(v):
                 stratify=y_test,
             )
 
-        train_ds = KDDCUP99TorchDataset(x_train, y_train)
-        test_ds = KDDCUP99TorchDataset(x_test, y_test)
+        x_train_np = x_train.astype(np.float32).to_numpy(copy=True)
+        x_test_np = x_test.astype(np.float32).to_numpy(copy=True)
+
+        # Scale the original numeric columns after splitting. One-hot columns stay binary.
+        if continuous_features:
+            scaler = StandardScaler()
+            x_train_np[:, continuous_features] = scaler.fit_transform(x_train_np[:, continuous_features])
+            x_test_np[:, continuous_features] = scaler.transform(x_test_np[:, continuous_features])
+
+        train_ds = KDDCUP99TorchDataset(
+            x_train_np,
+            y_train,
+            feature_names=feature_names,
+            continuous_features=continuous_features,
+            binary_features=binary_features,
+        )
+        test_ds = KDDCUP99TorchDataset(
+            x_test_np,
+            y_test,
+            feature_names=feature_names,
+            continuous_features=continuous_features,
+            binary_features=binary_features,
+        )
 
         # Optional: preserve original class names for inspection/debugging
         train_ds.classes = classes
diff --git a/nebula/core/models/adultcensus/mlp.py b/nebula/core/models/adultcensus/mlp.py
index 2e8ce5107..68f1d77d8 100644
--- a/nebula/core/models/adultcensus/mlp.py
+++ b/nebula/core/models/adultcensus/mlp.py
@@ -73,3 +73,6 @@ def count_parameters(self) -> int:
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/breast_cancer/mlp.py b/nebula/core/models/breast_cancer/mlp.py
index 2b70e0713..11a6ec833 100644
--- a/nebula/core/models/breast_cancer/mlp.py
+++ b/nebula/core/models/breast_cancer/mlp.py
@@ -61,3 +61,6 @@ def count_parameters(self) -> int:
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/cifar10/cnn.py b/nebula/core/models/cifar10/cnn.py
index 1486f0aa6..db9df51e6 100755
--- a/nebula/core/models/cifar10/cnn.py
+++ b/nebula/core/models/cifar10/cnn.py
@@ -59,3 +59,6 @@ def count_parameters(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/cifar10/cnnV2.py b/nebula/core/models/cifar10/cnnV2.py
index ca5ef81b0..0b23eef34 100755
--- a/nebula/core/models/cifar10/cnnV2.py
+++ b/nebula/core/models/cifar10/cnnV2.py
@@ -63,3 +63,6 @@ def count_parameters(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/cifar10/cnnV3.py b/nebula/core/models/cifar10/cnnV3.py
index b1783f76a..8b4585208 100755
--- a/nebula/core/models/cifar10/cnnV3.py
+++ b/nebula/core/models/cifar10/cnnV3.py
@@ -90,3 +90,6 @@ def count_parameters(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/cifar10/fastermobilenet.py b/nebula/core/models/cifar10/fastermobilenet.py
index 4856b22dd..7be70c64d 100755
--- a/nebula/core/models/cifar10/fastermobilenet.py
+++ b/nebula/core/models/cifar10/fastermobilenet.py
@@ -79,3 +79,6 @@ def count_parameters(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/cifar10/resnet.py b/nebula/core/models/cifar10/resnet.py
index ce5d5ec7e..a0b6d1f15 100755
--- a/nebula/core/models/cifar10/resnet.py
+++ b/nebula/core/models/cifar10/resnet.py
@@ -163,3 +163,6 @@ def count_parameters(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/cifar10/simplemobilenet.py b/nebula/core/models/cifar10/simplemobilenet.py
index 478398439..9791f5735 100755
--- a/nebula/core/models/cifar10/simplemobilenet.py
+++ b/nebula/core/models/cifar10/simplemobilenet.py
@@ -81,3 +81,6 @@ def count_parameters(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/cifar100/cnn.py b/nebula/core/models/cifar100/cnn.py
index 8ec7118c0..0a005973f 100755
--- a/nebula/core/models/cifar100/cnn.py
+++ b/nebula/core/models/cifar100/cnn.py
@@ -114,3 +114,6 @@ def count_parameters(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/covtype/mlp.py b/nebula/core/models/covtype/mlp.py
index eace7b899..0f684dd06 100644
--- a/nebula/core/models/covtype/mlp.py
+++ b/nebula/core/models/covtype/mlp.py
@@ -61,3 +61,6 @@ def count_parameters(self) -> int:
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/emnist/cnn.py b/nebula/core/models/emnist/cnn.py
index 79ba0a944..17c7f6040 100755
--- a/nebula/core/models/emnist/cnn.py
+++ b/nebula/core/models/emnist/cnn.py
@@ -70,3 +70,6 @@ def count_parameters(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/emnist/mlp.py b/nebula/core/models/emnist/mlp.py
index c3af2d01f..6d5e420e6 100755
--- a/nebula/core/models/emnist/mlp.py
+++ b/nebula/core/models/emnist/mlp.py
@@ -52,3 +52,6 @@ def count_parameters(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/fashionmnist/cnn.py b/nebula/core/models/fashionmnist/cnn.py
index a62efea31..72837d204 100755
--- a/nebula/core/models/fashionmnist/cnn.py
+++ b/nebula/core/models/fashionmnist/cnn.py
@@ -70,3 +70,6 @@ def count_parameters(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/fashionmnist/mlp.py b/nebula/core/models/fashionmnist/mlp.py
index 4009cd06c..4704674e0 100755
--- a/nebula/core/models/fashionmnist/mlp.py
+++ b/nebula/core/models/fashionmnist/mlp.py
@@ -52,3 +52,6 @@ def configure_optimizers(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/kddcup99/mlp.py b/nebula/core/models/kddcup99/mlp.py
index 60306beae..d8c57d9d2 100644
--- a/nebula/core/models/kddcup99/mlp.py
+++ b/nebula/core/models/kddcup99/mlp.py
@@ -55,3 +55,6 @@ def count_parameters(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/mnist/cnn.py b/nebula/core/models/mnist/cnn.py
index 19f59f4f1..78e520b4e 100755
--- a/nebula/core/models/mnist/cnn.py
+++ b/nebula/core/models/mnist/cnn.py
@@ -68,3 +68,6 @@ def get_learning_rate(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/mnist/mlp.py b/nebula/core/models/mnist/mlp.py
index bbc46a308..9fdc48bb7 100755
--- a/nebula/core/models/mnist/mlp.py
+++ b/nebula/core/models/mnist/mlp.py
@@ -50,3 +50,6 @@ def count_parameters(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/sentiment140/cnn.py b/nebula/core/models/sentiment140/cnn.py
index dec754f3b..ab305fda7 100755
--- a/nebula/core/models/sentiment140/cnn.py
+++ b/nebula/core/models/sentiment140/cnn.py
@@ -62,3 +62,6 @@ def configure_optimizers(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/core/models/sentiment140/rnn.py b/nebula/core/models/sentiment140/rnn.py
index e15cec49a..aa1915d53 100755
--- a/nebula/core/models/sentiment140/rnn.py
+++ b/nebula/core/models/sentiment140/rnn.py
@@ -63,3 +63,6 @@ def configure_optimizers(self):
 
     def get_num_classes(self):
         return self.num_classes
+
+    def get_data_type(self):
+        return self.data_type
diff --git a/nebula/frontend/static/js/deployment/attack.js b/nebula/frontend/static/js/deployment/attack.js
index ea04e1309..77de2f957 100644
--- a/nebula/frontend/static/js/deployment/attack.js
+++ b/nebula/frontend/static/js/deployment/attack.js
@@ -1,5 +1,6 @@
 // Attack Configuration Module
 const AttackManager = (function() {
+    const IMAGE_DATASETS = new Set(["MNIST", "FashionMNIST", "EMNIST", "CIFAR10", "CIFAR100"]);
     const ATTACK_TYPES = {
         NO_ATTACK: 'No Attack',
         LABEL_FLIPPING: 'Label Flipping',
@@ -86,13 +87,19 @@ const AttackManager = (function() {
             updateAttackUI(this.value);
         });
 
+        const datasetSelect = document.getElementById("datasetSelect");
+        if (datasetSelect) {
+            datasetSelect.addEventListener("change", updateDatasetAvailability);
+            updateDatasetAvailability();
+        }
+
         document.getElementById("targeted").addEventListener("change", function() {
             const attackType = document.getElementById("poisoning-attack-select").value;
             const elements = {
                 targetLabel: {title: document.getElementById("target_label-title"), container: document.getElementById("target_label-container")},
                 targetChangedLabel: {title: document.getElementById("target_changed_label-title"), container: document.getElementById("target_changed_label-container")}
             };
-            
+
             if (this.checked && attackType === ATTACK_TYPES.LABEL_FLIPPING) {
                 showElements(elements, ['targetLabel', 'targetChangedLabel']);
             } else if (this.checked && attackType === ATTACK_TYPES.SAMPLE_POISONING) {
@@ -116,9 +123,47 @@ const AttackManager = (function() {
         });
     }
 
+    function updateDatasetAvailability() {
+        const dataset = document.getElementById("datasetSelect")?.value;
+        const enabledForDataset = IMAGE_DATASETS.has(dataset);
+        const attackSelect = document.getElementById("poisoning-attack-select");
+        const samplePoisoningOption = Array.from(attackSelect?.options || [])
+            .find(option => option.value === ATTACK_TYPES.SAMPLE_POISONING || option.textContent === ATTACK_TYPES.SAMPLE_POISONING);
+        const datasetNote = document.getElementById("sample-poisoning-dataset-note");
+
+        if (samplePoisoningOption) {
+            samplePoisoningOption.disabled = !enabledForDataset;
+            samplePoisoningOption.title = enabledForDataset ? "" : "Sample Poisoning is currently available only for image datasets.";
+        }
+
+        if (datasetNote) {
+            datasetNote.style.display = enabledForDataset ? "none" : "block";
+        }
+
+        if (attackSelect?.value === ATTACK_TYPES.SAMPLE_POISONING && !enabledForDataset) {
+            attackSelect.value = ATTACK_TYPES.NO_ATTACK;
+            updateAttackUI(ATTACK_TYPES.NO_ATTACK);
+        }
+    }
+
+    function validateConfig() {
+        const dataset = document.getElementById("datasetSelect")?.value;
+        const attackType = document.getElementById("poisoning-attack-select")?.value;
+
+        if (attackType === ATTACK_TYPES.SAMPLE_POISONING && !IMAGE_DATASETS.has(dataset)) {
+            return "Sample Poisoning is currently available only for image datasets.";
+        }
+
+        return null;
+    }
+
     function getAttackConfig() {
         const attackType = document.getElementById("poisoning-attack-select").value;
-        
+        const validationMessage = validateConfig();
+        if (validationMessage) {
+            throw new Error(validationMessage);
+        }
+
         // Validate numeric inputs
         function validateNumericInput(id, min = 0, max = 100) {
             const value = parseFloat(document.getElementById(id).value);
@@ -185,10 +230,14 @@ const AttackManager = (function() {
 
     function setAttackConfig(config) {
         if (!config) return;
+        const attackType = Array.isArray(config.attacks)
+            ? config.attacks[0]
+            : (config.type || config.attacks || ATTACK_TYPES.NO_ATTACK);
 
         // Set attack type and update UI
-        document.getElementById("poisoning-attack-select").value = config.type;
-        updateAttackUI(config.type);
+        document.getElementById("poisoning-attack-select").value = attackType;
+        updateAttackUI(attackType);
+        updateDatasetAvailability();
 
         // Set common fields
         document.getElementById("poisoned-node-percent").value = config.poisoned_node_percent || 0;
@@ -197,7 +246,7 @@ const AttackManager = (function() {
         document.getElementById("attack-interval").value = config.attack_interval || 1;
 
         // Set attack-specific fields
-        switch(config.type) {
+        switch(attackType) {
             case ATTACK_TYPES.LABEL_FLIPPING:
                 document.getElementById("poisoned-sample-percent").value = config.poisoned_sample_percent || 0;
                 document.getElementById("targeted").checked = config.targeted || false;
@@ -243,12 +292,15 @@ const AttackManager = (function() {
     function resetAttackConfig() {
         document.getElementById("poisoning-attack-select").value = ATTACK_TYPES.NO_ATTACK;
         updateAttackUI(ATTACK_TYPES.NO_ATTACK);
+        updateDatasetAvailability();
     }
 
     return {
         ATTACK_TYPES,
         initializeEventListeners,
         updateAttackUI,
+        updateDatasetAvailability,
+        validateConfig,
         getAttackConfig,
         setAttackConfig,
         resetAttackConfig
diff --git a/nebula/frontend/static/js/deployment/main.js b/nebula/frontend/static/js/deployment/main.js
index 40fe716b6..de5b3d0e5 100644
--- a/nebula/frontend/static/js/deployment/main.js
+++ b/nebula/frontend/static/js/deployment/main.js
@@ -132,9 +132,23 @@ const DeploymentManager = (function() {
             return false;
         }
 
+        const attackValidationMessage = validateAttack();
+        if (attackValidationMessage) {
+            Utils.showAlert('error', attackValidationMessage);
+            return false;
+        }
+
         return true;
     }
 
+    function validateAttack() {
+        const manager = window.AttackManager || AttackManager;
+        if (manager && typeof manager.validateConfig === "function") {
+            return manager.validateConfig();
+        }
+        return null;
+    }
+
     function validateAdversarialTraining() {
         const manager = window.AdversarialTrainingManager || AdversarialTrainingManager;
         if (manager && typeof manager.validateConfig === "function") {
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index e68a5fec9..567bedc57 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -678,6 +678,9 @@ <h5 class="step-title">Attack Type</h5>
                             <option>Delayer</option>
                             <option>Flooding</option>
                         </select>
+                        <small id="sample-poisoning-dataset-note" class="form-text text-muted" style="display: none;">
+                            Sample Poisoning is currently available only for image datasets.
+                        </small>
                         <h5 id="poisoned-node-title" class="step-title" style="margin-left: 11px; display: none;">%
                             Malicious nodes</h5>
                         <div class="form-check form-check-inline" style="display: none;"

From f3a42705dffc186e7480adb1276b99f439c89d81 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Thu, 21 May 2026 13:28:31 +0200
Subject: [PATCH 47/66] Trustworthiness: Tabular and images factsheet division,
 accuracy/stability error fixed, sample poisoning error fixed, Trusworthiness:
 CNN error fixed for explainability, adversarial training log system

---
 nebula/addons/attacks/dataset/datapoison.py   |   79 +-
 .../addons/defenses/adversarial_training.py   |   88 ++
 nebula/addons/trustworthiness/calculation.py  |   56 +-
 ...val_metrics.json => eval_metrics_cfl.json} |    0
 .../configs/eval_metrics_cfl_images.json      | 1137 +++++++++++++++++
 .../configs/eval_metrics_cfl_tabular.json     | 1137 +++++++++++++++++
 .../configs/eval_metrics_dfl_images.json      | 1042 +++++++++++++++
 .../configs/eval_metrics_dfl_tabular.json     | 1042 +++++++++++++++
 ...plate.json => factsheet_template_cfl.json} |   26 +-
 .../factsheet_template_cfl_images.json        |   88 ++
 .../factsheet_template_cfl_tabular.json       |   88 ++
 .../configs/factsheet_template_dfl.json       |   26 +-
 .../factsheet_template_dfl_images.json        |   82 ++
 .../factsheet_template_dfl_tabular.json       |   82 ++
 .../addons/trustworthiness/dfl_factsheet.py   |   13 +-
 nebula/addons/trustworthiness/factsheet.py    |   15 +-
 .../trustworthiness/factsheet_common.py       |  110 +-
 .../trustworthiness/factsheet_populators.py   |  158 +++
 nebula/addons/trustworthiness/metric.py       |   35 +-
 .../trustworthiness/per_round_metrics.py      |   75 +-
 20 files changed, 5173 insertions(+), 206 deletions(-)
 rename nebula/addons/trustworthiness/configs/{eval_metrics.json => eval_metrics_cfl.json} (100%)
 create mode 100755 nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json
 create mode 100755 nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json
 create mode 100755 nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json
 create mode 100755 nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json
 rename nebula/addons/trustworthiness/configs/{factsheet_template.json => factsheet_template_cfl.json} (76%)
 create mode 100755 nebula/addons/trustworthiness/configs/factsheet_template_cfl_images.json
 create mode 100755 nebula/addons/trustworthiness/configs/factsheet_template_cfl_tabular.json
 create mode 100755 nebula/addons/trustworthiness/configs/factsheet_template_dfl_images.json
 create mode 100755 nebula/addons/trustworthiness/configs/factsheet_template_dfl_tabular.json
 create mode 100644 nebula/addons/trustworthiness/factsheet_populators.py

diff --git a/nebula/addons/attacks/dataset/datapoison.py b/nebula/addons/attacks/dataset/datapoison.py
index 7b22d37d7..40dd7bdee 100755
--- a/nebula/addons/attacks/dataset/datapoison.py
+++ b/nebula/addons/attacks/dataset/datapoison.py
@@ -71,6 +71,59 @@ def _convert_to_tensor(self, data: torch.Tensor | Image.Image | tuple) -> torch.
         else:
             return torch.tensor(data)
 
+    def _restore_data_format(self, data, original):
+        if isinstance(data, torch.Tensor):
+            array_data = data.detach().cpu().numpy()
+        else:
+            array_data = np.asarray(data)
+
+        original_shape = None
+        if isinstance(original, torch.Tensor):
+            original_shape = tuple(original.shape)
+        elif isinstance(original, Image.Image):
+            original_shape = np.array(original).shape
+        elif hasattr(original, "shape"):
+            original_shape = tuple(original.shape)
+
+        if original_shape is not None and array_data.shape != original_shape and array_data.size == np.prod(original_shape):
+            array_data = array_data.reshape(original_shape)
+
+        if isinstance(original, torch.Tensor):
+            restored = torch.as_tensor(array_data, device=original.device)
+            if original.dtype.is_floating_point:
+                original_max = original.detach().max() if original.numel() > 0 else torch.tensor(1.0, device=original.device)
+                if restored.numel() > 0 and original_max > 1 and restored.min() >= 0 and restored.max() <= 1:
+                    restored = restored * original_max
+                return restored.to(dtype=original.dtype)
+
+            if restored.numel() > 0 and restored.min() >= 0 and restored.max() <= 1:
+                restored = restored * torch.iinfo(original.dtype).max
+            return restored.clamp(torch.iinfo(original.dtype).min, torch.iinfo(original.dtype).max).to(dtype=original.dtype)
+
+        if isinstance(original, Image.Image):
+            original_array = np.array(original)
+            restored = self._restore_array_dtype(array_data, original_array.dtype, original_array)
+            return Image.fromarray(restored, mode=original.mode)
+
+        if isinstance(original, np.ndarray):
+            return self._restore_array_dtype(array_data, original.dtype, original)
+
+        return data
+
+    def _restore_array_dtype(self, data: np.ndarray, dtype: np.dtype, original: np.ndarray | None = None) -> np.ndarray:
+        dtype = np.dtype(dtype)
+        if np.issubdtype(dtype, np.integer):
+            if data.size > 0 and data.min() >= 0 and data.max() <= 1:
+                data = data * np.iinfo(dtype).max
+            return np.rint(np.clip(data, np.iinfo(dtype).min, np.iinfo(dtype).max)).astype(dtype)
+
+        if original is not None and data.size > 0 and original.size > 0:
+            original_max = np.max(original)
+            if original_max > 1 and data.min() >= 0 and data.max() <= 1:
+                data = data * original_max
+
+        return data.astype(dtype)
+
     def _handle_single_point(self, tensor: torch.Tensor) -> tuple[torch.Tensor, bool]:
         """
         Handle single point tensors by reshaping them.
@@ -100,7 +153,7 @@ def __init__(self, noise_type: str):
         """
         self.noise_type = noise_type.lower()
 
-    def apply_noise(self, t: torch.Tensor | Image.Image, poisoned_noise_percent: float) -> torch.Tensor:
+    def apply_noise(self, t: torch.Tensor | Image.Image, poisoned_noise_percent: float):
         """
         Applies noise to a tensor based on the specified noise type and poisoning percentage.
 
@@ -109,9 +162,10 @@ def apply_noise(self, t: torch.Tensor | Image.Image, poisoned_noise_percent: flo
             poisoned_noise_percent: The percentage of noise to be applied (0-100)
 
         Returns:
-            The tensor with noise applied
+            The poisoned data in the same format as the input
         """
-        t = self._convert_to_tensor(t)
+        original = t[0] if isinstance(t, tuple) else t
+        t = self._convert_to_tensor(original)
         t, is_single_point = self._handle_single_point(t)
 
         arr = t.detach().cpu().numpy()
@@ -122,21 +176,21 @@ def apply_noise(self, t: torch.Tensor | Image.Image, poisoned_noise_percent: flo
         )
 
         if self.noise_type == "salt":
-            poisoned = torch.tensor(random_noise(arr, mode=self.noise_type, amount=poisoned_ratio))
+            poisoned = random_noise(arr, mode=self.noise_type, amount=poisoned_ratio)
         elif self.noise_type == "gaussian":
-            poisoned = torch.tensor(random_noise(arr, mode=self.noise_type, mean=0, var=poisoned_ratio, clip=True))
+            poisoned = random_noise(arr, mode=self.noise_type, mean=0, var=poisoned_ratio, clip=True)
         elif self.noise_type == "s&p":
-            poisoned = torch.tensor(random_noise(arr, mode=self.noise_type, amount=poisoned_ratio))
+            poisoned = random_noise(arr, mode=self.noise_type, amount=poisoned_ratio)
         elif self.noise_type == "nlp_rawdata":
             poisoned = self.poison_to_nlp_rawdata(arr, poisoned_ratio)
         else:
             logging.info(f"ERROR: noise_type '{self.noise_type}' not supported in data poison attack.")
-            return t
+            return original
 
         if is_single_point:
             poisoned = poisoned[0]
 
-        return poisoned
+        return self._restore_data_format(poisoned, original)
 
     def poison_to_nlp_rawdata(self, text_data: list, poisoned_ratio: float) -> list:
         """
@@ -221,7 +275,7 @@ def __init__(self, target_label: int):
         """
         self.target_label = target_label
 
-    def add_x_to_image(self, img: torch.Tensor | Image.Image) -> torch.Tensor:
+    def add_x_to_image(self, img: torch.Tensor | Image.Image):
         """
         Adds a 10x10 pixel 'X' mark to the top-left corner of an image.
 
@@ -229,10 +283,11 @@ def add_x_to_image(self, img: torch.Tensor | Image.Image) -> torch.Tensor:
             img: Input image tensor or PIL Image
 
         Returns:
-            Modified image with X pattern
+            Modified image in the same format as the input
         """
         logging.info(f"[{self.__class__.__name__}] Adding X pattern to image")
-        img = self._convert_to_tensor(img)
+        original = img[0] if isinstance(img, tuple) else img
+        img = self._convert_to_tensor(original)
         img, is_single_point = self._handle_single_point(img)
 
         # Handle batch dimension if present
@@ -267,7 +322,7 @@ def add_x_to_image(self, img: torch.Tensor | Image.Image) -> torch.Tensor:
         if is_single_point:
             img = img[0]
 
-        return img
+        return self._restore_data_format(img, original)
 
     def poison_data(
         self,
diff --git a/nebula/addons/defenses/adversarial_training.py b/nebula/addons/defenses/adversarial_training.py
index 6260edd8f..0de881e4f 100644
--- a/nebula/addons/defenses/adversarial_training.py
+++ b/nebula/addons/defenses/adversarial_training.py
@@ -5,6 +5,10 @@
 
 import torch
 
+from nebula.config.config import TRAINING_LOGGER
+
+logging_training = logging.getLogger(TRAINING_LOGGER)
+
 IMAGE_DATASET_NORMALIZATION = {
     "MNIST": ((0.5,), (0.5,)),
     "FashionMNIST": ((0.5,), (0.5,)),
@@ -68,6 +72,11 @@ def _bounds(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
         upper = (float(self.config.clip_max) - mean) / std
         return lower, upper
 
+    def denormalize(self, x: torch.Tensor) -> torch.Tensor:
+        mean = self._channel_tensor(self.mean, x)
+        std = self._channel_tensor(self.std, x)
+        return (x * std + mean).clamp(float(self.config.clip_min), float(self.config.clip_max))
+
     def _project(self, x_adv: torch.Tensor, x_clean: torch.Tensor) -> torch.Tensor:
         epsilon = self._epsilon(x_clean)
         lower, upper = self._bounds(x_clean)
@@ -105,9 +114,12 @@ def generate(self, model, x, y, criterion):
 class AdversarialTrainingDefense:
     """Batch-level adversarial training defense for Nebula models."""
 
+    LOGGED_SAMPLES_PER_ROUND = 3
+
     def __init__(self, config: AdversarialTrainingConfig, generator: AdversarialExampleGenerator):
         self.config = config
         self.generator = generator
+        self._logged_adversarial_samples_by_round: dict[int, int] = {}
 
     @classmethod
     def from_participant_config(cls, participant_config: dict[str, Any]) -> "AdversarialTrainingDefense | None":
@@ -201,6 +213,7 @@ def compute_training_step(self, model, x, y, criterion):
             return loss, logits, {}
 
         x_adv = self.generator.generate(model, x, y, criterion)
+        self._log_adversarial_samples(model, x, x_adv, y)
         adv_logits = model(x_adv)
         adv_loss = criterion(adv_logits, y)
 
@@ -232,6 +245,81 @@ def _extra_metrics(self, metrics):
             return {}
         return metrics
 
+    def _log_adversarial_samples(self, model, x_clean: torch.Tensor, x_adv: torch.Tensor, y: torch.Tensor) -> None:
+        if not self.config.log_adversarial_metrics:
+            return
+
+        current_round = int(getattr(model, "round", 0))
+        already_logged = self._logged_adversarial_samples_by_round.get(current_round, 0)
+        remaining = self.LOGGED_SAMPLES_PER_ROUND - already_logged
+        if remaining <= 0:
+            return
+
+        with torch.no_grad():
+            clean_view = x_clean.detach()
+            adv_view = x_adv.detach()
+            if hasattr(self.generator, "denormalize"):
+                clean_view = self.generator.denormalize(clean_view)
+                adv_view = self.generator.denormalize(adv_view)
+
+            delta = adv_view - clean_view
+            samples_to_log = min(remaining, int(clean_view.size(0)))
+
+            for sample_idx in range(samples_to_log):
+                sample_clean = clean_view[sample_idx].detach().float().cpu()
+                sample_adv = adv_view[sample_idx].detach().float().cpu()
+                sample_delta = delta[sample_idx].detach().float().cpu()
+
+                logging_training.info(
+                    "[AdversarialTrainingDefense] Round %s | Sample %s/%s before/after distortion | "
+                    "dataset=%s | attack=%s | label=%s | clean[min=%.6f max=%.6f mean=%.6f] | "
+                    "adv[min=%.6f max=%.6f mean=%.6f] | delta_linf=%.6f | delta_l2=%.6f",
+                    current_round,
+                    already_logged + sample_idx + 1,
+                    self.LOGGED_SAMPLES_PER_ROUND,
+                    self.config.dataset_name,
+                    self.config.attack,
+                    int(y[sample_idx].detach().cpu().item()) if y.numel() > sample_idx else None,
+                    sample_clean.min().item(),
+                    sample_clean.max().item(),
+                    sample_clean.mean().item(),
+                    sample_adv.min().item(),
+                    sample_adv.max().item(),
+                    sample_adv.mean().item(),
+                    sample_delta.abs().max().item(),
+                    sample_delta.reshape(-1).norm(p=2).item(),
+                )
+                logging_training.info(
+                    "[AdversarialTrainingDefense] Round %s | Clean sample %s channel0 4x4:\n%s",
+                    current_round,
+                    already_logged + sample_idx + 1,
+                    self._format_patch(sample_clean),
+                )
+                logging_training.info(
+                    "[AdversarialTrainingDefense] Round %s | Adversarial sample %s channel0 4x4:\n%s",
+                    current_round,
+                    already_logged + sample_idx + 1,
+                    self._format_patch(sample_adv),
+                )
+                logging_training.info(
+                    "[AdversarialTrainingDefense] Round %s | Delta sample %s channel0 4x4:\n%s",
+                    current_round,
+                    already_logged + sample_idx + 1,
+                    self._format_patch(sample_delta),
+                )
+
+            self._logged_adversarial_samples_by_round[current_round] = already_logged + samples_to_log
+
+    @staticmethod
+    def _format_patch(sample: torch.Tensor, patch_size: int = 4) -> str:
+        if sample.dim() >= 3:
+            patch = sample[0, :patch_size, :patch_size]
+        elif sample.dim() == 2:
+            patch = sample[:patch_size, :patch_size]
+        else:
+            patch = sample[:patch_size]
+        return str([[round(float(value), 6) for value in row] for row in patch.tolist()])
+
 
 def apply_adversarial_training_if_enabled(model, participant_config: dict[str, Any]) -> None:
     defense = AdversarialTrainingDefense.from_participant_config(participant_config)
diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index ff8a49e7b..33853a731 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -1306,6 +1306,35 @@ def _compute_shap_values(model_ref, background, test_data):
 
         raise RuntimeError("; ".join(explainer_errors))
 
+    def _compute_gradient_importances(model_ref, test_data):
+        was_training = bool(getattr(model_ref, "training", False))
+        model_ref.eval()
+
+        try:
+            inputs = test_data.detach().clone().requires_grad_(True)
+            model_ref.zero_grad(set_to_none=True)
+
+            outputs = model_ref(inputs)
+            if isinstance(outputs, (tuple, list)):
+                outputs = outputs[0]
+
+            if outputs.ndim == 1:
+                score = outputs.sum()
+            else:
+                score = outputs.reshape(outputs.shape[0], -1).max(dim=1).values.sum()
+
+            score.backward()
+            if inputs.grad is None:
+                return np.array([])
+
+            importances = torch.abs(inputs.grad * inputs).mean(dim=0)
+            importances = importances.detach().cpu().numpy().reshape(-1)
+            importances = np.nan_to_num(importances, nan=0.0, posinf=0.0, neginf=0.0)
+            return np.maximum(importances, 0.0)
+        finally:
+            if was_training:
+                model_ref.train()
+
     def _feature_axes_from_shape(arr_shape, input_shape, n_samples):
         input_shape = tuple(input_shape)
         input_rank = len(input_shape)
@@ -1354,9 +1383,26 @@ def _feature_axes_from_shape(arr_shape, input_shape, n_samples):
         test_data = test_data.to(device)
 
         shap_model = _clone_model(model, device)
-        shap_values = _compute_shap_values(shap_model, background, test_data)
-        del shap_model
-        gc.collect()
+        try:
+            shap_values = _compute_shap_values(shap_model, background, test_data)
+        except Exception as exc:
+            logger.debug("Could not compute feature importances with SHAP, using gradient fallback: %s", exc)
+            del shap_model
+            gc.collect()
+
+            gradient_model = _clone_model(model, device)
+            try:
+                return _compute_gradient_importances(gradient_model, test_data)
+            except Exception as fallback_exc:
+                logger.debug("Could not compute feature importances with gradient fallback: %s", fallback_exc)
+                return np.array([])
+            finally:
+                del gradient_model
+                gc.collect()
+        finally:
+            if "shap_model" in locals():
+                del shap_model
+            gc.collect()
 
         if shap_values is None:
             return np.array([])
@@ -1394,8 +1440,8 @@ def _feature_axes_from_shape(arr_shape, input_shape, n_samples):
         importances = np.nan_to_num(importances, nan=0.0, posinf=0.0, neginf=0.0)
         return np.maximum(importances, 0.0)
     except Exception as exc:
-        logger.warning("Could not compute feature importances with shap")
-        logger.warning(exc)
+        logger.debug("Could not compute feature importances")
+        logger.debug(exc)
         return np.array([])
 
 
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics.json b/nebula/addons/trustworthiness/configs/eval_metrics_cfl.json
similarity index 100%
rename from nebula/addons/trustworthiness/configs/eval_metrics.json
rename to nebula/addons/trustworthiness/configs/eval_metrics_cfl.json
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json
new file mode 100755
index 000000000..889635d90
--- /dev/null
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json
@@ -0,0 +1,1137 @@
+{
+    "robustness": {
+      "resilience_to_attacks": {
+        "weight": 0.4,
+        "metrics": {
+          "certified_robustness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_clever"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
+            "weight": 0.4
+          },
+          "inverse_loss_sensitivity": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_loss_sensitivity"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.2
+          },
+          "clipped_adversarial_accuracy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_adv_accuracy"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "clipped_empirical_robustness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_empirical_robustness"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "clipped_confidence_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_confidence_score"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "inverse_attack_success_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_attack_success_rate"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          }
+        }
+      },
+      "algorithm_robustness": {
+        "weight": 0.4,
+        "metrics": {
+          "performance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_acc_avg"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_true_score",
+            "type": "true_score",
+            "description": "Average test accuracy of the global model on clients test data.",
+            "weight": 0.3
+          },
+          "macro_f1": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_macro_f1"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Macro F1 score of the final model on test data.",
+            "weight": 0.3
+          },
+          "personalization": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/personalization"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of personalized FL algorithm.",
+            "weight": 0.2
+          },
+          "reputation_enabled": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/reputation_enabled"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of an active reputation-based defense mechanism.",
+            "weight": 0.2
+          }
+        }
+      },
+      "client_reliability": {
+        "weight": 0.2,
+        "metrics": {
+          "scale": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The number of clients in the model.",
+            "weight": 0.3
+          },
+          "average_neighbor_reputation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Average reputation score of the neighbors associated with the node or federation.",
+            "weight": 0.3
+          },
+          "dropout_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/dropout_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of expected client updates that were not received across rounds.",
+            "weight": 0.2
+          },
+          "timeout_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/timeout_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of aggregation rounds that finished with missing expected client updates.",
+            "weight": 0.2
+          }
+        }
+      }
+    },
+    "privacy": {
+      "technique": {
+        "weight": 0.2,
+        "metrics": {
+          "differential_privacy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of differential privacy.",
+            "weight": 1
+          }
+        }
+      },
+      "uncertainty": {
+        "weight": 0.6,
+        "metrics": {
+          "entropy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/avg_entropy"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The measure of uncertainty in identifying a client.",
+            "weight": 1
+          }
+        }
+      },
+      "indistinguishability": {
+        "weight": 0.2,
+        "metrics": {
+          "global_privacy_risk": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/dp_epsilon"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              }
+            ],
+            "operation": "get_global_privacy_risk",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "A worst-case approximation of the maximal risk for distinguishing two clients.",
+            "weight": 0.2
+          },
+          "epsilon_star": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/inverse_epsilon_star"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Empirical privacy leakage estimated from the separability of train and test loss distributions.",
+            "weight": 0.4
+          },
+          "mia_auc_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Trust-oriented score derived from the ROC-AUC of a loss-based membership inference attack.",
+            "weight": 0.4
+          }
+        }
+      }
+    },
+    "fairness": {
+      "selection_fairness": {
+        "weight": 0.25,
+        "metrics": {
+          "selection_variation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/selection_cv"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "asc",
+            "description": "Variation in selection rate among the clients.",
+            "weight": 1
+          }
+        }
+      },
+      "performance_fairness": {
+        "weight": 0.25,
+        "metrics": {
+          "accuracy_variation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/test_acc_cv"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Variation of global model performance among the clients.",
+            "weight": 1
+          }
+        }
+      },
+      "class_distribution": {
+        "weight": 0.25,
+        "metrics": {
+          "class_imbalance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/class_imbalance"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Variation of the sample size per class.",
+            "weight": 1
+          }
+        }
+      },
+      "outcome_fairness": {
+        "weight": 0.25,
+        "metrics": {
+          "underfitting": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/underfitting"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Held-out performance proxy used as an outcome-level fairness signal.",
+            "weight": 0.1667
+          },
+          "inverse_overfitting": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_overfitting"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Generalization quality proxy transformed so higher is better.",
+            "weight": 0.1667
+          },
+          "inverse_well_calibration_error": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_well_calibration_error"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
+            "weight": 0.1667
+          },
+          "inverse_generalized_entropy_index": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_generalized_entropy_index"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Outcome inequality score transformed so higher values indicate better fairness.",
+            "weight": 0.1667
+          },
+          "inverse_theil_index": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_theil_index"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
+            "weight": 0.1667
+          },
+          "inverse_coefficient_of_variation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_coefficient_of_variation"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Coefficient-of-variation-based outcome fairness score.",
+            "weight": 0.1665
+          }
+        }
+      }
+    },
+    "explainability": {
+      "interpretability": {
+        "weight": 0.4,
+        "metrics": {
+          "algorithmic_transparency": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/training_model"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "RandomForestClassifier": 4,
+              "KNeighborsClassifier": 3,
+              "SVC": 2,
+              "GaussianProcessClassifier": 3,
+              "DecisionTreeClassifier": 5,
+              "MLP": 1,
+              "AdaBoostClassifier": 3,
+              "GaussianNB": 3.5,
+              "QuadraticDiscriminantAnalysis": 3,
+              "LogisticRegression": 4,
+              "LinearRegression": 3.5,
+              "Sequential": 1,
+              "CNN": 1
+            },
+            "description": "Mapping of Learning techniques to the level of explainability based on on literature research and qualitative analysis of each learning technique.",
+            "weight": 0.6
+          },
+          "model_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [10e1, 10e2, 10e3, 10e4, 10e5, 10e6, 10e7, 10e8],
+            "description": "Ranges of how to map model size to a score from 1-5.",
+            "weight": 0.4
+          }
+        }
+      },
+      "post_hoc_methods": {
+        "weight": 0.6,
+        "metrics": {
+          "clipped_feature_importance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_feature_importance_cv"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Variation of feature importance scores of all the features.",
+            "weight": 0.2
+          },
+          "alpha_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/alpha_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of features needed to explain most of the attribution mass; lower values indicate sparser and more focused explanations.",
+            "weight": 0.2
+          },
+          "spread_ratio": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_ratio"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Normalized entropy of the attribution distribution; lower values indicate explanations concentrated on fewer features.",
+            "weight": 0.2
+          },
+          "spread_divergence": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_divergence"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Jensen-Shannon divergence between the attribution distribution and a uniform distribution; higher values indicate more selective explanations.",
+            "weight": 0.2
+          },
+          "visualization": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/visualization"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of graphical capabilities to show the explainability.",
+            "weight": 0.2
+          }
+        }
+      }
+    },
+    "accountability": {
+      "factsheet_completeness": {
+        "weight": 0.8,
+        "metrics": {
+          "project_specs": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "project/overview"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "project/purpose"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "project/background"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Specifications of the project.",
+            "weight": 0.1
+          },
+          "participants": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/sample_client_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_selector"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_dataset_size"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Participants information.",
+            "weight": 0.1
+          },
+          "data": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/provenance"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "data/preprocessing"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "data/avg_entropy"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Meta data about the data.",
+            "weight": 0.1
+          },
+          "configuration": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/aggregation_algorithm"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/training_model"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/personalization"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/visualization"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/monitoring"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/dp_epsilon"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/total_round_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/learning_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/local_update_steps"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "FL model configurations.",
+            "weight": 0.1
+          },
+          "performance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_loss_avg"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_acc_avg"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_macro_f1"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_feature_importance_cv"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_clever"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_loss_sensitivity"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_adv_accuracy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_empirical_robustness"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_confidence_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_attack_success_rate"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Performance evaluation results.",
+            "weight": 0.1
+          },
+          "fairness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/test_acc_cv"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/selection_cv"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/class_imbalance"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/underfitting"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_overfitting"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_well_calibration_error"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_generalized_entropy_index"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_theil_index"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_coefficient_of_variation"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Fairness metrics results.",
+            "weight": 0.1
+          },
+          "system": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/avg_time_minutes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/avg_model_size"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/total_upload_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/total_download_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/avg_upload_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/avg_download_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/dropout_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/timeout_rate"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "privacy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/privacy_risk"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/epsilon_star"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/inverse_epsilon_star"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc_score"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "explainability": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/alpha_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_ratio"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_divergence"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "sustainability": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_carbon_intensity_server"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_carbon_intensity_clients"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_power_performance_clients"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_power_performance_server"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_training"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_aggregation"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_communication_uplink"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_communication_downlink"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          }
+        }
+      },
+      "monitoring": {
+        "weight": 0.2,
+        "metrics": {
+          "logs_available": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/monitoring"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of logs to show all the nodes.",
+            "weight": 1
+          }
+        }
+      }
+    },
+    "architectural_soundness": {
+      "client_management": {
+        "weight": 0.5,
+        "metrics": {
+          "client_selector": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_selector"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Reputation Based": 1.0,
+              "Full Participation": 0.5
+            },
+            "description": "Mapping of client selection strategies to architectural soundness. Reputation-based selection is scored higher than full participation because it introduces an explicit selection mechanism.",
+            "weight": 1
+          }
+        }
+      },
+      "optimization": {
+        "weight": 0.5,
+        "metrics": {
+          "algorithm": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/aggregation_algorithm"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_map_value",
+            "score_map": {
+              "FedAvg": 0.9509,
+              "Krum": 0.9535,
+              "TrimmedMean": 0.9595,
+              "Median": 0.9461
+            },
+            "description": "The choice of a suitable aggregation algorithm.",
+            "weight": 1
+          }
+        }
+      },
+      "federation_management": {
+        "weight": 0.5,
+        "metrics": {
+          "topology_type": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/preprocessing"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Fully": 1.0,
+              "Star": 0.8,
+              "Ring": 0.6,
+              "Random": 0.2
+            },
+            "description": "Mapping of network topology types to architectural soundness, assuming fully connected topologies provide the strongest structural connectivity, followed by star, ring, and random topologies.",
+            "weight": 1
+          }
+        }
+      }
+    },
+    "sustainability": {
+      "energy_source": {
+        "weight": 0.5,
+        "metrics": {
+          "carbon_intensity_clients": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_carbon_intensity_clients"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "desc",
+            "scale": [20, 795],
+            "description": "Carbon intensity of energy grid used by clients",
+            "weight": 0.5
+          },
+          "carbon_intensity_server": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_carbon_intensity_server"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "desc",
+            "scale": [20, 795],
+            "description": "Carbon intensity of energy grid used by server",
+            "weight": 0.5
+          }
+        }
+      },
+      "hardware_efficiency": {
+        "weight": 0.25,
+        "metrics": {
+          "avg_power_performance_clients": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_power_performance_clients"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "asc",
+            "scale": [20, 1447],
+            "description": "Average Power Performanc of Client CPUs or GPUs",
+            "weight": 0.5
+          },
+          "avg_power_performance_server": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_power_performance_server"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "asc",
+            "scale": [20, 1447],
+            "description": "Power Performanc of Server CPU or GPU",
+            "weight": 0.5
+          }
+        }
+      },
+      "federation_complexity": {
+        "weight": 0.25,
+        "metrics": {
+          "communication_efficiency": {
+            "inputs": [
+              { "source": "factsheet", "field_path": "system/total_upload_bytes" },
+              { "source": "factsheet", "field_path": "system/total_download_bytes" },
+              { "source": "factsheet", "field_path": "performance/test_acc_avg" }
+            ],
+            "operation": "comm_efficiency",
+            "type": "ranges",
+            "direction": "low",
+            "ranges":[0.1, 10e2, 10e3,10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
+            "description": "Descripcion de la metrica",
+            "weight": 0.1
+          },
+          "number_of_training_rounds": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/total_round_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The total number of training rounds",
+            "weight": 0.06666666
+          },
+          "avg_model_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges":[10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
+            "description": "The size of the model",
+            "weight": 0.16666666
+          },
+          "client_selection_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/sample_client_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "asc",
+            "scale": [
+              0.1,1
+            ],
+            "description": "The selection rate of clients for each training round",
+            "weight": 0.16666666
+          },
+          "number_of_clients": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The number of clients in the federation.",
+            "weight": 0.16666666
+          },
+          "local_training_rounds": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/local_update_steps"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "desc",
+            "scale": [1, 100],
+            "description": "The number of local training rounds.",
+            "weight": 0.16666666
+          },
+          "avg_dataset_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_dataset_size"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [10e1, 10e2, 10e3, 10e4, 10e5],
+            "description": "The average number of training samples",
+            "weight": 0.16666666
+          }
+        }
+      }
+    }
+  }
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json
new file mode 100755
index 000000000..889635d90
--- /dev/null
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json
@@ -0,0 +1,1137 @@
+{
+    "robustness": {
+      "resilience_to_attacks": {
+        "weight": 0.4,
+        "metrics": {
+          "certified_robustness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_clever"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
+            "weight": 0.4
+          },
+          "inverse_loss_sensitivity": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_loss_sensitivity"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.2
+          },
+          "clipped_adversarial_accuracy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_adv_accuracy"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "clipped_empirical_robustness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_empirical_robustness"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "clipped_confidence_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_confidence_score"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "inverse_attack_success_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_attack_success_rate"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          }
+        }
+      },
+      "algorithm_robustness": {
+        "weight": 0.4,
+        "metrics": {
+          "performance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_acc_avg"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_true_score",
+            "type": "true_score",
+            "description": "Average test accuracy of the global model on clients test data.",
+            "weight": 0.3
+          },
+          "macro_f1": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_macro_f1"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Macro F1 score of the final model on test data.",
+            "weight": 0.3
+          },
+          "personalization": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/personalization"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of personalized FL algorithm.",
+            "weight": 0.2
+          },
+          "reputation_enabled": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/reputation_enabled"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of an active reputation-based defense mechanism.",
+            "weight": 0.2
+          }
+        }
+      },
+      "client_reliability": {
+        "weight": 0.2,
+        "metrics": {
+          "scale": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The number of clients in the model.",
+            "weight": 0.3
+          },
+          "average_neighbor_reputation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Average reputation score of the neighbors associated with the node or federation.",
+            "weight": 0.3
+          },
+          "dropout_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/dropout_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of expected client updates that were not received across rounds.",
+            "weight": 0.2
+          },
+          "timeout_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/timeout_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of aggregation rounds that finished with missing expected client updates.",
+            "weight": 0.2
+          }
+        }
+      }
+    },
+    "privacy": {
+      "technique": {
+        "weight": 0.2,
+        "metrics": {
+          "differential_privacy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of differential privacy.",
+            "weight": 1
+          }
+        }
+      },
+      "uncertainty": {
+        "weight": 0.6,
+        "metrics": {
+          "entropy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/avg_entropy"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The measure of uncertainty in identifying a client.",
+            "weight": 1
+          }
+        }
+      },
+      "indistinguishability": {
+        "weight": 0.2,
+        "metrics": {
+          "global_privacy_risk": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/dp_epsilon"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              }
+            ],
+            "operation": "get_global_privacy_risk",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "A worst-case approximation of the maximal risk for distinguishing two clients.",
+            "weight": 0.2
+          },
+          "epsilon_star": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/inverse_epsilon_star"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Empirical privacy leakage estimated from the separability of train and test loss distributions.",
+            "weight": 0.4
+          },
+          "mia_auc_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Trust-oriented score derived from the ROC-AUC of a loss-based membership inference attack.",
+            "weight": 0.4
+          }
+        }
+      }
+    },
+    "fairness": {
+      "selection_fairness": {
+        "weight": 0.25,
+        "metrics": {
+          "selection_variation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/selection_cv"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "asc",
+            "description": "Variation in selection rate among the clients.",
+            "weight": 1
+          }
+        }
+      },
+      "performance_fairness": {
+        "weight": 0.25,
+        "metrics": {
+          "accuracy_variation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/test_acc_cv"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Variation of global model performance among the clients.",
+            "weight": 1
+          }
+        }
+      },
+      "class_distribution": {
+        "weight": 0.25,
+        "metrics": {
+          "class_imbalance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/class_imbalance"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Variation of the sample size per class.",
+            "weight": 1
+          }
+        }
+      },
+      "outcome_fairness": {
+        "weight": 0.25,
+        "metrics": {
+          "underfitting": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/underfitting"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Held-out performance proxy used as an outcome-level fairness signal.",
+            "weight": 0.1667
+          },
+          "inverse_overfitting": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_overfitting"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Generalization quality proxy transformed so higher is better.",
+            "weight": 0.1667
+          },
+          "inverse_well_calibration_error": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_well_calibration_error"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
+            "weight": 0.1667
+          },
+          "inverse_generalized_entropy_index": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_generalized_entropy_index"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Outcome inequality score transformed so higher values indicate better fairness.",
+            "weight": 0.1667
+          },
+          "inverse_theil_index": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_theil_index"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
+            "weight": 0.1667
+          },
+          "inverse_coefficient_of_variation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_coefficient_of_variation"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Coefficient-of-variation-based outcome fairness score.",
+            "weight": 0.1665
+          }
+        }
+      }
+    },
+    "explainability": {
+      "interpretability": {
+        "weight": 0.4,
+        "metrics": {
+          "algorithmic_transparency": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/training_model"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "RandomForestClassifier": 4,
+              "KNeighborsClassifier": 3,
+              "SVC": 2,
+              "GaussianProcessClassifier": 3,
+              "DecisionTreeClassifier": 5,
+              "MLP": 1,
+              "AdaBoostClassifier": 3,
+              "GaussianNB": 3.5,
+              "QuadraticDiscriminantAnalysis": 3,
+              "LogisticRegression": 4,
+              "LinearRegression": 3.5,
+              "Sequential": 1,
+              "CNN": 1
+            },
+            "description": "Mapping of Learning techniques to the level of explainability based on on literature research and qualitative analysis of each learning technique.",
+            "weight": 0.6
+          },
+          "model_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [10e1, 10e2, 10e3, 10e4, 10e5, 10e6, 10e7, 10e8],
+            "description": "Ranges of how to map model size to a score from 1-5.",
+            "weight": 0.4
+          }
+        }
+      },
+      "post_hoc_methods": {
+        "weight": 0.6,
+        "metrics": {
+          "clipped_feature_importance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_feature_importance_cv"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Variation of feature importance scores of all the features.",
+            "weight": 0.2
+          },
+          "alpha_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/alpha_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of features needed to explain most of the attribution mass; lower values indicate sparser and more focused explanations.",
+            "weight": 0.2
+          },
+          "spread_ratio": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_ratio"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Normalized entropy of the attribution distribution; lower values indicate explanations concentrated on fewer features.",
+            "weight": 0.2
+          },
+          "spread_divergence": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_divergence"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Jensen-Shannon divergence between the attribution distribution and a uniform distribution; higher values indicate more selective explanations.",
+            "weight": 0.2
+          },
+          "visualization": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/visualization"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of graphical capabilities to show the explainability.",
+            "weight": 0.2
+          }
+        }
+      }
+    },
+    "accountability": {
+      "factsheet_completeness": {
+        "weight": 0.8,
+        "metrics": {
+          "project_specs": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "project/overview"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "project/purpose"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "project/background"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Specifications of the project.",
+            "weight": 0.1
+          },
+          "participants": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/sample_client_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_selector"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_dataset_size"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Participants information.",
+            "weight": 0.1
+          },
+          "data": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/provenance"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "data/preprocessing"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "data/avg_entropy"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Meta data about the data.",
+            "weight": 0.1
+          },
+          "configuration": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/aggregation_algorithm"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/training_model"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/personalization"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/visualization"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/monitoring"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/dp_epsilon"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/total_round_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/learning_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/local_update_steps"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "FL model configurations.",
+            "weight": 0.1
+          },
+          "performance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_loss_avg"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_acc_avg"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_macro_f1"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_feature_importance_cv"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_clever"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_loss_sensitivity"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_adv_accuracy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_empirical_robustness"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_confidence_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_attack_success_rate"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Performance evaluation results.",
+            "weight": 0.1
+          },
+          "fairness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/test_acc_cv"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/selection_cv"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/class_imbalance"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/underfitting"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_overfitting"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_well_calibration_error"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_generalized_entropy_index"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_theil_index"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_coefficient_of_variation"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Fairness metrics results.",
+            "weight": 0.1
+          },
+          "system": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/avg_time_minutes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/avg_model_size"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/total_upload_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/total_download_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/avg_upload_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/avg_download_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/dropout_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/timeout_rate"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "privacy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/privacy_risk"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/epsilon_star"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/inverse_epsilon_star"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc_score"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "explainability": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/alpha_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_ratio"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_divergence"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "sustainability": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_carbon_intensity_server"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_carbon_intensity_clients"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_power_performance_clients"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_power_performance_server"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_training"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_aggregation"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_communication_uplink"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_communication_downlink"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          }
+        }
+      },
+      "monitoring": {
+        "weight": 0.2,
+        "metrics": {
+          "logs_available": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/monitoring"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of logs to show all the nodes.",
+            "weight": 1
+          }
+        }
+      }
+    },
+    "architectural_soundness": {
+      "client_management": {
+        "weight": 0.5,
+        "metrics": {
+          "client_selector": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_selector"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Reputation Based": 1.0,
+              "Full Participation": 0.5
+            },
+            "description": "Mapping of client selection strategies to architectural soundness. Reputation-based selection is scored higher than full participation because it introduces an explicit selection mechanism.",
+            "weight": 1
+          }
+        }
+      },
+      "optimization": {
+        "weight": 0.5,
+        "metrics": {
+          "algorithm": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/aggregation_algorithm"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_map_value",
+            "score_map": {
+              "FedAvg": 0.9509,
+              "Krum": 0.9535,
+              "TrimmedMean": 0.9595,
+              "Median": 0.9461
+            },
+            "description": "The choice of a suitable aggregation algorithm.",
+            "weight": 1
+          }
+        }
+      },
+      "federation_management": {
+        "weight": 0.5,
+        "metrics": {
+          "topology_type": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/preprocessing"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Fully": 1.0,
+              "Star": 0.8,
+              "Ring": 0.6,
+              "Random": 0.2
+            },
+            "description": "Mapping of network topology types to architectural soundness, assuming fully connected topologies provide the strongest structural connectivity, followed by star, ring, and random topologies.",
+            "weight": 1
+          }
+        }
+      }
+    },
+    "sustainability": {
+      "energy_source": {
+        "weight": 0.5,
+        "metrics": {
+          "carbon_intensity_clients": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_carbon_intensity_clients"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "desc",
+            "scale": [20, 795],
+            "description": "Carbon intensity of energy grid used by clients",
+            "weight": 0.5
+          },
+          "carbon_intensity_server": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_carbon_intensity_server"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "desc",
+            "scale": [20, 795],
+            "description": "Carbon intensity of energy grid used by server",
+            "weight": 0.5
+          }
+        }
+      },
+      "hardware_efficiency": {
+        "weight": 0.25,
+        "metrics": {
+          "avg_power_performance_clients": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_power_performance_clients"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "asc",
+            "scale": [20, 1447],
+            "description": "Average Power Performanc of Client CPUs or GPUs",
+            "weight": 0.5
+          },
+          "avg_power_performance_server": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/avg_power_performance_server"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "asc",
+            "scale": [20, 1447],
+            "description": "Power Performanc of Server CPU or GPU",
+            "weight": 0.5
+          }
+        }
+      },
+      "federation_complexity": {
+        "weight": 0.25,
+        "metrics": {
+          "communication_efficiency": {
+            "inputs": [
+              { "source": "factsheet", "field_path": "system/total_upload_bytes" },
+              { "source": "factsheet", "field_path": "system/total_download_bytes" },
+              { "source": "factsheet", "field_path": "performance/test_acc_avg" }
+            ],
+            "operation": "comm_efficiency",
+            "type": "ranges",
+            "direction": "low",
+            "ranges":[0.1, 10e2, 10e3,10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
+            "description": "Descripcion de la metrica",
+            "weight": 0.1
+          },
+          "number_of_training_rounds": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/total_round_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The total number of training rounds",
+            "weight": 0.06666666
+          },
+          "avg_model_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges":[10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
+            "description": "The size of the model",
+            "weight": 0.16666666
+          },
+          "client_selection_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/sample_client_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "asc",
+            "scale": [
+              0.1,1
+            ],
+            "description": "The selection rate of clients for each training round",
+            "weight": 0.16666666
+          },
+          "number_of_clients": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The number of clients in the federation.",
+            "weight": 0.16666666
+          },
+          "local_training_rounds": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/local_update_steps"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "desc",
+            "scale": [1, 100],
+            "description": "The number of local training rounds.",
+            "weight": 0.16666666
+          },
+          "avg_dataset_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_dataset_size"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [10e1, 10e2, 10e3, 10e4, 10e5],
+            "description": "The average number of training samples",
+            "weight": 0.16666666
+          }
+        }
+      }
+    }
+  }
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json
new file mode 100755
index 000000000..c3d2f2755
--- /dev/null
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json
@@ -0,0 +1,1042 @@
+{
+    "robustness": {
+      "resilience_to_attacks": {
+        "weight": 0.4,
+        "metrics": {
+          "certified_robustness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_clever"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
+            "weight": 0.4
+          },
+          "inverse_loss_sensitivity": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_loss_sensitivity"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.2
+          },
+          "clipped_adversarial_accuracy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_adv_accuracy"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "clipped_empirical_robustness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_empirical_robustness"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "clipped_confidence_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_confidence_score"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "inverse_attack_success_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_attack_success_rate"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          }
+        }
+      },
+      "algorithm_robustness": {
+        "weight": 0.4,
+        "metrics": {
+          "performance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_acc"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_true_score",
+            "type": "true_score",
+            "description": "Local clean test accuracy of the final model.",
+            "weight": 0.3
+          },
+          "macro_f1": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_macro_f1"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Macro F1 score of the final local model on test data.",
+            "weight": 0.3
+          },
+          "personalization": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/personalization"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of personalized FL algorithm.",
+            "weight": 0.2
+          },
+          "reputation_enabled": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/reputation_enabled"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of an active reputation-based defense mechanism.",
+            "weight": 0.2
+          }
+        }
+      },
+      "client_reliability": {
+        "weight": 0.2,
+        "metrics": {
+          "scale": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The number of clients in the model.",
+            "weight": 0.3
+          },
+          "average_neighbor_reputation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Average reputation score of the neighbors associated with the node.",
+            "weight": 0.3
+          },
+          "dropout_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/dropout_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of expected neighbor updates that were not received across rounds.",
+            "weight": 0.2
+          },
+          "timeout_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/timeout_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of aggregation rounds that finished with missing expected neighbor updates.",
+            "weight": 0.2
+          }
+        }
+      }
+    },
+    "privacy": {
+      "technique": {
+        "weight": 0.2,
+        "metrics": {
+          "differential_privacy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of differential privacy.",
+            "weight": 1
+          }
+        }
+      },
+      "uncertainty": {
+        "weight": 0.6,
+        "metrics": {
+          "entropy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/entropy_local"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The measure of uncertainty in identifying a client.",
+            "weight": 1
+          }
+        }
+      },
+      "indistinguishability": {
+        "weight": 0.2,
+        "metrics": {
+          "global_privacy_risk": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/dp_epsilon"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/neighbor_num"
+              }
+            ],
+            "operation": "get_global_privacy_risk_dfl",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "A worst-case approximation of the maximal risk for distinguishing two clients.",
+            "weight": 0.2
+          },
+          "inverse_epsilon_star": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/inverse_epsilon_star"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Empirical privacy leakage estimated from the separability of train and test loss distributions.",
+            "weight": 0.4
+          },
+          "mia_auc_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Trust-oriented score derived from the ROC-AUC of a loss-based membership inference attack.",
+            "weight": 0.4
+          }
+        }
+      }
+    },
+    "fairness": {
+      "class_distribution": {
+        "weight": 0.5,
+        "metrics": {
+          "selection_variation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/selection_cv"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Local variation in neighbor participation across rounds, transformed so higher values mean more stable participation.",
+            "weight": 0.5
+          },
+          "class_imbalance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/class_imbalance"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Variation of the sample size per class.",
+            "weight": 0.5
+          }
+        }
+      },
+      "outcome_fairness": {
+        "weight": 0.5,
+        "metrics": {
+          "underfitting": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/underfitting"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Held-out performance proxy used as an outcome-level fairness signal.",
+            "weight": 0.1667
+          },
+          "inverse_overfitting": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_overfitting"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Generalization quality proxy transformed so higher is better.",
+            "weight": 0.1667
+          },
+          "inverse_well_calibration_error": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_well_calibration_error"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
+            "weight": 0.1667
+          },
+          "inverse_generalized_entropy_index": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_generalized_entropy_index"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Outcome inequality score transformed so higher values indicate better fairness.",
+            "weight": 0.1667
+          },
+          "inverse_theil_index": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_theil_index"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
+            "weight": 0.1667
+          },
+          "inverse_coefficient_of_variation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_coefficient_of_variation"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Coefficient-of-variation-based outcome fairness score.",
+            "weight": 0.1665
+          }
+        }
+      }
+    },
+    "explainability": {
+      "interpretability": {
+        "weight": 0.4,
+        "metrics": {
+          "algorithmic_transparency": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/training_model"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "RandomForestClassifier": 4,
+              "KNeighborsClassifier": 3,
+              "SVC": 2,
+              "GaussianProcessClassifier": 3,
+              "DecisionTreeClassifier": 5,
+              "MLP": 1,
+              "AdaBoostClassifier": 3,
+              "GaussianNB": 3.5,
+              "QuadraticDiscriminantAnalysis": 3,
+              "LogisticRegression": 4,
+              "LinearRegression": 3.5,
+              "Sequential": 1,
+              "CNN": 1
+            },
+            "description": "Mapping of Learning techniques to the level of explainability based on on literature research and qualitative analysis of each learning technique.",
+            "weight": 0.6
+          },
+          "model_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [10e1, 10e2, 10e3, 10e4, 10e5, 10e6, 10e7, 10e8],
+            "description": "Ranges of how to map model size to a score from 1-5.",
+            "weight": 0.4
+          }
+        }
+      },
+      "post_hoc_methods": {
+        "weight": 0.6,
+        "metrics": {
+          "clipped_feature_importance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_feature_importance_cv"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Variation of feature importance scores of all the features.",
+            "weight": 0.2
+          },
+          "alpha_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/alpha_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of features needed to explain most of the attribution mass; lower values indicate sparser and more focused explanations.",
+            "weight": 0.2
+          },
+          "spread_ratio": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_ratio"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Normalized entropy of the attribution distribution; lower values indicate explanations concentrated on fewer features.",
+            "weight": 0.2
+          },
+          "spread_divergence": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_divergence"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Jensen-Shannon divergence between the attribution distribution and a uniform distribution; higher values indicate more selective explanations.",
+            "weight": 0.2
+          },
+          "visualization": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/visualization"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of graphical capabilities to show the explainability.",
+            "weight": 0.2
+          }
+        }
+      }
+    },
+    "accountability": {
+      "factsheet_completeness": {
+        "weight": 0.8,
+        "metrics": {
+          "project_specs": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "project/overview"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "project/purpose"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "project/background"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Specifications of the project.",
+            "weight": 0.1
+          },
+          "participants": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/sample_client_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_selector"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/local_dataset_size"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Participants information.",
+            "weight": 0.1
+          },
+          "data": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/provenance"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "data/preprocessing"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "data/entropy_local"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Meta data about the data.",
+            "weight": 0.1
+          },
+          "configuration": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/aggregation_algorithm"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/training_model"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/personalization"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/reputation_enabled"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/visualization"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/monitoring"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/dp_epsilon"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/total_round_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/learning_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/local_update_steps"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "FL model configurations.",
+            "weight": 0.1
+          },
+          "performance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_loss"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_acc"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_macro_f1"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_feature_importance_cv"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_clever"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_loss_sensitivity"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_adv_accuracy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_empirical_robustness"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_confidence_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_attack_success_rate"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Performance evaluation results.",
+            "weight": 0.1
+          },
+          "fairness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/class_imbalance"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/selection_cv"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/underfitting"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_overfitting"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_well_calibration_error"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_generalized_entropy_index"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_theil_index"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_coefficient_of_variation"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Fairness metrics results.",
+            "weight": 0.1
+          },
+          "system": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/time_minutes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/model_size"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/upload_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/download_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/dropout_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/timeout_rate"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "privacy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/privacy_risk"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/epsilon_star"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/inveres_epsilon_star"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc_score"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "explainability": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/alpha_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_ratio"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_divergence"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "sustainability": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/carbon_intensity_local"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_training_local"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/energy_consumed_local"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_communication_local"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          }
+        }
+      },
+      "monitoring": {
+        "weight": 0.2,
+        "metrics": {
+          "logs_available": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/monitoring"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of logs to show all the nodes.",
+            "weight": 1
+          }
+        }
+      }
+    },
+    "architectural_soundness": {
+      "client_management": {
+        "weight": 0.5,
+        "metrics": {
+          "client_selector": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_selector"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Reputation Based": 1.0,
+              "Full Participation": 0.5
+            },
+            "description": "Mapping of client selection strategies to architectural soundness. Reputation-based selection is scored higher than full participation because it introduces an explicit selection mechanism.",
+            "weight": 1
+          }
+        }
+      },
+      "optimization": {
+        "weight": 0.5,
+        "metrics": {
+          "algorithm": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/aggregation_algorithm"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_map_value",
+            "score_map": {
+              "FedAvg": 0.9509,
+              "Krum": 0.9535,
+              "TrimmedMean": 0.9595,
+              "Median": 0.9461
+            },
+            "description": "The choice of a suitable aggregation algorithm.",
+            "weight": 1
+          }
+        }
+      },
+      "federation_management": {
+        "weight": 0.5,
+        "metrics": {
+          "topology_type": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/preprocessing"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Fully": 1.0,
+              "Star": 0.8,
+              "Ring": 0.6,
+              "Random": 0.2
+            },
+            "description": "Mapping of network topology types to architectural soundness, assuming fully connected topologies provide the strongest structural connectivity, followed by star, ring, and random topologies.",
+            "weight": 1
+          }
+        }
+      }
+    },
+    "sustainability": {
+      "energy_source": {
+        "weight": 0.5,
+        "metrics": {
+          "carbon_intensity_clients": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/carbon_intensity_local"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "desc",
+            "scale": [20, 795],
+            "description": "Carbon intensity of energy grid used by clients",
+            "weight": 1
+          }
+        }
+      },
+      "federation_complexity": {
+        "weight": 0.5,
+        "metrics": {
+          "communication_efficiency": {
+            "inputs": [
+              { "source": "factsheet", "field_path": "system/upload_bytes" },
+              { "source": "factsheet", "field_path": "system/download_bytes" },
+              { "source": "factsheet", "field_path": "performance/test_acc" }
+            ],
+            "operation": "comm_efficiency",
+            "type": "ranges",
+            "direction": "low",
+            "ranges":[0.1, 10e2, 10e3,10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
+            "description": "Descripcion de la metrica",
+            "weight": 0.1
+          },
+          "number_of_training_rounds": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/total_round_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The total number of training rounds",
+            "weight": 0.06666666
+          },
+          "avg_model_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges":[10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
+            "description": "The size of the model",
+            "weight": 0.16666666
+          },
+          "client_selection_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/sample_client_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "asc",
+            "scale": [
+              0.1,1
+            ],
+            "description": "The selection rate of clients for each training round",
+            "weight": 0.16666666
+          },
+          "number_of_clients": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The number of clients in the federation.",
+            "weight": 0.16666666
+          },
+          "local_training_rounds": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/local_update_steps"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "desc",
+            "scale": [1, 100],
+            "description": "The number of local training rounds.",
+            "weight": 0.16666666
+          },
+          "avg_dataset_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/local_dataset_size"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [10e1, 10e2, 10e3, 10e4, 10e5],
+            "description": "The average number of training samples",
+            "weight": 0.16666666
+          }
+        }
+      }
+    }
+  }
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json
new file mode 100755
index 000000000..c3d2f2755
--- /dev/null
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json
@@ -0,0 +1,1042 @@
+{
+    "robustness": {
+      "resilience_to_attacks": {
+        "weight": 0.4,
+        "metrics": {
+          "certified_robustness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_clever"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
+            "weight": 0.4
+          },
+          "inverse_loss_sensitivity": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_loss_sensitivity"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.2
+          },
+          "clipped_adversarial_accuracy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_adv_accuracy"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "clipped_empirical_robustness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_empirical_robustness"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "clipped_confidence_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_confidence_score"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          },
+          "inverse_attack_success_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_attack_success_rate"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_range_score",
+            "type": "true_score",
+            "description": "",
+            "weight": 0.1
+          }
+        }
+      },
+      "algorithm_robustness": {
+        "weight": 0.4,
+        "metrics": {
+          "performance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_acc"
+              }
+            ],
+            "operation": "get_value",
+            "score_function": "get_true_score",
+            "type": "true_score",
+            "description": "Local clean test accuracy of the final model.",
+            "weight": 0.3
+          },
+          "macro_f1": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_macro_f1"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Macro F1 score of the final local model on test data.",
+            "weight": 0.3
+          },
+          "personalization": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/personalization"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of personalized FL algorithm.",
+            "weight": 0.2
+          },
+          "reputation_enabled": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/reputation_enabled"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of an active reputation-based defense mechanism.",
+            "weight": 0.2
+          }
+        }
+      },
+      "client_reliability": {
+        "weight": 0.2,
+        "metrics": {
+          "scale": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The number of clients in the model.",
+            "weight": 0.3
+          },
+          "average_neighbor_reputation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Average reputation score of the neighbors associated with the node.",
+            "weight": 0.3
+          },
+          "dropout_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/dropout_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of expected neighbor updates that were not received across rounds.",
+            "weight": 0.2
+          },
+          "timeout_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/timeout_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of aggregation rounds that finished with missing expected neighbor updates.",
+            "weight": 0.2
+          }
+        }
+      }
+    },
+    "privacy": {
+      "technique": {
+        "weight": 0.2,
+        "metrics": {
+          "differential_privacy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of differential privacy.",
+            "weight": 1
+          }
+        }
+      },
+      "uncertainty": {
+        "weight": 0.6,
+        "metrics": {
+          "entropy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/entropy_local"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The measure of uncertainty in identifying a client.",
+            "weight": 1
+          }
+        }
+      },
+      "indistinguishability": {
+        "weight": 0.2,
+        "metrics": {
+          "global_privacy_risk": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/dp_epsilon"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/neighbor_num"
+              }
+            ],
+            "operation": "get_global_privacy_risk_dfl",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "A worst-case approximation of the maximal risk for distinguishing two clients.",
+            "weight": 0.2
+          },
+          "inverse_epsilon_star": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/inverse_epsilon_star"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Empirical privacy leakage estimated from the separability of train and test loss distributions.",
+            "weight": 0.4
+          },
+          "mia_auc_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Trust-oriented score derived from the ROC-AUC of a loss-based membership inference attack.",
+            "weight": 0.4
+          }
+        }
+      }
+    },
+    "fairness": {
+      "class_distribution": {
+        "weight": 0.5,
+        "metrics": {
+          "selection_variation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/selection_cv"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Local variation in neighbor participation across rounds, transformed so higher values mean more stable participation.",
+            "weight": 0.5
+          },
+          "class_imbalance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/class_imbalance"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Variation of the sample size per class.",
+            "weight": 0.5
+          }
+        }
+      },
+      "outcome_fairness": {
+        "weight": 0.5,
+        "metrics": {
+          "underfitting": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/underfitting"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Held-out performance proxy used as an outcome-level fairness signal.",
+            "weight": 0.1667
+          },
+          "inverse_overfitting": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_overfitting"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Generalization quality proxy transformed so higher is better.",
+            "weight": 0.1667
+          },
+          "inverse_well_calibration_error": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_well_calibration_error"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
+            "weight": 0.1667
+          },
+          "inverse_generalized_entropy_index": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_generalized_entropy_index"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Outcome inequality score transformed so higher values indicate better fairness.",
+            "weight": 0.1667
+          },
+          "inverse_theil_index": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_theil_index"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
+            "weight": 0.1667
+          },
+          "inverse_coefficient_of_variation": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_coefficient_of_variation"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Coefficient-of-variation-based outcome fairness score.",
+            "weight": 0.1665
+          }
+        }
+      }
+    },
+    "explainability": {
+      "interpretability": {
+        "weight": 0.4,
+        "metrics": {
+          "algorithmic_transparency": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/training_model"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "RandomForestClassifier": 4,
+              "KNeighborsClassifier": 3,
+              "SVC": 2,
+              "GaussianProcessClassifier": 3,
+              "DecisionTreeClassifier": 5,
+              "MLP": 1,
+              "AdaBoostClassifier": 3,
+              "GaussianNB": 3.5,
+              "QuadraticDiscriminantAnalysis": 3,
+              "LogisticRegression": 4,
+              "LinearRegression": 3.5,
+              "Sequential": 1,
+              "CNN": 1
+            },
+            "description": "Mapping of Learning techniques to the level of explainability based on on literature research and qualitative analysis of each learning technique.",
+            "weight": 0.6
+          },
+          "model_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [10e1, 10e2, 10e3, 10e4, 10e5, 10e6, 10e7, 10e8],
+            "description": "Ranges of how to map model size to a score from 1-5.",
+            "weight": 0.4
+          }
+        }
+      },
+      "post_hoc_methods": {
+        "weight": 0.6,
+        "metrics": {
+          "clipped_feature_importance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_feature_importance_cv"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Variation of feature importance scores of all the features.",
+            "weight": 0.2
+          },
+          "alpha_score": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/alpha_score"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Fraction of features needed to explain most of the attribution mass; lower values indicate sparser and more focused explanations.",
+            "weight": 0.2
+          },
+          "spread_ratio": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_ratio"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "direction": "desc",
+            "description": "Normalized entropy of the attribution distribution; lower values indicate explanations concentrated on fewer features.",
+            "weight": 0.2
+          },
+          "spread_divergence": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_divergence"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "Jensen-Shannon divergence between the attribution distribution and a uniform distribution; higher values indicate more selective explanations.",
+            "weight": 0.2
+          },
+          "visualization": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/visualization"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of graphical capabilities to show the explainability.",
+            "weight": 0.2
+          }
+        }
+      }
+    },
+    "accountability": {
+      "factsheet_completeness": {
+        "weight": 0.8,
+        "metrics": {
+          "project_specs": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "project/overview"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "project/purpose"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "project/background"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Specifications of the project.",
+            "weight": 0.1
+          },
+          "participants": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/sample_client_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_selector"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/local_dataset_size"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "participants/avg_neighbor_reputation"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Participants information.",
+            "weight": 0.1
+          },
+          "data": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/provenance"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "data/preprocessing"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "data/entropy_local"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Meta data about the data.",
+            "weight": 0.1
+          },
+          "configuration": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/aggregation_algorithm"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/training_model"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/personalization"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/reputation_enabled"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/visualization"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/monitoring"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/differential_privacy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/dp_epsilon"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/total_round_num"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/learning_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "configuration/local_update_steps"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "FL model configurations.",
+            "weight": 0.1
+          },
+          "performance": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_loss"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_acc"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/test_macro_f1"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_feature_importance_cv"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_clever"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_loss_sensitivity"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_adv_accuracy"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_empirical_robustness"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/clipped_test_confidence_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "performance/inverse_test_attack_success_rate"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Performance evaluation results.",
+            "weight": 0.1
+          },
+          "fairness": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "fairness/class_imbalance"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/selection_cv"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/underfitting"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_overfitting"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_well_calibration_error"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_generalized_entropy_index"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_theil_index"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "fairness/inverse_coefficient_of_variation"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "Fairness metrics results.",
+            "weight": 0.1
+          },
+          "system": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "system/time_minutes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/model_size"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/upload_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/download_bytes"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/dropout_rate"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "system/timeout_rate"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "privacy": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "privacy/privacy_risk"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/epsilon_star"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/inveres_epsilon_star"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "privacy/mia_auc_score"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "explainability": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "explainability/alpha_score"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_ratio"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "explainability/spread_divergence"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          },
+          "sustainability": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/carbon_intensity_local"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_training_local"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/energy_consumed_local"
+              },
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/emissions_communication_local"
+              }
+            ],
+            "operation": "check_properties",
+            "type": "property_check",
+            "description": "System usage information.",
+            "weight": 0.1
+          }
+        }
+      },
+      "monitoring": {
+        "weight": 0.2,
+        "metrics": {
+          "logs_available": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/monitoring"
+              }
+            ],
+            "operation": "get_value",
+            "type": "true_score",
+            "description": "The use of logs to show all the nodes.",
+            "weight": 1
+          }
+        }
+      }
+    },
+    "architectural_soundness": {
+      "client_management": {
+        "weight": 0.5,
+        "metrics": {
+          "client_selector": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_selector"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Reputation Based": 1.0,
+              "Full Participation": 0.5
+            },
+            "description": "Mapping of client selection strategies to architectural soundness. Reputation-based selection is scored higher than full participation because it introduces an explicit selection mechanism.",
+            "weight": 1
+          }
+        }
+      },
+      "optimization": {
+        "weight": 0.5,
+        "metrics": {
+          "algorithm": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/aggregation_algorithm"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_map_value",
+            "score_map": {
+              "FedAvg": 0.9509,
+              "Krum": 0.9535,
+              "TrimmedMean": 0.9595,
+              "Median": 0.9461
+            },
+            "description": "The choice of a suitable aggregation algorithm.",
+            "weight": 1
+          }
+        }
+      },
+      "federation_management": {
+        "weight": 0.5,
+        "metrics": {
+          "topology_type": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "data/preprocessing"
+              }
+            ],
+            "operation": "get_value",
+            "type": "score_mapping",
+            "score_map": {
+              "Fully": 1.0,
+              "Star": 0.8,
+              "Ring": 0.6,
+              "Random": 0.2
+            },
+            "description": "Mapping of network topology types to architectural soundness, assuming fully connected topologies provide the strongest structural connectivity, followed by star, ring, and random topologies.",
+            "weight": 1
+          }
+        }
+      }
+    },
+    "sustainability": {
+      "energy_source": {
+        "weight": 0.5,
+        "metrics": {
+          "carbon_intensity_clients": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "sustainability/carbon_intensity_local"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "desc",
+            "scale": [20, 795],
+            "description": "Carbon intensity of energy grid used by clients",
+            "weight": 1
+          }
+        }
+      },
+      "federation_complexity": {
+        "weight": 0.5,
+        "metrics": {
+          "communication_efficiency": {
+            "inputs": [
+              { "source": "factsheet", "field_path": "system/upload_bytes" },
+              { "source": "factsheet", "field_path": "system/download_bytes" },
+              { "source": "factsheet", "field_path": "performance/test_acc" }
+            ],
+            "operation": "comm_efficiency",
+            "type": "ranges",
+            "direction": "low",
+            "ranges":[0.1, 10e2, 10e3,10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
+            "description": "Descripcion de la metrica",
+            "weight": 0.1
+          },
+          "number_of_training_rounds": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/total_round_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The total number of training rounds",
+            "weight": 0.06666666
+          },
+          "avg_model_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/trainable_param_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges":[10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
+            "description": "The size of the model",
+            "weight": 0.16666666
+          },
+          "client_selection_rate": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/sample_client_rate"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "asc",
+            "scale": [
+              0.1,1
+            ],
+            "description": "The selection rate of clients for each training round",
+            "weight": 0.16666666
+          },
+          "number_of_clients": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/client_num"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
+            "description": "The number of clients in the federation.",
+            "weight": 0.16666666
+          },
+          "local_training_rounds": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "configuration/local_update_steps"
+              }
+            ],
+            "operation": "get_value",
+            "type": "scaled_score",
+            "direction": "desc",
+            "scale": [1, 100],
+            "description": "The number of local training rounds.",
+            "weight": 0.16666666
+          },
+          "avg_dataset_size": {
+            "inputs": [
+              {
+                "source": "factsheet",
+                "field_path": "participants/local_dataset_size"
+              }
+            ],
+            "operation": "get_value",
+            "type": "ranges",
+            "direction": "desc",
+            "ranges": [10e1, 10e2, 10e3, 10e4, 10e5],
+            "description": "The average number of training samples",
+            "weight": 0.16666666
+          }
+        }
+      }
+    }
+  }
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template.json b/nebula/addons/trustworthiness/configs/factsheet_template_cfl.json
similarity index 76%
rename from nebula/addons/trustworthiness/configs/factsheet_template.json
rename to nebula/addons/trustworthiness/configs/factsheet_template_cfl.json
index 17f53d052..0ba2db196 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_cfl.json
@@ -33,7 +33,7 @@
 	"privacy": {
 		"privacy_risk": "",
 		"epsilon_star": "",
-		"epsilon_star_score": "",
+		"inverse_epsilon_star": "",
 		"mia_auc": "",
 		"mia_auc_score": ""
 	},
@@ -46,24 +46,24 @@
 		"test_loss_avg": "",
 		"test_acc_avg": "",
 		"test_macro_f1": "",
-		"test_feature_importance_cv": "",
-		"test_clever": "",
-		"test_loss_sensitivity": "",
-		"test_adv_accuracy": "",
-		"test_empirical_robustness": "",
-		"test_confidence_score": "",
-		"test_attack_success_rate": ""
+		"clipped_test_feature_importance_cv": "",
+		"clipped_test_clever": "",
+		"inverse_test_loss_sensitivity": "",
+		"clipped_test_adv_accuracy": "",
+		"clipped_test_empirical_robustness": "",
+		"clipped_test_confidence_score": "",
+		"inverse_test_attack_success_rate": ""
 	},
 	"fairness": {
 		"test_acc_cv": "",
 		"selection_cv": "",
 		"class_imbalance": "",
 		"underfitting": "",
-		"overfitting": "",
-		"well_calibration_error": "",
-		"generalized_entropy_index": "",
-		"theil_index": "",
-		"coefficient_of_variation": ""
+		"inverse_overfitting": "",
+		"inverse_well_calibration_error": "",
+		"inverse_generalized_entropy_index": "",
+		"inverse_theil_index": "",
+		"inverse_coefficient_of_variation": ""
 	},
 	"system": {
 		"avg_time_minutes": "",
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_cfl_images.json b/nebula/addons/trustworthiness/configs/factsheet_template_cfl_images.json
new file mode 100755
index 000000000..0ba2db196
--- /dev/null
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_cfl_images.json
@@ -0,0 +1,88 @@
+{
+	"project": {
+		"overview": "",
+		"purpose": "",
+		"background": ""
+	},
+	"data": {
+		"provenance": "",
+		"preprocessing": "",
+		"avg_entropy": ""
+	},
+	"participants": {
+		"client_num": "",
+		"sample_client_rate": "",
+		"client_selector": "",
+		"avg_neighbor_reputation": "",
+		"avg_dataset_size": ""
+	},
+	"configuration": {
+		"aggregation_algorithm": "",
+		"training_model": "",
+		"reputation_enabled": "",
+		"personalization": "",
+		"visualization": "",
+		"monitoring": "",
+		"differential_privacy": "",
+		"dp_epsilon": "",
+		"trainable_param_num": "",
+		"total_round_num": "",
+		"learning_rate": "",
+		"local_update_steps": ""
+	},
+	"privacy": {
+		"privacy_risk": "",
+		"epsilon_star": "",
+		"inverse_epsilon_star": "",
+		"mia_auc": "",
+		"mia_auc_score": ""
+	},
+	"explainability": {
+		"alpha_score": "",
+		"spread_ratio": "",
+		"spread_divergence": ""
+	},
+	"performance": {
+		"test_loss_avg": "",
+		"test_acc_avg": "",
+		"test_macro_f1": "",
+		"clipped_test_feature_importance_cv": "",
+		"clipped_test_clever": "",
+		"inverse_test_loss_sensitivity": "",
+		"clipped_test_adv_accuracy": "",
+		"clipped_test_empirical_robustness": "",
+		"clipped_test_confidence_score": "",
+		"inverse_test_attack_success_rate": ""
+	},
+	"fairness": {
+		"test_acc_cv": "",
+		"selection_cv": "",
+		"class_imbalance": "",
+		"underfitting": "",
+		"inverse_overfitting": "",
+		"inverse_well_calibration_error": "",
+		"inverse_generalized_entropy_index": "",
+		"inverse_theil_index": "",
+		"inverse_coefficient_of_variation": ""
+	},
+	"system": {
+		"avg_time_minutes": "",
+		"avg_model_size": "",
+		"total_upload_bytes": "",
+		"total_download_bytes":"",
+		"avg_upload_bytes": "",
+		"avg_download_bytes": "",
+		"dropout_rate": "",
+		"timeout_rate": ""
+	},
+	"sustainability": {
+		"avg_carbon_intensity_server": "",
+		"avg_carbon_intensity_clients": "",
+		"avg_power_performance_clients": "",
+		"avg_power_performance_server": "",
+		"emissions_training": "",
+		"emissions_aggregation": "",
+		"emissions_communication_uplink": "",
+		"emissions_communication_downlink": ""
+	}
+}
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_cfl_tabular.json b/nebula/addons/trustworthiness/configs/factsheet_template_cfl_tabular.json
new file mode 100755
index 000000000..0ba2db196
--- /dev/null
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_cfl_tabular.json
@@ -0,0 +1,88 @@
+{
+	"project": {
+		"overview": "",
+		"purpose": "",
+		"background": ""
+	},
+	"data": {
+		"provenance": "",
+		"preprocessing": "",
+		"avg_entropy": ""
+	},
+	"participants": {
+		"client_num": "",
+		"sample_client_rate": "",
+		"client_selector": "",
+		"avg_neighbor_reputation": "",
+		"avg_dataset_size": ""
+	},
+	"configuration": {
+		"aggregation_algorithm": "",
+		"training_model": "",
+		"reputation_enabled": "",
+		"personalization": "",
+		"visualization": "",
+		"monitoring": "",
+		"differential_privacy": "",
+		"dp_epsilon": "",
+		"trainable_param_num": "",
+		"total_round_num": "",
+		"learning_rate": "",
+		"local_update_steps": ""
+	},
+	"privacy": {
+		"privacy_risk": "",
+		"epsilon_star": "",
+		"inverse_epsilon_star": "",
+		"mia_auc": "",
+		"mia_auc_score": ""
+	},
+	"explainability": {
+		"alpha_score": "",
+		"spread_ratio": "",
+		"spread_divergence": ""
+	},
+	"performance": {
+		"test_loss_avg": "",
+		"test_acc_avg": "",
+		"test_macro_f1": "",
+		"clipped_test_feature_importance_cv": "",
+		"clipped_test_clever": "",
+		"inverse_test_loss_sensitivity": "",
+		"clipped_test_adv_accuracy": "",
+		"clipped_test_empirical_robustness": "",
+		"clipped_test_confidence_score": "",
+		"inverse_test_attack_success_rate": ""
+	},
+	"fairness": {
+		"test_acc_cv": "",
+		"selection_cv": "",
+		"class_imbalance": "",
+		"underfitting": "",
+		"inverse_overfitting": "",
+		"inverse_well_calibration_error": "",
+		"inverse_generalized_entropy_index": "",
+		"inverse_theil_index": "",
+		"inverse_coefficient_of_variation": ""
+	},
+	"system": {
+		"avg_time_minutes": "",
+		"avg_model_size": "",
+		"total_upload_bytes": "",
+		"total_download_bytes":"",
+		"avg_upload_bytes": "",
+		"avg_download_bytes": "",
+		"dropout_rate": "",
+		"timeout_rate": ""
+	},
+	"sustainability": {
+		"avg_carbon_intensity_server": "",
+		"avg_carbon_intensity_clients": "",
+		"avg_power_performance_clients": "",
+		"avg_power_performance_server": "",
+		"emissions_training": "",
+		"emissions_aggregation": "",
+		"emissions_communication_uplink": "",
+		"emissions_communication_downlink": ""
+	}
+}
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
index 5c19255d0..031be171e 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
@@ -34,7 +34,7 @@
 	"privacy": {
 		"privacy_risk": "",
 		"epsilon_star": "",
-		"epsilon_star_score": "",
+		"inverse_epsilon_star": "",
 		"mia_auc": "",
 		"mia_auc_score": ""
 	},
@@ -47,23 +47,23 @@
 		"test_loss": "",
 		"test_acc": "",
 		"test_macro_f1": "",
-		"test_feature_importance_cv": "",
-		"test_clever": "",
-		"test_loss_sensitivity": "",
-		"test_adv_accuracy": "",
-		"test_empirical_robustness": "",
-		"test_confidence_score": "",
-		"test_attack_success_rate": ""
+		"clipped_test_feature_importance_cv": "",
+		"clipped_test_clever": "",
+		"inverse_test_loss_sensitivity": "",
+		"clipped_test_adv_accuracy": "",
+		"clipped_test_empirical_robustness": "",
+		"clipped_test_confidence_score": "",
+		"inverse_test_attack_success_rate": ""
 	},
 	"fairness": {
 		"selection_cv": "",
 		"class_imbalance": "",
 		"underfitting": "",
-		"overfitting": "",
-		"well_calibration_error": "",
-		"generalized_entropy_index": "",
-		"theil_index": "",
-		"coefficient_of_variation": ""
+		"inverse_overfitting": "",
+		"inverse_well_calibration_error": "",
+		"inverse_generalized_entropy_index": "",
+		"inverse_theil_index": "",
+		"inverse_coefficient_of_variation": ""
 	},
 	"system": {
 		"time_minutes": "",
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_dfl_images.json b/nebula/addons/trustworthiness/configs/factsheet_template_dfl_images.json
new file mode 100755
index 000000000..031be171e
--- /dev/null
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_dfl_images.json
@@ -0,0 +1,82 @@
+{
+	"project": {
+		"overview": "",
+		"purpose": "",
+		"background": ""
+	},
+	"data": {
+		"provenance": "",
+		"preprocessing": "",
+		"entropy_local": ""
+	},
+	"participants": {
+		"client_num": "",
+		"sample_client_rate": "",
+		"client_selector": "",
+		"local_dataset_size": "",
+		"neighbor_num": "",
+		"avg_neighbor_reputation": ""
+	},
+	"configuration": {
+		"aggregation_algorithm": "",
+		"training_model": "",
+		"personalization": "",
+		"reputation_enabled": "",
+		"visualization": "",
+		"monitoring": "",
+		"differential_privacy": "",
+		"dp_epsilon": "",
+		"trainable_param_num": "",
+		"total_round_num": "",
+		"learning_rate": "",
+		"local_update_steps": ""
+	},
+	"privacy": {
+		"privacy_risk": "",
+		"epsilon_star": "",
+		"inverse_epsilon_star": "",
+		"mia_auc": "",
+		"mia_auc_score": ""
+	},
+	"explainability": {
+		"alpha_score": "",
+		"spread_ratio": "",
+		"spread_divergence": ""
+	},
+	"performance": {
+		"test_loss": "",
+		"test_acc": "",
+		"test_macro_f1": "",
+		"clipped_test_feature_importance_cv": "",
+		"clipped_test_clever": "",
+		"inverse_test_loss_sensitivity": "",
+		"clipped_test_adv_accuracy": "",
+		"clipped_test_empirical_robustness": "",
+		"clipped_test_confidence_score": "",
+		"inverse_test_attack_success_rate": ""
+	},
+	"fairness": {
+		"selection_cv": "",
+		"class_imbalance": "",
+		"underfitting": "",
+		"inverse_overfitting": "",
+		"inverse_well_calibration_error": "",
+		"inverse_generalized_entropy_index": "",
+		"inverse_theil_index": "",
+		"inverse_coefficient_of_variation": ""
+	},
+	"system": {
+		"time_minutes": "",
+		"model_size": "",
+		"upload_bytes": "",
+		"download_bytes":"",
+		"dropout_rate": "",
+		"timeout_rate": ""
+	},
+	"sustainability": {
+		"carbon_intensity_local": "",
+		"emissions_training_local": "",
+		"energy_consumed_local": "",
+		"emissions_communication_local": ""
+	}
+}
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_dfl_tabular.json b/nebula/addons/trustworthiness/configs/factsheet_template_dfl_tabular.json
new file mode 100755
index 000000000..031be171e
--- /dev/null
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_dfl_tabular.json
@@ -0,0 +1,82 @@
+{
+	"project": {
+		"overview": "",
+		"purpose": "",
+		"background": ""
+	},
+	"data": {
+		"provenance": "",
+		"preprocessing": "",
+		"entropy_local": ""
+	},
+	"participants": {
+		"client_num": "",
+		"sample_client_rate": "",
+		"client_selector": "",
+		"local_dataset_size": "",
+		"neighbor_num": "",
+		"avg_neighbor_reputation": ""
+	},
+	"configuration": {
+		"aggregation_algorithm": "",
+		"training_model": "",
+		"personalization": "",
+		"reputation_enabled": "",
+		"visualization": "",
+		"monitoring": "",
+		"differential_privacy": "",
+		"dp_epsilon": "",
+		"trainable_param_num": "",
+		"total_round_num": "",
+		"learning_rate": "",
+		"local_update_steps": ""
+	},
+	"privacy": {
+		"privacy_risk": "",
+		"epsilon_star": "",
+		"inverse_epsilon_star": "",
+		"mia_auc": "",
+		"mia_auc_score": ""
+	},
+	"explainability": {
+		"alpha_score": "",
+		"spread_ratio": "",
+		"spread_divergence": ""
+	},
+	"performance": {
+		"test_loss": "",
+		"test_acc": "",
+		"test_macro_f1": "",
+		"clipped_test_feature_importance_cv": "",
+		"clipped_test_clever": "",
+		"inverse_test_loss_sensitivity": "",
+		"clipped_test_adv_accuracy": "",
+		"clipped_test_empirical_robustness": "",
+		"clipped_test_confidence_score": "",
+		"inverse_test_attack_success_rate": ""
+	},
+	"fairness": {
+		"selection_cv": "",
+		"class_imbalance": "",
+		"underfitting": "",
+		"inverse_overfitting": "",
+		"inverse_well_calibration_error": "",
+		"inverse_generalized_entropy_index": "",
+		"inverse_theil_index": "",
+		"inverse_coefficient_of_variation": ""
+	},
+	"system": {
+		"time_minutes": "",
+		"model_size": "",
+		"upload_bytes": "",
+		"download_bytes":"",
+		"dropout_rate": "",
+		"timeout_rate": ""
+	},
+	"sustainability": {
+		"carbon_intensity_local": "",
+		"emissions_training_local": "",
+		"energy_consumed_local": "",
+		"emissions_communication_local": ""
+	}
+}
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 92b04ce46..c346fe1db 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -12,16 +12,17 @@
 )
 from nebula.addons.trustworthiness.factsheet_common import (
     get_factsheet_path,
+    get_factsheet_template_name,
     get_trustworthiness_dir,
     load_or_create_factsheet,
     populate_common_pre_train_sections,
-    populate_model_quality_metrics,
     populate_participation,
     populate_reliability,
     populate_reputation,
     set_dp_configuration,
     write_factsheet,
 )
+from nebula.addons.trustworthiness.factsheet_populators import populate_profile_metrics
 from nebula.addons.trustworthiness.utils import read_csv, get_all_data_entropy
 
 logger = logging.getLogger(__name__)
@@ -49,13 +50,18 @@ def populate_factsheet_dfl(
     ):
 
         self.factsheet_file_nm = f"factsheet_participant_{participant_idx}.json"
+        factsheet_template_file_nm = get_factsheet_template_name(
+            data["federation"],
+            model,
+            self.factsheet_template_file_nm,
+        )
 
         factsheet_file = get_factsheet_path(scenario_name, self.factsheet_file_nm)
 
         factsheet_file, factsheet = load_or_create_factsheet(
             scenario_name,
             self.factsheet_file_nm,
-            self.factsheet_template_file_nm,
+            factsheet_template_file_nm,
         )
 
         logging.info("DFL FactSheet: Populating factsheet")
@@ -121,8 +127,9 @@ def populate_factsheet_dfl(
         )
 
         factsheet["fairness"]["underfitting"] = get_underfitting_score_local(scenario_name, participant_idx)
-        populate_model_quality_metrics(
+        populate_profile_metrics(
             factsheet,
+            data["federation"],
             model,
             train_loader,
             test_loader,
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index d2647691c..86454e4a1 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -19,16 +19,17 @@
 from nebula.addons.trustworthiness.factsheet_common import (
     cap_score,
     get_factsheet_path,
+    get_factsheet_template_name,
     get_trustworthiness_dir,
     load_or_create_factsheet,
     populate_common_pre_train_sections,
-    populate_model_quality_metrics,
     populate_participation,
     populate_reliability,
     populate_reputation,
     set_dp_configuration,
     write_factsheet,
 )
+from nebula.addons.trustworthiness.factsheet_populators import populate_profile_metrics
 from nebula.addons.trustworthiness.utils import read_csv, check_field_filled
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
 
@@ -40,7 +41,7 @@ def __init__(self):
         Manager class to populate the FactSheet
         """
         self.factsheet_file_nm = "factsheet.json"
-        self.factsheet_template_file_nm = "factsheet_template.json"
+        self.factsheet_template_file_nm = "factsheet_template_cfl.json"
 
     def populate_factsheet_cfl(
         self,
@@ -58,12 +59,17 @@ def populate_factsheet_cfl(
     ):
 
         factsheet_file = get_factsheet_path(scenario_name, self.factsheet_file_nm)
+        factsheet_template_file_nm = get_factsheet_template_name(
+            data["federation"],
+            model,
+            self.factsheet_template_file_nm,
+        )
 
         try:
             factsheet_file, factsheet = load_or_create_factsheet(
                 scenario_name,
                 self.factsheet_file_nm,
-                self.factsheet_template_file_nm,
+                factsheet_template_file_nm,
             )
 
             logging.info("FactSheet: Populating factsheet with pre training metrics")
@@ -114,8 +120,9 @@ def populate_factsheet_cfl(
             underfitting_score = get_underfitting_score(scenario_name, participant_idx)
 
             factsheet["fairness"]["underfitting"] = underfitting_score
-            populate_model_quality_metrics(
+            populate_profile_metrics(
                 factsheet,
+                data["federation"],
                 model,
                 train_loader,
                 test_loader,
diff --git a/nebula/addons/trustworthiness/factsheet_common.py b/nebula/addons/trustworthiness/factsheet_common.py
index 290882395..3aa972fa9 100644
--- a/nebula/addons/trustworthiness/factsheet_common.py
+++ b/nebula/addons/trustworthiness/factsheet_common.py
@@ -4,27 +4,44 @@
 import os
 import shutil
 
-from nebula.addons.trustworthiness.calculation import (
-    attack_success_rate,
-    compute_adversarial_accuracy_art,
-    get_clever_score,
-    get_coefficient_of_variation,
-    get_confidence_score,
-    get_empirical_robustness_score,
-    get_epsilon_star,
-    get_explainability_metrics_summary,
-    get_generalized_entropy_index,
-    get_loss_sensitivity_score,
-    get_macro_f1_score,
-    get_mia_auc,
-    get_overfitting_score,
-    get_theil_index,
-    get_well_calibration_error,
-)
-
 
 dirname = os.path.dirname(__file__)
 
+DATA_TYPE_IMAGES = "images"
+DATA_TYPE_TABULAR = "tabular"
+
+
+def get_model_data_type(model):
+    """Returns the data type declared by the model, when available."""
+    if not hasattr(model, "get_data_type"):
+        return ""
+
+    try:
+        data_type = model.get_data_type()
+    except AttributeError:
+        return ""
+
+    if data_type is None:
+        return ""
+    return str(data_type).strip()
+
+
+def get_normalized_model_data_type(model):
+    return get_model_data_type(model).lower()
+
+
+def get_factsheet_template_name(federation, model, default_template_name):
+    federation_prefix = "dfl" if str(federation).upper() in {"DFL", "SDFL"} else "cfl"
+    data_type = get_normalized_model_data_type(model)
+
+    if data_type not in {DATA_TYPE_IMAGES, DATA_TYPE_TABULAR}:
+        return default_template_name
+
+    template_name = f"factsheet_template_{federation_prefix}_{data_type}.json"
+    template_path = get_factsheet_template_path(template_name)
+
+    return template_name if os.path.exists(template_path) else default_template_name
+
 
 def get_trustworthiness_dir(scenario_name):
     """Returns the trustworthiness output directory for a scenario."""
@@ -112,6 +129,7 @@ def populate_common_pre_train_sections(factsheet, data, model):
     factsheet["project"]["background"] = build_project_background(data)
 
     factsheet["data"]["provenance"] = data["dataset"]
+    factsheet["data"]["type"] = get_model_data_type(model)
     factsheet["data"]["preprocessing"] = data["topology"]
 
     factsheet["participants"]["client_num"] = data["n_nodes"] or ""
@@ -180,59 +198,3 @@ def populate_reputation(factsheet, reputation_summary, include_neighbor_num=Fals
     factsheet["participants"]["avg_neighbor_reputation"] = 0
     if include_neighbor_num:
         factsheet["participants"]["neighbor_num"] = 0
-
-
-def populate_model_quality_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
-    """Calculates common privacy, fairness, explainability and robustness metrics."""
-    test_sample = next(iter(test_loader))
-    explainability_metrics = get_explainability_metrics_summary(model, test_loader)
-
-    factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_loader)
-
-    factsheet["privacy"]["epsilon_star"] = get_epsilon_star(model, train_loader, test_loader)
-    factsheet["privacy"]["inverse_epsilon_star"] = inverse_score(factsheet["privacy"]["epsilon_star"])
-    factsheet["privacy"]["mia_auc"] = get_mia_auc(model, train_loader, test_loader)
-    factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
-
-    overfitting_value = get_overfitting_score(model, train_loader, test_accuracy)
-    factsheet["fairness"]["inverse_overfitting"] = inverse_score(overfitting_value)
-
-    well_calibration_error_value = get_well_calibration_error(model, test_loader)
-    factsheet["fairness"]["inverse_well_calibration_error"] = inverse_score(well_calibration_error_value)
-
-    generalized_entropy_index_value = get_generalized_entropy_index(model, test_loader)
-    factsheet["fairness"]["inverse_generalized_entropy_index"] = inverse_score(generalized_entropy_index_value)
-
-    theil_index_value = get_theil_index(model, test_loader)
-    factsheet["fairness"]["inverse_theil_index"] = inverse_score(theil_index_value)
-
-    coefficient_of_variation_value = get_coefficient_of_variation(model, test_loader)
-    factsheet["fairness"]["inverse_coefficient_of_variation"] = inverse_score(coefficient_of_variation_value)
-
-    factsheet["explainability"]["alpha_score"] = explainability_metrics["alpha_score"]
-    factsheet["explainability"]["spread_ratio"] = explainability_metrics["spread_ratio"]
-    factsheet["explainability"]["spread_divergence"] = explainability_metrics["spread_divergence"]
-
-    lr = factsheet["configuration"]["learning_rate"]
-    num_classes = model.get_num_classes()
-
-    value_clever = get_clever_score(model, test_sample, num_classes, lr)
-    factsheet["performance"]["clipped_test_clever"] = cap_score(value_clever)
-
-    value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes, lr)
-    factsheet["performance"]["inverse_test_loss_sensitivity"] = inverse_score(value_loss_sensitivity)
-
-    value_adv_accuracy = compute_adversarial_accuracy_art(model, test_loader, num_classes, lr)
-    factsheet["performance"]["clipped_test_adv_accuracy"] = cap_score(value_adv_accuracy)
-
-    value_empirical_robustness = get_empirical_robustness_score(model, test_sample, num_classes, lr)
-    factsheet["performance"]["clipped_test_empirical_robustness"] = cap_score(value_empirical_robustness)
-
-    value_confidence_score = get_confidence_score(model, test_sample)
-    factsheet["performance"]["clipped_test_confidence_score"] = cap_score(value_confidence_score)
-
-    value_attack_success_rate = attack_success_rate(model, test_sample)
-    factsheet["performance"]["inverse_test_attack_success_rate"] = 1 - value_attack_success_rate
-
-    feature_importance = explainability_metrics["feature_importance_cv"]
-    factsheet["performance"]["clipped_test_feature_importance_cv"] = cap_score(feature_importance)
diff --git a/nebula/addons/trustworthiness/factsheet_populators.py b/nebula/addons/trustworthiness/factsheet_populators.py
new file mode 100644
index 000000000..3e44b8678
--- /dev/null
+++ b/nebula/addons/trustworthiness/factsheet_populators.py
@@ -0,0 +1,158 @@
+"""Profile-specific factsheet metric population."""
+
+from nebula.addons.trustworthiness.calculation import (
+    attack_success_rate,
+    compute_adversarial_accuracy_art,
+    get_clever_score,
+    get_coefficient_of_variation,
+    get_confidence_score,
+    get_empirical_robustness_score,
+    get_epsilon_star,
+    get_explainability_metrics_summary,
+    get_generalized_entropy_index,
+    get_loss_sensitivity_score,
+    get_macro_f1_score,
+    get_mia_auc,
+    get_overfitting_score,
+    get_theil_index,
+    get_well_calibration_error,
+)
+from nebula.addons.trustworthiness.factsheet_common import (
+    DATA_TYPE_IMAGES,
+    DATA_TYPE_TABULAR,
+    cap_score,
+    get_normalized_model_data_type,
+    inverse_score,
+)
+
+FEDERATION_CFL = "cfl"
+FEDERATION_DFL = "dfl"
+
+
+def get_federation_profile(federation):
+    return FEDERATION_DFL if str(federation).upper() in {"DFL", "SDFL"} else FEDERATION_CFL
+
+
+def populate_profile_metrics(
+    factsheet,
+    federation,
+    model,
+    train_loader,
+    test_loader,
+    test_accuracy,
+):
+    federation_profile = get_federation_profile(federation)
+    data_type = get_normalized_model_data_type(model)
+    populator = PROFILE_POPULATORS.get((federation_profile, data_type), populate_default_metrics)
+
+    populator(
+        factsheet=factsheet,
+        model=model,
+        train_loader=train_loader,
+        test_loader=test_loader,
+        test_accuracy=test_accuracy,
+    )
+
+
+def populate_cfl_images_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
+    populate_default_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
+
+
+def populate_cfl_tabular_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
+    populate_default_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
+
+
+def populate_dfl_images_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
+    populate_default_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
+
+
+def populate_dfl_tabular_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
+    populate_default_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
+
+
+def populate_default_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
+    """Current shared metric set used by every factsheet profile."""
+    test_sample = next(iter(test_loader))
+    explainability_metrics = get_explainability_metrics_summary(model, test_loader)
+
+    populate_common_model_quality_metrics(
+        factsheet,
+        model,
+        train_loader,
+        test_loader,
+        test_accuracy,
+        test_sample,
+    )
+    populate_explainability_metrics(factsheet, explainability_metrics)
+    populate_image_robustness_metrics(factsheet, model, test_loader, test_sample)
+
+
+def populate_common_model_quality_metrics(
+    factsheet,
+    model,
+    train_loader,
+    test_loader,
+    test_accuracy,
+    test_sample,
+):
+    factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_loader)
+
+    factsheet["privacy"]["epsilon_star"] = get_epsilon_star(model, train_loader, test_loader)
+    factsheet["privacy"]["inverse_epsilon_star"] = inverse_score(factsheet["privacy"]["epsilon_star"])
+    factsheet["privacy"]["mia_auc"] = get_mia_auc(model, train_loader, test_loader)
+    factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
+
+    overfitting_value = get_overfitting_score(model, train_loader, test_accuracy)
+    factsheet["fairness"]["inverse_overfitting"] = inverse_score(overfitting_value)
+
+    well_calibration_error_value = get_well_calibration_error(model, test_loader)
+    factsheet["fairness"]["inverse_well_calibration_error"] = inverse_score(well_calibration_error_value)
+
+    generalized_entropy_index_value = get_generalized_entropy_index(model, test_loader)
+    factsheet["fairness"]["inverse_generalized_entropy_index"] = inverse_score(generalized_entropy_index_value)
+
+    theil_index_value = get_theil_index(model, test_loader)
+    factsheet["fairness"]["inverse_theil_index"] = inverse_score(theil_index_value)
+
+    coefficient_of_variation_value = get_coefficient_of_variation(model, test_loader)
+    factsheet["fairness"]["inverse_coefficient_of_variation"] = inverse_score(coefficient_of_variation_value)
+
+    value_confidence_score = get_confidence_score(model, test_sample)
+    factsheet["performance"]["clipped_test_confidence_score"] = cap_score(value_confidence_score)
+
+
+def populate_explainability_metrics(factsheet, explainability_metrics):
+    factsheet["explainability"]["alpha_score"] = explainability_metrics["alpha_score"]
+    factsheet["explainability"]["spread_ratio"] = explainability_metrics["spread_ratio"]
+    factsheet["explainability"]["spread_divergence"] = explainability_metrics["spread_divergence"]
+
+    feature_importance = explainability_metrics["feature_importance_cv"]
+    factsheet["performance"]["clipped_test_feature_importance_cv"] = cap_score(feature_importance)
+
+
+def populate_image_robustness_metrics(factsheet, model, test_loader, test_sample):
+    lr = factsheet["configuration"]["learning_rate"]
+    num_classes = model.get_num_classes()
+
+    value_clever = get_clever_score(model, test_sample, num_classes, lr)
+    factsheet["performance"]["clipped_test_clever"] = cap_score(value_clever)
+
+    value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes, lr)
+    factsheet["performance"]["inverse_test_loss_sensitivity"] = inverse_score(value_loss_sensitivity)
+
+    value_adv_accuracy = compute_adversarial_accuracy_art(model, test_loader, num_classes, lr)
+    factsheet["performance"]["clipped_test_adv_accuracy"] = cap_score(value_adv_accuracy)
+
+    value_empirical_robustness = get_empirical_robustness_score(model, test_sample, num_classes, lr)
+    factsheet["performance"]["clipped_test_empirical_robustness"] = cap_score(value_empirical_robustness)
+
+    value_attack_success_rate = attack_success_rate(model, test_sample)
+    factsheet["performance"]["inverse_test_attack_success_rate"] = 1 - value_attack_success_rate
+
+
+PROFILE_POPULATORS = {
+    (FEDERATION_CFL, DATA_TYPE_IMAGES): populate_cfl_images_metrics,
+    (FEDERATION_CFL, DATA_TYPE_TABULAR): populate_cfl_tabular_metrics,
+    (FEDERATION_DFL, DATA_TYPE_IMAGES): populate_dfl_images_metrics,
+    (FEDERATION_DFL, DATA_TYPE_TABULAR): populate_dfl_tabular_metrics,
+}
diff --git a/nebula/addons/trustworthiness/metric.py b/nebula/addons/trustworthiness/metric.py
index b5548c1c5..f9e24e72d 100755
--- a/nebula/addons/trustworthiness/metric.py
+++ b/nebula/addons/trustworthiness/metric.py
@@ -11,6 +11,18 @@
 logger = logging.getLogger(__name__)
 
 
+def get_eval_metrics_file(federation_prefix, factsheet, default_file_name):
+    data_type = str(factsheet.get("data", {}).get("type", "")).strip().lower()
+
+    if data_type not in {"images", "tabular"}:
+        return os.path.join(dirname, "configs", default_file_name)
+
+    metrics_file_name = f"eval_metrics_{federation_prefix}_{data_type}.json"
+    metrics_file = os.path.join(dirname, "configs", metrics_file_name)
+
+    return metrics_file if os.path.exists(metrics_file) else os.path.join(dirname, "configs", default_file_name)
+
+
 class TrustMetricManager:
     """
     Manager class to help store the output directory and handle calls from the FL framework.
@@ -18,13 +30,15 @@ class TrustMetricManager:
 
     def __init__(self, scenario_start_time, federation, participant=None):
         if federation == "DFL" or federation == "SDFL":
+            self.federation_prefix = "dfl"
             self.factsheet_file_nm = f"factsheet_participant_{participant}.json"
             self.eval_metrics_file_nm = "eval_metrics_dfl.json"
             self.nebula_trust_results_nm = f"nebula_trust_results_{participant}.json"
             self.scenario_start_time = scenario_start_time
         else:
+            self.federation_prefix = "cfl"
             self.factsheet_file_nm = "factsheet.json"
-            self.eval_metrics_file_nm = "eval_metrics.json"
+            self.eval_metrics_file_nm = "eval_metrics_cfl.json"
             self.nebula_trust_results_nm = "nebula_trust_results.json"
             self.scenario_start_time = scenario_start_time
 
@@ -40,19 +54,22 @@ def evaluate(self, experiment_name, weights, use_weights=False):
         # Get scenario name
         scenario_name = experiment_name
         factsheet_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", self.factsheet_file_nm)
-        metrics_cfg_file = os.path.join(dirname, "configs", self.eval_metrics_file_nm)
         results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", self.nebula_trust_results_nm)
 
         if not os.path.exists(factsheet_file):
             logger.error(f"{factsheet_file} is missing! Please check documentation.")
             return
 
+        with open(factsheet_file, "r") as f:
+            factsheet = json.load(f)
+
+        metrics_cfg_file = get_eval_metrics_file(self.federation_prefix, factsheet, self.eval_metrics_file_nm)
+
         if not os.path.exists(metrics_cfg_file):
             logger.error(f"{metrics_cfg_file} is missing! Please check documentation.")
             return
 
-        with open(factsheet_file, "r") as f, open(metrics_cfg_file, "r") as m:
-            factsheet = json.load(f)
+        with open(metrics_cfg_file, "r") as m:
             metrics_cfg = json.load(m)
             metrics = metrics_cfg.items()
             input_docs = {"factsheet": factsheet}
@@ -86,20 +103,22 @@ def evaluate_participant(self, experiment_name, weights, participant_id, use_wei
         # Get scenario name
         scenario_name = experiment_name
         factsheet_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", self.factsheet_file_nm)
-        metrics_cfg_file = os.path.join(dirname, "configs", self.eval_metrics_file_nm)
         results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", self.nebula_trust_results_nm)
 
         if not os.path.exists(factsheet_file):
             logger.error(f"{factsheet_file} is missing! Please check documentation.")
             return
 
+        with open(factsheet_file, "r") as f:
+            factsheet = json.load(f)
+
+        metrics_cfg_file = get_eval_metrics_file(self.federation_prefix, factsheet, self.eval_metrics_file_nm)
+
         if not os.path.exists(metrics_cfg_file):
             logger.error(f"{metrics_cfg_file} is missing! Please check documentation.")
             return
 
-        with open(factsheet_file, "r") as f, open(metrics_cfg_file, "r") as m:
-            factsheet = json.load(f)
-
+        with open(metrics_cfg_file, "r") as m:
             raw_metrics_cfg: str = m.read()
             raw_metrics_cfg = raw_metrics_cfg.replace("factsheet", f"factsheet_participant_{participant_id}")
             metrics_cfg = json.loads(raw_metrics_cfg)
diff --git a/nebula/addons/trustworthiness/per_round_metrics.py b/nebula/addons/trustworthiness/per_round_metrics.py
index efb1facf9..e8104befd 100644
--- a/nebula/addons/trustworthiness/per_round_metrics.py
+++ b/nebula/addons/trustworthiness/per_round_metrics.py
@@ -6,12 +6,10 @@
 import csv
 import os
 from dataclasses import dataclass, field
-from typing import Any, Optional, Tuple
+from typing import Optional
 
-import torch
 
 from nebula.addons.functions import print_msg_box
-from nebula.addons.trustworthiness.calculation import get_feature_importance_cv
 
 
 def _safe_get_round(engine) -> int:
@@ -25,69 +23,6 @@ def _safe_get_round(engine) -> int:
         return int(getattr(trainer, "round", -1))
 
 
-def _get_local_test_loader(engine):
-    trainer = getattr(engine, "trainer", None)
-    dm = getattr(trainer, "datamodule", None)
-    if dm is None:
-        return None
-
-    try:
-        dm.setup(stage="test")
-    except Exception:
-        pass
-
-    try:
-        tdl = dm.test_dataloader()
-        if isinstance(tdl, (list, tuple)) and len(tdl) > 0:
-            return tdl[0]
-        return tdl
-    except Exception:
-        return None
-
-
-def _build_test_sample_min_bs(test_loader, min_bs: int = 10) -> Optional[Tuple[Any, Any]]:
-    if test_loader is None:
-        return None
-
-    try:
-        it = iter(test_loader)
-        batch = next(it)
-    except Exception:
-        return None
-
-    if not (isinstance(batch, (tuple, list)) and len(batch) >= 2):
-        return None
-
-    x, y = batch[0], batch[1]
-    if not (isinstance(x, torch.Tensor) and isinstance(y, torch.Tensor)):
-        return None
-
-    if x.size(0) >= min_bs:
-        return (x, y)
-
-    xs = [x]
-    ys = [y]
-    cur = x.size(0)
-
-    while cur < min_bs:
-        try:
-            b2 = next(it)
-        except Exception:
-            break
-        if not (isinstance(b2, (tuple, list)) and len(b2) >= 2):
-            break
-        x2, y2 = b2[0], b2[1]
-        if not (isinstance(x2, torch.Tensor) and isinstance(y2, torch.Tensor)):
-            break
-        xs.append(x2)
-        ys.append(y2)
-        cur += x2.size(0)
-
-    x_cat = torch.cat(xs, dim=0)
-    y_cat = torch.cat(ys, dim=0)
-    return (x_cat, y_cat)
-
-
 @dataclass
 class PerRoundTrustMetrics:
     experiment_name: str
@@ -100,7 +35,6 @@ class PerRoundTrustMetrics:
 
     _csv_path: str = field(init=False)
     _prev_acc: Optional[float] = field(default=None, init=False)
-    _test_loader: Any = field(default=None, init=False)
     _lock: asyncio.Lock = field(default_factory=asyncio.Lock, init=False)
 
     async def setup(self, engine) -> None:
@@ -120,9 +54,6 @@ async def setup(self, engine) -> None:
                     "accuracy",
                     "tw_stability",
                 ])
-
-        self._test_loader = _get_local_test_loader(engine)
-
     async def on_test_metrics(self, engine, loss: float, acc: float) -> None:
         async with self._lock:
             round_id = _safe_get_round(engine)
@@ -134,8 +65,6 @@ async def on_test_metrics(self, engine, loss: float, acc: float) -> None:
                 tw_stability = max(0.0, min(1.0, tw_stability))
             self._prev_acc = acc
 
-            fi_cv: Optional[float] = None
-
             if self.enable_csv:
                 with open(self._csv_path, "a", newline="") as f:
                     w = csv.writer(f)
@@ -146,11 +75,9 @@ async def on_test_metrics(self, engine, loss: float, acc: float) -> None:
                         float(loss),
                         float(acc),
                         float(tw_stability),
-                        None if fi_cv is None else float(fi_cv),
                     ])
 
             if self.enable_print:
-                fi_txt = "NA" if fi_cv is None else f"{fi_cv:.4f}"
                 print_msg_box(
                     msg=(
                         f"Round: {round_id}\n"

From d2bd55371892fe5c821294f8124162cc277ad00f Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Thu, 21 May 2026 14:49:23 +0200
Subject: [PATCH 48/66] Eval_metrics updated and fixed. Weights updated

---
 .../configs/eval_metrics_cfl.json             | 80 ++++++++----------
 .../configs/eval_metrics_cfl_images.json      | 80 ++++++++----------
 .../configs/eval_metrics_cfl_tabular.json     | 80 ++++++++----------
 .../configs/eval_metrics_dfl.json             | 84 +++++++++----------
 .../configs/eval_metrics_dfl_images.json      | 84 +++++++++----------
 .../configs/eval_metrics_dfl_tabular.json     | 84 +++++++++----------
 6 files changed, 222 insertions(+), 270 deletions(-)

diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_cfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_cfl.json
index 889635d90..d51e58dcf 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_cfl.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_cfl.json
@@ -11,10 +11,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
             "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
-            "weight": 0.4
+            "weight": 0.2
           },
           "inverse_loss_sensitivity": {
             "inputs": [
@@ -24,9 +23,8 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
+            "description": "Inverse loss sensitivity score; higher values indicate lower sensitivity of the loss to input perturbations.",
             "weight": 0.2
           },
           "clipped_adversarial_accuracy": {
@@ -37,10 +35,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Adversarial accuracy clipped to the expected score range; higher values indicate better predictive performance under adversarial perturbations.",
+            "weight": 0.2
           },
           "clipped_empirical_robustness": {
             "inputs": [
@@ -50,10 +47,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Empirical robustness clipped to the expected score range; higher values indicate stronger resistance to adversarial perturbations.",
+            "weight": 0.15
           },
           "clipped_confidence_score": {
             "inputs": [
@@ -63,9 +59,8 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
+            "description": "Confidence score clipped to the expected score range; higher values indicate more stable predictive confidence.",
             "weight": 0.1
           },
           "inverse_attack_success_rate": {
@@ -76,10 +71,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Inverse attack success rate; higher values indicate a lower fraction of successful adversarial attacks.",
+            "weight": 0.15
           }
         }
       },
@@ -94,10 +88,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_true_score",
             "type": "true_score",
             "description": "Average test accuracy of the global model on clients test data.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "macro_f1": {
             "inputs": [
@@ -109,7 +102,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Macro F1 score of the final model on test data.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "personalization": {
             "inputs": [
@@ -121,7 +114,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of personalized FL algorithm.",
-            "weight": 0.2
+            "weight": 0.1
           },
           "reputation_enabled": {
             "inputs": [
@@ -133,7 +126,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of an active reputation-based defense mechanism.",
-            "weight": 0.2
+            "weight": 0.1
           }
         }
       },
@@ -152,7 +145,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the model.",
-            "weight": 0.3
+            "weight": 0.1
           },
           "average_neighbor_reputation": {
             "inputs": [
@@ -177,7 +170,7 @@
             "type": "true_score",
             "direction": "desc",
             "description": "Fraction of expected client updates that were not received across rounds.",
-            "weight": 0.2
+            "weight": 0.3
           },
           "timeout_rate": {
             "inputs": [
@@ -190,7 +183,7 @@
             "type": "true_score",
             "direction": "desc",
             "description": "Fraction of aggregation rounds that finished with missing expected client updates.",
-            "weight": 0.2
+            "weight": 0.3
           }
         }
       }
@@ -330,7 +323,6 @@
             ],
             "operation": "get_value",
             "type": "true_score",
-            "direction": "desc",
             "description": "Variation of the sample size per class.",
             "weight": 1
           }
@@ -349,7 +341,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Held-out performance proxy used as an outcome-level fairness signal.",
-            "weight": 0.1667
+            "weight": 0.1
           },
           "inverse_overfitting": {
             "inputs": [
@@ -361,7 +353,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Generalization quality proxy transformed so higher is better.",
-            "weight": 0.1667
+            "weight": 0.15
           },
           "inverse_well_calibration_error": {
             "inputs": [
@@ -373,7 +365,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_generalized_entropy_index": {
             "inputs": [
@@ -385,7 +377,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Outcome inequality score transformed so higher values indicate better fairness.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_theil_index": {
             "inputs": [
@@ -397,7 +389,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_coefficient_of_variation": {
             "inputs": [
@@ -409,7 +401,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Coefficient-of-variation-based outcome fairness score.",
-            "weight": 0.1665
+            "weight": 0.15
           }
         }
       }
@@ -782,7 +774,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Privacy metrics and risk estimates documented in the factsheet.",
             "weight": 0.1
           },
           "privacy": {
@@ -810,7 +802,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Explainability metrics documented in the factsheet.",
             "weight": 0.1
           },
           "explainability": {
@@ -830,7 +822,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Sustainability and emissions metrics documented in the factsheet.",
             "weight": 0.1
           },
           "sustainability": {
@@ -895,7 +887,7 @@
     },
     "architectural_soundness": {
       "client_management": {
-        "weight": 0.5,
+        "weight": 0.25,
         "metrics": {
           "client_selector": {
             "inputs": [
@@ -939,7 +931,7 @@
         }
       },
       "federation_management": {
-        "weight": 0.5,
+        "weight": 0.25,
         "metrics": {
           "topology_type": {
             "inputs": [
@@ -1040,10 +1032,10 @@
             ],
             "operation": "comm_efficiency",
             "type": "ranges",
-            "direction": "low",
+            "direction": "desc",
             "ranges":[0.1, 10e2, 10e3,10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
-            "description": "Descripcion de la metrica",
-            "weight": 0.1
+            "description": "Communication cost per unit of final test accuracy; lower values indicate more efficient federation communication.",
+            "weight": 0.3
           },
           "number_of_training_rounds": {
             "inputs": [
@@ -1057,7 +1049,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The total number of training rounds",
-            "weight": 0.06666666
+            "weight": 0.15
           },
           "avg_model_size": {
             "inputs": [
@@ -1071,7 +1063,7 @@
             "direction": "desc",
             "ranges":[10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
             "description": "The size of the model",
-            "weight": 0.16666666
+            "weight": 0.15
           },
           "client_selection_rate": {
             "inputs": [
@@ -1087,7 +1079,7 @@
               0.1,1
             ],
             "description": "The selection rate of clients for each training round",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "number_of_clients": {
             "inputs": [
@@ -1101,7 +1093,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the federation.",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "local_training_rounds": {
             "inputs": [
@@ -1115,7 +1107,7 @@
             "direction": "desc",
             "scale": [1, 100],
             "description": "The number of local training rounds.",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "avg_dataset_size": {
             "inputs": [
@@ -1129,7 +1121,7 @@
             "direction": "desc",
             "ranges": [10e1, 10e2, 10e3, 10e4, 10e5],
             "description": "The average number of training samples",
-            "weight": 0.16666666
+            "weight": 0.1
           }
         }
       }
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json
index 889635d90..d51e58dcf 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json
@@ -11,10 +11,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
             "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
-            "weight": 0.4
+            "weight": 0.2
           },
           "inverse_loss_sensitivity": {
             "inputs": [
@@ -24,9 +23,8 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
+            "description": "Inverse loss sensitivity score; higher values indicate lower sensitivity of the loss to input perturbations.",
             "weight": 0.2
           },
           "clipped_adversarial_accuracy": {
@@ -37,10 +35,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Adversarial accuracy clipped to the expected score range; higher values indicate better predictive performance under adversarial perturbations.",
+            "weight": 0.2
           },
           "clipped_empirical_robustness": {
             "inputs": [
@@ -50,10 +47,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Empirical robustness clipped to the expected score range; higher values indicate stronger resistance to adversarial perturbations.",
+            "weight": 0.15
           },
           "clipped_confidence_score": {
             "inputs": [
@@ -63,9 +59,8 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
+            "description": "Confidence score clipped to the expected score range; higher values indicate more stable predictive confidence.",
             "weight": 0.1
           },
           "inverse_attack_success_rate": {
@@ -76,10 +71,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Inverse attack success rate; higher values indicate a lower fraction of successful adversarial attacks.",
+            "weight": 0.15
           }
         }
       },
@@ -94,10 +88,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_true_score",
             "type": "true_score",
             "description": "Average test accuracy of the global model on clients test data.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "macro_f1": {
             "inputs": [
@@ -109,7 +102,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Macro F1 score of the final model on test data.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "personalization": {
             "inputs": [
@@ -121,7 +114,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of personalized FL algorithm.",
-            "weight": 0.2
+            "weight": 0.1
           },
           "reputation_enabled": {
             "inputs": [
@@ -133,7 +126,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of an active reputation-based defense mechanism.",
-            "weight": 0.2
+            "weight": 0.1
           }
         }
       },
@@ -152,7 +145,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the model.",
-            "weight": 0.3
+            "weight": 0.1
           },
           "average_neighbor_reputation": {
             "inputs": [
@@ -177,7 +170,7 @@
             "type": "true_score",
             "direction": "desc",
             "description": "Fraction of expected client updates that were not received across rounds.",
-            "weight": 0.2
+            "weight": 0.3
           },
           "timeout_rate": {
             "inputs": [
@@ -190,7 +183,7 @@
             "type": "true_score",
             "direction": "desc",
             "description": "Fraction of aggregation rounds that finished with missing expected client updates.",
-            "weight": 0.2
+            "weight": 0.3
           }
         }
       }
@@ -330,7 +323,6 @@
             ],
             "operation": "get_value",
             "type": "true_score",
-            "direction": "desc",
             "description": "Variation of the sample size per class.",
             "weight": 1
           }
@@ -349,7 +341,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Held-out performance proxy used as an outcome-level fairness signal.",
-            "weight": 0.1667
+            "weight": 0.1
           },
           "inverse_overfitting": {
             "inputs": [
@@ -361,7 +353,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Generalization quality proxy transformed so higher is better.",
-            "weight": 0.1667
+            "weight": 0.15
           },
           "inverse_well_calibration_error": {
             "inputs": [
@@ -373,7 +365,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_generalized_entropy_index": {
             "inputs": [
@@ -385,7 +377,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Outcome inequality score transformed so higher values indicate better fairness.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_theil_index": {
             "inputs": [
@@ -397,7 +389,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_coefficient_of_variation": {
             "inputs": [
@@ -409,7 +401,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Coefficient-of-variation-based outcome fairness score.",
-            "weight": 0.1665
+            "weight": 0.15
           }
         }
       }
@@ -782,7 +774,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Privacy metrics and risk estimates documented in the factsheet.",
             "weight": 0.1
           },
           "privacy": {
@@ -810,7 +802,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Explainability metrics documented in the factsheet.",
             "weight": 0.1
           },
           "explainability": {
@@ -830,7 +822,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Sustainability and emissions metrics documented in the factsheet.",
             "weight": 0.1
           },
           "sustainability": {
@@ -895,7 +887,7 @@
     },
     "architectural_soundness": {
       "client_management": {
-        "weight": 0.5,
+        "weight": 0.25,
         "metrics": {
           "client_selector": {
             "inputs": [
@@ -939,7 +931,7 @@
         }
       },
       "federation_management": {
-        "weight": 0.5,
+        "weight": 0.25,
         "metrics": {
           "topology_type": {
             "inputs": [
@@ -1040,10 +1032,10 @@
             ],
             "operation": "comm_efficiency",
             "type": "ranges",
-            "direction": "low",
+            "direction": "desc",
             "ranges":[0.1, 10e2, 10e3,10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
-            "description": "Descripcion de la metrica",
-            "weight": 0.1
+            "description": "Communication cost per unit of final test accuracy; lower values indicate more efficient federation communication.",
+            "weight": 0.3
           },
           "number_of_training_rounds": {
             "inputs": [
@@ -1057,7 +1049,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The total number of training rounds",
-            "weight": 0.06666666
+            "weight": 0.15
           },
           "avg_model_size": {
             "inputs": [
@@ -1071,7 +1063,7 @@
             "direction": "desc",
             "ranges":[10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
             "description": "The size of the model",
-            "weight": 0.16666666
+            "weight": 0.15
           },
           "client_selection_rate": {
             "inputs": [
@@ -1087,7 +1079,7 @@
               0.1,1
             ],
             "description": "The selection rate of clients for each training round",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "number_of_clients": {
             "inputs": [
@@ -1101,7 +1093,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the federation.",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "local_training_rounds": {
             "inputs": [
@@ -1115,7 +1107,7 @@
             "direction": "desc",
             "scale": [1, 100],
             "description": "The number of local training rounds.",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "avg_dataset_size": {
             "inputs": [
@@ -1129,7 +1121,7 @@
             "direction": "desc",
             "ranges": [10e1, 10e2, 10e3, 10e4, 10e5],
             "description": "The average number of training samples",
-            "weight": 0.16666666
+            "weight": 0.1
           }
         }
       }
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json
index 889635d90..d51e58dcf 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json
@@ -11,10 +11,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
             "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
-            "weight": 0.4
+            "weight": 0.2
           },
           "inverse_loss_sensitivity": {
             "inputs": [
@@ -24,9 +23,8 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
+            "description": "Inverse loss sensitivity score; higher values indicate lower sensitivity of the loss to input perturbations.",
             "weight": 0.2
           },
           "clipped_adversarial_accuracy": {
@@ -37,10 +35,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Adversarial accuracy clipped to the expected score range; higher values indicate better predictive performance under adversarial perturbations.",
+            "weight": 0.2
           },
           "clipped_empirical_robustness": {
             "inputs": [
@@ -50,10 +47,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Empirical robustness clipped to the expected score range; higher values indicate stronger resistance to adversarial perturbations.",
+            "weight": 0.15
           },
           "clipped_confidence_score": {
             "inputs": [
@@ -63,9 +59,8 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
+            "description": "Confidence score clipped to the expected score range; higher values indicate more stable predictive confidence.",
             "weight": 0.1
           },
           "inverse_attack_success_rate": {
@@ -76,10 +71,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Inverse attack success rate; higher values indicate a lower fraction of successful adversarial attacks.",
+            "weight": 0.15
           }
         }
       },
@@ -94,10 +88,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_true_score",
             "type": "true_score",
             "description": "Average test accuracy of the global model on clients test data.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "macro_f1": {
             "inputs": [
@@ -109,7 +102,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Macro F1 score of the final model on test data.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "personalization": {
             "inputs": [
@@ -121,7 +114,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of personalized FL algorithm.",
-            "weight": 0.2
+            "weight": 0.1
           },
           "reputation_enabled": {
             "inputs": [
@@ -133,7 +126,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of an active reputation-based defense mechanism.",
-            "weight": 0.2
+            "weight": 0.1
           }
         }
       },
@@ -152,7 +145,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the model.",
-            "weight": 0.3
+            "weight": 0.1
           },
           "average_neighbor_reputation": {
             "inputs": [
@@ -177,7 +170,7 @@
             "type": "true_score",
             "direction": "desc",
             "description": "Fraction of expected client updates that were not received across rounds.",
-            "weight": 0.2
+            "weight": 0.3
           },
           "timeout_rate": {
             "inputs": [
@@ -190,7 +183,7 @@
             "type": "true_score",
             "direction": "desc",
             "description": "Fraction of aggregation rounds that finished with missing expected client updates.",
-            "weight": 0.2
+            "weight": 0.3
           }
         }
       }
@@ -330,7 +323,6 @@
             ],
             "operation": "get_value",
             "type": "true_score",
-            "direction": "desc",
             "description": "Variation of the sample size per class.",
             "weight": 1
           }
@@ -349,7 +341,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Held-out performance proxy used as an outcome-level fairness signal.",
-            "weight": 0.1667
+            "weight": 0.1
           },
           "inverse_overfitting": {
             "inputs": [
@@ -361,7 +353,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Generalization quality proxy transformed so higher is better.",
-            "weight": 0.1667
+            "weight": 0.15
           },
           "inverse_well_calibration_error": {
             "inputs": [
@@ -373,7 +365,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_generalized_entropy_index": {
             "inputs": [
@@ -385,7 +377,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Outcome inequality score transformed so higher values indicate better fairness.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_theil_index": {
             "inputs": [
@@ -397,7 +389,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_coefficient_of_variation": {
             "inputs": [
@@ -409,7 +401,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Coefficient-of-variation-based outcome fairness score.",
-            "weight": 0.1665
+            "weight": 0.15
           }
         }
       }
@@ -782,7 +774,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Privacy metrics and risk estimates documented in the factsheet.",
             "weight": 0.1
           },
           "privacy": {
@@ -810,7 +802,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Explainability metrics documented in the factsheet.",
             "weight": 0.1
           },
           "explainability": {
@@ -830,7 +822,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Sustainability and emissions metrics documented in the factsheet.",
             "weight": 0.1
           },
           "sustainability": {
@@ -895,7 +887,7 @@
     },
     "architectural_soundness": {
       "client_management": {
-        "weight": 0.5,
+        "weight": 0.25,
         "metrics": {
           "client_selector": {
             "inputs": [
@@ -939,7 +931,7 @@
         }
       },
       "federation_management": {
-        "weight": 0.5,
+        "weight": 0.25,
         "metrics": {
           "topology_type": {
             "inputs": [
@@ -1040,10 +1032,10 @@
             ],
             "operation": "comm_efficiency",
             "type": "ranges",
-            "direction": "low",
+            "direction": "desc",
             "ranges":[0.1, 10e2, 10e3,10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
-            "description": "Descripcion de la metrica",
-            "weight": 0.1
+            "description": "Communication cost per unit of final test accuracy; lower values indicate more efficient federation communication.",
+            "weight": 0.3
           },
           "number_of_training_rounds": {
             "inputs": [
@@ -1057,7 +1049,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The total number of training rounds",
-            "weight": 0.06666666
+            "weight": 0.15
           },
           "avg_model_size": {
             "inputs": [
@@ -1071,7 +1063,7 @@
             "direction": "desc",
             "ranges":[10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
             "description": "The size of the model",
-            "weight": 0.16666666
+            "weight": 0.15
           },
           "client_selection_rate": {
             "inputs": [
@@ -1087,7 +1079,7 @@
               0.1,1
             ],
             "description": "The selection rate of clients for each training round",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "number_of_clients": {
             "inputs": [
@@ -1101,7 +1093,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the federation.",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "local_training_rounds": {
             "inputs": [
@@ -1115,7 +1107,7 @@
             "direction": "desc",
             "scale": [1, 100],
             "description": "The number of local training rounds.",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "avg_dataset_size": {
             "inputs": [
@@ -1129,7 +1121,7 @@
             "direction": "desc",
             "ranges": [10e1, 10e2, 10e3, 10e4, 10e5],
             "description": "The average number of training samples",
-            "weight": 0.16666666
+            "weight": 0.1
           }
         }
       }
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
index c3d2f2755..e2d194187 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
@@ -11,10 +11,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
             "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
-            "weight": 0.4
+            "weight": 0.2
           },
           "inverse_loss_sensitivity": {
             "inputs": [
@@ -24,9 +23,8 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
+            "description": "Inverse loss sensitivity score; higher values indicate lower sensitivity of the loss to input perturbations.",
             "weight": 0.2
           },
           "clipped_adversarial_accuracy": {
@@ -37,10 +35,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Adversarial accuracy clipped to the expected score range; higher values indicate better predictive performance under adversarial perturbations.",
+            "weight": 0.2
           },
           "clipped_empirical_robustness": {
             "inputs": [
@@ -50,10 +47,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Empirical robustness clipped to the expected score range; higher values indicate stronger resistance to adversarial perturbations.",
+            "weight": 0.15
           },
           "clipped_confidence_score": {
             "inputs": [
@@ -63,9 +59,8 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
+            "description": "Confidence score clipped to the expected score range; higher values indicate more stable predictive confidence.",
             "weight": 0.1
           },
           "inverse_attack_success_rate": {
@@ -76,10 +71,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Inverse attack success rate; higher values indicate a lower fraction of successful adversarial attacks.",
+            "weight": 0.15
           }
         }
       },
@@ -94,10 +88,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_true_score",
             "type": "true_score",
             "description": "Local clean test accuracy of the final model.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "macro_f1": {
             "inputs": [
@@ -109,7 +102,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Macro F1 score of the final local model on test data.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "personalization": {
             "inputs": [
@@ -121,7 +114,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of personalized FL algorithm.",
-            "weight": 0.2
+            "weight": 0.1
           },
           "reputation_enabled": {
             "inputs": [
@@ -133,7 +126,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of an active reputation-based defense mechanism.",
-            "weight": 0.2
+            "weight": 0.1
           }
         }
       },
@@ -152,7 +145,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the model.",
-            "weight": 0.3
+            "weight": 0.1
           },
           "average_neighbor_reputation": {
             "inputs": [
@@ -164,7 +157,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Average reputation score of the neighbors associated with the node.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "dropout_rate": {
             "inputs": [
@@ -177,7 +170,7 @@
             "type": "true_score",
             "direction": "desc",
             "description": "Fraction of expected neighbor updates that were not received across rounds.",
-            "weight": 0.2
+            "weight": 0.25
           },
           "timeout_rate": {
             "inputs": [
@@ -190,7 +183,7 @@
             "type": "true_score",
             "direction": "desc",
             "description": "Fraction of aggregation rounds that finished with missing expected neighbor updates.",
-            "weight": 0.2
+            "weight": 0.25
           }
         }
       }
@@ -306,7 +299,6 @@
             ],
             "operation": "get_value",
             "type": "true_score",
-            "direction": "desc",
             "description": "Variation of the sample size per class.",
             "weight": 0.5
           }
@@ -325,7 +317,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Held-out performance proxy used as an outcome-level fairness signal.",
-            "weight": 0.1667
+            "weight": 0.1
           },
           "inverse_overfitting": {
             "inputs": [
@@ -337,7 +329,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Generalization quality proxy transformed so higher is better.",
-            "weight": 0.1667
+            "weight": 0.15
           },
           "inverse_well_calibration_error": {
             "inputs": [
@@ -349,7 +341,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_generalized_entropy_index": {
             "inputs": [
@@ -361,7 +353,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Outcome inequality score transformed so higher values indicate better fairness.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_theil_index": {
             "inputs": [
@@ -373,7 +365,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_coefficient_of_variation": {
             "inputs": [
@@ -385,7 +377,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Coefficient-of-variation-based outcome fairness score.",
-            "weight": 0.1665
+            "weight": 0.15
           }
         }
       }
@@ -765,7 +757,7 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "privacy/inveres_epsilon_star"
+                "field_path": "privacy/inverse_epsilon_star"
               },
               {
                 "source": "factsheet",
@@ -778,7 +770,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Privacy metrics and risk estimates documented in the factsheet.",
             "weight": 0.1
           },
           "explainability": {
@@ -798,7 +790,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Explainability metrics documented in the factsheet.",
             "weight": 0.1
           },
           "sustainability": {
@@ -822,7 +814,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Sustainability and emissions metrics documented in the factsheet.",
             "weight": 0.1
           }
         }
@@ -847,7 +839,7 @@
     },
     "architectural_soundness": {
       "client_management": {
-        "weight": 0.5,
+        "weight": 0.25,
         "metrics": {
           "client_selector": {
             "inputs": [
@@ -891,7 +883,7 @@
         }
       },
       "federation_management": {
-        "weight": 0.5,
+        "weight": 0.25,
         "metrics": {
           "topology_type": {
             "inputs": [
@@ -945,10 +937,10 @@
             ],
             "operation": "comm_efficiency",
             "type": "ranges",
-            "direction": "low",
+            "direction": "desc",
             "ranges":[0.1, 10e2, 10e3,10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
-            "description": "Descripcion de la metrica",
-            "weight": 0.1
+            "description": "Communication cost per unit of local test accuracy; lower values indicate more efficient neighbor communication.",
+            "weight": 0.3
           },
           "number_of_training_rounds": {
             "inputs": [
@@ -962,7 +954,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The total number of training rounds",
-            "weight": 0.06666666
+            "weight": 0.15
           },
           "avg_model_size": {
             "inputs": [
@@ -976,7 +968,7 @@
             "direction": "desc",
             "ranges":[10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
             "description": "The size of the model",
-            "weight": 0.16666666
+            "weight": 0.15
           },
           "client_selection_rate": {
             "inputs": [
@@ -992,7 +984,7 @@
               0.1,1
             ],
             "description": "The selection rate of clients for each training round",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "number_of_clients": {
             "inputs": [
@@ -1006,7 +998,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the federation.",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "local_training_rounds": {
             "inputs": [
@@ -1020,7 +1012,7 @@
             "direction": "desc",
             "scale": [1, 100],
             "description": "The number of local training rounds.",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "avg_dataset_size": {
             "inputs": [
@@ -1034,7 +1026,7 @@
             "direction": "desc",
             "ranges": [10e1, 10e2, 10e3, 10e4, 10e5],
             "description": "The average number of training samples",
-            "weight": 0.16666666
+            "weight": 0.1
           }
         }
       }
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json
index c3d2f2755..e2d194187 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json
@@ -11,10 +11,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
             "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
-            "weight": 0.4
+            "weight": 0.2
           },
           "inverse_loss_sensitivity": {
             "inputs": [
@@ -24,9 +23,8 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
+            "description": "Inverse loss sensitivity score; higher values indicate lower sensitivity of the loss to input perturbations.",
             "weight": 0.2
           },
           "clipped_adversarial_accuracy": {
@@ -37,10 +35,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Adversarial accuracy clipped to the expected score range; higher values indicate better predictive performance under adversarial perturbations.",
+            "weight": 0.2
           },
           "clipped_empirical_robustness": {
             "inputs": [
@@ -50,10 +47,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Empirical robustness clipped to the expected score range; higher values indicate stronger resistance to adversarial perturbations.",
+            "weight": 0.15
           },
           "clipped_confidence_score": {
             "inputs": [
@@ -63,9 +59,8 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
+            "description": "Confidence score clipped to the expected score range; higher values indicate more stable predictive confidence.",
             "weight": 0.1
           },
           "inverse_attack_success_rate": {
@@ -76,10 +71,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Inverse attack success rate; higher values indicate a lower fraction of successful adversarial attacks.",
+            "weight": 0.15
           }
         }
       },
@@ -94,10 +88,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_true_score",
             "type": "true_score",
             "description": "Local clean test accuracy of the final model.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "macro_f1": {
             "inputs": [
@@ -109,7 +102,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Macro F1 score of the final local model on test data.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "personalization": {
             "inputs": [
@@ -121,7 +114,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of personalized FL algorithm.",
-            "weight": 0.2
+            "weight": 0.1
           },
           "reputation_enabled": {
             "inputs": [
@@ -133,7 +126,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of an active reputation-based defense mechanism.",
-            "weight": 0.2
+            "weight": 0.1
           }
         }
       },
@@ -152,7 +145,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the model.",
-            "weight": 0.3
+            "weight": 0.1
           },
           "average_neighbor_reputation": {
             "inputs": [
@@ -164,7 +157,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Average reputation score of the neighbors associated with the node.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "dropout_rate": {
             "inputs": [
@@ -177,7 +170,7 @@
             "type": "true_score",
             "direction": "desc",
             "description": "Fraction of expected neighbor updates that were not received across rounds.",
-            "weight": 0.2
+            "weight": 0.25
           },
           "timeout_rate": {
             "inputs": [
@@ -190,7 +183,7 @@
             "type": "true_score",
             "direction": "desc",
             "description": "Fraction of aggregation rounds that finished with missing expected neighbor updates.",
-            "weight": 0.2
+            "weight": 0.25
           }
         }
       }
@@ -306,7 +299,6 @@
             ],
             "operation": "get_value",
             "type": "true_score",
-            "direction": "desc",
             "description": "Variation of the sample size per class.",
             "weight": 0.5
           }
@@ -325,7 +317,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Held-out performance proxy used as an outcome-level fairness signal.",
-            "weight": 0.1667
+            "weight": 0.1
           },
           "inverse_overfitting": {
             "inputs": [
@@ -337,7 +329,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Generalization quality proxy transformed so higher is better.",
-            "weight": 0.1667
+            "weight": 0.15
           },
           "inverse_well_calibration_error": {
             "inputs": [
@@ -349,7 +341,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_generalized_entropy_index": {
             "inputs": [
@@ -361,7 +353,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Outcome inequality score transformed so higher values indicate better fairness.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_theil_index": {
             "inputs": [
@@ -373,7 +365,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_coefficient_of_variation": {
             "inputs": [
@@ -385,7 +377,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Coefficient-of-variation-based outcome fairness score.",
-            "weight": 0.1665
+            "weight": 0.15
           }
         }
       }
@@ -765,7 +757,7 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "privacy/inveres_epsilon_star"
+                "field_path": "privacy/inverse_epsilon_star"
               },
               {
                 "source": "factsheet",
@@ -778,7 +770,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Privacy metrics and risk estimates documented in the factsheet.",
             "weight": 0.1
           },
           "explainability": {
@@ -798,7 +790,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Explainability metrics documented in the factsheet.",
             "weight": 0.1
           },
           "sustainability": {
@@ -822,7 +814,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Sustainability and emissions metrics documented in the factsheet.",
             "weight": 0.1
           }
         }
@@ -847,7 +839,7 @@
     },
     "architectural_soundness": {
       "client_management": {
-        "weight": 0.5,
+        "weight": 0.25,
         "metrics": {
           "client_selector": {
             "inputs": [
@@ -891,7 +883,7 @@
         }
       },
       "federation_management": {
-        "weight": 0.5,
+        "weight": 0.25,
         "metrics": {
           "topology_type": {
             "inputs": [
@@ -945,10 +937,10 @@
             ],
             "operation": "comm_efficiency",
             "type": "ranges",
-            "direction": "low",
+            "direction": "desc",
             "ranges":[0.1, 10e2, 10e3,10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
-            "description": "Descripcion de la metrica",
-            "weight": 0.1
+            "description": "Communication cost per unit of local test accuracy; lower values indicate more efficient neighbor communication.",
+            "weight": 0.3
           },
           "number_of_training_rounds": {
             "inputs": [
@@ -962,7 +954,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The total number of training rounds",
-            "weight": 0.06666666
+            "weight": 0.15
           },
           "avg_model_size": {
             "inputs": [
@@ -976,7 +968,7 @@
             "direction": "desc",
             "ranges":[10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
             "description": "The size of the model",
-            "weight": 0.16666666
+            "weight": 0.15
           },
           "client_selection_rate": {
             "inputs": [
@@ -992,7 +984,7 @@
               0.1,1
             ],
             "description": "The selection rate of clients for each training round",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "number_of_clients": {
             "inputs": [
@@ -1006,7 +998,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the federation.",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "local_training_rounds": {
             "inputs": [
@@ -1020,7 +1012,7 @@
             "direction": "desc",
             "scale": [1, 100],
             "description": "The number of local training rounds.",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "avg_dataset_size": {
             "inputs": [
@@ -1034,7 +1026,7 @@
             "direction": "desc",
             "ranges": [10e1, 10e2, 10e3, 10e4, 10e5],
             "description": "The average number of training samples",
-            "weight": 0.16666666
+            "weight": 0.1
           }
         }
       }
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json
index c3d2f2755..e2d194187 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json
@@ -11,10 +11,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
             "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
-            "weight": 0.4
+            "weight": 0.2
           },
           "inverse_loss_sensitivity": {
             "inputs": [
@@ -24,9 +23,8 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
+            "description": "Inverse loss sensitivity score; higher values indicate lower sensitivity of the loss to input perturbations.",
             "weight": 0.2
           },
           "clipped_adversarial_accuracy": {
@@ -37,10 +35,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Adversarial accuracy clipped to the expected score range; higher values indicate better predictive performance under adversarial perturbations.",
+            "weight": 0.2
           },
           "clipped_empirical_robustness": {
             "inputs": [
@@ -50,10 +47,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Empirical robustness clipped to the expected score range; higher values indicate stronger resistance to adversarial perturbations.",
+            "weight": 0.15
           },
           "clipped_confidence_score": {
             "inputs": [
@@ -63,9 +59,8 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
+            "description": "Confidence score clipped to the expected score range; higher values indicate more stable predictive confidence.",
             "weight": 0.1
           },
           "inverse_attack_success_rate": {
@@ -76,10 +71,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_range_score",
             "type": "true_score",
-            "description": "",
-            "weight": 0.1
+            "description": "Inverse attack success rate; higher values indicate a lower fraction of successful adversarial attacks.",
+            "weight": 0.15
           }
         }
       },
@@ -94,10 +88,9 @@
               }
             ],
             "operation": "get_value",
-            "score_function": "get_true_score",
             "type": "true_score",
             "description": "Local clean test accuracy of the final model.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "macro_f1": {
             "inputs": [
@@ -109,7 +102,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Macro F1 score of the final local model on test data.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "personalization": {
             "inputs": [
@@ -121,7 +114,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of personalized FL algorithm.",
-            "weight": 0.2
+            "weight": 0.1
           },
           "reputation_enabled": {
             "inputs": [
@@ -133,7 +126,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "The use of an active reputation-based defense mechanism.",
-            "weight": 0.2
+            "weight": 0.1
           }
         }
       },
@@ -152,7 +145,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the model.",
-            "weight": 0.3
+            "weight": 0.1
           },
           "average_neighbor_reputation": {
             "inputs": [
@@ -164,7 +157,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Average reputation score of the neighbors associated with the node.",
-            "weight": 0.3
+            "weight": 0.4
           },
           "dropout_rate": {
             "inputs": [
@@ -177,7 +170,7 @@
             "type": "true_score",
             "direction": "desc",
             "description": "Fraction of expected neighbor updates that were not received across rounds.",
-            "weight": 0.2
+            "weight": 0.25
           },
           "timeout_rate": {
             "inputs": [
@@ -190,7 +183,7 @@
             "type": "true_score",
             "direction": "desc",
             "description": "Fraction of aggregation rounds that finished with missing expected neighbor updates.",
-            "weight": 0.2
+            "weight": 0.25
           }
         }
       }
@@ -306,7 +299,6 @@
             ],
             "operation": "get_value",
             "type": "true_score",
-            "direction": "desc",
             "description": "Variation of the sample size per class.",
             "weight": 0.5
           }
@@ -325,7 +317,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Held-out performance proxy used as an outcome-level fairness signal.",
-            "weight": 0.1667
+            "weight": 0.1
           },
           "inverse_overfitting": {
             "inputs": [
@@ -337,7 +329,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Generalization quality proxy transformed so higher is better.",
-            "weight": 0.1667
+            "weight": 0.15
           },
           "inverse_well_calibration_error": {
             "inputs": [
@@ -349,7 +341,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Calibration quality of the predictive outputs represented as a trust-oriented score.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_generalized_entropy_index": {
             "inputs": [
@@ -361,7 +353,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Outcome inequality score transformed so higher values indicate better fairness.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_theil_index": {
             "inputs": [
@@ -373,7 +365,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Theil-based outcome inequality score transformed so higher values indicate better fairness.",
-            "weight": 0.1667
+            "weight": 0.2
           },
           "inverse_coefficient_of_variation": {
             "inputs": [
@@ -385,7 +377,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Coefficient-of-variation-based outcome fairness score.",
-            "weight": 0.1665
+            "weight": 0.15
           }
         }
       }
@@ -765,7 +757,7 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "privacy/inveres_epsilon_star"
+                "field_path": "privacy/inverse_epsilon_star"
               },
               {
                 "source": "factsheet",
@@ -778,7 +770,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Privacy metrics and risk estimates documented in the factsheet.",
             "weight": 0.1
           },
           "explainability": {
@@ -798,7 +790,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Explainability metrics documented in the factsheet.",
             "weight": 0.1
           },
           "sustainability": {
@@ -822,7 +814,7 @@
             ],
             "operation": "check_properties",
             "type": "property_check",
-            "description": "System usage information.",
+            "description": "Sustainability and emissions metrics documented in the factsheet.",
             "weight": 0.1
           }
         }
@@ -847,7 +839,7 @@
     },
     "architectural_soundness": {
       "client_management": {
-        "weight": 0.5,
+        "weight": 0.25,
         "metrics": {
           "client_selector": {
             "inputs": [
@@ -891,7 +883,7 @@
         }
       },
       "federation_management": {
-        "weight": 0.5,
+        "weight": 0.25,
         "metrics": {
           "topology_type": {
             "inputs": [
@@ -945,10 +937,10 @@
             ],
             "operation": "comm_efficiency",
             "type": "ranges",
-            "direction": "low",
+            "direction": "desc",
             "ranges":[0.1, 10e2, 10e3,10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
-            "description": "Descripcion de la metrica",
-            "weight": 0.1
+            "description": "Communication cost per unit of local test accuracy; lower values indicate more efficient neighbor communication.",
+            "weight": 0.3
           },
           "number_of_training_rounds": {
             "inputs": [
@@ -962,7 +954,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The total number of training rounds",
-            "weight": 0.06666666
+            "weight": 0.15
           },
           "avg_model_size": {
             "inputs": [
@@ -976,7 +968,7 @@
             "direction": "desc",
             "ranges":[10e4, 10e5, 10e6,10e7,10e8,10e9,10e10,10e11],
             "description": "The size of the model",
-            "weight": 0.16666666
+            "weight": 0.15
           },
           "client_selection_rate": {
             "inputs": [
@@ -992,7 +984,7 @@
               0.1,1
             ],
             "description": "The selection rate of clients for each training round",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "number_of_clients": {
             "inputs": [
@@ -1006,7 +998,7 @@
             "direction": "desc",
             "ranges": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
             "description": "The number of clients in the federation.",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "local_training_rounds": {
             "inputs": [
@@ -1020,7 +1012,7 @@
             "direction": "desc",
             "scale": [1, 100],
             "description": "The number of local training rounds.",
-            "weight": 0.16666666
+            "weight": 0.1
           },
           "avg_dataset_size": {
             "inputs": [
@@ -1034,7 +1026,7 @@
             "direction": "desc",
             "ranges": [10e1, 10e2, 10e3, 10e4, 10e5],
             "description": "The average number of training samples",
-            "weight": 0.16666666
+            "weight": 0.1
           }
         }
       }

From 08409dcd3f8a55bfc901b6ba26a41626c1184f4d Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Mon, 25 May 2026 13:20:25 +0200
Subject: [PATCH 49/66] Adversarial training for tabular data (Adult Census),
 frontend updated, trustworthiness updated and fixed, eval_metrics changed,
 trustworthiness divided in tabular and images

---
 .../addons/defenses/adversarial_training.py   | 214 ++++++++++++++++--
 .../configs/eval_metrics_cfl.json             |   4 +-
 .../configs/eval_metrics_cfl_images.json      |   4 +-
 .../configs/eval_metrics_cfl_tabular.json     |   4 +-
 .../configs/eval_metrics_dfl.json             |   4 +-
 .../configs/eval_metrics_dfl_images.json      |   4 +-
 .../configs/eval_metrics_dfl_tabular.json     |   4 +-
 .../trustworthiness/factsheet_populators.py   |  16 +-
 .../core/datasets/adultcensus/adultcensus.py  | 149 ++++++++++--
 nebula/core/datasets/nebuladataset.py         |  18 ++
 nebula/core/datasets/tabular_metadata.py      |  45 ++++
 nebula/core/models/adultcensus/mlp.py         |   2 +-
 nebula/core/node.py                           |   2 +-
 .../js/deployment/adversarial-training.js     |  10 +-
 nebula/frontend/templates/deployment.html     |   4 +-
 15 files changed, 424 insertions(+), 60 deletions(-)
 create mode 100644 nebula/core/datasets/tabular_metadata.py

diff --git a/nebula/addons/defenses/adversarial_training.py b/nebula/addons/defenses/adversarial_training.py
index 0de881e4f..558a5fe90 100644
--- a/nebula/addons/defenses/adversarial_training.py
+++ b/nebula/addons/defenses/adversarial_training.py
@@ -6,6 +6,7 @@
 import torch
 
 from nebula.config.config import TRAINING_LOGGER
+from nebula.core.datasets.tabular_metadata import CONTINUOUS, INTEGER, TabularAdversarialMetadata
 
 logging_training = logging.getLogger(TRAINING_LOGGER)
 
@@ -39,10 +40,26 @@ class AdversarialTrainingConfig:
 class AdversarialExampleGenerator(ABC):
     """Base interface for domain-specific adversarial example generators."""
 
+    last_epsilon: float | None = None
+
     @abstractmethod
     def generate(self, model, x, y, criterion):
         raise NotImplementedError
 
+    def _sample_epsilon(self, device: torch.device) -> float:
+        epsilon_max = float(self.config.epsilon)
+        if epsilon_max <= 0.0:
+            self.last_epsilon = 0.0
+            return 0.0
+
+        epsilon_min = epsilon_max / 4.0
+        epsilon_step = epsilon_max / 8.0
+        num_values = max(int(round((epsilon_max - epsilon_min) / epsilon_step)) + 1, 1)
+        index = int(torch.randint(num_values, (), device=device).item())
+        epsilon = min(epsilon_min + index * epsilon_step, epsilon_max)
+        self.last_epsilon = epsilon
+        return epsilon
+
 
 class ImageAdversarialExampleGenerator(AdversarialExampleGenerator):
     def __init__(self, config: AdversarialTrainingConfig, mean: tuple[float, ...], std: tuple[float, ...]):
@@ -54,14 +71,14 @@ def _channel_tensor(self, values: tuple[float, ...], x: torch.Tensor) -> torch.T
         shape = [1, len(values)] + [1] * max(x.dim() - 2, 0)
         return torch.tensor(values, dtype=x.dtype, device=x.device).view(*shape)
 
-    def _epsilon(self, x: torch.Tensor) -> torch.Tensor:
+    def _epsilon(self, x: torch.Tensor, epsilon: float) -> torch.Tensor:
         std = self._channel_tensor(self.std, x)
-        return float(self.config.epsilon) / std
+        return float(epsilon) / std
 
-    def _alpha(self, x: torch.Tensor) -> torch.Tensor:
+    def _alpha(self, x: torch.Tensor, epsilon: float) -> torch.Tensor:
         alpha = self.config.alpha
         if alpha is None:
-            alpha = self.config.epsilon / max(int(self.config.steps), 1)
+            alpha = epsilon / max(int(self.config.steps), 1)
         std = self._channel_tensor(self.std, x)
         return float(alpha) / std
 
@@ -77,8 +94,8 @@ def denormalize(self, x: torch.Tensor) -> torch.Tensor:
         std = self._channel_tensor(self.std, x)
         return (x * std + mean).clamp(float(self.config.clip_min), float(self.config.clip_max))
 
-    def _project(self, x_adv: torch.Tensor, x_clean: torch.Tensor) -> torch.Tensor:
-        epsilon = self._epsilon(x_clean)
+    def _project(self, x_adv: torch.Tensor, x_clean: torch.Tensor, epsilon: float) -> torch.Tensor:
+        epsilon = self._epsilon(x_clean, epsilon)
         lower, upper = self._bounds(x_clean)
         x_adv = torch.max(torch.min(x_adv, x_clean + epsilon), x_clean - epsilon)
         return torch.max(torch.min(x_adv, upper), lower)
@@ -86,16 +103,18 @@ def _project(self, x_adv: torch.Tensor, x_clean: torch.Tensor) -> torch.Tensor:
 
 class ImageFGSMGenerator(ImageAdversarialExampleGenerator):
     def generate(self, model, x, y, criterion):
+        epsilon = self._sample_epsilon(x.device)
         x_adv = x.detach().clone().requires_grad_(True)
         logits = model(x_adv)
         loss = criterion(logits, y)
         grad = torch.autograd.grad(loss, x_adv, only_inputs=True)[0]
-        x_adv = x_adv + self._epsilon(x_adv) * grad.sign()
-        return self._project(x_adv.detach(), x.detach())
+        x_adv = x_adv + self._epsilon(x_adv, epsilon) * grad.sign()
+        return self._project(x_adv.detach(), x.detach(), epsilon)
 
 
 class ImagePGDGenerator(ImageAdversarialExampleGenerator):
     def generate(self, model, x, y, criterion):
+        epsilon = self._sample_epsilon(x.device)
         x_clean = x.detach()
         x_adv = x_clean.clone()
         steps = max(int(self.config.steps), 1)
@@ -105,8 +124,101 @@ def generate(self, model, x, y, criterion):
             logits = model(x_adv)
             loss = criterion(logits, y)
             grad = torch.autograd.grad(loss, x_adv, only_inputs=True)[0]
-            x_adv = x_adv + self._alpha(x_adv) * grad.sign()
-            x_adv = self._project(x_adv.detach(), x_clean)
+            x_adv = x_adv + self._alpha(x_adv, epsilon) * grad.sign()
+            x_adv = self._project(x_adv.detach(), x_clean, epsilon)
+
+        return x_adv.detach()
+
+
+class TabularAdversarialExampleGenerator(AdversarialExampleGenerator):
+    """Adversarial generator for perturbable continuous and integer tabular features."""
+
+    def __init__(self, config: AdversarialTrainingConfig, metadata: TabularAdversarialMetadata):
+        self.config = config
+        self.metadata = metadata
+        self._tensor_cache: dict[tuple[torch.device, torch.dtype], dict[str, torch.Tensor]] = {}
+
+    def _alpha(self, epsilon: float) -> float:
+        if self.config.alpha is not None:
+            return float(self.config.alpha)
+        return float(epsilon) / max(int(self.config.steps), 1)
+
+    def _tensors(self, x: torch.Tensor) -> dict[str, torch.Tensor]:
+        key = (x.device, x.dtype)
+        cached = self._tensor_cache.get(key)
+        if cached is not None:
+            return cached
+
+        cached = {
+            "continuous": torch.tensor(
+                [feature_type == CONTINUOUS for feature_type in self.metadata.feature_types],
+                dtype=torch.bool,
+                device=x.device,
+            ).view(1, -1),
+            "integer": torch.tensor(
+                [feature_type == INTEGER for feature_type in self.metadata.feature_types],
+                dtype=torch.bool,
+                device=x.device,
+            ).view(1, -1),
+            "min": torch.tensor(self.metadata.feature_min_norm, dtype=x.dtype, device=x.device).view(1, -1),
+            "max": torch.tensor(self.metadata.feature_max_norm, dtype=x.dtype, device=x.device).view(1, -1),
+        }
+        cached["perturbable"] = cached["continuous"] | cached["integer"]
+        integer_steps = torch.ones_like(cached["min"])
+        for idx, step in (self.metadata.integer_step_norm or {}).items():
+            integer_steps[0, int(idx)] = float(step)
+        cached["integer_step"] = integer_steps
+        self._tensor_cache[key] = cached
+        return cached
+
+    def _gradient(self, model, x, y, criterion):
+        x_grad = x.detach().clone().requires_grad_(True)
+        logits = model(x_grad)
+        loss = criterion(logits, y)
+        return torch.autograd.grad(loss, x_grad, only_inputs=True)[0]
+
+    def _project(self, x_adv: torch.Tensor, x_clean: torch.Tensor, epsilon: float) -> torch.Tensor:
+        tensors = self._tensors(x_clean)
+        lower = torch.maximum(tensors["min"], x_clean - float(epsilon))
+        upper = torch.minimum(tensors["max"], x_clean + float(epsilon))
+        x_adv = torch.max(torch.min(x_adv, upper), lower)
+
+        integer_mask = tensors["integer"]
+        if integer_mask.any():
+            step = torch.clamp(tensors["integer_step"], min=torch.finfo(x_adv.dtype).eps)
+            projected_integer = torch.round((x_adv - tensors["min"]) / step) * step + tensors["min"]
+            grid_lower = torch.ceil((lower - tensors["min"]) / step) * step + tensors["min"]
+            grid_upper = torch.floor((upper - tensors["min"]) / step) * step + tensors["min"]
+            projected_integer = torch.max(torch.min(projected_integer, grid_upper), grid_lower)
+            has_valid_grid = grid_lower <= grid_upper
+            projected_integer = torch.where(has_valid_grid, projected_integer, x_clean)
+            x_adv = torch.where(integer_mask, projected_integer, x_adv)
+
+        return torch.where(tensors["perturbable"], x_adv, x_clean)
+
+
+class TabularFGSMGenerator(TabularAdversarialExampleGenerator):
+    def generate(self, model, x, y, criterion):
+        epsilon = self._sample_epsilon(x.device)
+        grad = self._gradient(model, x, y, criterion)
+        x_clean = x.detach()
+        perturbable_mask = self._tensors(x_clean)["perturbable"]
+        x_adv = x_clean + float(epsilon) * grad.sign() * perturbable_mask
+        return self._project(x_adv.detach(), x_clean, epsilon)
+
+
+class TabularPGDGenerator(TabularAdversarialExampleGenerator):
+    def generate(self, model, x, y, criterion):
+        epsilon = self._sample_epsilon(x.device)
+        x_clean = x.detach()
+        x_adv = x_clean.clone()
+        steps = max(int(self.config.steps), 1)
+
+        for _ in range(steps):
+            grad = self._gradient(model, x_adv, y, criterion)
+            perturbable_mask = self._tensors(x_clean)["perturbable"]
+            x_adv = x_adv.detach() + self._alpha(epsilon) * grad.sign() * perturbable_mask
+            x_adv = self._project(x_adv.detach(), x_clean, epsilon)
 
         return x_adv.detach()
 
@@ -122,7 +234,11 @@ def __init__(self, config: AdversarialTrainingConfig, generator: AdversarialExam
         self._logged_adversarial_samples_by_round: dict[int, int] = {}
 
     @classmethod
-    def from_participant_config(cls, participant_config: dict[str, Any]) -> "AdversarialTrainingDefense | None":
+    def from_participant_config(
+        cls,
+        participant_config: dict[str, Any],
+        partition=None,
+    ) -> "AdversarialTrainingDefense | None":
         raw = participant_config.get("defense_args", {}).get("adversarial_training", {})
         if not raw or not raw.get("enabled", False):
             return None
@@ -146,6 +262,17 @@ def from_participant_config(cls, participant_config: dict[str, Any]) -> "Adversa
         )
         cls._validate_config(config)
 
+        if config.domain == "tabular":
+            if dataset_name != "AdultCensus":
+                logging.warning(
+                    "[AdversarialTrainingDefense] Skipping tabular adversarial training: dataset '%s' is not supported yet",
+                    dataset_name,
+                )
+                return None
+            metadata = cls._get_tabular_metadata(partition)
+            generator = cls._build_tabular_generator(config, metadata)
+            return cls(config=config, generator=generator)
+
         if config.domain != "image":
             logging.warning(
                 "[AdversarialTrainingDefense] Skipping adversarial training: domain '%s' is not implemented yet",
@@ -194,6 +321,52 @@ def _build_generator(config, normalization):
             return ImagePGDGenerator(config, mean, std)
         raise ValueError(f"Unsupported adversarial training attack: {config.attack}")
 
+    @staticmethod
+    def _build_tabular_generator(config, metadata: TabularAdversarialMetadata):
+        if config.attack == "fgsm":
+            return TabularFGSMGenerator(config, metadata)
+        if config.attack == "pgd":
+            return TabularPGDGenerator(config, metadata)
+        raise ValueError(f"Unsupported adversarial training attack: {config.attack}")
+
+    @staticmethod
+    def _get_tabular_metadata(partition) -> TabularAdversarialMetadata:
+        train_set = getattr(partition, "train_set", None) if partition is not None else None
+        metadata = getattr(train_set, "tabular_metadata", None)
+        if metadata is None:
+            raise ValueError("AdultCensus tabular adversarial training requires tabular_metadata")
+        if isinstance(metadata, TabularAdversarialMetadata):
+            tabular_metadata = metadata
+        else:
+            tabular_metadata = TabularAdversarialMetadata.from_dict(metadata)
+
+        integer_features = [
+            name
+            for name, feature_type in zip(tabular_metadata.feature_names, tabular_metadata.feature_types)
+            if feature_type == INTEGER
+        ]
+        continuous_features = [
+            name
+            for name, feature_type in zip(tabular_metadata.feature_names, tabular_metadata.feature_types)
+            if feature_type == CONTINUOUS
+        ]
+        non_perturbable_features = [
+            name
+            for name, feature_type in zip(tabular_metadata.feature_names, tabular_metadata.feature_types)
+            if feature_type not in {CONTINUOUS, INTEGER}
+        ]
+        logging.info(
+            "[AdversarialTrainingDefense] Tabular feature mask loaded | integer=%s | continuous=%s | "
+            "non_perturbable=%s | integer_features=%s | continuous_features=%s | non_perturbable_preview=%s",
+            len(integer_features),
+            len(continuous_features),
+            len(non_perturbable_features),
+            integer_features,
+            continuous_features,
+            non_perturbable_features[:20],
+        )
+        return tabular_metadata
+
     def should_apply(self, x: torch.Tensor) -> bool:
         if self.config.apply_probability >= 1.0:
             return True
@@ -272,13 +445,15 @@ def _log_adversarial_samples(self, model, x_clean: torch.Tensor, x_adv: torch.Te
 
                 logging_training.info(
                     "[AdversarialTrainingDefense] Round %s | Sample %s/%s before/after distortion | "
-                    "dataset=%s | attack=%s | label=%s | clean[min=%.6f max=%.6f mean=%.6f] | "
+                    "dataset=%s | attack=%s | epsilon_effective=%.6f | label=%s | "
+                    "clean[min=%.6f max=%.6f mean=%.6f] | "
                     "adv[min=%.6f max=%.6f mean=%.6f] | delta_linf=%.6f | delta_l2=%.6f",
                     current_round,
                     already_logged + sample_idx + 1,
                     self.LOGGED_SAMPLES_PER_ROUND,
                     self.config.dataset_name,
                     self.config.attack,
+                    float(getattr(self.generator, "last_epsilon", self.config.epsilon) or 0.0),
                     int(y[sample_idx].detach().cpu().item()) if y.numel() > sample_idx else None,
                     sample_clean.min().item(),
                     sample_clean.max().item(),
@@ -318,17 +493,24 @@ def _format_patch(sample: torch.Tensor, patch_size: int = 4) -> str:
             patch = sample[:patch_size, :patch_size]
         else:
             patch = sample[:patch_size]
-        return str([[round(float(value), 6) for value in row] for row in patch.tolist()])
+        values = patch.tolist()
+        if sample.dim() < 2:
+            return str([round(float(value), 6) for value in values])
+        return str([[round(float(value), 6) for value in row] for row in values])
 
 
-def apply_adversarial_training_if_enabled(model, participant_config: dict[str, Any]) -> None:
-    defense = AdversarialTrainingDefense.from_participant_config(participant_config)
+def apply_adversarial_training_if_enabled(model, participant_config: dict[str, Any], partition=None) -> None:
+    defense = AdversarialTrainingDefense.from_participant_config(participant_config, partition=partition)
     if defense is not None:
         model.set_adversarial_training(defense)
         logging.info(
-            "[AdversarialTrainingDefense] Enabled | dataset=%s | attack=%s | epsilon=%s | mode=%s",
+            "[AdversarialTrainingDefense] Enabled | dataset=%s | attack=%s | epsilon_max=%s | "
+            "epsilon_range=[%.6f, %.6f] | epsilon_step=%.6f | mode=%s",
             defense.config.dataset_name,
             defense.config.attack,
             defense.config.epsilon,
+            defense.config.epsilon / 4.0,
+            defense.config.epsilon,
+            defense.config.epsilon / 8.0,
             defense.config.mode,
         )
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_cfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_cfl.json
index d51e58dcf..635d2e9a0 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_cfl.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_cfl.json
@@ -159,7 +159,7 @@
             "description": "Average reputation score of the neighbors associated with the node or federation.",
             "weight": 0.3
           },
-          "dropout_rate": {
+          "inverse_dropout_rate": {
             "inputs": [
               {
                 "source": "factsheet",
@@ -172,7 +172,7 @@
             "description": "Fraction of expected client updates that were not received across rounds.",
             "weight": 0.3
           },
-          "timeout_rate": {
+          "inverse_timeout_rate": {
             "inputs": [
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json
index d51e58dcf..635d2e9a0 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json
@@ -159,7 +159,7 @@
             "description": "Average reputation score of the neighbors associated with the node or federation.",
             "weight": 0.3
           },
-          "dropout_rate": {
+          "inverse_dropout_rate": {
             "inputs": [
               {
                 "source": "factsheet",
@@ -172,7 +172,7 @@
             "description": "Fraction of expected client updates that were not received across rounds.",
             "weight": 0.3
           },
-          "timeout_rate": {
+          "inverse_timeout_rate": {
             "inputs": [
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json
index d51e58dcf..635d2e9a0 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json
@@ -159,7 +159,7 @@
             "description": "Average reputation score of the neighbors associated with the node or federation.",
             "weight": 0.3
           },
-          "dropout_rate": {
+          "inverse_dropout_rate": {
             "inputs": [
               {
                 "source": "factsheet",
@@ -172,7 +172,7 @@
             "description": "Fraction of expected client updates that were not received across rounds.",
             "weight": 0.3
           },
-          "timeout_rate": {
+          "inverse_timeout_rate": {
             "inputs": [
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
index e2d194187..80cb9486e 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
@@ -159,7 +159,7 @@
             "description": "Average reputation score of the neighbors associated with the node.",
             "weight": 0.4
           },
-          "dropout_rate": {
+          "inverse_dropout_rate": {
             "inputs": [
               {
                 "source": "factsheet",
@@ -172,7 +172,7 @@
             "description": "Fraction of expected neighbor updates that were not received across rounds.",
             "weight": 0.25
           },
-          "timeout_rate": {
+          "inverse_timeout_rate": {
             "inputs": [
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json
index e2d194187..80cb9486e 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json
@@ -159,7 +159,7 @@
             "description": "Average reputation score of the neighbors associated with the node.",
             "weight": 0.4
           },
-          "dropout_rate": {
+          "inverse_dropout_rate": {
             "inputs": [
               {
                 "source": "factsheet",
@@ -172,7 +172,7 @@
             "description": "Fraction of expected neighbor updates that were not received across rounds.",
             "weight": 0.25
           },
-          "timeout_rate": {
+          "inverse_timeout_rate": {
             "inputs": [
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json
index e2d194187..80cb9486e 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json
@@ -159,7 +159,7 @@
             "description": "Average reputation score of the neighbors associated with the node.",
             "weight": 0.4
           },
-          "dropout_rate": {
+          "inverse_dropout_rate": {
             "inputs": [
               {
                 "source": "factsheet",
@@ -172,7 +172,7 @@
             "description": "Fraction of expected neighbor updates that were not received across rounds.",
             "weight": 0.25
           },
-          "timeout_rate": {
+          "inverse_timeout_rate": {
             "inputs": [
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/factsheet_populators.py b/nebula/addons/trustworthiness/factsheet_populators.py
index 3e44b8678..1fa8ea6f8 100644
--- a/nebula/addons/trustworthiness/factsheet_populators.py
+++ b/nebula/addons/trustworthiness/factsheet_populators.py
@@ -1,5 +1,7 @@
 """Profile-specific factsheet metric population."""
 
+import logging
+
 from nebula.addons.trustworthiness.calculation import (
     attack_success_rate,
     compute_adversarial_accuracy_art,
@@ -17,6 +19,8 @@
     get_theil_index,
     get_well_calibration_error,
 )
+
+logger = logging.getLogger(__name__)
 from nebula.addons.trustworthiness.factsheet_common import (
     DATA_TYPE_IMAGES,
     DATA_TYPE_TABULAR,
@@ -143,10 +147,18 @@ def populate_image_robustness_metrics(factsheet, model, test_loader, test_sample
     value_adv_accuracy = compute_adversarial_accuracy_art(model, test_loader, num_classes, lr)
     factsheet["performance"]["clipped_test_adv_accuracy"] = cap_score(value_adv_accuracy)
 
-    value_empirical_robustness = get_empirical_robustness_score(model, test_sample, num_classes, lr)
+    value_empirical_robustness = get_empirical_robustness_score(
+        model,
+        test_sample,
+        num_classes,
+        lr,
+    )
     factsheet["performance"]["clipped_test_empirical_robustness"] = cap_score(value_empirical_robustness)
 
-    value_attack_success_rate = attack_success_rate(model, test_sample)
+    value_attack_success_rate = attack_success_rate(
+        model,
+        test_sample,
+    )
     factsheet["performance"]["inverse_test_attack_success_rate"] = 1 - value_attack_success_rate
 
 
diff --git a/nebula/core/datasets/adultcensus/adultcensus.py b/nebula/core/datasets/adultcensus/adultcensus.py
index 4c3c4584f..5be603bfd 100644
--- a/nebula/core/datasets/adultcensus/adultcensus.py
+++ b/nebula/core/datasets/adultcensus/adultcensus.py
@@ -8,6 +8,7 @@
 from torch.utils.data import Dataset
 
 from nebula.core.datasets.nebuladataset import NebulaDataset, NebulaPartitionHandler
+from nebula.core.datasets.tabular_metadata import CONTINUOUS, INTEGER, NON_PERTURBABLE, TabularAdversarialMetadata
 
 
 class AdultCensusTorchDataset(Dataset):
@@ -22,7 +23,9 @@ def __init__(
         y: np.ndarray,
         feature_names: list[str] | None = None,
         continuous_features: list[int] | None = None,
-        binary_features: list[int] | None = None,
+        integer_features: list[int] | None = None,
+        non_perturbable_features: list[int] | None = None,
+        tabular_metadata: dict | None = None,
     ):
         if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
             raise ValueError("x and y must be numpy arrays")
@@ -43,7 +46,9 @@ def __init__(
         self.classes: list[str] = ["<=50K", ">50K"]
         self.feature_names = feature_names or [f"feature_{i}" for i in range(self.x.shape[1])]
         self.continuous_features = continuous_features or []
-        self.binary_features = binary_features or []
+        self.integer_features = integer_features or []
+        self.non_perturbable_features = non_perturbable_features or []
+        self.tabular_metadata = tabular_metadata
         self.input_dim = int(self.x.shape[1])
 
     def __len__(self) -> int:
@@ -91,9 +96,29 @@ class AdultCensusDataset(NebulaDataset):
     Adult Census Income dataset integration for Nebula.
 
     - 2 classes: <=50K vs >50K
-    - mixed categorical + numerical -> numeric via preprocessing (impute + OHE + scale)
+    - mixed tabular data -> numeric model input via preprocessing
     - deterministic stratified train/test split
     """
+    PERTURBABLE_CONTINUOUS_COLUMNS = []
+    PERTURBABLE_INTEGER_COLUMNS = [
+        "age",
+        "fnlwgt",
+        "education-num",
+        "capital-gain",
+        "capital-loss",
+        "hours-per-week",
+    ]
+    NON_PERTURBABLE_COLUMNS = [
+        "workclass",
+        "education",
+        "marital-status",
+        "occupation",
+        "relationship",
+        "race",
+        "sex",
+        "native-country",
+    ]
+
     def __init__(
         self,
         num_classes: int = 2,
@@ -140,6 +165,28 @@ def _make_ohe_dense():
         except TypeError:
             return OneHotEncoder(handle_unknown="ignore", sparse=False)
 
+    @classmethod
+    def _validate_manual_schema(cls, columns) -> None:
+        continuous_columns = set(cls.PERTURBABLE_CONTINUOUS_COLUMNS)
+        integer_columns = set(cls.PERTURBABLE_INTEGER_COLUMNS)
+        non_perturbable_columns = set(cls.NON_PERTURBABLE_COLUMNS)
+        overlapping_columns = sorted(
+            (continuous_columns & integer_columns)
+            | (continuous_columns & non_perturbable_columns)
+            | (integer_columns & non_perturbable_columns)
+        )
+        if overlapping_columns:
+            raise ValueError(f"AdultCensusDataset columns configured twice: {overlapping_columns}")
+
+        configured_columns = continuous_columns | integer_columns | non_perturbable_columns
+        dataset_columns = set(columns)
+        missing_columns = sorted(configured_columns - dataset_columns)
+        if missing_columns:
+            raise ValueError(f"AdultCensusDataset is missing configured columns: {missing_columns}")
+        unconfigured_columns = sorted(dataset_columns - configured_columns)
+        if unconfigured_columns:
+            raise ValueError(f"AdultCensusDataset has unconfigured columns: {unconfigured_columns}")
+
     def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensusTorchDataset]:
         """
         Loads Adult dataset from OpenML and preprocesses to all-numeric features.
@@ -149,8 +196,9 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
           2) y = (target == '>50K').astype(int)
           3) replace '?' with NA for missing values
           4) ColumnTransformer:
-              - numeric: median impute + StandardScaler
-              - categorical: most_frequent impute + OneHotEncoder(dense)
+              - continuous: median impute + StandardScaler
+              - integer: median impute + StandardScaler
+              - non_perturbable: most_frequent impute + OneHotEncoder(dense)
           5) train/test split (stratified), fit preprocessing only on train (avoid leakage)
         """
         data_dir: str = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
@@ -160,7 +208,7 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
             import pandas as pd
             from sklearn.datasets import fetch_openml
             from sklearn.model_selection import train_test_split
-            from sklearn.compose import ColumnTransformer, make_column_selector as selector
+            from sklearn.compose import ColumnTransformer
             from sklearn.pipeline import Pipeline
             from sklearn.impute import SimpleImputer
             from sklearn.preprocessing import StandardScaler
@@ -179,13 +227,26 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
         y_str = y_raw.astype(str).str.strip()
         y: np.ndarray = (y_str == ">50K").astype(np.int64).to_numpy()
 
-        # 3) Replace '?' markers with NA (UCI Adult uses '?' for missing categorical values)
-        X_df = X_df.replace(r"^\s*\?\s*$", pd.NA, regex=True)
+        # 3) Replace '?' markers with np.nan and drop rows with missing configured features.
+        X_df = X_df.replace(r"^\s*\?\s*$", np.nan, regex=True)
+        self._validate_manual_schema(X_df.columns)
+
+        numeric_columns = self.PERTURBABLE_CONTINUOUS_COLUMNS + self.PERTURBABLE_INTEGER_COLUMNS
+        for column in numeric_columns:
+            X_df[column] = pd.to_numeric(X_df[column], errors="coerce")
+        for column in self.NON_PERTURBABLE_COLUMNS:
+            X_df[column] = X_df[column].astype(object)
+
+        configured_columns = numeric_columns + self.NON_PERTURBABLE_COLUMNS
+        valid_rows = ~X_df[configured_columns].isna().any(axis=1)
+        removed_rows = int((~valid_rows).sum())
+        if removed_rows:
+            import logging
+            logging.getLogger().info("[AdultCensus] Dropping %s rows with NA values", removed_rows)
+        X_df = X_df.loc[valid_rows].copy()
+        y = y[valid_rows.to_numpy()]
 
         # 4) Preprocess
-        numeric_selector = selector(dtype_exclude=["object", "category", "string"])
-        categorical_selector = selector(dtype_include=["object", "category", "string"])
-
         numeric_transformer = Pipeline(
             steps=[
                 ("impute", SimpleImputer(strategy="median")),
@@ -193,20 +254,21 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
             ]
         )
 
-        categorical_transformer = Pipeline(
+        non_perturbable_transformer = Pipeline(
             steps=[
                 ("impute", SimpleImputer(strategy="most_frequent")),
                 ("ohe", self._make_ohe_dense()),
             ]
         )
 
-        preprocessor = ColumnTransformer(
-            transformers=[
-                ("num", numeric_transformer, numeric_selector),
-                ("cat", categorical_transformer, categorical_selector),
-            ],
-            remainder="drop",
-        )
+        transformers = []
+        if self.PERTURBABLE_CONTINUOUS_COLUMNS:
+            transformers.append(("continuous", numeric_transformer, self.PERTURBABLE_CONTINUOUS_COLUMNS))
+        if self.PERTURBABLE_INTEGER_COLUMNS:
+            transformers.append(("integer", numeric_transformer, self.PERTURBABLE_INTEGER_COLUMNS))
+        transformers.append(("non_perturbable", non_perturbable_transformer, self.NON_PERTURBABLE_COLUMNS))
+
+        preprocessor = ColumnTransformer(transformers=transformers, remainder="drop")
 
         # 5) Split then fit on train
         X_train_df, X_test_df, y_train, y_test = train_test_split(
@@ -238,26 +300,65 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
         X_test_np: np.ndarray = np.asarray(X_test, dtype=np.float32)
         continuous_features = [
             idx for idx, name in enumerate(feature_names)
-            if name.startswith("num__")
+            if name.startswith("continuous__")
+        ]
+        integer_features = [
+            idx for idx, name in enumerate(feature_names)
+            if name.startswith("integer__")
         ]
-        binary_features = [
+        non_perturbable_features = [
             idx for idx, name in enumerate(feature_names)
-            if name.startswith("cat__")
+            if name.startswith("non_perturbable__")
         ]
+        continuous_feature_set = set(continuous_features)
+        integer_feature_set = set(integer_features)
+        integer_step_norm = {}
+        if integer_features:
+            integer_scaler = preprocessor.named_transformers_["integer"].named_steps["scaler"]
+            integer_step_norm = {
+                idx: float(1.0 / scale)
+                for idx, scale in zip(integer_features, integer_scaler.scale_, strict=False)
+            }
+        tabular_metadata = TabularAdversarialMetadata(
+            feature_names=feature_names,
+            feature_types=[
+                CONTINUOUS if idx in continuous_feature_set
+                else INTEGER if idx in integer_feature_set
+                else NON_PERTURBABLE
+                for idx in range(len(feature_names))
+            ],
+            feature_min_norm=np.min(X_train_np, axis=0).astype(float).tolist(),
+            feature_max_norm=np.max(X_train_np, axis=0).astype(float).tolist(),
+            integer_step_norm=integer_step_norm,
+        ).to_dict()
+        logging.getLogger().info(
+            "[AdultCensus] Tabular adversarial feature mask | continuous=%s | integer=%s | "
+            "non_perturbable=%s | continuous_features=%s | integer_features=%s | integer_step_norm=%s",
+            len(continuous_features),
+            len(integer_features),
+            len(non_perturbable_features),
+            [feature_names[idx] for idx in continuous_features],
+            [feature_names[idx] for idx in integer_features],
+            integer_step_norm,
+        )
 
         train_ds = AdultCensusTorchDataset(
             X_train_np,
             np.asarray(y_train, dtype=np.int64),
             feature_names=feature_names,
             continuous_features=continuous_features,
-            binary_features=binary_features,
+            integer_features=integer_features,
+            non_perturbable_features=non_perturbable_features,
+            tabular_metadata=tabular_metadata,
         )
         test_ds = AdultCensusTorchDataset(
             X_test_np,
             np.asarray(y_test, dtype=np.int64),
             feature_names=feature_names,
             continuous_features=continuous_features,
-            binary_features=binary_features,
+            integer_features=integer_features,
+            non_perturbable_features=non_perturbable_features,
+            tabular_metadata=tabular_metadata,
         )
 
         return train_ds, test_ds
diff --git a/nebula/core/datasets/nebuladataset.py b/nebula/core/datasets/nebuladataset.py
index a4468ded8..28565c798 100755
--- a/nebula/core/datasets/nebuladataset.py
+++ b/nebula/core/datasets/nebuladataset.py
@@ -1,4 +1,5 @@
 import copy
+import json
 import os
 import pickle
 from abc import ABC, abstractmethod
@@ -74,6 +75,11 @@ def load_data(self):
             self.data = self.load_partition(f, f"{prefix}_data")
             self.targets = np.array(f[f"{prefix}_targets"])
             self.num_classes = f[f"{prefix}_data"].attrs.get("num_classes", 0)
+            raw_tabular_metadata = f[f"{prefix}_data"].attrs.get("tabular_metadata", None)
+            if raw_tabular_metadata is not None:
+                if isinstance(raw_tabular_metadata, bytes):
+                    raw_tabular_metadata = raw_tabular_metadata.decode("utf-8")
+                self.tabular_metadata = json.loads(raw_tabular_metadata)
             self.length = len(self.data)
         logging_training.info(
             f"[NebulaPartitionHandler] [{self.prefix}] Loaded {self.length} samples from {self.file_path} and {self.num_classes} classes."
@@ -289,6 +295,8 @@ def load_partition(self):
 
             self.local_test_set = self.handler(test_partition_file, "local_test", config=self.config, empty=True)
             self.local_test_set.set_data(self.test_set.data, self.test_set.targets)
+            if hasattr(self.test_set, "tabular_metadata"):
+                self.local_test_set.tabular_metadata = self.test_set.tabular_metadata
             self.local_test_indices = self.set_local_test_indices()
 
             logging_training.info(f"Successfully loaded partition data for participant {p}.")
@@ -484,6 +492,7 @@ def save_partitions(self):
                 test_data = [self.test_set[i] for i in indices]
                 self.save_partition(test_data, f, "test_data")
                 f["test_data"].attrs["num_classes"] = self.num_classes
+                self._save_tabular_metadata_attr(self.test_set, f["test_data"])
                 test_targets = np.array(self.test_set.targets)
                 f.create_dataset("test_targets", data=test_targets, compression="gzip")
 
@@ -495,6 +504,7 @@ def save_partitions(self):
                     train_data = [self.train_set[i] for i in indices]
                     self.save_partition(train_data, f, "train_data")
                     f["train_data"].attrs["num_classes"] = self.num_classes
+                    self._save_tabular_metadata_attr(self.train_set, f["train_data"])
                     train_targets = np.array([self.train_set.targets[i] for i in indices])
                     f.create_dataset("train_targets", data=train_targets, compression="gzip")
                     logging.info(f"Partition saved for participant {participant}.")
@@ -508,6 +518,14 @@ def save_partitions(self):
             self.clear()
             logging.info("Cleared dataset after saving partitions.")
 
+    def _save_tabular_metadata_attr(self, dataset, h5_dataset):
+        metadata = getattr(dataset, "tabular_metadata", None)
+        if metadata is None:
+            return
+        if hasattr(metadata, "to_dict"):
+            metadata = metadata.to_dict()
+        h5_dataset.attrs["tabular_metadata"] = json.dumps(metadata)
+
     @abstractmethod
     def generate_non_iid_map(self, dataset, partition="dirichlet", plot=False):
         """
diff --git a/nebula/core/datasets/tabular_metadata.py b/nebula/core/datasets/tabular_metadata.py
new file mode 100644
index 000000000..eeb277989
--- /dev/null
+++ b/nebula/core/datasets/tabular_metadata.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from typing import Any
+
+
+CONTINUOUS = "continuous"
+INTEGER = "integer"
+NON_PERTURBABLE = "non_perturbable"
+
+
+@dataclass(frozen=True)
+class TabularAdversarialMetadata:
+    """Minimal metadata for tabular adversarial training."""
+
+    feature_names: list[str]
+    feature_types: list[str]
+    feature_min_norm: list[float]
+    feature_max_norm: list[float]
+    integer_step_norm: dict[int, float] | None = None
+
+    def __post_init__(self):
+        n_features = len(self.feature_names)
+        if len(self.feature_types) != n_features:
+            raise ValueError("feature_types length must match feature_names length")
+        if len(self.feature_min_norm) != n_features:
+            raise ValueError("feature_min_norm length must match feature_names length")
+        if len(self.feature_max_norm) != n_features:
+            raise ValueError("feature_max_norm length must match feature_names length")
+        invalid_types = set(self.feature_types) - {CONTINUOUS, INTEGER, NON_PERTURBABLE}
+        if invalid_types:
+            raise ValueError(f"Unsupported tabular feature types: {sorted(invalid_types)}")
+
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "TabularAdversarialMetadata":
+        return cls(
+            feature_names=[str(value) for value in data["feature_names"]],
+            feature_types=[str(value) for value in data["feature_types"]],
+            feature_min_norm=[float(value) for value in data["feature_min_norm"]],
+            feature_max_norm=[float(value) for value in data["feature_max_norm"]],
+            integer_step_norm={int(k): float(v) for k, v in data.get("integer_step_norm", {}).items()},
+        )
diff --git a/nebula/core/models/adultcensus/mlp.py b/nebula/core/models/adultcensus/mlp.py
index 68f1d77d8..d3fff9e94 100644
--- a/nebula/core/models/adultcensus/mlp.py
+++ b/nebula/core/models/adultcensus/mlp.py
@@ -13,7 +13,7 @@ class AdultCensusModelMLP(NebulaModel):
     """
     def __init__(
         self,
-        input_dim: int = 105,
+        input_dim: int = 104,
         num_classes: int = 2,
         learning_rate: float = 1e-3,
         metrics=None,
diff --git a/nebula/core/node.py b/nebula/core/node.py
index 395752cc5..a5fa22f1d 100755
--- a/nebula/core/node.py
+++ b/nebula/core/node.py
@@ -190,7 +190,7 @@ async def main(config: Config):
     dataset = NebulaPartition(handler=handler, config=config)
     dataset.load_partition()
     apply_feature_squeezing_if_enabled(dataset, config.participant)
-    apply_adversarial_training_if_enabled(model, config.participant)
+    apply_adversarial_training_if_enabled(model, config.participant, dataset)
     dataset.log_partition()
     samples_per_label = Counter(dataset.get_train_labels())
 
diff --git a/nebula/frontend/static/js/deployment/adversarial-training.js b/nebula/frontend/static/js/deployment/adversarial-training.js
index 72bc6a47d..91cecd96e 100644
--- a/nebula/frontend/static/js/deployment/adversarial-training.js
+++ b/nebula/frontend/static/js/deployment/adversarial-training.js
@@ -17,6 +17,7 @@ const AdversarialTrainingManager = (function() {
     };
 
     const IMAGE_DATASETS = new Set(["MNIST", "FashionMNIST", "EMNIST", "CIFAR10", "CIFAR100"]);
+    const TABULAR_DATASETS = new Set(["AdultCensus"]);
 
     function initializeAdversarialTraining() {
         setupAdversarialTrainingSwitch();
@@ -71,13 +72,18 @@ const AdversarialTrainingManager = (function() {
 
     function updateDatasetAvailability() {
         const dataset = document.getElementById("datasetSelect")?.value;
-        const enabledForDataset = IMAGE_DATASETS.has(dataset);
+        const enabledForDataset = IMAGE_DATASETS.has(dataset) || TABULAR_DATASETS.has(dataset);
+        const domain = TABULAR_DATASETS.has(dataset) ? "tabular" : "image";
         const adversarialTrainingSwitch = document.getElementById("adversarialTrainingSwitch");
         const datasetNote = document.getElementById("adversarial-training-dataset-note");
+        const domainInput = document.getElementById("adversarialTrainingDomain");
 
         if (datasetNote) {
             datasetNote.style.display = enabledForDataset ? "none" : "block";
         }
+        if (domainInput) {
+            domainInput.value = domain;
+        }
 
         if (!adversarialTrainingSwitch) return;
         adversarialTrainingSwitch.disabled = !enabledForDataset;
@@ -192,7 +198,7 @@ const AdversarialTrainingManager = (function() {
             return "[Adversarial Training] Apply probability must be between 0 and 1.";
         }
         if (config.clip_min >= config.clip_max) {
-            return "[Adversarial Training] Pixel min bound must be smaller than max bound.";
+            return "[Adversarial Training] Min bound must be smaller than max bound.";
         }
         return null;
     }
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index 567bedc57..6d40655c0 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -588,7 +588,7 @@ <h5 class="step-title">Enable/Disable Adversarial Training</h5>
                             style="display: inline; width: 80px; height: 30px;">
                     </div>
                     <small id="adversarial-training-dataset-note" class="form-text text-muted" style="display: none;">
-                        Adversarial Training is currently available only for image datasets.
+                        Adversarial Training is available for image datasets and Adult Census.
                     </small>
                     <div id="adversarial-training-settings" style="margin-top: 10px; display: none;">
                         <input type="hidden" id="adversarialTrainingDomain" value="image">
@@ -657,7 +657,7 @@ <h5 class="step-title">Log adversarial metrics</h5>
                             </div>
                         </details>
                         <small class="form-text text-muted">
-                            Epsilon and bounds use pixel scale; Nebula converts them to the normalized tensor space.
+                            Epsilon and bounds use the dataset input scale; image datasets convert pixel scale to normalized tensors.
                         </small>
                     </div>
                 </div>

From 3e1fd2b8fe87671a662cbd94bafd4b5cb8bd5564 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Mon, 25 May 2026 15:01:37 +0200
Subject: [PATCH 50/66] Adversarial training implemented for CancerBreast,
 KDDCUP99 and Covtype, frontend updated: Adversarial trianing dataset
 limitation

---
 .../addons/defenses/adversarial_training.py   |   5 +-
 .../datasets/breast_cancer/breast_cancer.py   | 124 +++++++++++-
 nebula/core/datasets/covtype/covtype.py       | 144 +++++++++++++-
 nebula/core/datasets/kddcup99/kddcup99.py     | 188 +++++++++++++++++-
 .../js/deployment/adversarial-training.js     |  12 +-
 nebula/frontend/templates/deployment.html     |   2 +-
 6 files changed, 446 insertions(+), 29 deletions(-)

diff --git a/nebula/addons/defenses/adversarial_training.py b/nebula/addons/defenses/adversarial_training.py
index 558a5fe90..340836d17 100644
--- a/nebula/addons/defenses/adversarial_training.py
+++ b/nebula/addons/defenses/adversarial_training.py
@@ -263,7 +263,8 @@ def from_participant_config(
         cls._validate_config(config)
 
         if config.domain == "tabular":
-            if dataset_name != "AdultCensus":
+            supported_tabular_datasets = {"AdultCensus", "BreastCancer", "Covtype", "KDDCUP99"}
+            if dataset_name not in supported_tabular_datasets:
                 logging.warning(
                     "[AdversarialTrainingDefense] Skipping tabular adversarial training: dataset '%s' is not supported yet",
                     dataset_name,
@@ -334,7 +335,7 @@ def _get_tabular_metadata(partition) -> TabularAdversarialMetadata:
         train_set = getattr(partition, "train_set", None) if partition is not None else None
         metadata = getattr(train_set, "tabular_metadata", None)
         if metadata is None:
-            raise ValueError("AdultCensus tabular adversarial training requires tabular_metadata")
+            raise ValueError("Tabular adversarial training requires tabular_metadata")
         if isinstance(metadata, TabularAdversarialMetadata):
             tabular_metadata = metadata
         else:
diff --git a/nebula/core/datasets/breast_cancer/breast_cancer.py b/nebula/core/datasets/breast_cancer/breast_cancer.py
index b951a8113..ac7446770 100644
--- a/nebula/core/datasets/breast_cancer/breast_cancer.py
+++ b/nebula/core/datasets/breast_cancer/breast_cancer.py
@@ -6,6 +6,7 @@
 from torch.utils.data import Dataset
 
 from nebula.core.datasets.nebuladataset import NebulaDataset, NebulaPartitionHandler
+from nebula.core.datasets.tabular_metadata import CONTINUOUS, INTEGER, NON_PERTURBABLE, TabularAdversarialMetadata
 
 
 class BreastCancerTorchDataset(Dataset):
@@ -14,7 +15,16 @@ class BreastCancerTorchDataset(Dataset):
     x: float32 tensor (n_features,)
     y: long scalar {0,1}
     """
-    def __init__(self, x: np.ndarray, y: np.ndarray, feature_names: list[str] | None = None):
+    def __init__(
+        self,
+        x: np.ndarray,
+        y: np.ndarray,
+        feature_names: list[str] | None = None,
+        continuous_features: list[int] | None = None,
+        integer_features: list[int] | None = None,
+        non_perturbable_features: list[int] | None = None,
+        tabular_metadata: dict | None = None,
+    ):
         if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
             raise ValueError("x and y must be numpy arrays")
 
@@ -33,8 +43,11 @@ def __init__(self, x: np.ndarray, y: np.ndarray, feature_names: list[str] | None
         self.targets = self.y
         self.classes = ["0", "1"]
         self.feature_names = feature_names or [f"feature_{i}" for i in range(self.x.shape[1])]
-        self.continuous_features = list(range(self.x.shape[1]))
+        self.continuous_features = continuous_features or list(range(self.x.shape[1]))
+        self.integer_features = integer_features or []
+        self.non_perturbable_features = non_perturbable_features or []
         self.binary_features = []
+        self.tabular_metadata = tabular_metadata
         self.input_dim = int(self.x.shape[1])
 
     def __len__(self) -> int:
@@ -84,6 +97,41 @@ class BreastCancerDataset(NebulaDataset):
     - tabular features (30)
     - deterministic stratified train/test split
     """
+    PERTURBABLE_CONTINUOUS_COLUMNS = [
+        "mean radius",
+        "mean texture",
+        "mean perimeter",
+        "mean area",
+        "mean smoothness",
+        "mean compactness",
+        "mean concavity",
+        "mean concave points",
+        "mean symmetry",
+        "mean fractal dimension",
+        "radius error",
+        "texture error",
+        "perimeter error",
+        "area error",
+        "smoothness error",
+        "compactness error",
+        "concavity error",
+        "concave points error",
+        "symmetry error",
+        "fractal dimension error",
+        "worst radius",
+        "worst texture",
+        "worst perimeter",
+        "worst area",
+        "worst smoothness",
+        "worst compactness",
+        "worst concavity",
+        "worst concave points",
+        "worst symmetry",
+        "worst fractal dimension",
+    ]
+    PERTURBABLE_INTEGER_COLUMNS = []
+    NON_PERTURBABLE_COLUMNS = []
+
     def __init__(
         self,
         num_classes: int = 2,
@@ -116,6 +164,28 @@ def initialize_dataset(self):
 
         self.data_partitioning(plot=True)
 
+    @classmethod
+    def _validate_manual_schema(cls, columns) -> None:
+        continuous_columns = set(cls.PERTURBABLE_CONTINUOUS_COLUMNS)
+        integer_columns = set(cls.PERTURBABLE_INTEGER_COLUMNS)
+        non_perturbable_columns = set(cls.NON_PERTURBABLE_COLUMNS)
+        overlapping_columns = sorted(
+            (continuous_columns & integer_columns)
+            | (continuous_columns & non_perturbable_columns)
+            | (integer_columns & non_perturbable_columns)
+        )
+        if overlapping_columns:
+            raise ValueError(f"BreastCancerDataset columns configured twice: {overlapping_columns}")
+
+        configured_columns = continuous_columns | integer_columns | non_perturbable_columns
+        dataset_columns = set(columns)
+        missing_columns = sorted(configured_columns - dataset_columns)
+        if missing_columns:
+            raise ValueError(f"BreastCancerDataset is missing configured columns: {missing_columns}")
+        unconfigured_columns = sorted(dataset_columns - configured_columns)
+        if unconfigured_columns:
+            raise ValueError(f"BreastCancerDataset has unconfigured columns: {unconfigured_columns}")
+
     def load_breast_cancer_dataset(self):
         # Local cache directory (aunque load_breast_cancer no descarga, seguimos el patrón)
         data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
@@ -134,6 +204,7 @@ def load_breast_cancer_dataset(self):
         x = np.asarray(ds.data)
         y = np.asarray(ds.target).reshape(-1)  # already 0/1
         feature_names = [str(name) for name in ds.feature_names]
+        self._validate_manual_schema(feature_names)
 
         x_train, x_test, y_train, y_test = train_test_split(
             x,
@@ -148,8 +219,53 @@ def load_breast_cancer_dataset(self):
         x_train = scaler.fit_transform(x_train)
         x_test = scaler.transform(x_test)
 
-        train_ds = BreastCancerTorchDataset(x_train, y_train, feature_names=feature_names)
-        test_ds = BreastCancerTorchDataset(x_test, y_test, feature_names=feature_names)
+        x_train_np = np.asarray(x_train, dtype=np.float32)
+        x_test_np = np.asarray(x_test, dtype=np.float32)
+        continuous_features = [
+            idx for idx, name in enumerate(feature_names)
+            if name in self.PERTURBABLE_CONTINUOUS_COLUMNS
+        ]
+        integer_features = [
+            idx for idx, name in enumerate(feature_names)
+            if name in self.PERTURBABLE_INTEGER_COLUMNS
+        ]
+        non_perturbable_features = [
+            idx for idx, name in enumerate(feature_names)
+            if name in self.NON_PERTURBABLE_COLUMNS
+        ]
+        continuous_feature_set = set(continuous_features)
+        integer_feature_set = set(integer_features)
+        tabular_metadata = TabularAdversarialMetadata(
+            feature_names=feature_names,
+            feature_types=[
+                CONTINUOUS if idx in continuous_feature_set
+                else INTEGER if idx in integer_feature_set
+                else NON_PERTURBABLE
+                for idx in range(len(feature_names))
+            ],
+            feature_min_norm=np.min(x_train_np, axis=0).astype(float).tolist(),
+            feature_max_norm=np.max(x_train_np, axis=0).astype(float).tolist(),
+            integer_step_norm={},
+        ).to_dict()
+
+        train_ds = BreastCancerTorchDataset(
+            x_train_np,
+            y_train,
+            feature_names=feature_names,
+            continuous_features=continuous_features,
+            integer_features=integer_features,
+            non_perturbable_features=non_perturbable_features,
+            tabular_metadata=tabular_metadata,
+        )
+        test_ds = BreastCancerTorchDataset(
+            x_test_np,
+            y_test,
+            feature_names=feature_names,
+            continuous_features=continuous_features,
+            integer_features=integer_features,
+            non_perturbable_features=non_perturbable_features,
+            tabular_metadata=tabular_metadata,
+        )
 
         return train_ds, test_ds
 
diff --git a/nebula/core/datasets/covtype/covtype.py b/nebula/core/datasets/covtype/covtype.py
index 4c0d28cfc..22a24c682 100644
--- a/nebula/core/datasets/covtype/covtype.py
+++ b/nebula/core/datasets/covtype/covtype.py
@@ -8,6 +8,7 @@
 from torch.utils.data import Dataset
 
 from nebula.core.datasets.nebuladataset import NebulaDataset, NebulaPartitionHandler
+from nebula.core.datasets.tabular_metadata import CONTINUOUS, INTEGER, NON_PERTURBABLE, TabularAdversarialMetadata
 
 
 class CovtypeTorchDataset(Dataset):
@@ -24,7 +25,10 @@ def __init__(
         y: np.ndarray,
         feature_names: list[str] | None = None,
         continuous_features: list[int] | None = None,
+        integer_features: list[int] | None = None,
+        non_perturbable_features: list[int] | None = None,
         binary_features: list[int] | None = None,
+        tabular_metadata: dict | None = None,
     ):
         if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
             raise ValueError("x and y must be numpy arrays")
@@ -47,7 +51,10 @@ def __init__(
         self.classes = [str(i) for i in range(n_classes)]
         self.feature_names = feature_names or [f"feature_{i}" for i in range(self.x.shape[1])]
         self.continuous_features = continuous_features or []
+        self.integer_features = integer_features or []
+        self.non_perturbable_features = non_perturbable_features or []
         self.binary_features = binary_features or []
+        self.tabular_metadata = tabular_metadata
         self.input_dim = int(self.x.shape[1])
 
     def __len__(self) -> int:
@@ -113,6 +120,66 @@ class CovtypeDataset(NebulaDataset):
     Requirements:
     - scikit-learn must be installed (for fetch_covtype + train_test_split).
     """
+    PERTURBABLE_CONTINUOUS_COLUMNS = [
+        "Elevation",
+        "Aspect",
+        "Slope",
+        "Horizontal_Distance_To_Hydrology",
+        "Vertical_Distance_To_Hydrology",
+        "Horizontal_Distance_To_Roadways",
+        "Hillshade_9am",
+        "Hillshade_Noon",
+        "Hillshade_3pm",
+        "Horizontal_Distance_To_Fire_Points",
+    ]
+    PERTURBABLE_INTEGER_COLUMNS = []
+    NON_PERTURBABLE_COLUMNS = [
+        "Wilderness_Area_0",
+        "Wilderness_Area_1",
+        "Wilderness_Area_2",
+        "Wilderness_Area_3",
+        "Soil_Type_0",
+        "Soil_Type_1",
+        "Soil_Type_2",
+        "Soil_Type_3",
+        "Soil_Type_4",
+        "Soil_Type_5",
+        "Soil_Type_6",
+        "Soil_Type_7",
+        "Soil_Type_8",
+        "Soil_Type_9",
+        "Soil_Type_10",
+        "Soil_Type_11",
+        "Soil_Type_12",
+        "Soil_Type_13",
+        "Soil_Type_14",
+        "Soil_Type_15",
+        "Soil_Type_16",
+        "Soil_Type_17",
+        "Soil_Type_18",
+        "Soil_Type_19",
+        "Soil_Type_20",
+        "Soil_Type_21",
+        "Soil_Type_22",
+        "Soil_Type_23",
+        "Soil_Type_24",
+        "Soil_Type_25",
+        "Soil_Type_26",
+        "Soil_Type_27",
+        "Soil_Type_28",
+        "Soil_Type_29",
+        "Soil_Type_30",
+        "Soil_Type_31",
+        "Soil_Type_32",
+        "Soil_Type_33",
+        "Soil_Type_34",
+        "Soil_Type_35",
+        "Soil_Type_36",
+        "Soil_Type_37",
+        "Soil_Type_38",
+        "Soil_Type_39",
+    ]
+
     def __init__(
         self,
         num_classes: int = 7,
@@ -149,6 +216,35 @@ def initialize_dataset(self):
 
         self.data_partitioning(plot=True)
 
+    @classmethod
+    def _default_feature_names(cls, n_features: int) -> list[str]:
+        configured_columns = cls.PERTURBABLE_CONTINUOUS_COLUMNS + cls.NON_PERTURBABLE_COLUMNS
+        if n_features == len(configured_columns):
+            return configured_columns
+        return [f"feature_{i}" for i in range(n_features)]
+
+    @classmethod
+    def _validate_manual_schema(cls, columns) -> None:
+        continuous_columns = set(cls.PERTURBABLE_CONTINUOUS_COLUMNS)
+        integer_columns = set(cls.PERTURBABLE_INTEGER_COLUMNS)
+        non_perturbable_columns = set(cls.NON_PERTURBABLE_COLUMNS)
+        overlapping_columns = sorted(
+            (continuous_columns & integer_columns)
+            | (continuous_columns & non_perturbable_columns)
+            | (integer_columns & non_perturbable_columns)
+        )
+        if overlapping_columns:
+            raise ValueError(f"CovtypeDataset columns configured twice: {overlapping_columns}")
+
+        configured_columns = continuous_columns | integer_columns | non_perturbable_columns
+        dataset_columns = set(columns)
+        missing_columns = sorted(configured_columns - dataset_columns)
+        if missing_columns:
+            raise ValueError(f"CovtypeDataset is missing configured columns: {missing_columns}")
+        unconfigured_columns = sorted(dataset_columns - configured_columns)
+        if unconfigured_columns:
+            raise ValueError(f"CovtypeDataset has unconfigured columns: {unconfigured_columns}")
+
     def load_covtype_dataset(self):
         """
         Loads Covtype via sklearn, performs a deterministic train/test split,
@@ -173,10 +269,32 @@ def load_covtype_dataset(self):
         y = cov.target  # commonly 1..7 in sklearn
         feature_names = getattr(cov, "feature_names", None)
         if feature_names is None:
-            feature_names = [f"feature_{i}" for i in range(x.shape[1])]
+            feature_names = self._default_feature_names(x.shape[1])
         feature_names = [str(name) for name in feature_names]
-        continuous_features = list(range(min(10, x.shape[1])))
-        binary_features = [i for i in range(x.shape[1]) if i not in continuous_features]
+        try:
+            self._validate_manual_schema(feature_names)
+        except ValueError:
+            if x.shape[1] != len(self.PERTURBABLE_CONTINUOUS_COLUMNS) + len(self.NON_PERTURBABLE_COLUMNS):
+                raise
+            import logging
+            logging.getLogger().info(
+                "[Covtype] Replacing sklearn feature names with canonical Covtype names for adversarial metadata"
+            )
+            feature_names = self._default_feature_names(x.shape[1])
+            self._validate_manual_schema(feature_names)
+        continuous_features = [
+            idx for idx, name in enumerate(feature_names)
+            if name in self.PERTURBABLE_CONTINUOUS_COLUMNS
+        ]
+        integer_features = [
+            idx for idx, name in enumerate(feature_names)
+            if name in self.PERTURBABLE_INTEGER_COLUMNS
+        ]
+        non_perturbable_features = [
+            idx for idx, name in enumerate(feature_names)
+            if name in self.NON_PERTURBABLE_COLUMNS
+        ]
+        binary_features = non_perturbable_features
 
         # Map labels to 0..6 (CrossEntropyLoss convention)
         # If already 0..6, this is harmless for 1..7 only if we detect min.
@@ -219,20 +337,40 @@ def load_covtype_dataset(self):
         x_test = np.asarray(x_test, dtype=np.float32).copy()
         x_train[:, continuous_features] = scaler.fit_transform(x_train[:, continuous_features])
         x_test[:, continuous_features] = scaler.transform(x_test[:, continuous_features])
+        continuous_feature_set = set(continuous_features)
+        integer_feature_set = set(integer_features)
+        tabular_metadata = TabularAdversarialMetadata(
+            feature_names=feature_names,
+            feature_types=[
+                CONTINUOUS if idx in continuous_feature_set
+                else INTEGER if idx in integer_feature_set
+                else NON_PERTURBABLE
+                for idx in range(len(feature_names))
+            ],
+            feature_min_norm=np.min(x_train, axis=0).astype(float).tolist(),
+            feature_max_norm=np.max(x_train, axis=0).astype(float).tolist(),
+            integer_step_norm={},
+        ).to_dict()
 
         train_ds = CovtypeTorchDataset(
             x_train,
             y_train,
             feature_names=feature_names,
             continuous_features=continuous_features,
+            integer_features=integer_features,
+            non_perturbable_features=non_perturbable_features,
             binary_features=binary_features,
+            tabular_metadata=tabular_metadata,
         )
         test_ds = CovtypeTorchDataset(
             x_test,
             y_test,
             feature_names=feature_names,
             continuous_features=continuous_features,
+            integer_features=integer_features,
+            non_perturbable_features=non_perturbable_features,
             binary_features=binary_features,
+            tabular_metadata=tabular_metadata,
         )
 
         return train_ds, test_ds
diff --git a/nebula/core/datasets/kddcup99/kddcup99.py b/nebula/core/datasets/kddcup99/kddcup99.py
index d24ca3469..ef4403deb 100644
--- a/nebula/core/datasets/kddcup99/kddcup99.py
+++ b/nebula/core/datasets/kddcup99/kddcup99.py
@@ -1,3 +1,4 @@
+import logging
 import os
 from typing import Tuple, Any
 
@@ -6,6 +7,7 @@
 from torch.utils.data import Dataset
 
 from nebula.core.datasets.nebuladataset import NebulaDataset, NebulaPartitionHandler
+from nebula.core.datasets.tabular_metadata import CONTINUOUS, INTEGER, NON_PERTURBABLE, TabularAdversarialMetadata
 
 
 class KDDCUP99TorchDataset(Dataset):
@@ -22,7 +24,10 @@ def __init__(
         y: np.ndarray,
         feature_names: list[str] | None = None,
         continuous_features: list[int] | None = None,
+        integer_features: list[int] | None = None,
+        non_perturbable_features: list[int] | None = None,
         binary_features: list[int] | None = None,
+        tabular_metadata: dict | None = None,
     ):
         if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
             raise ValueError("x and y must be numpy arrays")
@@ -45,7 +50,10 @@ def __init__(
         self.classes = [str(i) for i in range(n_classes)]
         self.feature_names = feature_names or [f"feature_{i}" for i in range(self.x.shape[1])]
         self.continuous_features = continuous_features or []
+        self.integer_features = integer_features or []
+        self.non_perturbable_features = non_perturbable_features or []
         self.binary_features = binary_features or []
+        self.tabular_metadata = tabular_metadata
         self.input_dim = int(self.x.shape[1])
 
     def __len__(self) -> int:
@@ -110,6 +118,97 @@ class KDDCUP99Dataset(NebulaDataset):
     - scikit-learn must be installed
     - pandas must be installed
     """
+    RAW_FEATURE_COLUMNS = [
+        "duration",
+        "protocol_type",
+        "service",
+        "flag",
+        "src_bytes",
+        "dst_bytes",
+        "land",
+        "wrong_fragment",
+        "urgent",
+        "hot",
+        "num_failed_logins",
+        "logged_in",
+        "num_compromised",
+        "root_shell",
+        "su_attempted",
+        "num_root",
+        "num_file_creations",
+        "num_shells",
+        "num_access_files",
+        "num_outbound_cmds",
+        "is_host_login",
+        "is_guest_login",
+        "count",
+        "srv_count",
+        "serror_rate",
+        "srv_serror_rate",
+        "rerror_rate",
+        "srv_rerror_rate",
+        "same_srv_rate",
+        "diff_srv_rate",
+        "srv_diff_host_rate",
+        "dst_host_count",
+        "dst_host_srv_count",
+        "dst_host_same_srv_rate",
+        "dst_host_diff_srv_rate",
+        "dst_host_same_src_port_rate",
+        "dst_host_srv_diff_host_rate",
+        "dst_host_serror_rate",
+        "dst_host_srv_serror_rate",
+        "dst_host_rerror_rate",
+        "dst_host_srv_rerror_rate",
+    ]
+    PERTURBABLE_CONTINUOUS_COLUMNS = [
+        "serror_rate",
+        "srv_serror_rate",
+        "rerror_rate",
+        "srv_rerror_rate",
+        "same_srv_rate",
+        "diff_srv_rate",
+        "srv_diff_host_rate",
+        "dst_host_same_srv_rate",
+        "dst_host_diff_srv_rate",
+        "dst_host_same_src_port_rate",
+        "dst_host_srv_diff_host_rate",
+        "dst_host_serror_rate",
+        "dst_host_srv_serror_rate",
+        "dst_host_rerror_rate",
+        "dst_host_srv_rerror_rate",
+    ]
+    PERTURBABLE_INTEGER_COLUMNS = [
+        "duration",
+        "src_bytes",
+        "dst_bytes",
+        "wrong_fragment",
+        "urgent",
+        "hot",
+        "num_failed_logins",
+        "num_compromised",
+        "num_root",
+        "num_file_creations",
+        "num_shells",
+        "num_access_files",
+        "num_outbound_cmds",
+        "count",
+        "srv_count",
+        "dst_host_count",
+        "dst_host_srv_count",
+    ]
+    NON_PERTURBABLE_RAW_COLUMNS = [
+        "protocol_type",
+        "service",
+        "flag",
+        "land",
+        "logged_in",
+        "root_shell",
+        "su_attempted",
+        "is_host_login",
+        "is_guest_login",
+    ]
+
     def __init__(
         self,
         num_classes: int = 23,
@@ -122,8 +221,8 @@ def __init__(
         seed: int = 42,
         config_dir: str | None = None,
         test_size: float = 0.2,
-        train_limit: int | None = None,
-        test_limit: int | None = None,
+        train_limit: int | None = 60000,
+        test_limit: int | None = 10000,
         subset: str | None = None,
         percent10: bool = True,
     ):
@@ -150,6 +249,35 @@ def initialize_dataset(self):
 
         self.data_partitioning(plot=True)
 
+    @classmethod
+    def _ensure_raw_feature_names(cls, x):
+        if list(x.columns) == list(range(len(cls.RAW_FEATURE_COLUMNS))):
+            x = x.copy()
+            x.columns = cls.RAW_FEATURE_COLUMNS
+        return x
+
+    @classmethod
+    def _validate_manual_schema(cls, columns) -> None:
+        continuous_columns = set(cls.PERTURBABLE_CONTINUOUS_COLUMNS)
+        integer_columns = set(cls.PERTURBABLE_INTEGER_COLUMNS)
+        non_perturbable_columns = set(cls.NON_PERTURBABLE_RAW_COLUMNS)
+        overlapping_columns = sorted(
+            (continuous_columns & integer_columns)
+            | (continuous_columns & non_perturbable_columns)
+            | (integer_columns & non_perturbable_columns)
+        )
+        if overlapping_columns:
+            raise ValueError(f"KDDCUP99Dataset columns configured twice: {overlapping_columns}")
+
+        configured_columns = continuous_columns | integer_columns | non_perturbable_columns
+        dataset_columns = set(columns)
+        missing_columns = sorted(configured_columns - dataset_columns)
+        if missing_columns:
+            raise ValueError(f"KDDCUP99Dataset is missing configured columns: {missing_columns}")
+        unconfigured_columns = sorted(dataset_columns - configured_columns)
+        if unconfigured_columns:
+            raise ValueError(f"KDDCUP99Dataset has unconfigured columns: {unconfigured_columns}")
+
     def load_kddcup99_dataset(self):
         """
         Loads KDDCUP99 via sklearn, performs deterministic preprocessing
@@ -188,6 +316,8 @@ def load_kddcup99_dataset(self):
             x = pd.DataFrame(x)
         if not hasattr(y, "astype"):
             y = pd.Series(y)
+        x = self._ensure_raw_feature_names(x)
+        self._validate_manual_schema(x.columns)
 
         # Decode bytes -> str where needed
         def _decode_if_bytes(v):
@@ -202,14 +332,23 @@ def _decode_if_bytes(v):
                 x[col] = x[col].map(_decode_if_bytes)
 
         y = y.map(_decode_if_bytes)
-        numeric_columns = x.select_dtypes(exclude=["object", "category"]).columns.tolist()
 
         # One-hot encode categorical columns, keep numeric ones as-is.
         x = pd.get_dummies(x, drop_first=False)
         feature_names = [str(col) for col in x.columns]
-        numeric_columns = [col for col in numeric_columns if col in x.columns]
-        continuous_features = [x.columns.get_loc(col) for col in numeric_columns]
-        binary_features = [i for i in range(len(feature_names)) if i not in continuous_features]
+        continuous_features = [
+            x.columns.get_loc(col)
+            for col in self.PERTURBABLE_CONTINUOUS_COLUMNS
+            if col in x.columns
+        ]
+        integer_features = [
+            x.columns.get_loc(col)
+            for col in self.PERTURBABLE_INTEGER_COLUMNS
+            if col in x.columns
+        ]
+        perturbable_features = set(continuous_features) | set(integer_features)
+        non_perturbable_features = [i for i in range(len(feature_names)) if i not in perturbable_features]
+        binary_features = non_perturbable_features
 
         # Map labels to 0..num_classes-1 deterministically
         y = pd.Series(y).astype(str)
@@ -238,6 +377,7 @@ def _decode_if_bytes(v):
                 shuffle=True,
                 stratify=y_train,
             )
+            logging.getLogger().info("[KDDCUP99] Limited train split to %s samples", len(y_train))
 
         if self.test_limit is not None and len(y_test) > self.test_limit:
             x_test, _, y_test, _ = train_test_split(
@@ -247,29 +387,57 @@ def _decode_if_bytes(v):
                 shuffle=True,
                 stratify=y_test,
             )
+            logging.getLogger().info("[KDDCUP99] Limited test split to %s samples", len(y_test))
 
         x_train_np = x_train.astype(np.float32).to_numpy(copy=True)
         x_test_np = x_test.astype(np.float32).to_numpy(copy=True)
 
-        # Scale the original numeric columns after splitting. One-hot columns stay binary.
-        if continuous_features:
+        # Scale perturbable numeric columns after splitting. One-hot and binary flags stay unchanged.
+        scaled_features = continuous_features + integer_features
+        if scaled_features:
             scaler = StandardScaler()
-            x_train_np[:, continuous_features] = scaler.fit_transform(x_train_np[:, continuous_features])
-            x_test_np[:, continuous_features] = scaler.transform(x_test_np[:, continuous_features])
+            x_train_np[:, scaled_features] = scaler.fit_transform(x_train_np[:, scaled_features])
+            x_test_np[:, scaled_features] = scaler.transform(x_test_np[:, scaled_features])
+        integer_step_norm = {}
+        if integer_features:
+            integer_step_norm = {
+                idx: float(1.0 / scale)
+                for idx, scale in zip(integer_features, scaler.scale_[len(continuous_features):], strict=False)
+            }
+        continuous_feature_set = set(continuous_features)
+        integer_feature_set = set(integer_features)
+        tabular_metadata = TabularAdversarialMetadata(
+            feature_names=feature_names,
+            feature_types=[
+                CONTINUOUS if idx in continuous_feature_set
+                else INTEGER if idx in integer_feature_set
+                else NON_PERTURBABLE
+                for idx in range(len(feature_names))
+            ],
+            feature_min_norm=np.min(x_train_np, axis=0).astype(float).tolist(),
+            feature_max_norm=np.max(x_train_np, axis=0).astype(float).tolist(),
+            integer_step_norm=integer_step_norm,
+        ).to_dict()
 
         train_ds = KDDCUP99TorchDataset(
             x_train_np,
             y_train,
             feature_names=feature_names,
             continuous_features=continuous_features,
+            integer_features=integer_features,
+            non_perturbable_features=non_perturbable_features,
             binary_features=binary_features,
+            tabular_metadata=tabular_metadata,
         )
         test_ds = KDDCUP99TorchDataset(
             x_test_np,
             y_test,
             feature_names=feature_names,
             continuous_features=continuous_features,
+            integer_features=integer_features,
+            non_perturbable_features=non_perturbable_features,
             binary_features=binary_features,
+            tabular_metadata=tabular_metadata,
         )
 
         # Optional: preserve original class names for inspection/debugging
diff --git a/nebula/frontend/static/js/deployment/adversarial-training.js b/nebula/frontend/static/js/deployment/adversarial-training.js
index 91cecd96e..c22a5b320 100644
--- a/nebula/frontend/static/js/deployment/adversarial-training.js
+++ b/nebula/frontend/static/js/deployment/adversarial-training.js
@@ -17,7 +17,6 @@ const AdversarialTrainingManager = (function() {
     };
 
     const IMAGE_DATASETS = new Set(["MNIST", "FashionMNIST", "EMNIST", "CIFAR10", "CIFAR100"]);
-    const TABULAR_DATASETS = new Set(["AdultCensus"]);
 
     function initializeAdversarialTraining() {
         setupAdversarialTrainingSwitch();
@@ -72,25 +71,20 @@ const AdversarialTrainingManager = (function() {
 
     function updateDatasetAvailability() {
         const dataset = document.getElementById("datasetSelect")?.value;
-        const enabledForDataset = IMAGE_DATASETS.has(dataset) || TABULAR_DATASETS.has(dataset);
-        const domain = TABULAR_DATASETS.has(dataset) ? "tabular" : "image";
+        const domain = IMAGE_DATASETS.has(dataset) ? "image" : "tabular";
         const adversarialTrainingSwitch = document.getElementById("adversarialTrainingSwitch");
         const datasetNote = document.getElementById("adversarial-training-dataset-note");
         const domainInput = document.getElementById("adversarialTrainingDomain");
 
         if (datasetNote) {
-            datasetNote.style.display = enabledForDataset ? "none" : "block";
+            datasetNote.style.display = "none";
         }
         if (domainInput) {
             domainInput.value = domain;
         }
 
         if (!adversarialTrainingSwitch) return;
-        adversarialTrainingSwitch.disabled = !enabledForDataset;
-        if (!enabledForDataset) {
-            adversarialTrainingSwitch.checked = false;
-            toggleAdversarialTrainingSettings(false);
-        }
+        adversarialTrainingSwitch.disabled = false;
     }
 
     function numberValue(id, fallback) {
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index 6d40655c0..7a21c8162 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -588,7 +588,7 @@ <h5 class="step-title">Enable/Disable Adversarial Training</h5>
                             style="display: inline; width: 80px; height: 30px;">
                     </div>
                     <small id="adversarial-training-dataset-note" class="form-text text-muted" style="display: none;">
-                        Adversarial Training is available for image datasets and Adult Census.
+                        Adversarial Training is available for image and tabular datasets.
                     </small>
                     <div id="adversarial-training-settings" style="margin-top: 10px; display: none;">
                         <input type="hidden" id="adversarialTrainingDomain" value="image">

From 98f0195c4b2de965e5bdae43cfb058aca25c93e4 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Tue, 26 May 2026 10:35:35 +0200
Subject: [PATCH 51/66] KDDCUP fixed, adversarial training working for KDD

---
 nebula/controller/scenarios.py            |  2 +-
 nebula/core/datasets/kddcup99/kddcup99.py | 23 ++++++++++++-----------
 nebula/core/datasets/nebuladataset.py     | 21 +++++++++++++++++----
 nebula/core/models/kddcup99/mlp.py        |  2 +-
 4 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/nebula/controller/scenarios.py b/nebula/controller/scenarios.py
index 70639462b..f9fa18a17 100644
--- a/nebula/controller/scenarios.py
+++ b/nebula/controller/scenarios.py
@@ -1109,7 +1109,7 @@ async def load_configurations_and_start_nodes(
             )
         elif dataset_name == "KDDCUP99":
             dataset = KDDCUP99Dataset(
-                num_classes=23,
+                num_classes=2,
                 partitions_number=self.n_nodes,
                 iid=self.scenario.iid,
                 partition=self.scenario.partition_selection,
diff --git a/nebula/core/datasets/kddcup99/kddcup99.py b/nebula/core/datasets/kddcup99/kddcup99.py
index ef4403deb..644af9e3b 100644
--- a/nebula/core/datasets/kddcup99/kddcup99.py
+++ b/nebula/core/datasets/kddcup99/kddcup99.py
@@ -110,9 +110,10 @@ class KDDCUP99Dataset(NebulaDataset):
 
     Notes:
     - KDDCUP99 is a tabular intrusion-detection dataset.
-    - sklearn fetch_kddcup99 exposes 41 features and 23 classes.
+    - sklearn fetch_kddcup99 exposes 41 features.
+    - Targets are mapped to a binary task: normal vs attack.
     - Some columns are categorical/string-like, so we one-hot encode them.
-    - Targets may come as bytes/strings, so we map them to 0..num_classes-1.
+    - Targets may come as bytes/strings, so we decode before mapping labels.
 
     Requirements:
     - scikit-learn must be installed
@@ -211,7 +212,7 @@ class KDDCUP99Dataset(NebulaDataset):
 
     def __init__(
         self,
-        num_classes: int = 23,
+        num_classes: int = 2,
         partitions_number: int = 1,
         batch_size: int = 32,
         num_workers: int = 4,
@@ -221,8 +222,8 @@ def __init__(
         seed: int = 42,
         config_dir: str | None = None,
         test_size: float = 0.2,
-        train_limit: int | None = 60000,
-        test_limit: int | None = 10000,
+        train_limit: int | None = 12000,
+        test_limit: int | None = 2000,
         subset: str | None = None,
         percent10: bool = True,
     ):
@@ -336,6 +337,7 @@ def _decode_if_bytes(v):
         # One-hot encode categorical columns, keep numeric ones as-is.
         x = pd.get_dummies(x, drop_first=False)
         feature_names = [str(col) for col in x.columns]
+        logging.getLogger().info("[KDDCUP99] Encoded feature dimension: %s", len(feature_names))
         continuous_features = [
             x.columns.get_loc(col)
             for col in self.PERTURBABLE_CONTINUOUS_COLUMNS
@@ -350,14 +352,13 @@ def _decode_if_bytes(v):
         non_perturbable_features = [i for i in range(len(feature_names)) if i not in perturbable_features]
         binary_features = non_perturbable_features
 
-        # Map labels to 0..num_classes-1 deterministically
+        # Map labels to a binary task: 0 = normal, 1 = attack.
         y = pd.Series(y).astype(str)
-        classes = sorted(y.unique().tolist())
-        class_to_idx = {cls_name: idx for idx, cls_name in enumerate(classes)}
-        y = y.map(class_to_idx).to_numpy(dtype=np.int64, copy=False)
+        y = y.str.strip()
+        y = (y != "normal.").astype(np.int64).to_numpy(copy=False)
 
-        # Keep self.num_classes aligned with actual loaded subset
-        self.num_classes = len(classes)
+        classes = ["normal", "attack"]
+        self.num_classes = 2
 
         # Split "grande"
         x_train, x_test, y_train, y_test = train_test_split(
diff --git a/nebula/core/datasets/nebuladataset.py b/nebula/core/datasets/nebuladataset.py
index 28565c798..4e5e6c903 100755
--- a/nebula/core/datasets/nebuladataset.py
+++ b/nebula/core/datasets/nebuladataset.py
@@ -162,6 +162,9 @@ def load_partition(self, file, name):
             elif typ == "pickle_bytes":
                 logging_training.info(f"Loading compressed pickled bytes object from {name}")
                 return pickle.loads(item[()])
+            elif typ == "array":
+                logging_training.info(f"Loading array object from {name}")
+                return item[()]
             else:
                 logging_training.warning(f"[NebulaPartitionHandler] Unknown type encountered: {typ} for item {name}")
                 return item[()]
@@ -466,6 +469,18 @@ def save_partition(self, obj, file, name):
             logging.exception(f"Error saving object to HDF5: {e}")
             raise
 
+    def save_dataset_partition(self, dataset, indices, file, name):
+        if hasattr(dataset, "x") and isinstance(dataset.x, np.ndarray):
+            logging.info(f"Saving array partition {name} with {len(indices)} samples")
+            data = dataset.x[indices].astype(np.float32, copy=False)
+            ds = file.create_dataset(name, data=data, compression="lzf", shuffle=True)
+            ds.attrs["__type__"] = "array"
+            logging.info(f"Saved array partition {name} with shape {data.shape}")
+            return
+
+        partition_data = [dataset[i] for i in indices]
+        self.save_partition(partition_data, file, name)
+
     def save_partitions(self):
         """
         Save each partition data (train, test, and local test) to separate pickle files.
@@ -489,8 +504,7 @@ def save_partitions(self):
             file_name = os.path.join(path, "global_test.h5")
             with h5py.File(file_name, "w") as f:
                 indices = list(range(len(self.test_set)))
-                test_data = [self.test_set[i] for i in indices]
-                self.save_partition(test_data, f, "test_data")
+                self.save_dataset_partition(self.test_set, indices, f, "test_data")
                 f["test_data"].attrs["num_classes"] = self.num_classes
                 self._save_tabular_metadata_attr(self.test_set, f["test_data"])
                 test_targets = np.array(self.test_set.targets)
@@ -501,8 +515,7 @@ def save_partitions(self):
                 with h5py.File(file_name, "w") as f:
                     logging.info(f"Saving training data for participant {participant} in {file_name}")
                     indices = self.train_indices_map[participant]
-                    train_data = [self.train_set[i] for i in indices]
-                    self.save_partition(train_data, f, "train_data")
+                    self.save_dataset_partition(self.train_set, indices, f, "train_data")
                     f["train_data"].attrs["num_classes"] = self.num_classes
                     self._save_tabular_metadata_attr(self.train_set, f["train_data"])
                     train_targets = np.array([self.train_set.targets[i] for i in indices])
diff --git a/nebula/core/models/kddcup99/mlp.py b/nebula/core/models/kddcup99/mlp.py
index d8c57d9d2..539f3ad98 100644
--- a/nebula/core/models/kddcup99/mlp.py
+++ b/nebula/core/models/kddcup99/mlp.py
@@ -7,7 +7,7 @@ class KDDCUP99ModelMLP(NebulaModel):
     def __init__(
         self,
         input_channels=1,
-        num_classes=23,
+        num_classes=2,
         learning_rate=1e-3,
         metrics=None,
         confusion_matrix=None,

From 2153bcdeafc3b8bfec23dc6d56c7e78b230a42ed Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 27 May 2026 12:52:18 +0200
Subject: [PATCH 52/66] Global privacy risk fixed. Important SDFL refactoring.

---
 nebula/addons/trustworthiness/calculation.py  |  12 +
 .../addons/trustworthiness/dfl_factsheet.py   |   6 +
 nebula/addons/trustworthiness/factsheet.py    |   6 +
 nebula/core/aggregation/aggregator.py         |  24 +-
 .../updatehandlers/sdflupdatehandler.py       |  24 ++
 .../updatehandlers/updatehandler.py           |   9 +
 nebula/core/engine.py                         | 123 +------
 nebula/core/noderole.py                       | 309 ++++++++++++------
 8 files changed, 276 insertions(+), 237 deletions(-)

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
index 33853a731..6fcb60ea2 100755
--- a/nebula/addons/trustworthiness/calculation.py
+++ b/nebula/addons/trustworthiness/calculation.py
@@ -385,6 +385,12 @@ def get_global_privacy_risk(dp, epsilon, n):
         float: The global privacy risk.
     """
 
+    try:
+        epsilon = float(epsilon)
+        n = float(n)
+    except (TypeError, ValueError):
+        return 1
+
     if dp is True and isinstance(epsilon, numbers.Number):
         return 1 / (1 + (n - 1) * math.pow(e, -epsilon))
     else:
@@ -403,6 +409,12 @@ def get_global_privacy_risk_dfl(dp, epsilon, n):
         float: The global privacy risk.
     """
 
+    try:
+        epsilon = float(epsilon)
+        n = float(n)
+    except (TypeError, ValueError):
+        return 1
+
     if dp is True and isinstance(epsilon, numbers.Number):
         return 1 / (1 + (n + 1) * math.pow(e, -epsilon))
     else:
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index c346fe1db..f4c78d4aa 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -6,6 +6,7 @@
     get_bytes_model,
     get_dp_local,
     get_elapsed_time,
+    get_global_privacy_risk_dfl,
     get_local_class_imbalance_score,
     get_local_normalized_entropy,
     get_underfitting_score_local,
@@ -120,6 +121,11 @@ def populate_factsheet_dfl(
         factsheet["participants"]["local_dataset_size"] = sample_size
 
         populate_reputation(factsheet, reputation_summary, include_neighbor_num=True)
+        factsheet["privacy"]["privacy_risk"] = get_global_privacy_risk_dfl(
+            dp_enabled,
+            dp_epsilon,
+            factsheet["participants"]["neighbor_num"],
+        )
 
         factsheet["sustainability"]["emissions_communication_local"] = (
             (bytes_sent * 2.24e-10 * carbon_intensity_local)
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 86454e4a1..7c23f20c2 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -13,6 +13,7 @@
     get_dp_global,
     get_elapsed_time,
     get_entropy_list,
+    get_global_privacy_risk,
     get_participant_loss_accuracy,
     get_underfitting_score,
 )
@@ -101,6 +102,11 @@ def populate_factsheet_cfl(
 
             dp_enabled, dp_epsilon = get_dp_global(scenario_name)
             set_dp_configuration(factsheet, dp_enabled, dp_epsilon)
+            factsheet["privacy"]["privacy_risk"] = get_global_privacy_risk(
+                dp_enabled,
+                dp_epsilon,
+                factsheet["participants"]["client_num"],
+            )
 
             factsheet["system"]["avg_time_minutes"] = get_elapsed_time(start_time, end_time)
             factsheet["system"]["avg_model_size"] = avg_model_size
diff --git a/nebula/core/aggregation/aggregator.py b/nebula/core/aggregation/aggregator.py
index 4338a7647..b9ab0d2fd 100755
--- a/nebula/core/aggregation/aggregator.py
+++ b/nebula/core/aggregation/aggregator.py
@@ -169,35 +169,13 @@ async def get_aggregation(self):
         else:
             logging.info("🔄  get_aggregation | All models accounted for, proceeding with aggregation.")
 
-        await self._calculate_sdfl_indirect_reputation_before_aggregation(updates)
+        await self.us.before_aggregation(updates, self._federation_nodes)
 
         agg_event = AggregationEvent(updates, self._federation_nodes, missing_nodes)
         await EventManager.get_instance().publish_node_event(agg_event)
         aggregated_result = self.run_aggregation(updates)
         return aggregated_result
 
-    async def _calculate_sdfl_indirect_reputation_before_aggregation(self, updates):
-        if self.config.participant["scenario_args"].get("federation") != "SDFL":
-            return
-        if not hasattr(self.engine, "_reputation") or self.engine._reputation is None:
-            return
-
-        round_num = await self.engine.get_round()
-        expected_table_nodes = self.engine.get_sdfl_expected_trainers()
-        target_nodes = set(self._federation_nodes) | set(updates.keys())
-        timeout = float(
-            self.config.participant["defense_args"]
-            .get("reputation", {})
-            .get("table_aggregation_timeout", 10)
-        )
-
-        await self.engine._reputation.calculate_indirect_reputation_for_non_neighbors(
-            target_nodes=target_nodes,
-            expected_table_nodes=expected_table_nodes,
-            round_num=round_num,
-            timeout=timeout,
-        )
-
     def print_model_size(self, model):
         total_memory = 0
 
diff --git a/nebula/core/aggregation/updatehandlers/sdflupdatehandler.py b/nebula/core/aggregation/updatehandlers/sdflupdatehandler.py
index 956abb011..4ebd15ba3 100644
--- a/nebula/core/aggregation/updatehandlers/sdflupdatehandler.py
+++ b/nebula/core/aggregation/updatehandlers/sdflupdatehandler.py
@@ -209,6 +209,30 @@ async def get_round_updates(self):
         await self._updates_storage_lock.release_async()
         return updates
 
+    async def before_aggregation(self, updates: dict[str, tuple[object, float]], federation_nodes: set):
+        """
+        Calculate indirect SDFL reputation before aggregating trainer updates.
+        """
+        engine = self.agg.engine
+        if not hasattr(engine, "_reputation") or engine._reputation is None:
+            return
+
+        round_num = await engine.get_round()
+        expected_table_nodes = engine.get_sdfl_expected_trainers()
+        target_nodes = set(federation_nodes) | set(updates.keys())
+        timeout = float(
+            self.agg.config.participant["defense_args"]
+            .get("reputation", {})
+            .get("table_aggregation_timeout", 10)
+        )
+
+        await engine._reputation.calculate_indirect_reputation_for_non_neighbors(
+            target_nodes=target_nodes,
+            expected_table_nodes=expected_table_nodes,
+            round_num=round_num,
+            timeout=timeout,
+        )
+
     async def notify_federation_update(self, updt_nei_event: UpdateNeighborEvent):
         """
         Handle federation node join/leave events.
diff --git a/nebula/core/aggregation/updatehandlers/updatehandler.py b/nebula/core/aggregation/updatehandlers/updatehandler.py
index f34849237..d6ac8367b 100644
--- a/nebula/core/aggregation/updatehandlers/updatehandler.py
+++ b/nebula/core/aggregation/updatehandlers/updatehandler.py
@@ -105,6 +105,15 @@ async def stop_notifying_updates(self):
         """
         raise NotImplementedError
 
+    async def before_aggregation(self, updates: dict[str, tuple[object, float]], federation_nodes: set):
+        """
+        Hook for federation-specific processing just before aggregation.
+
+        DFL/CFL do not need extra work here. Federation-specific handlers can override this
+        without making the base aggregator know about a concrete federation type.
+        """
+        return None
+
 
 def factory_update_handler(updt_handler, aggregator, addr) -> UpdateHandler:
     from nebula.core.aggregation.updatehandlers.cflupdatehandler import CFLUpdateHandler
diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index 702b473ac..7fde4164e 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -237,6 +237,8 @@ async def mark_leadership_transfer_pending(self, successor: str):
 
     async def confirm_leadership_transfer_ack(self, source: str) -> bool:
         async with self._leadership_transfer_lock:
+            if self._leadership_transfer_pending is None:
+                return False
             if self._leadership_transfer_pending != source:
                 logging.info(
                     f"SDFL leadership | Ignoring ACK from {source}; "
@@ -249,9 +251,6 @@ async def confirm_leadership_transfer_ack(self, source: str) -> bool:
             return True
 
     async def wait_pending_leadership_ack(self):
-        if self.config.participant["scenario_args"].get("federation") != "SDFL":
-            return
-
         async with self._leadership_transfer_lock:
             successor = self._leadership_transfer_pending
 
@@ -416,37 +415,6 @@ async def model_update_callback(self, source, message):
         updt_received_event = UpdateReceivedEvent(decoded_model, message.weight, source, message.round)
         await EventManager.get_instance().publish_node_event(updt_received_event)
 
-    async def send_sdfl_reputation_model_update(self):
-        if self.config.participant["scenario_args"].get("federation") != "SDFL":
-            return
-
-        model_params = self.trainer.get_model_parameters()
-        serialized_model = (
-            model_params
-            if isinstance(model_params, bytes)
-            else self.trainer.serialize_model(model_params)
-        )
-
-        message = self.cm.create_message(
-            "model",
-            round=self.round,
-            parameters=serialized_model,
-            weight=self.trainer.get_model_weight(),
-        )
-
-        neighbors = await self.cm.get_addrs_current_connections(only_direct=True, myself=False)
-        if not neighbors:
-            logging.info("SDFL reputation | No direct neighbors to send model/update")
-            return
-
-        logging.info(f"SDFL reputation | Broadcasting model/update to direct neighbors: {neighbors}")
-        await asyncio.gather(
-            *[
-                asyncio.create_task(self.cm.send_message(neighbor, message, "model"))
-                for neighbor in neighbors
-            ]
-        )
-
     """                                                     ##############################
                                                             #      General callbacks     #
                                                             ##############################
@@ -509,8 +477,7 @@ async def _control_leadership_transfer_callback(self, source, message):
     async def _control_leadership_transfer_ack_callback(self, source, message):
         logging.info(f"🔧  handle_control_message | Trigger | Received leadership transfer ack message from {source}")
         # No concurrence of difference ack received treated, be aware of that.
-        if self.config.participant["scenario_args"].get("federation") == "SDFL":
-            await self.confirm_leadership_transfer_ack(source)
+        if await self.confirm_leadership_transfer_ack(source):
             return
 
         if await self._round_in_process_lock.locked_async():
@@ -686,7 +653,7 @@ async def _trustscores_share_callback(self, source, message):
         except Exception as e:
             logging.exception(f"Error handling trustscores message: {e}")
 
-    async def sdfl_trainer_update_callback(self, source, message):
+    async def _sdflmodel_trainer_update_callback(self, source, message):
         try:
             logging.info(
                 f"SDFL | TRAINER_UPDATE callback triggered | "
@@ -737,7 +704,7 @@ async def sdfl_trainer_update_callback(self, source, message):
         except Exception as e:
             logging.exception(f"Error handling SDFL TRAINER_UPDATE message: {e}")
 
-    async def sdfl_global_model_callback(self, source, message):
+    async def _sdflmodel_global_model_callback(self, source, message):
         role = self.rb.get_role_name(True)
         logging.info(
             f"SDFL | GLOBAL_MODEL callback triggered | "
@@ -784,10 +751,6 @@ async def init_message_callbacks(self):
         await self.register_message_callback(("model", "initialization"), "model_initialization_callback")
         await self.register_message_callback(("model", "update"), "model_update_callback")
 
-        # SDFL model callbacks
-        await self.register_message_callback(("sdflmodel", "trainer_update"), "sdfl_trainer_update_callback")
-        await self.register_message_callback(("sdflmodel", "global_model"), "sdfl_global_model_callback")
-
     async def register_message_events_callbacks(self):
         me_dict = self.cm.get_messages_events()
         message_events = [
@@ -1109,80 +1072,6 @@ async def _waiting_model_updates(self):
         else:
             logging.error("Aggregation finished with no parameters")
 
-    async def send_sdfl_global_model(self) -> None:
-        model_params = self.trainer.get_model_parameters()
-        serialized_model = (
-            model_params
-            if isinstance(model_params, bytes)
-            else self.trainer.serialize_model(model_params)
-        )
-
-        message = self.cm.create_message(
-            "sdflmodel",
-            "global_model",
-            target="trainer",
-            parameters=serialized_model,
-            weight=self.trainer.get_model_weight(),
-            round=self.round,
-            node_id=self.addr,
-        )
-
-        neighbors = await self.cm.get_addrs_current_connections(
-            only_direct=True,
-            myself=False,
-        )
-
-        logging.info(
-            f"SDFL aggregator | Broadcasting GLOBAL_MODEL to neighbors: {neighbors}"
-        )
-
-        tasks = []
-
-        for neighbor in neighbors:
-            tasks.append(
-                asyncio.create_task(
-                    self.cm.send_message(
-                        neighbor,
-                        message,
-                        "sdflmodel",
-                        allow_after_learning_finished=True,
-                    )
-                )
-            )
-
-        if tasks:
-            await asyncio.gather(*tasks)
-        else:
-            logging.warning(
-                "SDFL aggregator | No neighbors available to send GLOBAL_MODEL"
-            )
-
-    def _is_sdfl_trainer(self):
-        federation = self.config.participant["scenario_args"].get("federation")
-        return federation == "SDFL" and self.rb.get_role_name(effective=True) == "trainer"
-
-    def prepare_waiting_global_model(self):
-        self._global_model_source = None
-        self._global_model_received.clear()
-
-    async def _waiting_global_model(self):
-        """
-        Wait for a global model sent by the current SDFL aggregator.
-
-        SDFL trainers must not aggregate locally. They train, send their update,
-        and block here until a model update is received and applied by
-        ``model_update_callback``.
-        """
-        timeout = self.config.participant["aggregator_args"]["aggregation_timeout"]
-        logging.info(f"💤  Waiting global SDFL model in round {self.round}.")
-        try:
-            await asyncio.wait_for(self._global_model_received.wait(), timeout=timeout)
-            logging.info(
-                f"🤖  SDFL trainer | Global model received from {self._global_model_source} in round {self.round}"
-            )
-        except TimeoutError:
-            logging.error(f"🤖  SDFL trainer | Timeout waiting global model in round {self.round}")
-
     def print_round_information(self):
         print_msg_box(
             msg=f"Round {self.round} of {self.total_rounds} started.",
@@ -1271,7 +1160,7 @@ async def _learning_cycle(self):
                     title="Round information",
                 )
 
-                await self.wait_pending_leadership_ack()
+                await self.rb.before_round_start()
                 await self.update_self_role()
 
                 logging.info(f"Federation nodes: {self.federation_nodes}")
diff --git a/nebula/core/noderole.py b/nebula/core/noderole.py
index 48991994f..ce02f1cd4 100644
--- a/nebula/core/noderole.py
+++ b/nebula/core/noderole.py
@@ -170,6 +170,10 @@ async def update_role_needed(self):
             updt_needed = self._next_role != None
         return updt_needed
 
+    async def before_round_start(self):
+        """Hook for role-specific work before a round starts."""
+        return None
+
 """                                                         ##############################
                                                             #     MALICIOUS BEHAVIOR     #
                                                             ##############################
@@ -210,6 +214,9 @@ async def extended_learning_cycle(self):
 
         await self._fake_role_behavior.extended_learning_cycle()
 
+    async def before_round_start(self):
+        await self._fake_role_behavior.before_round_start()
+
     async def select_nodes_to_wait(self):
         nodes = await self._fake_role_behavior.select_nodes_to_wait()
         return nodes
@@ -280,21 +287,8 @@ def get_role_name(self, effective=False):
     async def extended_learning_cycle(self):
         await self._engine.trainer.test()
 
-        if self._config.participant["scenario_args"].get("federation") == "SDFL":
-            await self._engine.send_sdfl_reputation_model_update()
-
         await self._engine._waiting_model_updates()
 
-        federation = self._config.participant["scenario_args"].get("federation")
-
-        if federation == "SDFL":
-
-            await self._engine.send_sdfl_global_model()
-            await self._transfer_leadership()
-
-            return
-
-
         mpe = ModelPropagationEvent(await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False), "stable")
         await EventManager.get_instance().publish_node_event(mpe)
 
@@ -315,24 +309,114 @@ async def _transfer_leadership(self):
                 return
             lt_message = self._engine.cm.create_message("control", "leadership_transfer")
             logging.info(f"Sending transfer leadership to: {successor}")
-            if self._config.participant["scenario_args"].get("federation") == "SDFL":
-                await self._engine.mark_leadership_transfer_pending(successor)
+            await self._before_leadership_transfer(successor)
             asyncio.create_task(self._engine.cm.send_message(successor, lt_message))
             await self._engine.register_leadership_transfer(successor)
             self._transfer_send = True
 
-    async def select_nodes_to_wait(self):
-        if self._config.participant["scenario_args"].get("federation") == "SDFL":
-            nodes = self._engine.get_sdfl_expected_trainers()
-            if nodes:
-                return nodes
+    async def _before_leadership_transfer(self, successor):
+        return None
 
+    async def select_nodes_to_wait(self):
         nodes = await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False)
         return nodes
 
     async def resolve_missing_updates(self):
         return (self._engine.trainer.get_model_parameters(), self._engine.trainer.BYPASS_MODEL_WEIGHT)
 
+
+class SDFLRoleMixin:
+    async def _send_reputation_model_update(self):
+        model_params = self._engine.trainer.get_model_parameters()
+        serialized_model = (
+            model_params
+            if isinstance(model_params, bytes)
+            else self._engine.trainer.serialize_model(model_params)
+        )
+
+        message = self._engine.cm.create_message(
+            "model",
+            round=self._engine.round,
+            parameters=serialized_model,
+            weight=self._engine.trainer.get_model_weight(),
+        )
+
+        neighbors = await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False)
+        if not neighbors:
+            logging.info("SDFL reputation | No direct neighbors to send model/update")
+            return
+
+        logging.info(f"SDFL reputation | Broadcasting model/update to direct neighbors: {neighbors}")
+        await asyncio.gather(
+            *[
+                asyncio.create_task(self._engine.cm.send_message(neighbor, message, "model"))
+                for neighbor in neighbors
+            ]
+        )
+
+
+class SDFLAggregatorRoleBehavior(SDFLRoleMixin, AggregatorRoleBehavior):
+    async def before_round_start(self):
+        await self._engine.wait_pending_leadership_ack()
+
+    async def extended_learning_cycle(self):
+        await self._engine.trainer.test()
+        await self._send_reputation_model_update()
+        await self._engine._waiting_model_updates()
+        await self._send_global_model()
+        await self._transfer_leadership()
+
+    async def _before_leadership_transfer(self, successor):
+        await self._engine.mark_leadership_transfer_pending(successor)
+
+    async def select_nodes_to_wait(self):
+        nodes = self._engine.get_sdfl_expected_trainers()
+        if nodes:
+            return nodes
+        return await super().select_nodes_to_wait()
+
+    async def _send_global_model(self) -> None:
+        model_params = self._engine.trainer.get_model_parameters()
+        serialized_model = (
+            model_params
+            if isinstance(model_params, bytes)
+            else self._engine.trainer.serialize_model(model_params)
+        )
+
+        message = self._engine.cm.create_message(
+            "sdflmodel",
+            "global_model",
+            target="trainer",
+            parameters=serialized_model,
+            weight=self._engine.trainer.get_model_weight(),
+            round=self._engine.round,
+            node_id=self._engine.addr,
+        )
+
+        neighbors = await self._engine.cm.get_addrs_current_connections(
+            only_direct=True,
+            myself=False,
+        )
+
+        logging.info(f"SDFL aggregator | Broadcasting GLOBAL_MODEL to neighbors: {neighbors}")
+
+        tasks = [
+            asyncio.create_task(
+                self._engine.cm.send_message(
+                    neighbor,
+                    message,
+                    "sdflmodel",
+                    allow_after_learning_finished=True,
+                )
+            )
+            for neighbor in neighbors
+        ]
+
+        if tasks:
+            await asyncio.gather(*tasks)
+        else:
+            logging.warning("SDFL aggregator | No neighbors available to send GLOBAL_MODEL")
+
 """                                                         ##############################
                                                             #       SERVER BEHAVIOR      #
                                                             ##############################
@@ -397,98 +481,120 @@ async def extended_learning_cycle(self):
         finally:
             await self._engine.trainning_in_progress_lock.release_async()
 
-        federation = self._config.participant["scenario_args"].get("federation")
-
-        if federation == "SDFL":
-            self._engine.prepare_waiting_global_model()
+        mpe = ModelPropagationEvent(await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False), "stable")
+        await EventManager.get_instance().publish_node_event(mpe)
 
-            if self._engine._reputation is not None:
-                await self._engine._reputation.process_pending_sdfl_reputation_updates(self._engine.round)
+        await self._engine._waiting_model_updates()
 
-            await self._engine.send_sdfl_reputation_model_update()
+    async def select_nodes_to_wait(self):
+        nodes = await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False)
+        return nodes
 
-            if self._engine._reputation is not None:
-                expected_reputation_neighbors = await self._engine.cm.get_addrs_current_connections(
-                    only_direct=True,
-                    myself=False,
-                )
-                reputation_timeout = float(
-                    self._config.participant["defense_args"]
-                    .get("reputation", {})
-                    .get(
-                        "model_update_timeout",
-                        self._config.participant["defense_args"]
-                        .get("reputation", {})
-                        .get("table_aggregation_timeout", 30),
-                    )
-                )
-                await self._engine._reputation.wait_sdfl_reputation_updates(
-                    expected_reputation_neighbors,
-                    self._engine.round,
-                    reputation_timeout,
-                )
-                await self._engine._reputation.calculate_and_send_sdfl_reputation_table()
+    async def resolve_missing_updates(self):
+        return (self._engine.trainer.get_model_parameters(), self._engine.trainer.get_model_weight())
 
-            model_params = self._engine.trainer.get_model_parameters()
-            serialized_model = (
-                model_params
-                if isinstance(model_params, bytes)
-                else self._engine.trainer.serialize_model(model_params)
-            )
 
-            message = self._engine.cm.create_message(
-                "sdflmodel",
-                "trainer_update",
-                target="aggregator",
-                parameters=serialized_model,
-                weight=self._engine.trainer.get_model_weight(),
-                round=self._engine.round,
-                node_id=self._engine.addr,
-            )
+class SDFLTrainerRoleBehavior(SDFLRoleMixin, TrainerRoleBehavior):
+    async def extended_learning_cycle(self):
+        logging.info("Waiting global update | Assign _waiting_global_update = True")
 
-            neighbors = await self._engine.cm.get_addrs_current_connections(
-                only_direct=True,
-                myself=False,
-            )
+        await self._engine.trainer.test()
+        self._prepare_waiting_global_model()
+        await self._engine.trainning_in_progress_lock.acquire_async()
+        try:
+            await self._engine.trainer.train()
+        finally:
+            await self._engine.trainning_in_progress_lock.release_async()
 
-            logging.info(
-                f"SDFL trainer | Broadcasting TRAINER_UPDATE to neighbors: {neighbors}"
-            )
+        if self._engine._reputation is not None:
+            await self._engine._reputation.process_pending_sdfl_reputation_updates(self._engine.round)
 
-            tasks = []
-
-            for neighbor in neighbors:
-                tasks.append(
-                    asyncio.create_task(
-                        self._engine.cm.send_message(
-                            neighbor,
-                            message,
-                            "sdflmodel",
-                        )
-                    )
-                )
+        await self._send_reputation_model_update()
+        await self._calculate_and_send_reputation_table()
+        await self._send_trainer_update()
+        await self._waiting_global_model()
 
-            if tasks:
-                await asyncio.gather(*tasks)
-            else:
-                logging.warning(
-                    "SDFL trainer | No neighbors available to send TRAINER_UPDATE"
-                )
+    def _prepare_waiting_global_model(self):
+        self._engine._global_model_source = None
+        self._engine._global_model_received.clear()
 
-            await self._engine._waiting_global_model()
+    async def _calculate_and_send_reputation_table(self):
+        if self._engine._reputation is None:
             return
 
-        mpe = ModelPropagationEvent(await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False), "stable")
-        await EventManager.get_instance().publish_node_event(mpe)
+        expected_reputation_neighbors = await self._engine.cm.get_addrs_current_connections(
+            only_direct=True,
+            myself=False,
+        )
+        reputation_timeout = float(
+            self._config.participant["defense_args"]
+            .get("reputation", {})
+            .get(
+                "model_update_timeout",
+                self._config.participant["defense_args"]
+                .get("reputation", {})
+                .get("table_aggregation_timeout", 30),
+            )
+        )
+        await self._engine._reputation.wait_sdfl_reputation_updates(
+            expected_reputation_neighbors,
+            self._engine.round,
+            reputation_timeout,
+        )
+        await self._engine._reputation.calculate_and_send_sdfl_reputation_table()
+
+    async def _send_trainer_update(self):
+        model_params = self._engine.trainer.get_model_parameters()
+        serialized_model = (
+            model_params
+            if isinstance(model_params, bytes)
+            else self._engine.trainer.serialize_model(model_params)
+        )
 
-        await self._engine._waiting_model_updates()
+        message = self._engine.cm.create_message(
+            "sdflmodel",
+            "trainer_update",
+            target="aggregator",
+            parameters=serialized_model,
+            weight=self._engine.trainer.get_model_weight(),
+            round=self._engine.round,
+            node_id=self._engine.addr,
+        )
 
-    async def select_nodes_to_wait(self):
-        nodes = await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False)
-        return nodes
+        neighbors = await self._engine.cm.get_addrs_current_connections(
+            only_direct=True,
+            myself=False,
+        )
 
-    async def resolve_missing_updates(self):
-        return (self._engine.trainer.get_model_parameters(), self._engine.trainer.get_model_weight())
+        logging.info(f"SDFL trainer | Broadcasting TRAINER_UPDATE to neighbors: {neighbors}")
+
+        tasks = [
+            asyncio.create_task(
+                self._engine.cm.send_message(
+                    neighbor,
+                    message,
+                    "sdflmodel",
+                )
+            )
+            for neighbor in neighbors
+        ]
+
+        if tasks:
+            await asyncio.gather(*tasks)
+        else:
+            logging.warning("SDFL trainer | No neighbors available to send TRAINER_UPDATE")
+
+    async def _waiting_global_model(self):
+        timeout = self._config.participant["aggregator_args"]["aggregation_timeout"]
+        logging.info(f"💤  Waiting global SDFL model in round {self._engine.round}.")
+        try:
+            await asyncio.wait_for(self._engine._global_model_received.wait(), timeout=timeout)
+            logging.info(
+                f"🤖  SDFL trainer | Global model received from "
+                f"{self._engine._global_model_source} in round {self._engine.round}"
+            )
+        except TimeoutError:
+            logging.error(f"🤖  SDFL trainer | Timeout waiting global model in round {self._engine.round}")
 
 """                                                         ##############################
                                                             #       IDLE BEHAVIOR        #
@@ -557,6 +663,15 @@ class roleBehaviorException(Exception):
     pass
 
 def factory_role_behavior(role: str, engine: Engine, config: Config) -> RoleBehavior | None:
+    federation = config.participant["scenario_args"].get("federation")
+    if federation == "SDFL":
+        sdfl_role_behaviors = {
+            "trainer": SDFLTrainerRoleBehavior,
+            "aggregator": SDFLAggregatorRoleBehavior,
+        }
+        node_role = sdfl_role_behaviors.get(role)
+        if node_role:
+            return node_role(engine, config)
 
     role_behaviors = {
         "malicious": MaliciousRoleBehavior,

From c0f5212c0af08aba1d47d740cc7c335c99054490 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Thu, 28 May 2026 16:11:08 +0200
Subject: [PATCH 53/66] Adversarial training updated for tabular data: CAA V1
 implemented

---
 .../addons/defenses/adversarial_training.py   | 548 +++++-------------
 .../defenses/adversarial_training_base.py     |  30 +
 .../defenses/adversarial_training_config.py   |  94 +++
 .../defenses/adversarial_training_image.py    |  84 +++
 .../defenses/adversarial_training_logging.py  | 225 +++++++
 .../defenses/adversarial_training_tabular.py  | 323 +++++++++++
 nebula/addons/defenses/feature_squeezing.py   |   2 +-
 .../core/datasets/adultcensus/adultcensus.py  | 109 ++--
 nebula/core/datasets/tabular_metadata.py      |  53 +-
 .../js/deployment/adversarial-training.js     |  84 ++-
 nebula/frontend/templates/deployment.html     |   6 +-
 11 files changed, 1098 insertions(+), 460 deletions(-)
 create mode 100644 nebula/addons/defenses/adversarial_training_base.py
 create mode 100644 nebula/addons/defenses/adversarial_training_config.py
 create mode 100644 nebula/addons/defenses/adversarial_training_image.py
 create mode 100644 nebula/addons/defenses/adversarial_training_logging.py
 create mode 100644 nebula/addons/defenses/adversarial_training_tabular.py

diff --git a/nebula/addons/defenses/adversarial_training.py b/nebula/addons/defenses/adversarial_training.py
index 340836d17..6dbc7aea0 100644
--- a/nebula/addons/defenses/adversarial_training.py
+++ b/nebula/addons/defenses/adversarial_training.py
@@ -1,237 +1,53 @@
 import logging
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
 from typing import Any
 
 import torch
 
-from nebula.config.config import TRAINING_LOGGER
-from nebula.core.datasets.tabular_metadata import CONTINUOUS, INTEGER, TabularAdversarialMetadata
-
-logging_training = logging.getLogger(TRAINING_LOGGER)
-
-IMAGE_DATASET_NORMALIZATION = {
-    "MNIST": ((0.5,), (0.5,)),
-    "FashionMNIST": ((0.5,), (0.5,)),
-    "EMNIST": ((0.5,), (0.5,)),
-    "CIFAR10": ((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)),
-    "CIFAR100": ((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)),
-}
-
-
-@dataclass(frozen=True)
-class AdversarialTrainingConfig:
-    enabled: bool = False
-    dataset_name: str | None = None
-    domain: str = "image"
-    attack: str = "fgsm"
-    epsilon: float = 8.0 / 255.0
-    alpha: float | None = None
-    steps: int = 1
-    clean_weight: float = 0.5
-    adversarial_weight: float = 0.5
-    mode: str = "mixed"
-    apply_probability: float = 1.0
-    clip_min: float = 0.0
-    clip_max: float = 1.0
-    log_adversarial_metrics: bool = True
-
-
-class AdversarialExampleGenerator(ABC):
-    """Base interface for domain-specific adversarial example generators."""
-
-    last_epsilon: float | None = None
-
-    @abstractmethod
-    def generate(self, model, x, y, criterion):
-        raise NotImplementedError
-
-    def _sample_epsilon(self, device: torch.device) -> float:
-        epsilon_max = float(self.config.epsilon)
-        if epsilon_max <= 0.0:
-            self.last_epsilon = 0.0
-            return 0.0
-
-        epsilon_min = epsilon_max / 4.0
-        epsilon_step = epsilon_max / 8.0
-        num_values = max(int(round((epsilon_max - epsilon_min) / epsilon_step)) + 1, 1)
-        index = int(torch.randint(num_values, (), device=device).item())
-        epsilon = min(epsilon_min + index * epsilon_step, epsilon_max)
-        self.last_epsilon = epsilon
-        return epsilon
-
-
-class ImageAdversarialExampleGenerator(AdversarialExampleGenerator):
-    def __init__(self, config: AdversarialTrainingConfig, mean: tuple[float, ...], std: tuple[float, ...]):
-        self.config = config
-        self.mean = mean
-        self.std = std
-
-    def _channel_tensor(self, values: tuple[float, ...], x: torch.Tensor) -> torch.Tensor:
-        shape = [1, len(values)] + [1] * max(x.dim() - 2, 0)
-        return torch.tensor(values, dtype=x.dtype, device=x.device).view(*shape)
-
-    def _epsilon(self, x: torch.Tensor, epsilon: float) -> torch.Tensor:
-        std = self._channel_tensor(self.std, x)
-        return float(epsilon) / std
-
-    def _alpha(self, x: torch.Tensor, epsilon: float) -> torch.Tensor:
-        alpha = self.config.alpha
-        if alpha is None:
-            alpha = epsilon / max(int(self.config.steps), 1)
-        std = self._channel_tensor(self.std, x)
-        return float(alpha) / std
-
-    def _bounds(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
-        mean = self._channel_tensor(self.mean, x)
-        std = self._channel_tensor(self.std, x)
-        lower = (float(self.config.clip_min) - mean) / std
-        upper = (float(self.config.clip_max) - mean) / std
-        return lower, upper
-
-    def denormalize(self, x: torch.Tensor) -> torch.Tensor:
-        mean = self._channel_tensor(self.mean, x)
-        std = self._channel_tensor(self.std, x)
-        return (x * std + mean).clamp(float(self.config.clip_min), float(self.config.clip_max))
-
-    def _project(self, x_adv: torch.Tensor, x_clean: torch.Tensor, epsilon: float) -> torch.Tensor:
-        epsilon = self._epsilon(x_clean, epsilon)
-        lower, upper = self._bounds(x_clean)
-        x_adv = torch.max(torch.min(x_adv, x_clean + epsilon), x_clean - epsilon)
-        return torch.max(torch.min(x_adv, upper), lower)
-
-
-class ImageFGSMGenerator(ImageAdversarialExampleGenerator):
-    def generate(self, model, x, y, criterion):
-        epsilon = self._sample_epsilon(x.device)
-        x_adv = x.detach().clone().requires_grad_(True)
-        logits = model(x_adv)
-        loss = criterion(logits, y)
-        grad = torch.autograd.grad(loss, x_adv, only_inputs=True)[0]
-        x_adv = x_adv + self._epsilon(x_adv, epsilon) * grad.sign()
-        return self._project(x_adv.detach(), x.detach(), epsilon)
-
-
-class ImagePGDGenerator(ImageAdversarialExampleGenerator):
-    def generate(self, model, x, y, criterion):
-        epsilon = self._sample_epsilon(x.device)
-        x_clean = x.detach()
-        x_adv = x_clean.clone()
-        steps = max(int(self.config.steps), 1)
-
-        for _ in range(steps):
-            x_adv = x_adv.detach().requires_grad_(True)
-            logits = model(x_adv)
-            loss = criterion(logits, y)
-            grad = torch.autograd.grad(loss, x_adv, only_inputs=True)[0]
-            x_adv = x_adv + self._alpha(x_adv, epsilon) * grad.sign()
-            x_adv = self._project(x_adv.detach(), x_clean, epsilon)
-
-        return x_adv.detach()
-
-
-class TabularAdversarialExampleGenerator(AdversarialExampleGenerator):
-    """Adversarial generator for perturbable continuous and integer tabular features."""
-
-    def __init__(self, config: AdversarialTrainingConfig, metadata: TabularAdversarialMetadata):
-        self.config = config
-        self.metadata = metadata
-        self._tensor_cache: dict[tuple[torch.device, torch.dtype], dict[str, torch.Tensor]] = {}
-
-    def _alpha(self, epsilon: float) -> float:
-        if self.config.alpha is not None:
-            return float(self.config.alpha)
-        return float(epsilon) / max(int(self.config.steps), 1)
-
-    def _tensors(self, x: torch.Tensor) -> dict[str, torch.Tensor]:
-        key = (x.device, x.dtype)
-        cached = self._tensor_cache.get(key)
-        if cached is not None:
-            return cached
-
-        cached = {
-            "continuous": torch.tensor(
-                [feature_type == CONTINUOUS for feature_type in self.metadata.feature_types],
-                dtype=torch.bool,
-                device=x.device,
-            ).view(1, -1),
-            "integer": torch.tensor(
-                [feature_type == INTEGER for feature_type in self.metadata.feature_types],
-                dtype=torch.bool,
-                device=x.device,
-            ).view(1, -1),
-            "min": torch.tensor(self.metadata.feature_min_norm, dtype=x.dtype, device=x.device).view(1, -1),
-            "max": torch.tensor(self.metadata.feature_max_norm, dtype=x.dtype, device=x.device).view(1, -1),
-        }
-        cached["perturbable"] = cached["continuous"] | cached["integer"]
-        integer_steps = torch.ones_like(cached["min"])
-        for idx, step in (self.metadata.integer_step_norm or {}).items():
-            integer_steps[0, int(idx)] = float(step)
-        cached["integer_step"] = integer_steps
-        self._tensor_cache[key] = cached
-        return cached
-
-    def _gradient(self, model, x, y, criterion):
-        x_grad = x.detach().clone().requires_grad_(True)
-        logits = model(x_grad)
-        loss = criterion(logits, y)
-        return torch.autograd.grad(loss, x_grad, only_inputs=True)[0]
-
-    def _project(self, x_adv: torch.Tensor, x_clean: torch.Tensor, epsilon: float) -> torch.Tensor:
-        tensors = self._tensors(x_clean)
-        lower = torch.maximum(tensors["min"], x_clean - float(epsilon))
-        upper = torch.minimum(tensors["max"], x_clean + float(epsilon))
-        x_adv = torch.max(torch.min(x_adv, upper), lower)
-
-        integer_mask = tensors["integer"]
-        if integer_mask.any():
-            step = torch.clamp(tensors["integer_step"], min=torch.finfo(x_adv.dtype).eps)
-            projected_integer = torch.round((x_adv - tensors["min"]) / step) * step + tensors["min"]
-            grid_lower = torch.ceil((lower - tensors["min"]) / step) * step + tensors["min"]
-            grid_upper = torch.floor((upper - tensors["min"]) / step) * step + tensors["min"]
-            projected_integer = torch.max(torch.min(projected_integer, grid_upper), grid_lower)
-            has_valid_grid = grid_lower <= grid_upper
-            projected_integer = torch.where(has_valid_grid, projected_integer, x_clean)
-            x_adv = torch.where(integer_mask, projected_integer, x_adv)
-
-        return torch.where(tensors["perturbable"], x_adv, x_clean)
-
-
-class TabularFGSMGenerator(TabularAdversarialExampleGenerator):
-    def generate(self, model, x, y, criterion):
-        epsilon = self._sample_epsilon(x.device)
-        grad = self._gradient(model, x, y, criterion)
-        x_clean = x.detach()
-        perturbable_mask = self._tensors(x_clean)["perturbable"]
-        x_adv = x_clean + float(epsilon) * grad.sign() * perturbable_mask
-        return self._project(x_adv.detach(), x_clean, epsilon)
-
-
-class TabularPGDGenerator(TabularAdversarialExampleGenerator):
-    def generate(self, model, x, y, criterion):
-        epsilon = self._sample_epsilon(x.device)
-        x_clean = x.detach()
-        x_adv = x_clean.clone()
-        steps = max(int(self.config.steps), 1)
-
-        for _ in range(steps):
-            grad = self._gradient(model, x_adv, y, criterion)
-            perturbable_mask = self._tensors(x_clean)["perturbable"]
-            x_adv = x_adv.detach() + self._alpha(epsilon) * grad.sign() * perturbable_mask
-            x_adv = self._project(x_adv.detach(), x_clean, epsilon)
-
-        return x_adv.detach()
+from nebula.addons.defenses.adversarial_training_base import AdversarialExampleGenerator
+from nebula.addons.defenses.adversarial_training_config import (
+    CAA_TABULAR_DATASETS,
+    ERR_ALPHA,
+    ERR_APPLY_PROBABILITY,
+    ERR_CLIP_BOUNDS,
+    ERR_EPSILON,
+    ERR_IMAGE_ATTACK,
+    ERR_LOSS_WEIGHTS,
+    ERR_MIXED_WEIGHTS,
+    ERR_MODE,
+    ERR_STEPS,
+    ERR_TABULAR_METADATA,
+    ERR_UNSUPPORTED_ATTACK,
+    IMAGE_ADVERSARIAL_ATTACKS,
+    IMAGE_DATASET_NORMALIZATION,
+    AdversarialTrainingConfig,
+    config_from_participant,
+    validate_config,
+)
+from nebula.addons.defenses.adversarial_training_image import (
+    ImageAdversarialExampleGenerator,
+    ImageFGSMGenerator,
+    ImagePGDGenerator,
+)
+from nebula.addons.defenses.adversarial_training_logging import AdversarialTrainingSampleLogger
+from nebula.addons.defenses.adversarial_training_tabular import (
+    TabularAdversarialExampleGenerator,
+    TabularCAAGenerator,
+    TabularConstraintSet,
+)
+from nebula.core.datasets.tabular_metadata import CATEGORICAL, CONTINUOUS, INTEGER, TabularAdversarialMetadata
 
 
 class AdversarialTrainingDefense:
     """Batch-level adversarial training defense for Nebula models."""
 
-    LOGGED_SAMPLES_PER_ROUND = 3
+    LOGGED_SAMPLES_PER_ROUND = AdversarialTrainingSampleLogger.LOGGED_SAMPLES_PER_ROUND
 
     def __init__(self, config: AdversarialTrainingConfig, generator: AdversarialExampleGenerator):
+        # Keep the selected generator and logger together for each participant model.
         self.config = config
         self.generator = generator
-        self._logged_adversarial_samples_by_round: dict[int, int] = {}
+        self.sample_logger = AdversarialTrainingSampleLogger(config, generator)
+        self._logged_adversarial_samples_by_round = self.sample_logger._logged_samples_by_round
 
     @classmethod
     def from_participant_config(
@@ -239,136 +55,72 @@ def from_participant_config(
         participant_config: dict[str, Any],
         partition=None,
     ) -> "AdversarialTrainingDefense | None":
-        raw = participant_config.get("defense_args", {}).get("adversarial_training", {})
-        if not raw or not raw.get("enabled", False):
+        # This is the only entry point used by Nebula's node setup.
+        config = config_from_participant(participant_config)
+        if config is None:
             return None
-
-        dataset_name = participant_config.get("data_args", {}).get("dataset")
-        config = AdversarialTrainingConfig(
-            enabled=True,
-            dataset_name=dataset_name,
-            domain=str(raw.get("domain", "image")).lower(),
-            attack=str(raw.get("attack", "fgsm")).lower(),
-            epsilon=float(raw.get("epsilon", 8.0 / 255.0)),
-            alpha=float(raw["alpha"]) if raw.get("alpha") is not None else None,
-            steps=int(raw.get("steps", 1)),
-            clean_weight=float(raw.get("clean_weight", 0.5)),
-            adversarial_weight=float(raw.get("adversarial_weight", 0.5)),
-            mode=str(raw.get("mode", "mixed")).lower(),
-            apply_probability=float(raw.get("apply_probability", 1.0)),
-            clip_min=float(raw.get("clip_min", 0.0)),
-            clip_max=float(raw.get("clip_max", 1.0)),
-            log_adversarial_metrics=bool(raw.get("log_adversarial_metrics", True)),
-        )
-        cls._validate_config(config)
+        validate_config(config)
 
         if config.domain == "tabular":
-            supported_tabular_datasets = {"AdultCensus", "BreastCancer", "Covtype", "KDDCUP99"}
-            if dataset_name not in supported_tabular_datasets:
+            # CAA needs dataset metadata. Keep the allow-list explicit while more tabular datasets are added.
+            if config.dataset_name not in CAA_TABULAR_DATASETS:
                 logging.warning(
-                    "[AdversarialTrainingDefense] Skipping tabular adversarial training: dataset '%s' is not supported yet",
-                    dataset_name,
+                    "[AdversarialTrainingDefense] Skipping CAA tabular adversarial training: "
+                    "dataset '%s' is not supported yet",
+                    config.dataset_name,
                 )
                 return None
+
             metadata = cls._get_tabular_metadata(partition)
-            generator = cls._build_tabular_generator(config, metadata)
-            return cls(config=config, generator=generator)
-
-        if config.domain != "image":
-            logging.warning(
-                "[AdversarialTrainingDefense] Skipping adversarial training: domain '%s' is not implemented yet",
-                config.domain,
-            )
-            return None
+            # For tabular data, the only valid adversarial-training generator is CAA.
+            return cls(config=config, generator=TabularCAAGenerator(config, metadata))
 
-        normalization = IMAGE_DATASET_NORMALIZATION.get(dataset_name)
-        if normalization is None:
-            logging.warning(
-                "[AdversarialTrainingDefense] Skipping adversarial training: dataset '%s' has no image bounds",
-                dataset_name,
-            )
-            return None
+        if config.domain == "image":
+            # Image attacks run in normalized model space, so each dataset must provide mean/std.
+            normalization = IMAGE_DATASET_NORMALIZATION.get(config.dataset_name)
+            if normalization is None:
+                logging.warning(
+                    "[AdversarialTrainingDefense] Skipping adversarial training: dataset '%s' has no image bounds",
+                    config.dataset_name,
+                )
+                return None
 
-        generator = cls._build_generator(config, normalization)
-        return cls(config=config, generator=generator)
+            return cls(config=config, generator=cls._build_image_generator(config, normalization))
 
-    @staticmethod
-    def _validate_config(config: AdversarialTrainingConfig) -> None:
-        if config.mode not in {"clean", "adversarial", "mixed"}:
-            raise ValueError("adversarial_training.mode must be one of: clean, adversarial, mixed")
-        if config.attack not in {"fgsm", "pgd"}:
-            raise ValueError("adversarial_training.attack must be one of: fgsm, pgd")
-        if config.epsilon < 0:
-            raise ValueError("adversarial_training.epsilon must be >= 0")
-        if config.alpha is not None and config.alpha < 0:
-            raise ValueError("adversarial_training.alpha must be >= 0")
-        if config.steps < 1:
-            raise ValueError("adversarial_training.steps must be >= 1")
-        if not 0.0 <= config.apply_probability <= 1.0:
-            raise ValueError("adversarial_training.apply_probability must be in [0, 1]")
-        if config.clean_weight < 0 or config.adversarial_weight < 0:
-            raise ValueError("adversarial_training loss weights must be >= 0")
-        if config.mode == "mixed" and config.clean_weight + config.adversarial_weight == 0:
-            raise ValueError("adversarial_training mixed mode requires at least one positive loss weight")
-        if config.clip_min >= config.clip_max:
-            raise ValueError("adversarial_training.clip_min must be smaller than clip_max")
+        logging.warning(
+            "[AdversarialTrainingDefense] Skipping adversarial training: domain '%s' is not implemented yet",
+            config.domain,
+        )
+        return None
 
     @staticmethod
-    def _build_generator(config, normalization):
+    def _build_image_generator(config, normalization):
+        # Choose the image attack implementation requested by the participant config.
         mean, std = normalization
         if config.attack == "fgsm":
             return ImageFGSMGenerator(config, mean, std)
         if config.attack == "pgd":
             return ImagePGDGenerator(config, mean, std)
-        raise ValueError(f"Unsupported adversarial training attack: {config.attack}")
-
-    @staticmethod
-    def _build_tabular_generator(config, metadata: TabularAdversarialMetadata):
-        if config.attack == "fgsm":
-            return TabularFGSMGenerator(config, metadata)
-        if config.attack == "pgd":
-            return TabularPGDGenerator(config, metadata)
-        raise ValueError(f"Unsupported adversarial training attack: {config.attack}")
+        raise ValueError(ERR_UNSUPPORTED_ATTACK.format(attack=config.attack))
 
     @staticmethod
     def _get_tabular_metadata(partition) -> TabularAdversarialMetadata:
+        # Load the tabular constraints from the local training partition.
         train_set = getattr(partition, "train_set", None) if partition is not None else None
         metadata = getattr(train_set, "tabular_metadata", None)
         if metadata is None:
-            raise ValueError("Tabular adversarial training requires tabular_metadata")
+            raise ValueError(ERR_TABULAR_METADATA)
+        # Metadata can come from an in-memory dataset object or from a serialized config.
         if isinstance(metadata, TabularAdversarialMetadata):
             tabular_metadata = metadata
         else:
             tabular_metadata = TabularAdversarialMetadata.from_dict(metadata)
 
-        integer_features = [
-            name
-            for name, feature_type in zip(tabular_metadata.feature_names, tabular_metadata.feature_types)
-            if feature_type == INTEGER
-        ]
-        continuous_features = [
-            name
-            for name, feature_type in zip(tabular_metadata.feature_names, tabular_metadata.feature_types)
-            if feature_type == CONTINUOUS
-        ]
-        non_perturbable_features = [
-            name
-            for name, feature_type in zip(tabular_metadata.feature_names, tabular_metadata.feature_types)
-            if feature_type not in {CONTINUOUS, INTEGER}
-        ]
-        logging.info(
-            "[AdversarialTrainingDefense] Tabular feature mask loaded | integer=%s | continuous=%s | "
-            "non_perturbable=%s | integer_features=%s | continuous_features=%s | non_perturbable_preview=%s",
-            len(integer_features),
-            len(continuous_features),
-            len(non_perturbable_features),
-            integer_features,
-            continuous_features,
-            non_perturbable_features[:20],
-        )
+        _log_tabular_metadata(tabular_metadata)
         return tabular_metadata
 
     def should_apply(self, x: torch.Tensor) -> bool:
+        # Allows adversarial training to be applied to only a fraction of batches.
         if self.config.apply_probability >= 1.0:
             return True
         if self.config.apply_probability <= 0.0:
@@ -381,16 +133,19 @@ def compute_training_step(self, model, x, y, criterion):
             loss = criterion(logits, y)
             return loss, logits, {}
 
+        # "clean" mode keeps the normal training step but still goes through the defense hook.
         if self.config.mode == "clean":
             logits = model(x)
             loss = criterion(logits, y)
             return loss, logits, {}
 
+        # Generate x_adv once and reuse it for logging, adversarial loss and metrics.
         x_adv = self.generator.generate(model, x, y, criterion)
         self._log_adversarial_samples(model, x, x_adv, y)
         adv_logits = model(x_adv)
         adv_loss = criterion(adv_logits, y)
 
+        # "adversarial" replaces the clean batch loss completely.
         if self.config.mode == "adversarial":
             return adv_loss, adv_logits, self._extra_metrics({
                 "Adversarial Loss": adv_loss,
@@ -400,6 +155,7 @@ def compute_training_step(self, model, x, y, criterion):
         clean_logits = model(x)
         clean_loss = criterion(clean_logits, y)
         total_weight = self.config.clean_weight + self.config.adversarial_weight
+        # "mixed" combines clean and adversarial losses with user-provided weights.
         loss = (
             self.config.clean_weight * clean_loss + self.config.adversarial_weight * adv_loss
         ) / total_weight
@@ -410,97 +166,67 @@ def compute_training_step(self, model, x, y, criterion):
             "Adversarial Accuracy": self._accuracy(adv_logits, y),
         })
 
+    def _log_adversarial_samples(self, model, x_clean: torch.Tensor, x_adv: torch.Tensor, y: torch.Tensor) -> None:
+        # Delegate logging so the training step stays focused on loss computation.
+        self.sample_logger.log(model, x_clean, x_adv, y)
+
     def _accuracy(self, logits, y):
+        # Compute batch accuracy from model logits.
         predictions = torch.argmax(logits, dim=1)
         return torch.mean((predictions == y).float())
 
     def _extra_metrics(self, metrics):
+        # Allow users to disable adversarial metrics without changing the training loss.
         if not self.config.log_adversarial_metrics:
             return {}
         return metrics
 
-    def _log_adversarial_samples(self, model, x_clean: torch.Tensor, x_adv: torch.Tensor, y: torch.Tensor) -> None:
-        if not self.config.log_adversarial_metrics:
-            return
-
-        current_round = int(getattr(model, "round", 0))
-        already_logged = self._logged_adversarial_samples_by_round.get(current_round, 0)
-        remaining = self.LOGGED_SAMPLES_PER_ROUND - already_logged
-        if remaining <= 0:
-            return
-
-        with torch.no_grad():
-            clean_view = x_clean.detach()
-            adv_view = x_adv.detach()
-            if hasattr(self.generator, "denormalize"):
-                clean_view = self.generator.denormalize(clean_view)
-                adv_view = self.generator.denormalize(adv_view)
-
-            delta = adv_view - clean_view
-            samples_to_log = min(remaining, int(clean_view.size(0)))
-
-            for sample_idx in range(samples_to_log):
-                sample_clean = clean_view[sample_idx].detach().float().cpu()
-                sample_adv = adv_view[sample_idx].detach().float().cpu()
-                sample_delta = delta[sample_idx].detach().float().cpu()
-
-                logging_training.info(
-                    "[AdversarialTrainingDefense] Round %s | Sample %s/%s before/after distortion | "
-                    "dataset=%s | attack=%s | epsilon_effective=%.6f | label=%s | "
-                    "clean[min=%.6f max=%.6f mean=%.6f] | "
-                    "adv[min=%.6f max=%.6f mean=%.6f] | delta_linf=%.6f | delta_l2=%.6f",
-                    current_round,
-                    already_logged + sample_idx + 1,
-                    self.LOGGED_SAMPLES_PER_ROUND,
-                    self.config.dataset_name,
-                    self.config.attack,
-                    float(getattr(self.generator, "last_epsilon", self.config.epsilon) or 0.0),
-                    int(y[sample_idx].detach().cpu().item()) if y.numel() > sample_idx else None,
-                    sample_clean.min().item(),
-                    sample_clean.max().item(),
-                    sample_clean.mean().item(),
-                    sample_adv.min().item(),
-                    sample_adv.max().item(),
-                    sample_adv.mean().item(),
-                    sample_delta.abs().max().item(),
-                    sample_delta.reshape(-1).norm(p=2).item(),
-                )
-                logging_training.info(
-                    "[AdversarialTrainingDefense] Round %s | Clean sample %s channel0 4x4:\n%s",
-                    current_round,
-                    already_logged + sample_idx + 1,
-                    self._format_patch(sample_clean),
-                )
-                logging_training.info(
-                    "[AdversarialTrainingDefense] Round %s | Adversarial sample %s channel0 4x4:\n%s",
-                    current_round,
-                    already_logged + sample_idx + 1,
-                    self._format_patch(sample_adv),
-                )
-                logging_training.info(
-                    "[AdversarialTrainingDefense] Round %s | Delta sample %s channel0 4x4:\n%s",
-                    current_round,
-                    already_logged + sample_idx + 1,
-                    self._format_patch(sample_delta),
-                )
-
-            self._logged_adversarial_samples_by_round[current_round] = already_logged + samples_to_log
 
-    @staticmethod
-    def _format_patch(sample: torch.Tensor, patch_size: int = 4) -> str:
-        if sample.dim() >= 3:
-            patch = sample[0, :patch_size, :patch_size]
-        elif sample.dim() == 2:
-            patch = sample[:patch_size, :patch_size]
-        else:
-            patch = sample[:patch_size]
-        values = patch.tolist()
-        if sample.dim() < 2:
-            return str([round(float(value), 6) for value in values])
-        return str([[round(float(value), 6) for value in row] for row in values])
+def _log_tabular_metadata(tabular_metadata: TabularAdversarialMetadata) -> None:
+    # Log a compact metadata summary to make CAA setup auditable.
+    integer_features = _feature_names_by_type(tabular_metadata, {INTEGER})
+    continuous_features = _feature_names_by_type(tabular_metadata, {CONTINUOUS})
+    categorical_features = _feature_names_by_type(tabular_metadata, {CATEGORICAL})
+    non_perturbable_features = _feature_names_excluding_types(
+        tabular_metadata,
+        {CONTINUOUS, INTEGER, CATEGORICAL},
+    )
+    logging.info(
+        "[AdversarialTrainingDefense] Tabular feature mask loaded | integer=%s | continuous=%s | "
+        "categorical=%s | categorical_groups=%s | non_perturbable=%s | integer_features=%s | "
+        "continuous_features=%s | categorical_preview=%s | non_perturbable_preview=%s",
+        len(integer_features),
+        len(continuous_features),
+        len(categorical_features),
+        len(tabular_metadata.categorical_groups or []),
+        len(non_perturbable_features),
+        integer_features,
+        continuous_features,
+        categorical_features[:20],
+        non_perturbable_features[:20],
+    )
+
+
+def _feature_names_by_type(tabular_metadata: TabularAdversarialMetadata, feature_types: set[str]) -> list[str]:
+    # Return feature names whose metadata type is included in feature_types.
+    return [
+        name
+        for name, feature_type in zip(tabular_metadata.feature_names, tabular_metadata.feature_types, strict=True)
+        if feature_type in feature_types
+    ]
+
+
+def _feature_names_excluding_types(tabular_metadata: TabularAdversarialMetadata, feature_types: set[str]) -> list[str]:
+    # Return feature names whose metadata type is not included in feature_types.
+    return [
+        name
+        for name, feature_type in zip(tabular_metadata.feature_names, tabular_metadata.feature_types, strict=True)
+        if feature_type not in feature_types
+    ]
 
 
 def apply_adversarial_training_if_enabled(model, participant_config: dict[str, Any], partition=None) -> None:
+    # Attach the defense to the model only when the participant config enables it.
     defense = AdversarialTrainingDefense.from_participant_config(participant_config, partition=partition)
     if defense is not None:
         model.set_adversarial_training(defense)
@@ -515,3 +241,31 @@ def apply_adversarial_training_if_enabled(model, participant_config: dict[str, A
             defense.config.epsilon / 8.0,
             defense.config.mode,
         )
+
+
+__all__ = [
+    "CAA_TABULAR_DATASETS",
+    "ERR_ALPHA",
+    "ERR_APPLY_PROBABILITY",
+    "ERR_CLIP_BOUNDS",
+    "ERR_EPSILON",
+    "ERR_IMAGE_ATTACK",
+    "ERR_LOSS_WEIGHTS",
+    "ERR_MIXED_WEIGHTS",
+    "ERR_MODE",
+    "ERR_STEPS",
+    "ERR_TABULAR_METADATA",
+    "ERR_UNSUPPORTED_ATTACK",
+    "IMAGE_ADVERSARIAL_ATTACKS",
+    "IMAGE_DATASET_NORMALIZATION",
+    "AdversarialExampleGenerator",
+    "AdversarialTrainingConfig",
+    "AdversarialTrainingDefense",
+    "ImageAdversarialExampleGenerator",
+    "ImageFGSMGenerator",
+    "ImagePGDGenerator",
+    "TabularAdversarialExampleGenerator",
+    "TabularCAAGenerator",
+    "TabularConstraintSet",
+    "apply_adversarial_training_if_enabled",
+]
diff --git a/nebula/addons/defenses/adversarial_training_base.py b/nebula/addons/defenses/adversarial_training_base.py
new file mode 100644
index 000000000..3e3c1fc48
--- /dev/null
+++ b/nebula/addons/defenses/adversarial_training_base.py
@@ -0,0 +1,30 @@
+from abc import ABC, abstractmethod
+
+import torch
+
+
+class AdversarialExampleGenerator(ABC):
+    """Base interface for domain-specific adversarial example generators."""
+
+    last_epsilon: float | None = None
+
+    @abstractmethod
+    def generate(self, model, x, y, criterion):
+        # Concrete generators must return an adversarial version of the input batch.
+        raise NotImplementedError
+
+    def _sample_epsilon(self, device: torch.device) -> float:
+        # Sample the effective epsilon on the same device as the batch.
+        epsilon_max = float(self.config.epsilon)
+        if epsilon_max <= 0.0:
+            self.last_epsilon = 0.0
+            return 0.0
+
+        # Use a different attack strength per batch, capped by the user epsilon.
+        epsilon_min = epsilon_max / 4.0
+        epsilon_step = epsilon_max / 8.0
+        num_values = max(round((epsilon_max - epsilon_min) / epsilon_step) + 1, 1)
+        index = int(torch.randint(num_values, (), device=device).item())
+        epsilon = min(epsilon_min + index * epsilon_step, epsilon_max)
+        self.last_epsilon = epsilon
+        return epsilon
diff --git a/nebula/addons/defenses/adversarial_training_config.py b/nebula/addons/defenses/adversarial_training_config.py
new file mode 100644
index 000000000..930144b5d
--- /dev/null
+++ b/nebula/addons/defenses/adversarial_training_config.py
@@ -0,0 +1,94 @@
+from dataclasses import dataclass
+from typing import Any
+
+IMAGE_ADVERSARIAL_ATTACKS = {"fgsm", "pgd"}
+CAA_TABULAR_DATASETS = {"AdultCensus"}
+
+ERR_IMAGE_ATTACK = "image adversarial_training.attack must be one of: fgsm, pgd"
+ERR_MODE = "adversarial_training.mode must be one of: clean, adversarial, mixed"
+ERR_EPSILON = "adversarial_training.epsilon must be >= 0"
+ERR_ALPHA = "adversarial_training.alpha must be >= 0"
+ERR_STEPS = "adversarial_training.steps must be >= 1"
+ERR_APPLY_PROBABILITY = "adversarial_training.apply_probability must be in [0, 1]"
+ERR_LOSS_WEIGHTS = "adversarial_training loss weights must be >= 0"
+ERR_MIXED_WEIGHTS = "adversarial_training mixed mode requires at least one positive loss weight"
+ERR_CLIP_BOUNDS = "adversarial_training.clip_min must be smaller than clip_max"
+ERR_TABULAR_METADATA = "Tabular adversarial training requires tabular_metadata"
+ERR_UNSUPPORTED_ATTACK = "Unsupported adversarial training attack: {attack}"
+
+IMAGE_DATASET_NORMALIZATION = {
+    "MNIST": ((0.5,), (0.5,)),
+    "FashionMNIST": ((0.5,), (0.5,)),
+    "EMNIST": ((0.5,), (0.5,)),
+    "CIFAR10": ((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)),
+    "CIFAR100": ((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)),
+}
+
+
+@dataclass(frozen=True)
+class AdversarialTrainingConfig:
+    enabled: bool = False
+    dataset_name: str | None = None
+    domain: str = "image"
+    attack: str = "fgsm"
+    epsilon: float = 8.0 / 255.0
+    alpha: float | None = None
+    steps: int = 1
+    clean_weight: float = 0.5
+    adversarial_weight: float = 0.5
+    mode: str = "mixed"
+    apply_probability: float = 1.0
+    clip_min: float = 0.0
+    clip_max: float = 1.0
+    log_adversarial_metrics: bool = True
+
+
+def config_from_participant(participant_config: dict[str, Any]) -> AdversarialTrainingConfig | None:
+    # Read the raw participant config and normalize it into a typed defense config.
+    raw = participant_config.get("defense_args", {}).get("adversarial_training", {})
+    if not raw or not raw.get("enabled", False):
+        return None
+
+    dataset_name = participant_config.get("data_args", {}).get("dataset")
+    domain = str(raw.get("domain", "image")).lower()
+    # Tabular adversarial training exposes a single attack: CAA.
+    attack = "caa" if domain == "tabular" else str(raw.get("attack", "fgsm")).lower()
+
+    return AdversarialTrainingConfig(
+        enabled=True,
+        dataset_name=dataset_name,
+        domain=domain,
+        attack=attack,
+        epsilon=float(raw.get("epsilon", 8.0 / 255.0)),
+        alpha=float(raw["alpha"]) if raw.get("alpha") is not None else None,
+        steps=int(raw.get("steps", 1)),
+        clean_weight=float(raw.get("clean_weight", 0.5)),
+        adversarial_weight=float(raw.get("adversarial_weight", 0.5)),
+        mode=str(raw.get("mode", "mixed")).lower(),
+        apply_probability=float(raw.get("apply_probability", 1.0)),
+        clip_min=float(raw.get("clip_min", 0.0)),
+        clip_max=float(raw.get("clip_max", 1.0)),
+        log_adversarial_metrics=bool(raw.get("log_adversarial_metrics", True)),
+    )
+
+
+def validate_config(config: AdversarialTrainingConfig) -> None:
+    # Fail early when a frontend/backend config value cannot produce a valid attack.
+    if config.mode not in {"clean", "adversarial", "mixed"}:
+        raise ValueError(ERR_MODE)
+    if config.domain == "image" and config.attack not in IMAGE_ADVERSARIAL_ATTACKS:
+        raise ValueError(ERR_IMAGE_ATTACK)
+    if config.epsilon < 0:
+        raise ValueError(ERR_EPSILON)
+    if config.alpha is not None and config.alpha < 0:
+        raise ValueError(ERR_ALPHA)
+    if config.steps < 1:
+        raise ValueError(ERR_STEPS)
+    if not 0.0 <= config.apply_probability <= 1.0:
+        raise ValueError(ERR_APPLY_PROBABILITY)
+    if config.clean_weight < 0 or config.adversarial_weight < 0:
+        raise ValueError(ERR_LOSS_WEIGHTS)
+    if config.mode == "mixed" and config.clean_weight + config.adversarial_weight == 0:
+        raise ValueError(ERR_MIXED_WEIGHTS)
+    if config.clip_min >= config.clip_max:
+        raise ValueError(ERR_CLIP_BOUNDS)
diff --git a/nebula/addons/defenses/adversarial_training_image.py b/nebula/addons/defenses/adversarial_training_image.py
new file mode 100644
index 000000000..d9a84ae1c
--- /dev/null
+++ b/nebula/addons/defenses/adversarial_training_image.py
@@ -0,0 +1,84 @@
+import torch
+
+from nebula.addons.defenses.adversarial_training_base import AdversarialExampleGenerator
+from nebula.addons.defenses.adversarial_training_config import AdversarialTrainingConfig
+
+
+class ImageAdversarialExampleGenerator(AdversarialExampleGenerator):
+    def __init__(self, config: AdversarialTrainingConfig, mean: tuple[float, ...], std: tuple[float, ...]):
+        # Store normalization values so attacks can move between pixel and model space.
+        self.config = config
+        self.mean = mean
+        self.std = std
+
+    def _channel_tensor(self, values: tuple[float, ...], x: torch.Tensor) -> torch.Tensor:
+        # Reshape per-channel values so they broadcast over the whole image batch.
+        shape = [1, len(values)] + [1] * max(x.dim() - 2, 0)
+        return torch.tensor(values, dtype=x.dtype, device=x.device).view(*shape)
+
+    def _epsilon(self, x: torch.Tensor, epsilon: float) -> torch.Tensor:
+        # Image batches are normalized, so pixel-space epsilon must be scaled by std.
+        std = self._channel_tensor(self.std, x)
+        return float(epsilon) / std
+
+    def _alpha(self, x: torch.Tensor, epsilon: float) -> torch.Tensor:
+        # Use the configured step size, or split epsilon across PGD steps by default.
+        alpha = self.config.alpha
+        if alpha is None:
+            alpha = epsilon / max(int(self.config.steps), 1)
+        std = self._channel_tensor(self.std, x)
+        return float(alpha) / std
+
+    def _bounds(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        # Convert valid pixel bounds to the normalized space where the model operates.
+        mean = self._channel_tensor(self.mean, x)
+        std = self._channel_tensor(self.std, x)
+        lower = (float(self.config.clip_min) - mean) / std
+        upper = (float(self.config.clip_max) - mean) / std
+        return lower, upper
+
+    def denormalize(self, x: torch.Tensor) -> torch.Tensor:
+        # Convert normalized tensors back to pixel scale for logging.
+        mean = self._channel_tensor(self.mean, x)
+        std = self._channel_tensor(self.std, x)
+        return (x * std + mean).clamp(float(self.config.clip_min), float(self.config.clip_max))
+
+    def _project(self, x_adv: torch.Tensor, x_clean: torch.Tensor, epsilon: float) -> torch.Tensor:
+        # Keep the adversarial image inside both the epsilon ball and valid pixel bounds.
+        epsilon = self._epsilon(x_clean, epsilon)
+        lower, upper = self._bounds(x_clean)
+        x_adv = torch.max(torch.min(x_adv, x_clean + epsilon), x_clean - epsilon)
+        return torch.max(torch.min(x_adv, upper), lower)
+
+
+class ImageFGSMGenerator(ImageAdversarialExampleGenerator):
+    def generate(self, model, x, y, criterion):
+        # Build one adversarial image batch with a single gradient step.
+        epsilon = self._sample_epsilon(x.device)
+        x_adv = x.detach().clone().requires_grad_(True)
+        logits = model(x_adv)
+        loss = criterion(logits, y)
+        grad = torch.autograd.grad(loss, x_adv, only_inputs=True)[0]
+        # FGSM takes one step in the sign of the loss gradient.
+        x_adv = x_adv + self._epsilon(x_adv, epsilon) * grad.sign()
+        return self._project(x_adv.detach(), x.detach(), epsilon)
+
+
+class ImagePGDGenerator(ImageAdversarialExampleGenerator):
+    def generate(self, model, x, y, criterion):
+        # Build one adversarial image batch with iterative projected gradient steps.
+        epsilon = self._sample_epsilon(x.device)
+        x_clean = x.detach()
+        x_adv = x_clean.clone()
+        steps = max(int(self.config.steps), 1)
+
+        for _ in range(steps):
+            x_adv = x_adv.detach().requires_grad_(True)
+            logits = model(x_adv)
+            loss = criterion(logits, y)
+            grad = torch.autograd.grad(loss, x_adv, only_inputs=True)[0]
+            # PGD repeats smaller FGSM-like steps and projects after each step.
+            x_adv = x_adv + self._alpha(x_adv, epsilon) * grad.sign()
+            x_adv = self._project(x_adv.detach(), x_clean, epsilon)
+
+        return x_adv.detach()
diff --git a/nebula/addons/defenses/adversarial_training_logging.py b/nebula/addons/defenses/adversarial_training_logging.py
new file mode 100644
index 000000000..2e0e489cb
--- /dev/null
+++ b/nebula/addons/defenses/adversarial_training_logging.py
@@ -0,0 +1,225 @@
+import logging
+
+import torch
+
+from nebula.addons.defenses.adversarial_training_config import AdversarialTrainingConfig
+from nebula.config.config import TRAINING_LOGGER
+
+logging_training = logging.getLogger(TRAINING_LOGGER)
+
+
+class AdversarialTrainingSampleLogger:
+    """Logs representative clean/adversarial samples without affecting training tensors."""
+
+    LOGGED_SAMPLES_PER_ROUND = 3
+
+    def __init__(self, config: AdversarialTrainingConfig, generator):
+        # Keep logging state per defense instance and per federated round.
+        self.config = config
+        self.generator = generator
+        self._logged_samples_by_round: dict[int, int] = {}
+
+    def log(self, model, x_clean: torch.Tensor, x_adv: torch.Tensor, y: torch.Tensor) -> None:
+        # Log only a few representative samples per round to avoid noisy training logs.
+        if not self.config.log_adversarial_metrics:
+            return
+
+        current_round = int(getattr(model, "round", 0))
+        already_logged = self._logged_samples_by_round.get(current_round, 0)
+        remaining = self.LOGGED_SAMPLES_PER_ROUND - already_logged
+        if remaining <= 0:
+            return
+
+        with torch.no_grad():
+            # Predictions must use the same normalized tensors that the model saw during training.
+            model_clean = x_clean.detach()
+            model_adv = x_adv.detach()
+            clean_predictions = torch.argmax(model(model_clean), dim=1)
+            adversarial_predictions = torch.argmax(model(model_adv), dim=1)
+
+            # Display values can be denormalized for images; tabular tensors are already in model space.
+            clean_view = model_clean
+            adv_view = model_adv
+            if hasattr(self.generator, "denormalize"):
+                clean_view = self.generator.denormalize(clean_view)
+                adv_view = self.generator.denormalize(adv_view)
+
+            delta = adv_view - clean_view
+            samples_to_log = min(remaining, int(clean_view.size(0)))
+            for sample_idx in range(samples_to_log):
+                self._log_sample(
+                    current_round=current_round,
+                    sample_number=already_logged + sample_idx + 1,
+                    clean=clean_view[sample_idx].detach().float().cpu(),
+                    adversarial=adv_view[sample_idx].detach().float().cpu(),
+                    delta=delta[sample_idx].detach().float().cpu(),
+                    label=self._safe_scalar(y, sample_idx),
+                    clean_prediction=self._safe_scalar(clean_predictions, sample_idx),
+                    adversarial_prediction=self._safe_scalar(adversarial_predictions, sample_idx),
+                )
+
+            self._logged_samples_by_round[current_round] = already_logged + samples_to_log
+
+    def _log_sample(
+        self,
+        current_round: int,
+        sample_number: int,
+        clean: torch.Tensor,
+        adversarial: torch.Tensor,
+        delta: torch.Tensor,
+        label: int | None,
+        clean_prediction: int | None,
+        adversarial_prediction: int | None,
+    ) -> None:
+        # Write the shared summary line before adding image/tabular-specific details.
+        logging_training.info(
+            "[AdversarialTrainingDefense] Round %s | Sample %s/%s before/after distortion | "
+            "dataset=%s | attack=%s | epsilon_effective=%.6f | label=%s | "
+            "clean_pred=%s | adversarial_pred=%s | "
+            "clean[min=%.6f max=%.6f mean=%.6f] | "
+            "adv[min=%.6f max=%.6f mean=%.6f] | delta_linf=%.6f | delta_l2=%.6f",
+            current_round,
+            sample_number,
+            self.LOGGED_SAMPLES_PER_ROUND,
+            self.config.dataset_name,
+            self.config.attack,
+            float(getattr(self.generator, "last_epsilon", self.config.epsilon) or 0.0),
+            label,
+            clean_prediction,
+            adversarial_prediction,
+            clean.min().item(),
+            clean.max().item(),
+            clean.mean().item(),
+            adversarial.min().item(),
+            adversarial.max().item(),
+            adversarial.mean().item(),
+            delta.abs().max().item(),
+            delta.reshape(-1).norm(p=2).item(),
+        )
+        if self.config.domain == "tabular":
+            self._log_tabular_sample(current_round, sample_number, clean, adversarial, delta)
+        else:
+            # Image logs stay compact: a 4x4 patch is enough to see that perturbations exist.
+            self._log_image_sample(current_round, sample_number, clean, adversarial, delta)
+
+    def _log_tabular_sample(
+        self,
+        current_round: int,
+        sample_number: int,
+        clean: torch.Tensor,
+        adversarial: torch.Tensor,
+        delta: torch.Tensor,
+    ) -> None:
+        # For tabular data, log full vectors because each feature has semantic meaning.
+        feature_names = getattr(getattr(self.generator, "metadata", None), "feature_names", None)
+        logging_training.info(
+            "[AdversarialTrainingDefense] Round %s | Clean tabular sample %s:\n%s",
+            current_round,
+            sample_number,
+            self._format_tabular_vector(clean, feature_names),
+        )
+        logging_training.info(
+            "[AdversarialTrainingDefense] Round %s | Final adversarial tabular sample %s:\n%s",
+            current_round,
+            sample_number,
+            self._format_tabular_vector(adversarial, feature_names),
+        )
+        logging_training.info(
+            "[AdversarialTrainingDefense] Round %s | Tabular perturbation delta sample %s:\n%s",
+            current_round,
+            sample_number,
+            self._format_tabular_vector(delta, feature_names),
+        )
+        logging_training.info(
+            "[AdversarialTrainingDefense] Round %s | Changed tabular features sample %s:\n%s",
+            current_round,
+            sample_number,
+            self._format_tabular_changes(clean, adversarial, delta, feature_names),
+        )
+
+    def _log_image_sample(
+        self,
+        current_round: int,
+        sample_number: int,
+        clean: torch.Tensor,
+        adversarial: torch.Tensor,
+        delta: torch.Tensor,
+    ) -> None:
+        # For images, log a small patch instead of the full tensor.
+        logging_training.info(
+            "[AdversarialTrainingDefense] Round %s | Clean sample %s channel0 4x4:\n%s",
+            current_round,
+            sample_number,
+            self._format_patch(clean),
+        )
+        logging_training.info(
+            "[AdversarialTrainingDefense] Round %s | Adversarial sample %s channel0 4x4:\n%s",
+            current_round,
+            sample_number,
+            self._format_patch(adversarial),
+        )
+        logging_training.info(
+            "[AdversarialTrainingDefense] Round %s | Delta sample %s channel0 4x4:\n%s",
+            current_round,
+            sample_number,
+            self._format_patch(delta),
+        )
+
+    @staticmethod
+    def _safe_scalar(values: torch.Tensor, sample_idx: int) -> int | None:
+        # Read one scalar defensively in case a short tensor is passed to the logger.
+        if values.numel() <= sample_idx:
+            return None
+        return int(values[sample_idx].detach().cpu().item())
+
+    @staticmethod
+    def _format_patch(sample: torch.Tensor, patch_size: int = 4) -> str:
+        # Format a small leading patch so image logs stay human-readable.
+        if sample.dim() >= 3:
+            patch = sample[0, :patch_size, :patch_size]
+        elif sample.dim() == 2:
+            patch = sample[:patch_size, :patch_size]
+        else:
+            patch = sample[:patch_size]
+        values = patch.tolist()
+        if sample.dim() < 2:
+            return str([round(float(value), 6) for value in values])
+        return str([[round(float(value), 6) for value in row] for row in values])
+
+    @staticmethod
+    def _format_tabular_vector(sample: torch.Tensor, feature_names: list[str] | None = None) -> str:
+        # Format a tabular sample as a feature-name to value mapping.
+        values = sample.reshape(-1).tolist()
+        names = feature_names or [f"feature_{idx}" for idx in range(len(values))]
+        return str({str(name): round(float(value), 6) for name, value in zip(names, values, strict=False)})
+
+    @staticmethod
+    def _format_tabular_changes(
+        clean: torch.Tensor,
+        adversarial: torch.Tensor,
+        delta: torch.Tensor,
+        feature_names: list[str] | None = None,
+        tolerance: float = 1e-7,
+    ) -> str:
+        # Format only features whose perturbation is larger than numerical noise.
+        clean_values = clean.reshape(-1).tolist()
+        adversarial_values = adversarial.reshape(-1).tolist()
+        delta_values = delta.reshape(-1).tolist()
+        names = feature_names or [f"feature_{idx}" for idx in range(len(delta_values))]
+        # Keep the changed-features log focused; full vectors are logged just above.
+        changes = {
+            str(name): {
+                "clean": round(float(clean_value), 6),
+                "adversarial": round(float(adversarial_value), 6),
+                "delta": round(float(delta_value), 6),
+            }
+            for name, clean_value, adversarial_value, delta_value in zip(
+                names,
+                clean_values,
+                adversarial_values,
+                delta_values,
+                strict=False,
+            )
+            if abs(float(delta_value)) > tolerance
+        }
+        return str(changes)
diff --git a/nebula/addons/defenses/adversarial_training_tabular.py b/nebula/addons/defenses/adversarial_training_tabular.py
new file mode 100644
index 000000000..0c62e1217
--- /dev/null
+++ b/nebula/addons/defenses/adversarial_training_tabular.py
@@ -0,0 +1,323 @@
+import torch
+import torch.nn.functional as F
+
+from nebula.addons.defenses.adversarial_training_base import AdversarialExampleGenerator
+from nebula.addons.defenses.adversarial_training_config import AdversarialTrainingConfig
+from nebula.core.datasets.tabular_metadata import CATEGORICAL, CONTINUOUS, INTEGER, TabularAdversarialMetadata
+
+
+class TabularConstraintSet:
+    """Projection and mutation rules derived from tabular metadata."""
+
+    def __init__(self, metadata: TabularAdversarialMetadata):
+        # Store metadata and cache derived tensors by device/dtype for speed.
+        self.metadata = metadata
+        self._tensor_cache: dict[tuple[torch.device, torch.dtype], dict[str, torch.Tensor]] = {}
+
+    def tensors(self, x: torch.Tensor) -> dict[str, torch.Tensor]:
+        # Return reusable masks, bounds and integer steps for a batch tensor.
+        key = (x.device, x.dtype)
+        cached = self._tensor_cache.get(key)
+        if cached is not None:
+            return cached
+
+        # Convert metadata lists to tensors once per device/dtype; CAA uses them in every step.
+        cached = {
+            "continuous": torch.tensor(
+                [feature_type == CONTINUOUS for feature_type in self.metadata.feature_types],
+                dtype=torch.bool,
+                device=x.device,
+            ).view(1, -1),
+            "integer": torch.tensor(
+                [feature_type == INTEGER for feature_type in self.metadata.feature_types],
+                dtype=torch.bool,
+                device=x.device,
+            ).view(1, -1),
+            "categorical": torch.tensor(
+                [feature_type == CATEGORICAL for feature_type in self.metadata.feature_types],
+                dtype=torch.bool,
+                device=x.device,
+            ).view(1, -1),
+            "min": torch.tensor(self.metadata.feature_min_norm, dtype=x.dtype, device=x.device).view(1, -1),
+            "max": torch.tensor(self.metadata.feature_max_norm, dtype=x.dtype, device=x.device).view(1, -1),
+        }
+        cached["numeric"] = cached["continuous"] | cached["integer"]
+        cached["perturbable"] = cached["numeric"] | cached["categorical"]
+        cached["integer_step"] = self._integer_steps(cached["min"])
+        self._tensor_cache[key] = cached
+        return cached
+
+    def perturbable_mask(self, x: torch.Tensor) -> torch.Tensor:
+        # Expose the final boolean mask used to block immutable features.
+        return self.tensors(x)["perturbable"]
+
+    def project(self, x_adv: torch.Tensor, x_clean: torch.Tensor, epsilon: float) -> torch.Tensor:
+        # Project a candidate back to valid tabular values around the clean sample.
+        tensors = self.tensors(x_clean)
+        # Numeric features are bounded by epsilon; categorical one-hot features use dataset bounds.
+        numeric_lower = torch.maximum(tensors["min"], x_clean - float(epsilon))
+        numeric_upper = torch.minimum(tensors["max"], x_clean + float(epsilon))
+        lower = torch.where(tensors["categorical"], tensors["min"], numeric_lower)
+        upper = torch.where(tensors["categorical"], tensors["max"], numeric_upper)
+        x_adv = torch.max(torch.min(x_adv, upper), lower)
+
+        x_adv = self._project_integer_features(x_adv, x_clean, lower, upper, tensors)
+        x_adv = self.project_categorical_groups(x_adv)
+        return torch.where(tensors["perturbable"], x_adv, x_clean)
+
+    def project_categorical_groups(self, x_adv: torch.Tensor) -> torch.Tensor:
+        # Enforce one-hot validity after gradient or evolutionary changes.
+        if not self.metadata.categorical_groups:
+            return x_adv
+
+        # Each one-hot group must end with exactly one active category.
+        x_projected = x_adv.clone()
+        for group in self.metadata.categorical_groups:
+            group_tensor = torch.tensor(group, dtype=torch.long, device=x_adv.device)
+            group_values = x_adv.index_select(1, group_tensor)
+            selected = group_values.argmax(dim=1)
+            one_hot = F.one_hot(selected, num_classes=len(group)).to(dtype=x_adv.dtype)
+            x_projected[:, group_tensor] = one_hot
+        return x_projected
+
+    def categorical_gradient_step(self, x_candidate: torch.Tensor, grad: torch.Tensor) -> torch.Tensor:
+        # Apply a discrete gradient step to categorical one-hot groups.
+        if not self.metadata.categorical_groups:
+            return x_candidate
+
+        # For one-hot features, choose the category with the largest adversarial gradient.
+        x_stepped = x_candidate.clone()
+        for group in self.metadata.categorical_groups:
+            group_tensor = torch.tensor(group, dtype=torch.long, device=x_candidate.device)
+            selected = grad.index_select(1, group_tensor).argmax(dim=1)
+            one_hot = F.one_hot(selected, num_classes=len(group)).to(dtype=x_candidate.dtype)
+            x_stepped[:, group_tensor] = one_hot
+        return x_stepped
+
+    def randomize_categorical_groups(
+        self,
+        candidates: torch.Tensor,
+        mutation_probability: float,
+    ) -> torch.Tensor:
+        # Randomly switch categories for evolutionary exploration.
+        if not self.metadata.categorical_groups:
+            return candidates
+
+        original_shape = candidates.shape
+        flat_candidates = candidates.reshape(-1, original_shape[-1]).clone()
+        for group in self.metadata.categorical_groups:
+            # Mutation explores alternative categories when the gradient phase is not enough.
+            group_tensor = torch.tensor(group, dtype=torch.long, device=candidates.device)
+            current = flat_candidates.index_select(1, group_tensor).argmax(dim=1)
+            random_choice = torch.randint(len(group), current.shape, device=candidates.device)
+            mutate = torch.rand(current.shape, device=candidates.device) < float(mutation_probability)
+            selected = torch.where(mutate, random_choice, current)
+            one_hot = F.one_hot(selected, num_classes=len(group)).to(dtype=candidates.dtype)
+            flat_candidates[:, group_tensor] = one_hot
+        return flat_candidates.reshape(original_shape)
+
+    def _integer_steps(self, minimum: torch.Tensor) -> torch.Tensor:
+        # Build the normalized integer grid spacing tensor from metadata.
+        integer_steps = torch.ones_like(minimum)
+        for idx, step in (self.metadata.integer_step_norm or {}).items():
+            integer_steps[0, int(idx)] = float(step)
+        return integer_steps
+
+    def _project_integer_features(
+        self,
+        x_adv: torch.Tensor,
+        x_clean: torch.Tensor,
+        lower: torch.Tensor,
+        upper: torch.Tensor,
+        tensors: dict[str, torch.Tensor],
+    ) -> torch.Tensor:
+        # Round integer columns while keeping them inside the allowed epsilon interval.
+        integer_mask = tensors["integer"]
+        if not integer_mask.any():
+            return x_adv
+
+        # Integer features live on a normalized grid, so round to the closest valid grid value.
+        step = torch.clamp(tensors["integer_step"], min=torch.finfo(x_adv.dtype).eps)
+        projected_integer = torch.round((x_adv - tensors["min"]) / step) * step + tensors["min"]
+        grid_lower = torch.ceil((lower - tensors["min"]) / step) * step + tensors["min"]
+        grid_upper = torch.floor((upper - tensors["min"]) / step) * step + tensors["min"]
+        projected_integer = torch.max(torch.min(projected_integer, grid_upper), grid_lower)
+        has_valid_grid = grid_lower <= grid_upper
+        projected_integer = torch.where(has_valid_grid, projected_integer, x_clean)
+        return torch.where(integer_mask, projected_integer, x_adv)
+
+
+class TabularAdversarialExampleGenerator(AdversarialExampleGenerator):
+    """Base generator for constrained tabular adversarial examples."""
+
+    def __init__(self, config: AdversarialTrainingConfig, metadata: TabularAdversarialMetadata):
+        # Share config, metadata and constraints across CAA phases.
+        self.config = config
+        self.metadata = metadata
+        self.constraints = TabularConstraintSet(metadata)
+
+    def _alpha(self, epsilon: float) -> float:
+        # Use an explicit alpha when provided; otherwise distribute epsilon across steps.
+        if self.config.alpha is not None:
+            return float(self.config.alpha)
+        return float(epsilon) / max(int(self.config.steps), 1)
+
+    def _margin(self, logits: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        # Score how close each sample is to being misclassified.
+        # Positive margin means some wrong class beats the true class.
+        true_logits = logits.gather(1, y.view(-1, 1)).squeeze(1)
+        other_logits = logits.masked_fill(F.one_hot(y, num_classes=logits.size(1)).bool(), float("-inf"))
+        return other_logits.max(dim=1).values - true_logits
+
+    def _success_mask(self, model, x_adv: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        # Mark samples whose adversarial version changes the model prediction.
+        with torch.no_grad():
+            return torch.argmax(model(x_adv), dim=1) != y
+
+    def _better_mask(
+        self,
+        candidate_success: torch.Tensor,
+        candidate_score: torch.Tensor,
+        best_success: torch.Tensor,
+        best_score: torch.Tensor,
+    ) -> torch.Tensor:
+        # Prefer successful attacks, then candidates with a better adversarial margin.
+        return (candidate_success & ~best_success) | (
+            (candidate_success == best_success) & (candidate_score > best_score)
+        )
+
+
+class TabularCAAGenerator(TabularAdversarialExampleGenerator):
+    """CAA-style generator for constrained tabular adversarial training."""
+
+    def generate(self, model, x, y, criterion):
+        # Generate a constrained tabular adversarial batch with CAA.
+        epsilon = self._sample_epsilon(x.device)
+        x_clean = x.detach()
+        if epsilon <= 0.0:
+            return x_clean
+
+        # First try a gradient-guided CAA search; then mutate only samples that still resist.
+        x_adv = self._capgd_phase(model, x_clean, y, criterion, epsilon)
+        failed = ~self._success_mask(model, x_adv, y)
+        if failed.any():
+            x_fallback = self._evolutionary_phase(model, x_clean[failed], y[failed], x_adv[failed], epsilon)
+            x_adv = x_adv.clone()
+            x_adv[failed] = x_fallback
+        return x_adv.detach()
+
+    def _capgd_phase(self, model, x_clean: torch.Tensor, y: torch.Tensor, criterion, epsilon: float) -> torch.Tensor:
+        # Run the gradient-based part of CAA with projection after every candidate step.
+        steps = max(int(self.config.steps), 1)
+        step_size = self._alpha(epsilon)
+        perturbable_mask = self.constraints.perturbable_mask(x_clean)
+        x_adv = x_clean.clone()
+        best_adv = x_adv.clone()
+        best_score = torch.full((x_clean.size(0),), float("-inf"), dtype=x_clean.dtype, device=x_clean.device)
+        best_success = torch.zeros(x_clean.size(0), dtype=torch.bool, device=x_clean.device)
+        previous_loss = None
+
+        for _ in range(steps):
+            x_grad = x_adv.detach().requires_grad_(True)
+            logits = model(x_grad)
+            loss = criterion(logits, y)
+            grad = torch.autograd.grad(loss, x_grad, only_inputs=True)[0]
+
+            candidate = x_adv.detach() + float(step_size) * grad.sign() * perturbable_mask
+            candidate = self.constraints.categorical_gradient_step(candidate, grad)
+            candidate = self.constraints.project(candidate, x_clean, epsilon)
+
+            with torch.no_grad():
+                candidate_logits = model(candidate)
+                candidate_score = self._margin(candidate_logits, y)
+                candidate_success = torch.argmax(candidate_logits, dim=1) != y
+                # Keep successful adversarial samples first; otherwise keep the highest margin.
+                better = self._better_mask(candidate_success, candidate_score, best_success, best_score)
+                best_adv = torch.where(better.view(-1, 1), candidate, best_adv)
+                best_score = torch.where(better, candidate_score, best_score)
+                best_success = best_success | candidate_success
+
+                candidate_loss = F.cross_entropy(candidate_logits, y)
+                if previous_loss is not None and candidate_loss <= previous_loss:
+                    step_size *= 0.75
+                previous_loss = candidate_loss
+
+            x_adv = candidate
+
+        return best_adv.detach()
+
+    def _evolutionary_phase(
+        self,
+        model,
+        x_clean: torch.Tensor,
+        y: torch.Tensor,
+        x_seed: torch.Tensor,
+        epsilon: float,
+    ) -> torch.Tensor:
+        # Use random mutations as a fallback for samples not solved by the gradient phase.
+        if x_clean.numel() == 0:
+            return x_clean
+
+        tensors = self.constraints.tensors(x_clean)
+        perturbable_mask = tensors["perturbable"].to(dtype=x_clean.dtype)
+        batch_size = x_clean.size(0)
+        population_size = min(max(int(self.config.steps) * 4, 8), 32)
+        generations = min(max(int(self.config.steps), 3), 20)
+        mutation_scale = max(float(epsilon) / 2.0, torch.finfo(x_clean.dtype).eps)
+
+        best_adv = self.constraints.project(x_seed.detach(), x_clean, epsilon)
+        with torch.no_grad():
+            best_logits = model(best_adv)
+            best_score = self._margin(best_logits, y)
+            best_success = torch.argmax(best_logits, dim=1) != y
+
+        for _ in range(generations):
+            random_noise = torch.empty(
+                population_size,
+                *x_clean.shape,
+                dtype=x_clean.dtype,
+                device=x_clean.device,
+            ).uniform_(-float(epsilon), float(epsilon))
+            mutations = torch.randn(
+                population_size,
+                *x_clean.shape,
+                dtype=x_clean.dtype,
+                device=x_clean.device,
+            ) * mutation_scale
+            candidates = x_clean.unsqueeze(0) + random_noise * perturbable_mask
+            candidates[0] = best_adv + mutations[0] * perturbable_mask
+            if population_size > 1:
+                candidates[1:] = candidates[1:] + mutations[1:] * perturbable_mask
+            candidates = self.constraints.randomize_categorical_groups(candidates, mutation_probability=0.35)
+
+            flat_candidates = candidates.reshape(population_size * batch_size, -1)
+            flat_clean = x_clean.repeat(population_size, 1)
+            # Every random candidate is projected back to the valid tabular domain before scoring.
+            flat_candidates = self.constraints.project(flat_candidates, flat_clean, epsilon)
+            repeated_y = y.repeat(population_size)
+
+            with torch.no_grad():
+                logits = model(flat_candidates)
+                scores = self._margin(logits, repeated_y).view(population_size, batch_size)
+                successes = (torch.argmax(logits, dim=1) != repeated_y).view(population_size, batch_size)
+                candidate_rank = scores + successes.to(dtype=scores.dtype) * 1_000.0
+                best_population_idx = candidate_rank.argmax(dim=0)
+
+                selected = flat_candidates.view(population_size, batch_size, -1)[
+                    best_population_idx,
+                    torch.arange(batch_size, device=x_clean.device),
+                ]
+                selected_score = scores[
+                    best_population_idx,
+                    torch.arange(batch_size, device=x_clean.device),
+                ]
+                selected_success = successes[
+                    best_population_idx,
+                    torch.arange(batch_size, device=x_clean.device),
+                ]
+                better = self._better_mask(selected_success, selected_score, best_success, best_score)
+                best_adv = torch.where(better.view(-1, 1), selected, best_adv)
+                best_score = torch.where(better, selected_score, best_score)
+                best_success = best_success | selected_success
+
+        return best_adv.detach()
diff --git a/nebula/addons/defenses/feature_squeezing.py b/nebula/addons/defenses/feature_squeezing.py
index 2ca27acd1..6ff592d12 100644
--- a/nebula/addons/defenses/feature_squeezing.py
+++ b/nebula/addons/defenses/feature_squeezing.py
@@ -139,7 +139,7 @@ def _squeeze_image_array(self, arr: np.ndarray) -> np.ndarray:
         return self._quantize01((arr_float - low) / value_range) * value_range + low
 
     # ------------------------------------------------------------------
-    # Shared helpers and diagnostics
+    # Helpers and diagnostics
     # ------------------------------------------------------------------
 
     def _quantize01(self, arr: np.ndarray) -> np.ndarray:
diff --git a/nebula/core/datasets/adultcensus/adultcensus.py b/nebula/core/datasets/adultcensus/adultcensus.py
index 5be603bfd..51aa5e7a2 100644
--- a/nebula/core/datasets/adultcensus/adultcensus.py
+++ b/nebula/core/datasets/adultcensus/adultcensus.py
@@ -1,14 +1,14 @@
 # nebula/core/datasets/adultcensus/adultcensus.py
 
 import os
-from typing import Tuple, Any
+from typing import Any, ClassVar
 
 import numpy as np
 import torch
 from torch.utils.data import Dataset
 
 from nebula.core.datasets.nebuladataset import NebulaDataset, NebulaPartitionHandler
-from nebula.core.datasets.tabular_metadata import CONTINUOUS, INTEGER, NON_PERTURBABLE, TabularAdversarialMetadata
+from nebula.core.datasets.tabular_metadata import CATEGORICAL, CONTINUOUS, INTEGER, TabularAdversarialMetadata
 
 
 class AdultCensusTorchDataset(Dataset):
@@ -24,7 +24,8 @@ def __init__(
         feature_names: list[str] | None = None,
         continuous_features: list[int] | None = None,
         integer_features: list[int] | None = None,
-        non_perturbable_features: list[int] | None = None,
+        categorical_features: list[int] | None = None,
+        categorical_groups: list[list[int]] | None = None,
         tabular_metadata: dict | None = None,
     ):
         if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
@@ -47,14 +48,15 @@ def __init__(
         self.feature_names = feature_names or [f"feature_{i}" for i in range(self.x.shape[1])]
         self.continuous_features = continuous_features or []
         self.integer_features = integer_features or []
-        self.non_perturbable_features = non_perturbable_features or []
+        self.categorical_features = categorical_features or []
+        self.categorical_groups = categorical_groups or []
         self.tabular_metadata = tabular_metadata
         self.input_dim = int(self.x.shape[1])
 
     def __len__(self) -> int:
         return int(self.y.shape[0])
 
-    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
+    def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]:
         x_i: torch.Tensor = torch.from_numpy(self.x[idx])
         y_i: torch.Tensor = torch.tensor(int(self.y[idx]), dtype=torch.long)
         return x_i, y_i
@@ -68,7 +70,7 @@ def __init__(self, file_path: str, prefix: str, config: Any, empty: bool = False
         super().__init__(file_path, prefix, config, empty)
         self.transform = None  # no torchvision transforms for tabular
 
-    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
+    def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]:
         data, target = super().__getitem__(idx)
 
         # Some Nebula handlers may wrap data in tuples
@@ -99,8 +101,8 @@ class AdultCensusDataset(NebulaDataset):
     - mixed tabular data -> numeric model input via preprocessing
     - deterministic stratified train/test split
     """
-    PERTURBABLE_CONTINUOUS_COLUMNS = []
-    PERTURBABLE_INTEGER_COLUMNS = [
+    CONTINUOUS_COLUMNS: ClassVar[list[str]] = []
+    INTEGER_COLUMNS: ClassVar[list[str]] = [
         "age",
         "fnlwgt",
         "education-num",
@@ -108,7 +110,7 @@ class AdultCensusDataset(NebulaDataset):
         "capital-loss",
         "hours-per-week",
     ]
-    NON_PERTURBABLE_COLUMNS = [
+    CATEGORICAL_COLUMNS: ClassVar[list[str]] = [
         "workclass",
         "education",
         "marital-status",
@@ -167,18 +169,18 @@ def _make_ohe_dense():
 
     @classmethod
     def _validate_manual_schema(cls, columns) -> None:
-        continuous_columns = set(cls.PERTURBABLE_CONTINUOUS_COLUMNS)
-        integer_columns = set(cls.PERTURBABLE_INTEGER_COLUMNS)
-        non_perturbable_columns = set(cls.NON_PERTURBABLE_COLUMNS)
+        continuous_columns = set(cls.CONTINUOUS_COLUMNS)
+        integer_columns = set(cls.INTEGER_COLUMNS)
+        categorical_columns = set(cls.CATEGORICAL_COLUMNS)
         overlapping_columns = sorted(
             (continuous_columns & integer_columns)
-            | (continuous_columns & non_perturbable_columns)
-            | (integer_columns & non_perturbable_columns)
+            | (continuous_columns & categorical_columns)
+            | (integer_columns & categorical_columns)
         )
         if overlapping_columns:
             raise ValueError(f"AdultCensusDataset columns configured twice: {overlapping_columns}")
 
-        configured_columns = continuous_columns | integer_columns | non_perturbable_columns
+        configured_columns = continuous_columns | integer_columns | categorical_columns
         dataset_columns = set(columns)
         missing_columns = sorted(configured_columns - dataset_columns)
         if missing_columns:
@@ -187,7 +189,7 @@ def _validate_manual_schema(cls, columns) -> None:
         if unconfigured_columns:
             raise ValueError(f"AdultCensusDataset has unconfigured columns: {unconfigured_columns}")
 
-    def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensusTorchDataset]:
+    def load_adult_census_dataset(self) -> tuple[AdultCensusTorchDataset, AdultCensusTorchDataset]:
         """
         Loads Adult dataset from OpenML and preprocesses to all-numeric features.
 
@@ -198,7 +200,7 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
           4) ColumnTransformer:
               - continuous: median impute + StandardScaler
               - integer: median impute + StandardScaler
-              - non_perturbable: most_frequent impute + OneHotEncoder(dense)
+              - categorical: most_frequent impute + OneHotEncoder(dense)
           5) train/test split (stratified), fit preprocessing only on train (avoid leakage)
         """
         data_dir: str = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
@@ -206,11 +208,11 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
 
         try:
             import pandas as pd
+            from sklearn.compose import ColumnTransformer
             from sklearn.datasets import fetch_openml
+            from sklearn.impute import SimpleImputer
             from sklearn.model_selection import train_test_split
-            from sklearn.compose import ColumnTransformer
             from sklearn.pipeline import Pipeline
-            from sklearn.impute import SimpleImputer
             from sklearn.preprocessing import StandardScaler
         except Exception as e:
             raise ImportError(
@@ -231,13 +233,13 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
         X_df = X_df.replace(r"^\s*\?\s*$", np.nan, regex=True)
         self._validate_manual_schema(X_df.columns)
 
-        numeric_columns = self.PERTURBABLE_CONTINUOUS_COLUMNS + self.PERTURBABLE_INTEGER_COLUMNS
+        numeric_columns = self.CONTINUOUS_COLUMNS + self.INTEGER_COLUMNS
         for column in numeric_columns:
             X_df[column] = pd.to_numeric(X_df[column], errors="coerce")
-        for column in self.NON_PERTURBABLE_COLUMNS:
+        for column in self.CATEGORICAL_COLUMNS:
             X_df[column] = X_df[column].astype(object)
 
-        configured_columns = numeric_columns + self.NON_PERTURBABLE_COLUMNS
+        configured_columns = numeric_columns + self.CATEGORICAL_COLUMNS
         valid_rows = ~X_df[configured_columns].isna().any(axis=1)
         removed_rows = int((~valid_rows).sum())
         if removed_rows:
@@ -254,7 +256,7 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
             ]
         )
 
-        non_perturbable_transformer = Pipeline(
+        categorical_transformer = Pipeline(
             steps=[
                 ("impute", SimpleImputer(strategy="most_frequent")),
                 ("ohe", self._make_ohe_dense()),
@@ -262,11 +264,12 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
         )
 
         transformers = []
-        if self.PERTURBABLE_CONTINUOUS_COLUMNS:
-            transformers.append(("continuous", numeric_transformer, self.PERTURBABLE_CONTINUOUS_COLUMNS))
-        if self.PERTURBABLE_INTEGER_COLUMNS:
-            transformers.append(("integer", numeric_transformer, self.PERTURBABLE_INTEGER_COLUMNS))
-        transformers.append(("non_perturbable", non_perturbable_transformer, self.NON_PERTURBABLE_COLUMNS))
+        if self.CONTINUOUS_COLUMNS:
+            transformers.append(("continuous", numeric_transformer, self.CONTINUOUS_COLUMNS))
+        if self.INTEGER_COLUMNS:
+            transformers.append(("integer", numeric_transformer, self.INTEGER_COLUMNS))
+        if self.CATEGORICAL_COLUMNS:
+            transformers.append(("categorical", categorical_transformer, self.CATEGORICAL_COLUMNS))
 
         preprocessor = ColumnTransformer(transformers=transformers, remainder="drop")
 
@@ -306,12 +309,28 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
             idx for idx, name in enumerate(feature_names)
             if name.startswith("integer__")
         ]
-        non_perturbable_features = [
+        categorical_features = [
             idx for idx, name in enumerate(feature_names)
-            if name.startswith("non_perturbable__")
+            if name.startswith("categorical__")
         ]
         continuous_feature_set = set(continuous_features)
         integer_feature_set = set(integer_features)
+        categorical_feature_set = set(categorical_features)
+        assigned_feature_set = continuous_feature_set | integer_feature_set | categorical_feature_set
+        unknown_features = [
+            feature_names[idx]
+            for idx in range(len(feature_names))
+            if idx not in assigned_feature_set
+        ]
+        if unknown_features:
+            raise ValueError(f"AdultCensusDataset generated untyped features: {unknown_features}")
+        feature_type_by_idx = {
+            **{idx: CONTINUOUS for idx in continuous_feature_set},
+            **{idx: INTEGER for idx in integer_feature_set},
+            **{idx: CATEGORICAL for idx in categorical_feature_set},
+        }
+
+        categorical_groups = self._build_categorical_groups(feature_names)
         integer_step_norm = {}
         if integer_features:
             integer_scaler = preprocessor.named_transformers_["integer"].named_steps["scaler"]
@@ -321,22 +340,20 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
             }
         tabular_metadata = TabularAdversarialMetadata(
             feature_names=feature_names,
-            feature_types=[
-                CONTINUOUS if idx in continuous_feature_set
-                else INTEGER if idx in integer_feature_set
-                else NON_PERTURBABLE
-                for idx in range(len(feature_names))
-            ],
+            feature_types=[feature_type_by_idx[idx] for idx in range(len(feature_names))],
             feature_min_norm=np.min(X_train_np, axis=0).astype(float).tolist(),
             feature_max_norm=np.max(X_train_np, axis=0).astype(float).tolist(),
             integer_step_norm=integer_step_norm,
+            categorical_groups=categorical_groups,
         ).to_dict()
         logging.getLogger().info(
             "[AdultCensus] Tabular adversarial feature mask | continuous=%s | integer=%s | "
-            "non_perturbable=%s | continuous_features=%s | integer_features=%s | integer_step_norm=%s",
+            "categorical=%s | categorical_groups=%s | continuous_features=%s | integer_features=%s | "
+            "integer_step_norm=%s",
             len(continuous_features),
             len(integer_features),
-            len(non_perturbable_features),
+            len(categorical_features),
+            len(categorical_groups),
             [feature_names[idx] for idx in continuous_features],
             [feature_names[idx] for idx in integer_features],
             integer_step_norm,
@@ -348,7 +365,8 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
             feature_names=feature_names,
             continuous_features=continuous_features,
             integer_features=integer_features,
-            non_perturbable_features=non_perturbable_features,
+            categorical_features=categorical_features,
+            categorical_groups=categorical_groups,
             tabular_metadata=tabular_metadata,
         )
         test_ds = AdultCensusTorchDataset(
@@ -357,12 +375,23 @@ def load_adult_census_dataset(self) -> Tuple[AdultCensusTorchDataset, AdultCensu
             feature_names=feature_names,
             continuous_features=continuous_features,
             integer_features=integer_features,
-            non_perturbable_features=non_perturbable_features,
+            categorical_features=categorical_features,
+            categorical_groups=categorical_groups,
             tabular_metadata=tabular_metadata,
         )
 
         return train_ds, test_ds
 
+    @classmethod
+    def _build_categorical_groups(cls, feature_names: list[str]) -> list[list[int]]:
+        groups = []
+        for column in cls.CATEGORICAL_COLUMNS:
+            prefix = f"categorical__{column}_"
+            group = [idx for idx, name in enumerate(feature_names) if name.startswith(prefix)]
+            if group:
+                groups.append(group)
+        return groups
+
     def generate_non_iid_map(self, dataset, partition: str = "dirichlet", partition_parameter: float = 0.5):
         if partition == "dirichlet":
             return self.dirichlet_partition(dataset, alpha=partition_parameter)
diff --git a/nebula/core/datasets/tabular_metadata.py b/nebula/core/datasets/tabular_metadata.py
index eeb277989..e7596fcad 100644
--- a/nebula/core/datasets/tabular_metadata.py
+++ b/nebula/core/datasets/tabular_metadata.py
@@ -3,11 +3,20 @@
 from dataclasses import asdict, dataclass
 from typing import Any
 
-
 CONTINUOUS = "continuous"
 INTEGER = "integer"
+CATEGORICAL = "categorical"
 NON_PERTURBABLE = "non_perturbable"
 
+ERR_FEATURE_TYPES_LENGTH = "feature_types length must match feature_names length"
+ERR_FEATURE_MIN_LENGTH = "feature_min_norm length must match feature_names length"
+ERR_FEATURE_MAX_LENGTH = "feature_max_norm length must match feature_names length"
+ERR_UNSUPPORTED_FEATURE_TYPES = "Unsupported tabular feature types: {feature_types}"
+ERR_CATEGORICAL_GROUP_SIZE = "categorical_groups entries must contain at least two feature indices"
+ERR_CATEGORICAL_GROUP_INDEX = "categorical_groups contains invalid feature indices: {indices}"
+ERR_CATEGORICAL_GROUP_TYPE = "categorical_groups contains non-categorical feature indices: {indices}"
+ERR_CATEGORICAL_GROUP_COVERAGE = "categorical feature indices missing from categorical_groups: {indices}"
+
 
 @dataclass(frozen=True)
 class TabularAdversarialMetadata:
@@ -18,28 +27,56 @@ class TabularAdversarialMetadata:
     feature_min_norm: list[float]
     feature_max_norm: list[float]
     integer_step_norm: dict[int, float] | None = None
+    categorical_groups: list[list[int]] | None = None
 
     def __post_init__(self):
         n_features = len(self.feature_names)
         if len(self.feature_types) != n_features:
-            raise ValueError("feature_types length must match feature_names length")
+            raise ValueError(ERR_FEATURE_TYPES_LENGTH)
         if len(self.feature_min_norm) != n_features:
-            raise ValueError("feature_min_norm length must match feature_names length")
+            raise ValueError(ERR_FEATURE_MIN_LENGTH)
         if len(self.feature_max_norm) != n_features:
-            raise ValueError("feature_max_norm length must match feature_names length")
-        invalid_types = set(self.feature_types) - {CONTINUOUS, INTEGER, NON_PERTURBABLE}
+            raise ValueError(ERR_FEATURE_MAX_LENGTH)
+        invalid_types = set(self.feature_types) - {CONTINUOUS, INTEGER, CATEGORICAL, NON_PERTURBABLE}
         if invalid_types:
-            raise ValueError(f"Unsupported tabular feature types: {sorted(invalid_types)}")
+            raise ValueError(ERR_UNSUPPORTED_FEATURE_TYPES.format(feature_types=sorted(invalid_types)))
+        for group in self.categorical_groups or []:
+            if len(group) < 2:
+                raise ValueError(ERR_CATEGORICAL_GROUP_SIZE)
+            invalid_indices = [idx for idx in group if idx < 0 or idx >= n_features]
+            if invalid_indices:
+                raise ValueError(ERR_CATEGORICAL_GROUP_INDEX.format(indices=invalid_indices))
+            non_categorical_indices = [idx for idx in group if self.feature_types[idx] != CATEGORICAL]
+            if non_categorical_indices:
+                raise ValueError(ERR_CATEGORICAL_GROUP_TYPE.format(indices=non_categorical_indices))
+
+        grouped_categorical_indices = {
+            idx
+            for group in self.categorical_groups or []
+            for idx in group
+        }
+        categorical_indices = {
+            idx
+            for idx, feature_type in enumerate(self.feature_types)
+            if feature_type == CATEGORICAL
+        }
+        missing_categorical_indices = sorted(categorical_indices - grouped_categorical_indices)
+        if missing_categorical_indices:
+            raise ValueError(ERR_CATEGORICAL_GROUP_COVERAGE.format(indices=missing_categorical_indices))
 
     def to_dict(self) -> dict[str, Any]:
         return asdict(self)
 
     @classmethod
-    def from_dict(cls, data: dict[str, Any]) -> "TabularAdversarialMetadata":
+    def from_dict(cls, data: dict[str, Any]) -> TabularAdversarialMetadata:
         return cls(
             feature_names=[str(value) for value in data["feature_names"]],
             feature_types=[str(value) for value in data["feature_types"]],
             feature_min_norm=[float(value) for value in data["feature_min_norm"]],
             feature_max_norm=[float(value) for value in data["feature_max_norm"]],
-            integer_step_norm={int(k): float(v) for k, v in data.get("integer_step_norm", {}).items()},
+            integer_step_norm={int(k): float(v) for k, v in (data.get("integer_step_norm") or {}).items()},
+            categorical_groups=[
+                [int(idx) for idx in group]
+                for group in data.get("categorical_groups") or []
+            ],
         )
diff --git a/nebula/frontend/static/js/deployment/adversarial-training.js b/nebula/frontend/static/js/deployment/adversarial-training.js
index c22a5b320..ceb965246 100644
--- a/nebula/frontend/static/js/deployment/adversarial-training.js
+++ b/nebula/frontend/static/js/deployment/adversarial-training.js
@@ -17,6 +17,14 @@ const AdversarialTrainingManager = (function() {
     };
 
     const IMAGE_DATASETS = new Set(["MNIST", "FashionMNIST", "EMNIST", "CIFAR10", "CIFAR100"]);
+    const CAA_TABULAR_DATASETS = new Set(["AdultCensus"]);
+    const IMAGE_ATTACK_OPTIONS = [
+        {value: "fgsm", label: "FGSM"},
+        {value: "pgd", label: "PGD"}
+    ];
+    const TABULAR_ATTACK_OPTIONS = [
+        {value: "caa", label: "CAA"}
+    ];
 
     function initializeAdversarialTraining() {
         setupAdversarialTrainingSwitch();
@@ -64,27 +72,77 @@ const AdversarialTrainingManager = (function() {
 
     function toggleAttackSettings(attack) {
         const pgdSettings = document.getElementById("adversarial-training-pgd-settings");
+        const stepsTitle = document.getElementById("adversarialTrainingStepsTitle");
         if (!pgdSettings) return;
 
-        pgdSettings.style.display = attack === "pgd" ? "block" : "none";
+        pgdSettings.style.display = ["pgd", "caa"].includes(attack) ? "block" : "none";
+        if (stepsTitle) {
+            stepsTitle.textContent = attack === "caa" ? "CAA search steps" : "PGD steps";
+        }
     }
 
     function updateDatasetAvailability() {
         const dataset = document.getElementById("datasetSelect")?.value;
-        const domain = IMAGE_DATASETS.has(dataset) ? "image" : "tabular";
+        const domain = getDatasetDomain(dataset);
         const adversarialTrainingSwitch = document.getElementById("adversarialTrainingSwitch");
         const datasetNote = document.getElementById("adversarial-training-dataset-note");
         const domainInput = document.getElementById("adversarialTrainingDomain");
+        const settings = document.getElementById("adversarial-training-settings");
 
         if (datasetNote) {
-            datasetNote.style.display = "none";
+            datasetNote.style.display = domain === "unsupported" ? "block" : "none";
+            datasetNote.textContent = "Adversarial Training for tabular datasets currently supports AdultCensus with CAA.";
         }
         if (domainInput) {
-            domainInput.value = domain;
+            domainInput.value = domain === "unsupported" ? "tabular" : domain;
         }
 
         if (!adversarialTrainingSwitch) return;
+        adversarialTrainingSwitch.disabled = domain === "unsupported";
+        if (domain === "unsupported") {
+            adversarialTrainingSwitch.checked = false;
+            if (settings) {
+                settings.style.display = "none";
+            }
+            return;
+        }
+
         adversarialTrainingSwitch.disabled = false;
+        refreshAttackOptions(domain);
+        toggleAdversarialTrainingSettings(adversarialTrainingSwitch.checked);
+    }
+
+    function getDatasetDomain(dataset) {
+        if (IMAGE_DATASETS.has(dataset)) {
+            return "image";
+        }
+        if (CAA_TABULAR_DATASETS.has(dataset)) {
+            return "tabular";
+        }
+        return "unsupported";
+    }
+
+    function refreshAttackOptions(domain, preferredAttack = null) {
+        const attackSelect = document.getElementById("adversarialTrainingAttack");
+        if (!attackSelect) return;
+
+        // Tabular datasets intentionally expose only CAA; image datasets expose FGSM/PGD.
+        const options = domain === "tabular" ? TABULAR_ATTACK_OPTIONS : IMAGE_ATTACK_OPTIONS;
+        const currentAttack = preferredAttack || attackSelect.value;
+        attackSelect.innerHTML = "";
+        options.forEach(({value, label}) => {
+            const option = document.createElement("option");
+            option.value = value;
+            option.textContent = label;
+            attackSelect.appendChild(option);
+        });
+
+        const validAttack = options.some(option => option.value === currentAttack)
+            ? currentAttack
+            : options[0].value;
+        attackSelect.value = validAttack;
+        attackSelect.disabled = domain === "tabular";
+        toggleAttackSettings(validAttack);
     }
 
     function numberValue(id, fallback) {
@@ -107,10 +165,14 @@ const AdversarialTrainingManager = (function() {
     }
 
     function getAdversarialTrainingConfig() {
+        const domain = document.getElementById("adversarialTrainingDomain")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.domain;
+        const attack = domain === "tabular"
+            ? "caa"
+            : (document.getElementById("adversarialTrainingAttack")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.attack);
         const config = {
             enabled: Boolean(document.getElementById("adversarialTrainingSwitch")?.checked),
-            domain: document.getElementById("adversarialTrainingDomain")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.domain,
-            attack: document.getElementById("adversarialTrainingAttack")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.attack,
+            domain,
+            attack,
             epsilon: numberValue("adversarialTrainingEpsilon", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.epsilon),
             alpha: optionalNumberValue("adversarialTrainingAlpha", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.alpha),
             steps: integerValue("adversarialTrainingSteps", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.steps),
@@ -139,8 +201,6 @@ const AdversarialTrainingManager = (function() {
         if (!adversarialTrainingSwitch) return;
 
         adversarialTrainingSwitch.checked = Boolean(adversarialTrainingConfig.enabled);
-        setValue("adversarialTrainingDomain", adversarialTrainingConfig.domain);
-        setValue("adversarialTrainingAttack", adversarialTrainingConfig.attack);
         setValue("adversarialTrainingEpsilon", adversarialTrainingConfig.epsilon);
         setValue("adversarialTrainingAlpha", adversarialTrainingConfig.alpha ?? "");
         setValue("adversarialTrainingSteps", adversarialTrainingConfig.steps);
@@ -156,8 +216,10 @@ const AdversarialTrainingManager = (function() {
             logMetricsInput.checked = Boolean(adversarialTrainingConfig.log_adversarial_metrics);
         }
 
-        toggleAdversarialTrainingSettings(adversarialTrainingSwitch.checked);
         updateDatasetAvailability();
+        const domain = document.getElementById("adversarialTrainingDomain")?.value || adversarialTrainingConfig.domain;
+        refreshAttackOptions(domain, adversarialTrainingConfig.attack);
+        toggleAdversarialTrainingSettings(adversarialTrainingSwitch.checked);
     }
 
     function setValue(id, value) {
@@ -179,8 +241,8 @@ const AdversarialTrainingManager = (function() {
         if (config.epsilon < 0) {
             return "[Adversarial Training] Epsilon must be greater than or equal to 0.";
         }
-        if (config.attack === "pgd" && config.steps < 1) {
-            return "[Adversarial Training] PGD steps must be at least 1.";
+        if (["pgd", "caa"].includes(config.attack) && config.steps < 1) {
+            return "[Adversarial Training] Search steps must be at least 1.";
         }
         if (config.clean_weight < 0 || config.adversarial_weight < 0) {
             return "[Adversarial Training] Loss weights must be greater than or equal to 0.";
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index 7a21c8162..5ad22ac87 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -588,7 +588,7 @@ <h5 class="step-title">Enable/Disable Adversarial Training</h5>
                             style="display: inline; width: 80px; height: 30px;">
                     </div>
                     <small id="adversarial-training-dataset-note" class="form-text text-muted" style="display: none;">
-                        Adversarial Training is available for image and tabular datasets.
+                        Adversarial Training for tabular datasets currently supports AdultCensus with CAA.
                     </small>
                     <div id="adversarial-training-settings" style="margin-top: 10px; display: none;">
                         <input type="hidden" id="adversarialTrainingDomain" value="image">
@@ -607,7 +607,7 @@ <h5 class="step-title">Epsilon</h5>
                                 style="display: inline; width: 80%">
                         </div>
                         <div id="adversarial-training-pgd-settings" style="display: none;">
-                            <h5 class="step-title">PGD steps</h5>
+                            <h5 class="step-title" id="adversarialTrainingStepsTitle">PGD steps</h5>
                             <div class="form-check form-check-inline">
                                 <input type="number" class="form-control" id="adversarialTrainingSteps"
                                     placeholder="Steps" min="1" step="1" value="1"
@@ -657,7 +657,7 @@ <h5 class="step-title">Log adversarial metrics</h5>
                             </div>
                         </details>
                         <small class="form-text text-muted">
-                            Epsilon and bounds use the dataset input scale; image datasets convert pixel scale to normalized tensors.
+                            Image datasets use FGSM/PGD. AdultCensus uses CAA for tabular adversarial training.
                         </small>
                     </div>
                 </div>

From ed8e6088869c2d8d919903ff207c3a76875438c3 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 29 May 2026 13:28:15 +0200
Subject: [PATCH 54/66] Refactor: Feature Squeezing, factsheets, graphics and
 trustworthiness

---
 nebula/addons/defenses/feature_squeezing.py   |  23 +-
 .../addons/trustworthiness/dfl_factsheet.py   |  48 +--
 .../trustworthiness/factsheet_populators.py   |  38 +-
 nebula/addons/trustworthiness/graphics.py     | 376 ++++++++++--------
 .../addons/trustworthiness/trustworthiness.py | 345 +++++++++-------
 5 files changed, 481 insertions(+), 349 deletions(-)

diff --git a/nebula/addons/defenses/feature_squeezing.py b/nebula/addons/defenses/feature_squeezing.py
index 6ff592d12..683cb9cce 100644
--- a/nebula/addons/defenses/feature_squeezing.py
+++ b/nebula/addons/defenses/feature_squeezing.py
@@ -6,7 +6,6 @@
 import torch
 from PIL import Image
 
-IMAGE_DATASETS = {"MNIST", "FashionMNIST", "EMNIST", "CIFAR10", "CIFAR100"}
 PIL_IMAGE_MODES = {"1", "L", "P", "RGB", "RGBA", "CMYK", "YCbCr"}
 
 
@@ -34,6 +33,7 @@ class FeatureSqueezingDefense:
     """Dataset-level feature squeezing for image Nebula datasets."""
 
     def __init__(self, config: FeatureSqueezingConfig):
+        # Validate the number of quantization levels requested by the scenario.
         if not isinstance(config.bit_depth, int) or not 1 <= config.bit_depth <= 64:
             raise ValueError("feature_squeezing.bit_depth must be an integer in [1, 64]")
 
@@ -42,6 +42,7 @@ def __init__(self, config: FeatureSqueezingConfig):
 
     @classmethod
     def from_participant_config(cls, participant_config: dict[str, Any]) -> "FeatureSqueezingDefense | None":
+        # Build the defense only when feature squeezing is enabled in the participant config.
         raw = participant_config.get("defense_args", {}).get("feature_squeezing", {})
         if not raw or not raw.get("enabled", False):
             return None
@@ -58,18 +59,12 @@ def from_participant_config(cls, participant_config: dict[str, Any]) -> "Feature
         )
 
     def apply_to_partition(self, partition) -> None:
+        # Apply the defense to each enabled split in the participant partition.
         train_set = getattr(partition, "train_set", None)
         if train_set is None:
             logging.warning("[FeatureSqueezingDefense] No train set found; skipping defense")
             return
 
-        if self.config.dataset_name not in IMAGE_DATASETS:
-            logging.info(
-                "[FeatureSqueezingDefense] Skipping feature squeezing: dataset is not image-supported | dataset=%s",
-                self.config.dataset_name,
-            )
-            return
-
         logging.info(
             "[FeatureSqueezingDefense] Applying feature squeezing | dataset=%s | bit_depth=%s",
             self.config.dataset_name,
@@ -86,6 +81,7 @@ def apply_to_partition(self, partition) -> None:
                 self._transform_dataset(dataset, name, seen_data)
 
     def _transform_dataset(self, dataset, name: str, seen_data: set[int]) -> None:
+        # Transform all samples in one dataset split, avoiding duplicated shared data.
         data = getattr(dataset, "data", None)
         if dataset is None or data is None:
             return
@@ -105,6 +101,7 @@ def _transform_dataset(self, dataset, name: str, seen_data: set[int]) -> None:
         self._log_check(data, name, status="transformed", before=before)
 
     def _transform_sample(self, sample):
+        # Transform only the input image and keep labels or metadata unchanged.
         if isinstance(sample, tuple) and sample:
             return (self._squeeze_image(sample[0]), *sample[1:])
         return self._squeeze_image(sample)
@@ -114,6 +111,7 @@ def _transform_sample(self, sample):
     # ------------------------------------------------------------------
 
     def _squeeze_image(self, value):
+        # Quantize PIL images, tensors, and arrays while preserving the original container type.
         if isinstance(value, Image.Image):
             image = value if value.mode in PIL_IMAGE_MODES else value.convert("RGB")
             arr = np.asarray(image)
@@ -124,6 +122,7 @@ def _squeeze_image(self, value):
         return self._restore_type(value, squeezed)
 
     def _squeeze_image_array(self, arr: np.ndarray) -> np.ndarray:
+        # Normalize values to [0, 1], quantize them, and map them back to the original range.
         arr_float = arr.astype(np.float32, copy=False)
         if np.issubdtype(arr.dtype, np.integer):
             info = np.iinfo(arr.dtype)
@@ -143,9 +142,11 @@ def _squeeze_image_array(self, arr: np.ndarray) -> np.ndarray:
     # ------------------------------------------------------------------
 
     def _quantize01(self, arr: np.ndarray) -> np.ndarray:
+        # Reduce normalized values to the discrete levels defined by bit_depth.
         return np.rint(np.clip(arr, 0.0, 1.0) * self.levels) / self.levels
 
     def _log_check(self, data, name: str, status: str, before: str | None = None) -> None:
+        # Log a compact before/after summary to verify that squeezing was applied.
         if not len(data):
             logging.info("[FeatureSqueezingDefense] Verification %s | status=%s | empty dataset", name, status)
             return
@@ -174,6 +175,7 @@ def _log_check(self, data, name: str, status: str, before: str | None = None) ->
         )
 
     def _summary(self, sample) -> str:
+        # Create a short numeric summary of one sample for diagnostics.
         arr = self._as_numpy(self._unwrap(sample))
         if arr.size == 0:
             return f"shape={arr.shape}, empty=True"
@@ -187,6 +189,7 @@ def _summary(self, sample) -> str:
         )
 
     def _as_numpy(self, value) -> np.ndarray:
+        # Convert supported image containers to numpy for quantization and logging.
         if isinstance(value, torch.Tensor):
             return value.detach().cpu().numpy()
         if isinstance(value, Image.Image):
@@ -194,6 +197,7 @@ def _as_numpy(self, value) -> np.ndarray:
         return np.asarray(value)
 
     def _restore_type(self, original, arr: np.ndarray):
+        # Return squeezed data with the same high-level type as the original sample.
         if isinstance(original, torch.Tensor):
             return torch.as_tensor(arr, dtype=original.dtype, device=original.device)
         if isinstance(original, np.ndarray):
@@ -201,9 +205,11 @@ def _restore_type(self, original, arr: np.ndarray):
         return arr
 
     def _unwrap(self, sample):
+        # Extract the image from common dataset samples shaped as (image, label, ...).
         return sample[0] if isinstance(sample, tuple) and sample else sample
 
     def _fmt(self, value) -> str:
+        # Format numbers in logs without unnecessary trailing decimals.
         try:
             number = float(value)
         except (TypeError, ValueError):
@@ -212,6 +218,7 @@ def _fmt(self, value) -> str:
 
 
 def apply_feature_squeezing_if_enabled(partition, participant_config: dict[str, Any]) -> None:
+    # Public entrypoint used by the node startup flow.
     defense = FeatureSqueezingDefense.from_participant_config(participant_config)
     if defense is not None:
         defense.apply_to_partition(partition)
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index f4c78d4aa..3f32e8b9e 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -74,8 +74,6 @@ def populate_factsheet_dfl(
 
         files_dir = get_trustworthiness_dir(scenario_name)
 
-        emissions_file = os.path.join(files_dir, f"emissions_{participant_idx}.csv")
-
         get_all_data_entropy(scenario_name)
 
         factsheet["data"]["entropy_local"] = get_local_normalized_entropy(scenario_name, participant_idx)
@@ -90,7 +88,7 @@ def populate_factsheet_dfl(
         factsheet["performance"]["test_loss"] = float(final_loss)
         factsheet["performance"]["test_acc"] = float(final_acc)
 
-        bytes_sent, bytes_recv = get_bytes(scenario_name, participant_idx)
+        bytes_sent, bytes_recv, *_ = load_data_results_participant(scenario_name, participant_idx)
 
         factsheet["system"]["model_size"] = get_bytes_model(model)
 
@@ -110,8 +108,19 @@ def populate_factsheet_dfl(
 
         populate_participation(factsheet, participation_summary)
 
-        carbon_intensity_local, emissions_training_local, energy_consumed_local, sample_size = get_emissions(
-            emissions_file,
+        (
+            role,
+            carbon_intensity_local,
+            emissions_training_local,
+            workload,
+            cpu_model,
+            gpu_model,
+            cpu_used,
+            gpu_used,
+            energy_consumed_local,
+            sample_size,
+        ) = load_emissions_participant(
+            scenario_name,
             participant_idx,
         )
 
@@ -155,32 +164,3 @@ def load_round_metrics(scenario_name, participant_idx):
 
     df = df.dropna(subset=["loss", "accuracy"])
     return df
-
-
-def get_bytes(scenario_name, participant_idx):
-    data_file = os.path.join(
-        get_trustworthiness_dir(scenario_name),
-        f"data_results_{participant_idx}.csv",
-    )
-
-    data = read_csv(data_file)
-
-    row = data[data["id"] == participant_idx]
-
-    bytes_sent = row["bytes_sent"].iloc[0]
-    bytes_recv = row["bytes_recv"].iloc[0]
-
-    return bytes_sent, bytes_recv
-
-
-def get_emissions(emissions_file, participant_idx):
-    data = read_csv(emissions_file)
-
-    row = data[data["id"] == participant_idx]
-
-    avg_carbon_intensity_clients = row["energy_grid"].iloc[0]
-    emissions_training = row["emissions"].iloc[0]
-    energy_consumed = row["energy_consumed"].iloc[0]
-    sample_size = row["sample_size"].iloc[0]
-
-    return avg_carbon_intensity_clients, emissions_training, energy_consumed, sample_size
diff --git a/nebula/addons/trustworthiness/factsheet_populators.py b/nebula/addons/trustworthiness/factsheet_populators.py
index 1fa8ea6f8..d5cce3371 100644
--- a/nebula/addons/trustworthiness/factsheet_populators.py
+++ b/nebula/addons/trustworthiness/factsheet_populators.py
@@ -34,6 +34,7 @@
 
 
 def get_federation_profile(federation):
+    # Group SDFL with DFL because both use decentralized factsheet profiles.
     return FEDERATION_DFL if str(federation).upper() in {"DFL", "SDFL"} else FEDERATION_CFL
 
 
@@ -45,9 +46,10 @@ def populate_profile_metrics(
     test_loader,
     test_accuracy,
 ):
+    # Select the profile-specific populator, falling back to the shared metric set.
     federation_profile = get_federation_profile(federation)
     data_type = get_normalized_model_data_type(model)
-    populator = PROFILE_POPULATORS.get((federation_profile, data_type), populate_default_metrics)
+    populator = PROFILE_POPULATORS.get((federation_profile, data_type), populate_common_profile_metrics)
 
     populator(
         factsheet=factsheet,
@@ -59,23 +61,28 @@ def populate_profile_metrics(
 
 
 def populate_cfl_images_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
-    populate_default_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
+    # Populate the current shared metrics for CFL image factsheets.
+    populate_common_profile_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
 
 
 def populate_cfl_tabular_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
-    populate_default_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
+    # Populate the current shared metrics for CFL tabular factsheets.
+    populate_common_profile_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
 
 
 def populate_dfl_images_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
-    populate_default_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
+    # Populate the current shared metrics for DFL/SDFL image factsheets.
+    populate_common_profile_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
 
 
 def populate_dfl_tabular_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
-    populate_default_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
+    # Populate the current shared metrics for DFL/SDFL tabular factsheets.
+    populate_common_profile_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
 
 
-def populate_default_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
-    """Current shared metric set used by every factsheet profile."""
+def populate_common_profile_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
+    # Current shared metric set used by every factsheet profile.
+    # Reuse one test batch for sample-based metrics and compute summary explainability once.
     test_sample = next(iter(test_loader))
     explainability_metrics = get_explainability_metrics_summary(model, test_loader)
 
@@ -87,8 +94,8 @@ def populate_default_metrics(factsheet, model, train_loader, test_loader, test_a
         test_accuracy,
         test_sample,
     )
-    populate_explainability_metrics(factsheet, explainability_metrics)
-    populate_image_robustness_metrics(factsheet, model, test_loader, test_sample)
+    populate_common_explainability_metrics(factsheet, explainability_metrics)
+    populate_common_robustness_metrics(factsheet, model, test_loader, test_sample)
 
 
 def populate_common_model_quality_metrics(
@@ -99,13 +106,16 @@ def populate_common_model_quality_metrics(
     test_accuracy,
     test_sample,
 ):
+    # Populate model quality, privacy, and fairness metrics shared by all profiles.
     factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_loader)
 
+    # Privacy metrics derived from train/test behavior.
     factsheet["privacy"]["epsilon_star"] = get_epsilon_star(model, train_loader, test_loader)
     factsheet["privacy"]["inverse_epsilon_star"] = inverse_score(factsheet["privacy"]["epsilon_star"])
     factsheet["privacy"]["mia_auc"] = get_mia_auc(model, train_loader, test_loader)
     factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
 
+    # Fairness and calibration metrics expressed as inverse scores.
     overfitting_value = get_overfitting_score(model, train_loader, test_accuracy)
     factsheet["fairness"]["inverse_overfitting"] = inverse_score(overfitting_value)
 
@@ -121,11 +131,13 @@ def populate_common_model_quality_metrics(
     coefficient_of_variation_value = get_coefficient_of_variation(model, test_loader)
     factsheet["fairness"]["inverse_coefficient_of_variation"] = inverse_score(coefficient_of_variation_value)
 
+    # Confidence is capped so factsheet scores stay within the expected range.
     value_confidence_score = get_confidence_score(model, test_sample)
     factsheet["performance"]["clipped_test_confidence_score"] = cap_score(value_confidence_score)
 
 
-def populate_explainability_metrics(factsheet, explainability_metrics):
+def populate_common_explainability_metrics(factsheet, explainability_metrics):
+    # Copy explainability summary metrics into the factsheet schema.
     factsheet["explainability"]["alpha_score"] = explainability_metrics["alpha_score"]
     factsheet["explainability"]["spread_ratio"] = explainability_metrics["spread_ratio"]
     factsheet["explainability"]["spread_divergence"] = explainability_metrics["spread_divergence"]
@@ -134,16 +146,19 @@ def populate_explainability_metrics(factsheet, explainability_metrics):
     factsheet["performance"]["clipped_test_feature_importance_cv"] = cap_score(feature_importance)
 
 
-def populate_image_robustness_metrics(factsheet, model, test_loader, test_sample):
+def populate_common_robustness_metrics(factsheet, model, test_loader, test_sample):
+    # Populate adversarial robustness metrics shared by the current factsheet profiles.
     lr = factsheet["configuration"]["learning_rate"]
     num_classes = model.get_num_classes()
 
+    # Sample-based robustness scores.
     value_clever = get_clever_score(model, test_sample, num_classes, lr)
     factsheet["performance"]["clipped_test_clever"] = cap_score(value_clever)
 
     value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes, lr)
     factsheet["performance"]["inverse_test_loss_sensitivity"] = inverse_score(value_loss_sensitivity)
 
+    # Loader-based adversarial accuracy.
     value_adv_accuracy = compute_adversarial_accuracy_art(model, test_loader, num_classes, lr)
     factsheet["performance"]["clipped_test_adv_accuracy"] = cap_score(value_adv_accuracy)
 
@@ -155,6 +170,7 @@ def populate_image_robustness_metrics(factsheet, model, test_loader, test_sample
     )
     factsheet["performance"]["clipped_test_empirical_robustness"] = cap_score(value_empirical_robustness)
 
+    # Attack success is inverted so higher remains better in the factsheet.
     value_attack_success_rate = attack_success_rate(
         model,
         test_sample,
diff --git a/nebula/addons/trustworthiness/graphics.py b/nebula/addons/trustworthiness/graphics.py
index e2f339eb3..13743680e 100644
--- a/nebula/addons/trustworthiness/graphics.py
+++ b/nebula/addons/trustworthiness/graphics.py
@@ -1,163 +1,220 @@
-from abc import ABC
+import json
 import logging
-import torch
 import os
-import pickle
-import lightning as pl
-from torchmetrics.classification import MulticlassAccuracy, MulticlassRecall, MulticlassPrecision, MulticlassF1Score, MulticlassConfusionMatrix
-from torchmetrics import MetricCollection
-import seaborn as sns
+
 import matplotlib.pyplot as plt
-import json
 import pandas as pd
+import seaborn as sns
 
 from nebula.core.utils.nebulalogger_tensorboard import NebulaTensorBoardLogger
 
+
 logging.basicConfig(level=logging.INFO)
 
-class Graphics():
+PILLAR_CONFIGS = [
+    ("robustness", "#F8D3DF", -0.4, (10, 6), "Robustness"),
+    ("privacy", "#DA8D8B", -0.2, (10, 6), "Privacy"),
+    ("fairness", "#DDDDDD", -0.4, (10, 6), "Fairness"),
+    ("explainability", "#FCEFC3", -0.4, (10, 6), "Explainability"),
+    ("accountability", "#8FAADC", -0.3, (10, 6), "Accountability"),
+    ("architectural_soundness", "#DBB9FA", -0.3, (10, 6), "Architectural Soundness"),
+    ("sustainability", "#BBFDAF", -0.5, (12, 8), "Sustainability"),
+]
+TRUST_SCORE_COLOR = "#BF9000"
+
+
+class Graphics:
     def __init__(
         self,
         scenario_start_time,
         scenario_name,
         participant_id=None,
     ):
+        # Configure the TensorBoard logger used to store trustworthiness figures.
         self.scenario_start_time = scenario_start_time
         self.scenario_name = scenario_name
         log_dir = os.path.join(os.environ["NEBULA_LOGS_DIR"], scenario_name)
-        if participant_id==None:
-            self.nebulalogger = NebulaTensorBoardLogger(scenario_start_time, f"{log_dir}", name="metrics", version=f"trust", log_graph=True)
-        else:
-            self.nebulalogger = NebulaTensorBoardLogger(scenario_start_time, f"{log_dir}", name="metrics", version=f"trust_{participant_id}", log_graph=True)
+        version = "trust" if participant_id is None else f"trust_{participant_id}"
+        self.nebulalogger = NebulaTensorBoardLogger(
+            scenario_start_time,
+            f"{log_dir}",
+            name="metrics",
+            version=version,
+            log_graph=True,
+        )
+
+    def _trustworthiness_dir(self):
+        # Return the directory where trustworthiness JSON reports are stored.
+        return os.path.join(os.environ.get("NEBULA_LOGS_DIR"), self.scenario_name, "trustworthiness")
+
+    def _trust_report_path(self, file_name):
+        # Build the absolute path for one trustworthiness report file.
+        return os.path.join(self._trustworthiness_dir(), file_name)
 
-    def __log_figure(self, df, pillar, color, tag_root, notion_y_pos = -0.4, figsize=(10,6)):
-        filtered_df = df[df['Pillar'] == pillar].copy()
+    def _load_trust_results(self, results_file):
+        # Load one trustworthiness JSON report from disk.
+        with open(results_file, "r") as f:
+            return json.load(f)
 
-        filtered_df.loc[:, 'Metric'] = filtered_df['Metric'].astype(str).str.replace('_', ' ')
-        filtered_df.loc[:, 'Metric'] = filtered_df['Metric'].apply(lambda x: str(x).title())
+    def _log_report_from_file(self, results_file, tag_root, all_pillars_tag, label_suffix=""):
+        # Load a report and log all figures generated from it.
+        results = self._load_trust_results(results_file)
+        self._log_trust_report(results, tag_root, all_pillars_tag, label_suffix=label_suffix)
 
-        filtered_df.loc[:, 'Notion'] = filtered_df['Notion'].astype(str).str.replace('_', ' ')
-        filtered_df.loc[:, 'Notion'] = filtered_df['Notion'].apply(lambda x: str(x).title())
+    def _format_report_dataframe(self, df, pillar):
+        # Keep one pillar and format metric/notion names for plot labels.
+        filtered_df = df[df["Pillar"] == pillar].copy()
 
-        unique_notion_count = filtered_df['Notion'].nunique()
-        palette = [color] * unique_notion_count
+        filtered_df.loc[:, "Metric"] = filtered_df["Metric"].astype(str).str.replace("_", " ")
+        filtered_df.loc[:, "Metric"] = filtered_df["Metric"].apply(lambda x: str(x).title())
 
-        plt.figure(figsize=figsize)
-        ax = sns.barplot(data=filtered_df, x='Metric', y='Metric Score', hue='Notion', palette=palette, dodge=False)
+        filtered_df.loc[:, "Notion"] = filtered_df["Notion"].astype(str).str.replace("_", " ")
+        filtered_df.loc[:, "Notion"] = filtered_df["Notion"].apply(lambda x: str(x).title())
+        return filtered_df
 
+    def _notion_ranges(self, filtered_df):
+        # Compute the x-axis range occupied by each notion in a pillar plot.
+        ranges = []
         x_positions = range(len(filtered_df))
+        seen_notions = set()
 
-        notion_scores = {}
-
-        for i in range(len(filtered_df)):
-            row = filtered_df.iloc[i]
-            notion = row['Notion']
-            notion_score = row['Notion Score']
-            metric_score = row['Metric Score']
-
-            if notion not in notion_scores:
-                metrics_for_notion = filtered_df[filtered_df['Notion'] == notion]['Metric']
-                start_pos = x_positions[i]
-                end_pos = x_positions[i + len(metrics_for_notion) - 1]
-
-                notion_x_pos = (start_pos + end_pos) / 2
-                ax.axhline(notion_score, ls='--', color='black', lw=0.5, xmin=start_pos/len(x_positions), xmax=(end_pos+1)/len(x_positions))
-                ax.text(notion_x_pos, notion_score + 0.01, f"{notion_score:.2f}", ha='center', va='bottom', fontsize=10, color='black')  # Color negro
-                notion_scores[notion] = notion_score
+        for i, notion in enumerate(filtered_df["Notion"]):
+            if notion in seen_notions:
+                continue
+
+            metrics_for_notion = filtered_df[filtered_df["Notion"] == notion]["Metric"]
+            start_pos = x_positions[i]
+            end_pos = x_positions[i + len(metrics_for_notion) - 1]
+            notion_x_pos = (start_pos + end_pos) / 2
+
+            ranges.append((notion, start_pos, end_pos, notion_x_pos))
+            seen_notions.add(notion)
+
+        return ranges
+
+    def _draw_notion_score_lines(self, ax, filtered_df):
+        # Draw dashed horizontal notion score lines over the metrics they group.
+        x_count = len(filtered_df)
+        if x_count == 0:
+            return
+
+        for notion, start_pos, end_pos, notion_x_pos in self._notion_ranges(filtered_df):
+            notion_score = filtered_df[filtered_df["Notion"] == notion]["Notion Score"].iloc[0]
+            ax.axhline(
+                notion_score,
+                ls="--",
+                color="black",
+                lw=0.5,
+                xmin=start_pos / x_count,
+                xmax=(end_pos + 1) / x_count,
+            )
+            ax.text(
+                notion_x_pos,
+                notion_score + 0.01,
+                f"{notion_score:.2f}",
+                ha="center",
+                va="bottom",
+                fontsize=10,
+                color="black",
+            )
 
-        ax.set_xticks(x_positions)
-        ax.set_xticklabels(filtered_df['Metric'], rotation=45, ha='right', fontsize=10)
+    def _draw_notion_labels(self, ax, filtered_df, notion_y_pos):
+        # Add notion labels below the metric labels.
+        for notion, _, _, notion_x_pos in self._notion_ranges(filtered_df):
+            ax.text(
+                notion_x_pos,
+                notion_y_pos,
+                notion,
+                ha="center",
+                va="center",
+                fontsize=10,
+                color="black",
+            )
 
-        seen_notions = set()
-        for i, (metric, notion) in enumerate(zip(filtered_df['Metric'], filtered_df['Notion'])):
-            if notion not in seen_notions:
-                metrics_for_notion = filtered_df[filtered_df['Notion'] == notion]['Metric']
-                start_pos = x_positions[i]
-                end_pos = x_positions[i + len(metrics_for_notion) - 1]
+    def _draw_metric_score_labels(self, ax, filtered_df):
+        # Add numeric metric scores above each bar.
+        for i, value in enumerate(filtered_df["Metric Score"]):
+            ax.text(i, value + 0.01, f"{value:.2f}", ha="center", va="bottom", fontsize=10, color="black")
 
-                notion_x_pos = (start_pos + end_pos) / 2
+    def _log_pillar_figure(self, df, pillar, color, tag_root, notion_y_pos=-0.4, figsize=(10, 6)):
+        # Generate and log the metric/notion bar chart for one pillar.
+        filtered_df = self._format_report_dataframe(df, pillar)
+        unique_notion_count = filtered_df["Notion"].nunique()
+        palette = [color] * unique_notion_count
 
-                ax.text(notion_x_pos, notion_y_pos, notion, ha='center', va='center', fontsize=10, color='black')
+        plt.figure(figsize=figsize)
+        ax = sns.barplot(data=filtered_df, x="Metric", y="Metric Score", hue="Notion", palette=palette, dodge=False)
 
-                seen_notions.add(notion)
+        x_positions = range(len(filtered_df))
+        ax.set_xticks(x_positions)
+        ax.set_xticklabels(filtered_df["Metric"], rotation=45, ha="right", fontsize=10)
 
-        for i, v in enumerate(filtered_df['Metric Score']):
-            ax.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=10, color='black')
+        self._draw_notion_score_lines(ax, filtered_df)
+        self._draw_notion_labels(ax, filtered_df, notion_y_pos)
+        self._draw_metric_score_labels(ax, filtered_df)
 
-        plt.xlabel('Metrics and notions', labelpad=35)
-        plt.ylabel('Score')
-        plt.title(f'Metrics and notion scores for the {pillar} pillar')
+        plt.xlabel("Metrics and notions", labelpad=35)
+        plt.ylabel("Score")
+        plt.title(f"Metrics and notion scores for the {pillar} pillar")
 
-        ax.legend_.remove()
+        if ax.legend_ is not None:
+            ax.legend_.remove()
 
         plt.tight_layout()
 
         self.nebulalogger.log_figure(ax.get_figure(), 0, f"{tag_root}/Pillar/{pillar}")
         plt.close()
 
-    def _load_trust_results(self, results_file):
-        with open(results_file, 'r') as f:
-            return json.load(f)
-
-    def _log_trust_report(self, results, tag_root, all_pillars_tag, label_suffix=""):
-        pillars_list = []
-        notion_names = []
-        notion_scores = []
-        metric_names = []
-        metric_scores = []
-
+    def _trust_report_rows(self, results):
+        # Flatten the nested trust report into rows that pandas can plot.
+        rows = []
         for pillar in results["pillars"]:
-            for key, value in pillar.items():
-                pillar_name = key
-                if "notions" in value:
-                    for notion in value["notions"]:
-                        for notion_key, notion_value in notion.items():
-                            notion_name = notion_key
-                            notion_score = notion_value["score"]
-                            for metric in notion_value["metrics"]:
-                                for metric_key, metric_value in metric.items():
-                                    metric_name = metric_key
-                                    metric_score = metric_value["score"]
-
-                                    pillars_list.append(pillar_name)
-                                    notion_names.append(notion_name)
-                                    notion_scores.append(notion_score)
-                                    metric_names.append(metric_name)
-                                    metric_scores.append(metric_score)
-
-        df = pd.DataFrame({
-            "Pillar": pillars_list,
-            "Notion": notion_names,
-            "Notion Score": notion_scores,
-            "Metric": metric_names,
-            "Metric Score": metric_scores
-        })
-
-        self.__log_figure(df, 'robustness', "#F8D3DF", tag_root)
-        self.__log_figure(df, "privacy", "#DA8D8B", tag_root, -0.2)
-        self.__log_figure(df, "fairness", "#DDDDDD", tag_root)
-        self.__log_figure(df, "explainability", "#FCEFC3", tag_root)
-        self.__log_figure(df, "accountability", "#8FAADC", tag_root, -0.3)
-        self.__log_figure(df, "architectural_soundness", "#DBB9FA", tag_root, -0.3)
-        self.__log_figure(df, "sustainability", "#BBFDAF", tag_root, -0.5, figsize=(12,8))
-
-        categories = [
-            "robustness",
-            "privacy",
-            "fairness",
-            "explainability",
-            "accountability",
-            "architectural_soundness",
-            "sustainability"
-        ]
-
+            for pillar_name, pillar_value in pillar.items():
+                if "notions" not in pillar_value:
+                    continue
+
+                for notion in pillar_value["notions"]:
+                    for notion_name, notion_value in notion.items():
+                        for metric in notion_value["metrics"]:
+                            for metric_name, metric_value in metric.items():
+                                rows.append(
+                                    {
+                                        "Pillar": pillar_name,
+                                        "Notion": notion_name,
+                                        "Notion Score": notion_value["score"],
+                                        "Metric": metric_name,
+                                        "Metric Score": metric_value["score"],
+                                    }
+                                )
+        return rows
+
+    def _build_trust_report_dataframe(self, results):
+        # Convert flattened report rows into a DataFrame for pillar plots.
+        return pd.DataFrame(
+            self._trust_report_rows(results),
+            columns=["Pillar", "Notion", "Notion Score", "Metric", "Metric Score"],
+        )
+
+    def _pillar_scores(self, results):
+        # Read pillar scores in the same order used by the all-pillars chart.
+        categories = [config[0] for config in PILLAR_CONFIGS]
         scores = [results["pillars"][i][category]["score"] for i, category in enumerate(categories)]
+        return categories, scores
+
+    def _pillar_labels(self, label_suffix):
+        # Build human-readable labels for the all-pillars chart.
+        labels = [config[4] for config in PILLAR_CONFIGS]
+        labels.append("Trust Score")
+        return [f"{label}{label_suffix}" for label in labels]
 
-        trust_score = results["trust_score"]
+    def _log_all_pillars_figure(self, results, all_pillars_tag, label_suffix=""):
+        # Generate and log the summary chart with every pillar and the final trust score.
+        categories, scores = self._pillar_scores(results)
         categories.append("trust_score")
-        scores.append(trust_score)
+        scores.append(results["trust_score"])
 
-        palette = ["#F8D3DF", "#DA8D8B", "#DDDDDD", "#FCEFC3", "#8FAADC", "#DBB9FA", "#BBFDAF", "#BF9000"]
+        palette = [config[1] for config in PILLAR_CONFIGS]
+        palette.append(TRUST_SCORE_COLOR)
 
         plt.figure(figsize=(10, 8))
         ax = sns.barplot(x=categories, y=scores, palette=palette, hue=categories, legend=False)
@@ -165,62 +222,55 @@ def _log_trust_report(self, results, tag_root, all_pillars_tag, label_suffix="")
         ax.set_ylabel("Score")
         ax.set_title("Pillars and trust scores")
 
-        for i, v in enumerate(scores):
-            ax.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=10)
-
-        name_labels = [
-            f"Robustness{label_suffix}",
-            f"Privacy{label_suffix}",
-            f"Fairness{label_suffix}",
-            f"Explainability{label_suffix}",
-            f"Accountability{label_suffix}",
-            f"Architectural Soundness{label_suffix}",
-            f"Sustainability{label_suffix}",
-            f"Trust Score{label_suffix}"
-        ]
+        for i, value in enumerate(scores):
+            ax.text(i, value + 0.01, f"{value:.2f}", ha="center", va="bottom", fontsize=10)
 
         ax.set_xticks(range(len(categories)))
-        ax.set_xticklabels(name_labels, rotation=45)
+        ax.set_xticklabels(self._pillar_labels(label_suffix), rotation=45)
 
         self.nebulalogger.log_figure(ax.get_figure(), 0, all_pillars_tag)
         plt.close()
 
-    def graphics(self):
-        results_file = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), self.scenario_name, "trustworthiness", "nebula_trust_results.json")
-        results = self._load_trust_results(results_file)
-        self._log_trust_report(results, "Trust", "Trust/AllPillars")
+    def _log_trust_report(self, results, tag_root, all_pillars_tag, label_suffix=""):
+        # Log each pillar chart plus the all-pillars summary for a trust report.
+        df = self._build_trust_report_dataframe(results)
+
+        for pillar, color, notion_y_pos, figsize, _ in PILLAR_CONFIGS:
+            self._log_pillar_figure(df, pillar, color, tag_root, notion_y_pos, figsize=figsize)
 
-    def graphics_dfl(self,participant_id):
-            results_file = os.path.join(os.environ.get("NEBULA_LOGS_DIR"), self.scenario_name, "trustworthiness", f"nebula_trust_results_{participant_id}.json")
-            results = self._load_trust_results(results_file)
-            self._log_trust_report(results, "Trust", f"Trust/AllPillars_{participant_id}", label_suffix=f"_{participant_id}")
+        self._log_all_pillars_figure(results, all_pillars_tag, label_suffix=label_suffix)
+
+    def graphics(self):
+        # Log centralized/global trustworthiness graphics.
+        results_file = self._trust_report_path("nebula_trust_results.json")
+        self._log_report_from_file(results_file, "Trust", "Trust/AllPillars")
+
+    def graphics_dfl(self, participant_id):
+        # Log local DFL graphics for one participant.
+        results_file = self._trust_report_path(f"nebula_trust_results_{participant_id}.json")
+        self._log_report_from_file(
+            results_file,
+            "Trust",
+            f"Trust/AllPillars_{participant_id}",
+            label_suffix=f"_{participant_id}",
+        )
 
     def graphics_dfl_global(self, participant_id):
-            results_file = os.path.join(
-                os.environ.get("NEBULA_LOGS_DIR"),
-                self.scenario_name,
-                "trustworthiness",
-                f"nebula_trust_results_{participant_id}_global.json",
-            )
-            results = self._load_trust_results(results_file)
-            self._log_trust_report(
-                results,
-                "TrustGlobal",
-                f"TrustGlobal/AllPillars_{participant_id}",
-                label_suffix=f"_{participant_id}",
-            )
+        # Log aggregated DFL global graphics for one participant.
+        results_file = self._trust_report_path(f"nebula_trust_results_{participant_id}_global.json")
+        self._log_report_from_file(
+            results_file,
+            "TrustGlobal",
+            f"TrustGlobal/AllPillars_{participant_id}",
+            label_suffix=f"_{participant_id}",
+        )
 
     def graphics_sdfl_global(self, participant_id):
-            results_file = os.path.join(
-                os.environ.get("NEBULA_LOGS_DIR"),
-                self.scenario_name,
-                "trustworthiness",
-                "nebula_trust_results.json",
-            )
-            results = self._load_trust_results(results_file)
-            self._log_trust_report(
-                results,
-                "TrustGlobal",
-                f"TrustGlobal/AllPillars_{participant_id}",
-                label_suffix=f"_{participant_id}",
-            )
+        # Log SDFL global graphics from the shared global report.
+        results_file = self._trust_report_path("nebula_trust_results.json")
+        self._log_report_from_file(
+            results_file,
+            "TrustGlobal",
+            f"TrustGlobal/AllPillars_{participant_id}",
+            label_suffix=f"_{participant_id}",
+        )
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index fb337da9b..b2a9ba2ad 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -32,30 +32,37 @@ class TrustWorkloadException(Exception):
 class TrustWorkload(ABC):
     @abstractmethod
     async def init(self, experiment_name):
+        # Initialize workload resources and event subscriptions.
         raise NotImplementedError
 
     @abstractmethod
     def get_workload(self) -> str:
+        # Return the workload label persisted in trustworthiness outputs.
         raise NotImplementedError
 
     @abstractmethod
     def get_sample_size(self) -> float:
+        # Return the local sample size used by the workload.
         raise NotImplementedError
 
     @abstractmethod
     def get_metrics(self) -> tuple[float, float]:
+        # Return the latest test loss and accuracy.
         raise NotImplementedError
 
     @abstractmethod
     async def finish_experiment_role_pre_actions(self):
+        # Run role-specific work before final metrics are persisted.
         raise NotImplementedError
 
     @abstractmethod
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
+        # Run role-specific work after final metrics are persisted.
         raise NotImplementedError
 
 class BaseTrustWorkload(TrustWorkload):
     def __init__(self, engine: Engine, idx, trust_files_route, workload: str, role_label: str, sample_size=None, start_time=None):
+        # Store shared workload state used by trainers and servers.
         self._engine: Engine = engine
         self._workload = workload
         self._idx = idx
@@ -77,6 +84,7 @@ def __init__(self, engine: Engine, idx, trust_files_route, workload: str, role_l
         self._timed_out_rounds_total = 0
 
     async def init(self, experiment_name):
+        # Subscribe to the events needed to build final trust summaries.
         self._experiment_name = experiment_name
         await EventManager.get_instance().subscribe_node_event(AggregationEvent, self._process_aggregation_event)
         await EventManager.get_instance().subscribe_node_event(RoundStartEvent, self._process_round_start_event)
@@ -94,26 +102,33 @@ async def init(self, experiment_name):
         await self._per_round.setup(self._engine)
 
     def get_workload(self):
+        # Return the workload name associated with this node role.
         return self._workload
 
     def get_sample_size(self):
+        # Return the sample size captured by the role pre-actions.
         return self._sample_size
 
     def get_metrics(self):
+        # Return the latest test metrics observed through events.
         return (self._current_loss, self._current_accuracy)
 
     def get_validation_metrics(self):
+        # Return the latest validation metrics observed through events.
         return (self._current_val_loss, self._current_val_accuracy)
 
     def _is_reputation_enabled(self) -> bool:
+        # Read the reputation toggle from the participant defense config.
         defense_args = self._engine.config.participant.get("defense_args", {})
         reputation_config = defense_args.get("reputation", {})
         return bool(reputation_config.get("enabled", False))
 
     def _get_reputation_system(self):
+        # Return the reputation system attached to the engine, when present.
         return getattr(self._engine, "_reputation", None)
 
     def _get_reputation_trust_summary(self) -> dict:
+        # Build the reputation fields added to the trust factsheet.
         summary = {
             "reputation_enabled": self._is_reputation_enabled(),
             "avg_neighbor_reputation": 0.0,
@@ -144,6 +159,7 @@ def _get_reputation_trust_summary(self) -> dict:
         return summary
 
     def _get_participation_trust_summary(self) -> dict:
+        # Build the participation variability fields added to the trust factsheet.
         total_clients = int(self._engine.config.participant["scenario_args"]["n_nodes"]) - 1
         counts = list(self._round_participation_counts.values())
 
@@ -155,6 +171,7 @@ def _get_participation_trust_summary(self) -> dict:
         }
 
     def _get_system_reliability_summary(self) -> dict:
+        # Build dropout and timeout rates from aggregation events.
         dropout_rate = 0.0
         if self._dropout_expected_total > 0:
             dropout_rate = self._dropout_missing_total / self._dropout_expected_total
@@ -169,11 +186,13 @@ def _get_system_reliability_summary(self) -> dict:
         }
 
     async def _process_round_start_event(self, rse: RoundStartEvent):
+        # Track how often each peer is expected to participate.
         _, _, expected_nodes = await rse.get_event_data()
         for node_addr in expected_nodes:
             self._round_participation_counts[node_addr] = self._round_participation_counts.get(node_addr, 0) + 1
 
     async def _process_aggregation_event(self, age: AggregationEvent):
+        # Track missing peers and timed-out aggregation rounds.
         _, expected_nodes, missing_nodes = await age.get_event_data()
         self_addr = self._engine.addr
 
@@ -187,6 +206,7 @@ async def _process_aggregation_event(self, age: AggregationEvent):
             self._timed_out_rounds_total += 1
 
     async def _process_test_metrics_event(self, tme: TestMetricsEvent):
+        # Cache final test metrics and forward them to per-round trust metrics.
         cur_loss, cur_acc = await tme.get_event_data()
         if cur_loss is not None and cur_acc is not None:
             self._current_loss, self._current_accuracy = cur_loss, cur_acc
@@ -195,6 +215,7 @@ async def _process_test_metrics_event(self, tme: TestMetricsEvent):
                 await self._per_round.on_test_metrics(self._engine, float(cur_loss), float(cur_acc))
 
     async def _process_validation_metrics_event(self, vme: ValidationMetricsEvent):
+        # Cache final validation metrics for final trustworthiness outputs.
         cur_loss, cur_acc = await vme.get_event_data()
         if cur_loss is not None and cur_acc is not None:
             self._current_val_loss, self._current_val_accuracy = cur_loss, cur_acc
@@ -206,6 +227,7 @@ class TrustWorkloadTrainer(BaseTrustWorkload):
     TRUSTSCORES_FORWARDING_GRACE_MARGIN_SECONDS = 1.0
 
     def __init__(self, engine, idx, trust_files_route):
+        # Initialize trainer-side state for CFL reports and DFL/SDFL trustscores.
         super().__init__(engine, idx, trust_files_route, workload="training", role_label="TRAINER")
         self._expected_trustscores_sources = set()
         self._expected_trustscores_reports = int(self._engine.config.participant["scenario_args"]["n_nodes"]) - 1
@@ -218,97 +240,119 @@ def __init__(self, engine, idx, trust_files_route):
         self._trustscores_local_report_initialized = False
 
     async def init(self, experiment_name):
+        # Reset exchange state before subscribing to shared workload events.
         self._reset_trustscores_exchange_state()
         self._trustscores_wait_event = asyncio.Event()
         await super().init(experiment_name)
 
     async def finish_experiment_role_pre_actions(self):
+        # Capture the training sample size before final trust outputs are written.
         self._engine.trainer.datamodule.setup(stage="fit")
         train_loader = self._engine.trainer.datamodule.train_dataloader()
         self._sample_size = len(train_loader)
 
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
+        # Finish with the report flow required by the selected federation type.
         federation = trust_config.get("federation")
 
-        if federation == "DFL" or federation == "SDFL":
+        if self._uses_trustscores_exchange(federation):
             await self._finish_trustscores_exchange(federation, trust_config, experiment_name)
-        else:
-            cm = CommunicationsManager.get_instance()
-
-            server_addr = str(self._engine.config.participant["network_args"]["neighbors"]).strip()
-
-            bytes_sent, bytes_recv, accuracy, loss, val_accuracy, dp_enabled, dp_epsilon = load_data_results_participant(experiment_name, self._idx)
-
-            role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size = load_emissions_participant(experiment_name, self._idx)
-
-            class_imbalance = get_class_imbalance_local(self._idx, experiment_name)
-
-            model_size = get_bytes_model(self._engine.trainer.model)
-
-            local_entropy = get_local_entropy(self._idx, experiment_name)
-
-            message = cm.create_message(
-                "trustworthiness",
-                action="report",
-                node_id=str(self._idx),
-                bytes_sent=bytes_sent,
-                bytes_recv=bytes_recv,
-                accuracy=accuracy,
-                loss=loss,
-                role=role,
-                energy_grid=energy_grid,
-                emissions=emissions,
-                workload=workload,
-                cpu_model=cpu_model,
-                gpu_model=gpu_model,
-                cpu_used=cpu_used,
-                gpu_used=gpu_used,
-                energy_consumed=energy_consumed,
-                sample_size=sample_size,
-                class_imbalance=class_imbalance,
-                model_size=model_size,
-                local_entropy=local_entropy,
-                val_accuracy=val_accuracy,
-                dp_enabled=dp_enabled,
-                dp_epsilon=dp_epsilon
-            )
+            return
 
-            logging.info(
-                "[TW SEND] dest=%s node_id=%s bytes_sent=%s bytes_recv=%s "
-                "accuracy=%s loss=%s role=%s energy_grid=%s emissions=%s workload=%s "
-                "cpu_model=%s gpu_model=%s cpu_used=%s gpu_used=%s energy_consumed=%s sample_size=%s class_imbalance=%s model_size=%s local_entropy=%s val_accuracy=%s dp_enabled=%s dp_epsilon=%s",
-                server_addr,
-                str(self._idx),
-                bytes_sent,
-                bytes_recv,
-                accuracy,
-                loss,
-                role,
-                energy_grid,
-                emissions,
-                workload,
-                cpu_model,
-                gpu_model,
-                cpu_used,
-                gpu_used,
-                energy_consumed,
-                sample_size,
-                class_imbalance,
-                model_size,
-                local_entropy,
-                val_accuracy,
-                dp_enabled,
-                dp_epsilon
-            )
+        await self._send_cfl_trustworthiness_report(experiment_name)
 
-            await cm.send_message(
-                server_addr,
-                message,
-                message_type="trustworthiness",
-                allow_after_learning_finished=True,
-            )
+    def _uses_trustscores_exchange(self, federation: str | None) -> bool:
+        # DFL and SDFL share trust reports directly between participants.
+        return federation in {"DFL", "SDFL"}
+
+    async def _send_cfl_trustworthiness_report(self, experiment_name: str):
+        # Send the participant trustworthiness report to the CFL server.
+        cm = CommunicationsManager.get_instance()
+        server_addr = str(self._engine.config.participant["network_args"]["neighbors"]).strip()
+        report = self._build_cfl_trustworthiness_report(experiment_name)
+
+        message = cm.create_message(
+            "trustworthiness",
+            action="report",
+            node_id=str(self._idx),
+            **report,
+        )
+
+        self._log_cfl_trustworthiness_report(server_addr, report)
+
+        await cm.send_message(
+            server_addr,
+            message,
+            message_type="trustworthiness",
+            allow_after_learning_finished=True,
+        )
+
+    def _build_cfl_trustworthiness_report(self, experiment_name: str) -> dict:
+        # Load local metrics and shape them as a trustworthiness message payload.
+        bytes_sent, bytes_recv, accuracy, loss, val_accuracy, dp_enabled, dp_epsilon = load_data_results_participant(
+            experiment_name,
+            self._idx,
+        )
+        role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size = load_emissions_participant(
+            experiment_name,
+            self._idx,
+        )
+
+        return {
+            "bytes_sent": bytes_sent,
+            "bytes_recv": bytes_recv,
+            "accuracy": accuracy,
+            "loss": loss,
+            "role": role,
+            "energy_grid": energy_grid,
+            "emissions": emissions,
+            "workload": workload,
+            "cpu_model": cpu_model,
+            "gpu_model": gpu_model,
+            "cpu_used": cpu_used,
+            "gpu_used": gpu_used,
+            "energy_consumed": energy_consumed,
+            "sample_size": sample_size,
+            "class_imbalance": get_class_imbalance_local(self._idx, experiment_name),
+            "model_size": get_bytes_model(self._engine.trainer.model),
+            "local_entropy": get_local_entropy(self._idx, experiment_name),
+            "val_accuracy": val_accuracy,
+            "dp_enabled": dp_enabled,
+            "dp_epsilon": dp_epsilon,
+        }
+
+    def _log_cfl_trustworthiness_report(self, server_addr: str, report: dict):
+        # Log the CFL report with the same fields sent over the network.
+        logging.info(
+            "[TW SEND] dest=%s node_id=%s bytes_sent=%s bytes_recv=%s "
+            "accuracy=%s loss=%s role=%s energy_grid=%s emissions=%s workload=%s "
+            "cpu_model=%s gpu_model=%s cpu_used=%s gpu_used=%s energy_consumed=%s sample_size=%s class_imbalance=%s model_size=%s local_entropy=%s val_accuracy=%s dp_enabled=%s dp_epsilon=%s",
+            server_addr,
+            str(self._idx),
+            report["bytes_sent"],
+            report["bytes_recv"],
+            report["accuracy"],
+            report["loss"],
+            report["role"],
+            report["energy_grid"],
+            report["emissions"],
+            report["workload"],
+            report["cpu_model"],
+            report["gpu_model"],
+            report["cpu_used"],
+            report["gpu_used"],
+            report["energy_consumed"],
+            report["sample_size"],
+            report["class_imbalance"],
+            report["model_size"],
+            report["local_entropy"],
+            report["val_accuracy"],
+            report["dp_enabled"],
+            report["dp_epsilon"],
+        )
 
     async def _finish_trustscores_exchange(self, federation, trust_config, experiment_name):
+        # Compute, share, wait for, and optionally aggregate DFL/SDFL trustscores.
         self._end_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
         await self._prepare_trustscores_exchange(federation)
 
@@ -332,11 +376,12 @@ async def _finish_trustscores_exchange(self, federation, trust_config, experimen
         await self._wait_for_trustscores_forwarding_drain(federation)
 
         if federation == "DFL":
-            self._finalize_trustscores_aggregation()
+            self._finalize_local_trustscores_aggregation()
         elif self._is_sdfl_aggregator_node():
             self._finalize_sdfl_global_trustscores_aggregation()
 
     def _compute_local_trustscores_report(self, experiment_name, trust_config, weights, federation) -> str:
+        # Build the local DFL/SDFL factsheet and return its JSON report.
         factsheet = DflFactsheet()
         self._engine.trainer.datamodule.setup(stage="fit")
         train_loader = self._engine.trainer.datamodule.train_dataloader()
@@ -362,10 +407,12 @@ def _compute_local_trustscores_report(self, experiment_name, trust_config, weigh
         return load_trust_report_json_dumped(experiment_name, self._idx)
 
     def _load_local_trustscores_weights(self, experiment_name: str) -> dict:
+        # Load trust metric weights for the active federation.
         federation = self._engine.config.participant["trust_args"]["scenario"].get("federation")
         return load_trust_weights(experiment_name, federation)
 
     def _reset_trustscores_exchange_state(self):
+        # Clear mutable state from any previous trustscores exchange.
         self._expected_trustscores_sources = set()
         self._received_trustscores_node_ids = set()
         self._trustscores_score_accumulator = {}
@@ -375,6 +422,7 @@ def _reset_trustscores_exchange_state(self):
         self._trustscores_local_report_initialized = False
 
     def _get_trustscores_weight_for_source(self, source: str, node_id: int | str) -> float:
+        # Resolve the aggregation weight for a remote trust report.
         if not self._is_reputation_enabled():
             return 0.5
 
@@ -399,6 +447,7 @@ def _get_trustscores_weight_for_source(self, source: str, node_id: int | str) ->
         return float(reputation_entry["reputation"])
 
     def _get_trustscores_peer_weights_from_reputation(self) -> dict:
+        # Extract peer trustscores weights from the reputation system.
         if not self._is_reputation_enabled():
             return {}
 
@@ -415,9 +464,11 @@ def _get_trustscores_peer_weights_from_reputation(self) -> dict:
         return peer_weights
 
     def _get_trustscores_self_weight(self) -> float:
+        # Keep local reports fully trusted in the weighted aggregation.
         return 1.0
 
     def _log_trustscores_node_weights(self, federation: str):
+        # Log the weights that will be used by trustscores aggregation.
         if not self._is_reputation_enabled():
             logging.info(
                 "[TW %s] Reputation system disabled. trustscores weights fallback to 0.5 for all nodes",
@@ -451,19 +502,12 @@ def _log_trustscores_node_weights(self, federation: str):
             )
 
     def _initialize_local_trustscores_aggregation(self, experiment_name: str):
+        # Initialize a DFL local aggregation copy with this node's own report.
         if self._trustscores_local_report_initialized:
             return
 
         trust_report_template, copy_path = create_local_trust_report_copy(experiment_name, self._idx)
-        self._trustscores_template_report = trust_report_template
-        self._trustscores_local_copy_path = copy_path
-        accumulate_weighted_trustscores(
-            report=trust_report_template,
-            weight=self._get_trustscores_self_weight(),
-            score_accumulator=self._trustscores_score_accumulator,
-            weight_accumulator=self._trustscores_weight_accumulator,
-        )
-        self._trustscores_local_report_initialized = True
+        self._initialize_trustscores_accumulator(trust_report_template, copy_path, self._get_trustscores_self_weight())
         logging.info(
             "[TW DFL] Local trustscores copy created at %s and accumulator initialized with local weight=%s",
             copy_path,
@@ -471,6 +515,7 @@ def _initialize_local_trustscores_aggregation(self, experiment_name: str):
         )
 
     async def _prepare_trustscores_exchange(self, federation: str):
+        # Discover direct neighbors and prepare the wait event for incoming reports.
         cm = CommunicationsManager.get_instance()
         self._expected_trustscores_sources = await cm.get_all_addrs_current_connections(only_direct=True)
 
@@ -497,6 +542,7 @@ async def _prepare_trustscores_exchange(self, federation: str):
             self._log_trustscores_node_weights(federation)
 
     async def _share_trustscores_report(self, trust_report_json: str, federation: str):
+        # Broadcast the local trustscores report to direct neighbors.
         cm = CommunicationsManager.get_instance()
         neighbors = self._expected_trustscores_sources.copy()
 
@@ -521,6 +567,7 @@ async def _share_trustscores_report(self, trust_report_json: str, federation: st
             )
 
     async def _wait_for_trustscores_reports(self, federation: str):
+        # Wait until every expected report arrives or the exchange times out.
         if self._trustscores_wait_event is None:
             return
 
@@ -545,6 +592,7 @@ async def _wait_for_trustscores_reports(self, federation: str):
             )
 
     async def _wait_for_trustscores_forwarding_drain(self, federation: str):
+        # Give the forwarder a short grace period before shutdown.
         if not self._expected_trustscores_sources:
             return
 
@@ -564,16 +612,24 @@ async def _wait_for_trustscores_forwarding_drain(self, federation: str):
         )
         await asyncio.sleep(forwarding_grace)
 
-    def _finalize_trustscores_aggregation(self):
+    def _build_weighted_trustscores_report(self) -> dict | None:
+        # Build the weighted report when the aggregation template is available.
         if self._trustscores_template_report is None or self._trustscores_local_copy_path is None:
-            logging.warning("[TW DFL] Skipping weighted trustscores write because local copy/template is not available")
-            return
+            return None
 
-        aggregated_report = build_weighted_trustscores_report(
+        return build_weighted_trustscores_report(
             template_report=self._trustscores_template_report,
             score_accumulator=self._trustscores_score_accumulator,
             weight_accumulator=self._trustscores_weight_accumulator,
         )
+
+    def _finalize_local_trustscores_aggregation(self):
+        # Write the weighted DFL report and generate DFL graphics.
+        aggregated_report = self._build_weighted_trustscores_report()
+        if aggregated_report is None:
+            logging.warning("[TW DFL] Skipping weighted trustscores write because local copy/template is not available")
+            return
+
         save_trust_report_json(self._trustscores_local_copy_path, aggregated_report)
         logging.info(
             "[TW DFL] Weighted trustscores written to local copy=%s",
@@ -583,11 +639,29 @@ def _finalize_trustscores_aggregation(self):
         graphics = Graphics(self._start_time, self._experiment_name, self._idx)
         graphics.graphics_dfl_global(self._idx)
 
+    def _finalize_sdfl_global_trustscores_aggregation(self):
+        # Write the weighted SDFL global report and generate SDFL graphics.
+        aggregated_report = self._build_weighted_trustscores_report()
+        if aggregated_report is None:
+            logging.warning("[TW SDFL] Skipping global trustscores write because the template/output is not available")
+            return
+
+        save_trust_report_json(self._trustscores_local_copy_path, aggregated_report)
+        logging.info(
+            "[TW SDFL] Global weighted trustscores written to %s",
+            self._trustscores_local_copy_path,
+        )
+
+        graphics = Graphics(self._start_time, self._experiment_name, self._idx)
+        graphics.graphics_sdfl_global(self._idx)
+
     def _is_sdfl_aggregator_node(self) -> bool:
+        # Check whether this node should aggregate global SDFL trustscores.
         effective_role = self._engine.rb.get_role_name(True)
         return effective_role in {Role.AGGREGATOR.value, Role.TRAINER_AGGREGATOR.value}
 
     def _initialize_sdfl_global_trustscores_aggregation(self, experiment_name: str):
+        # Initialize the SDFL global aggregation output with this node's own report.
         if self._trustscores_local_report_initialized:
             return
 
@@ -601,44 +675,31 @@ def _initialize_sdfl_global_trustscores_aggregation(self, experiment_name: str):
         )
         save_trust_report_json(output_path, trust_report_template)
 
-        self._trustscores_template_report = trust_report_template
-        self._trustscores_local_copy_path = output_path
-        accumulate_weighted_trustscores(
-            report=trust_report_template,
-            weight=1.0,
-            score_accumulator=self._trustscores_score_accumulator,
-            weight_accumulator=self._trustscores_weight_accumulator,
-        )
-        self._trustscores_local_report_initialized = True
+        self._initialize_trustscores_accumulator(trust_report_template, output_path, self._get_trustscores_self_weight())
         logging.info(
             "[TW SDFL] Global trustscores accumulator initialized at %s with local weight=1.0",
             output_path,
         )
 
-    def _finalize_sdfl_global_trustscores_aggregation(self):
-        if self._trustscores_template_report is None or self._trustscores_local_copy_path is None:
-            logging.warning("[TW SDFL] Skipping global trustscores write because the template/output is not available")
-            return
-
-        aggregated_report = build_weighted_trustscores_report(
-            template_report=self._trustscores_template_report,
+    def _initialize_trustscores_accumulator(self, trust_report_template: dict, output_path: str, local_weight: float):
+        # Store the aggregation template and seed accumulators with the local report.
+        self._trustscores_template_report = trust_report_template
+        self._trustscores_local_copy_path = output_path
+        accumulate_weighted_trustscores(
+            report=trust_report_template,
+            weight=local_weight,
             score_accumulator=self._trustscores_score_accumulator,
             weight_accumulator=self._trustscores_weight_accumulator,
         )
-        save_trust_report_json(self._trustscores_local_copy_path, aggregated_report)
-        logging.info(
-            "[TW SDFL] Global weighted trustscores written to %s",
-            self._trustscores_local_copy_path,
-        )
-
-        graphics = Graphics(self._start_time, self._experiment_name, self._idx)
-        graphics.graphics_sdfl_global(self._idx)
+        self._trustscores_local_report_initialized = True
 
     async def register_trustscores_report(self, source, message):
+        # Register a remote trustscores message using the active federation.
         federation = self._engine.config.participant["trust_args"]["scenario"].get("federation")
         await self._register_trustscores_report(source, message, federation)
 
     async def _register_trustscores_report(self, source, message, federation: str):
+        # Deduplicate, optionally accumulate, and mark remote trustscores as received.
         if str(message.node_id) == str(self._idx):
             logging.info("[TW %s] Ignoring own trustscores report from %s", federation, source)
             return
@@ -691,6 +752,7 @@ class TrustWorkloadServer(BaseTrustWorkload):
     REPORTS_WAIT_TIMEOUT_SECONDS = 60
 
     def __init__(self, engine: Engine, idx, trust_files_route):
+        # Initialize server-side state for collecting participant reports.
         server_start_time: ServerRoleBehavior = engine.rb
         super().__init__(
             engine,
@@ -710,12 +772,15 @@ def __init__(self, engine: Engine, idx, trust_files_route):
             self._reports_wait_event.set()
 
     async def init(self, experiment_name):
+        # Reuse the shared workload event subscriptions.
         await super().init(experiment_name)
 
     async def finish_experiment_role_pre_actions(self):
+        # Server has no pre-save work because aggregation sample size is zero.
         pass
 
     async def finish_experiment_role_post_actions(self, trust_config, experiment_name):
+        # Wait for participant reports, save CSV data, and generate the CFL factsheet.
         self._end_time = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
         self._trust_config = trust_config
         self._experiment_name = experiment_name
@@ -723,27 +788,35 @@ async def finish_experiment_role_post_actions(self, trust_config, experiment_nam
         if self._csv_completed:
             logging.info("[TW SERVER] finish_experiment_role_post_actions called, trustworthiness reports OK, starting generate_factsheet")
             await self._save_local_server_report_and_generate_factsheet(trust_config, experiment_name)
-        else:
-            logging.info("[TW SERVER] finish_experiment_role_post_actions called, waiting for trustworthiness reports")
-            try:
-                await asyncio.wait_for(
-                    self._reports_wait_event.wait(),
-                    timeout=self.REPORTS_WAIT_TIMEOUT_SECONDS,
-                )
-            except asyncio.TimeoutError:
-                logging.warning(
-                    "[TW SERVER] Timeout waiting trustworthiness reports. Received=%s/%s",
-                    len(self._trustworthiness_reports),
-                    self._expected_reports,
-                )
-
-            if self._trustworthiness_reports is not None and not self._csv_completed:
-                save_trustworthiness_reports_csv(self._trustworthiness_reports, self._experiment_name)
-                self._csv_completed = True
+            return
 
-            await self._save_local_server_report_and_generate_factsheet(trust_config, experiment_name)
+        logging.info("[TW SERVER] finish_experiment_role_post_actions called, waiting for trustworthiness reports")
+        await self._wait_for_trustworthiness_reports()
+        self._save_trustworthiness_reports_once()
+        await self._save_local_server_report_and_generate_factsheet(trust_config, experiment_name)
+
+    async def _wait_for_trustworthiness_reports(self):
+        # Wait until reports arrive or the server-side timeout expires.
+        try:
+            await asyncio.wait_for(
+                self._reports_wait_event.wait(),
+                timeout=self.REPORTS_WAIT_TIMEOUT_SECONDS,
+            )
+        except asyncio.TimeoutError:
+            logging.warning(
+                "[TW SERVER] Timeout waiting trustworthiness reports. Received=%s/%s",
+                len(self._trustworthiness_reports),
+                self._expected_reports,
+            )
+
+    def _save_trustworthiness_reports_once(self):
+        # Persist received participant reports only once.
+        if self._trustworthiness_reports is not None and not self._csv_completed:
+            save_trustworthiness_reports_csv(self._trustworthiness_reports, self._experiment_name)
+            self._csv_completed = True
 
     async def _save_local_server_report_and_generate_factsheet(self, trust_config, experiment_name):
+        # Add the server's own local report and generate final trust artifacts.
         bytes_sent, bytes_recv, _, _, val_accuracy, dp_enabled, dp_epsilon = load_data_results_participant(
             self._experiment_name,
             self._idx,
@@ -768,6 +841,7 @@ async def _save_local_server_report_and_generate_factsheet(self, trust_config, e
         await self._generate_factsheet(trust_config, experiment_name)
 
     async def register_trustworthiness_report(self, source, message):
+        # Store one participant trustworthiness report received by the server.
         self._trustworthiness_reports[message.node_id] = {
             "source": source,
             "node_id": message.node_id,
@@ -801,13 +875,12 @@ async def register_trustworthiness_report(self, source, message):
 
         if (len(self._trustworthiness_reports) >= self._expected_reports):
             logging.info("[TW SERVER] all reports received, generating csv")
-            # Generate CSV files
-            save_trustworthiness_reports_csv(self._trustworthiness_reports, self._experiment_name)
-            self._csv_completed = True
+            self._save_trustworthiness_reports_once()
             self._reports_wait_event.set()
             logging.info(f"[TW SERVER] all reports received, waiting for finish post, csv_completed {self._csv_completed}")
 
     async def _generate_factsheet(self, trust_config, experiment_name):
+        # Generate the CFL factsheet and evaluate final trust metrics.
         factsheet = CflFactsheet()
         self._engine.trainer.datamodule.setup(stage="fit")
         train_loader = self._engine.trainer.datamodule.train_dataloader()
@@ -840,6 +913,7 @@ async def _generate_factsheet(self, trust_config, experiment_name):
 
 class Trustworthiness():
     def __init__(self, engine: Engine, config: Config):
+        # Select the workload implementation for this node and start emissions tracking.
         config.reset_logging_configuration()
         print_msg_box(
             msg=f"Name Trustworthiness Module\nRole: {engine.rb.get_role_name()}",
@@ -864,15 +938,18 @@ def __init__(self, engine: Engine, config: Config):
     @property
     def tw(self):
         """TrustWorkload implementation chosen according to the node role."""
+        # Expose the role-specific trust workload.
         return self._trust_workload
 
     async def start(self):
+        # Prepare output directories, subscribe to finish events, and start tracking emissions.
         await self._create_trustworthiness_directory()
         await self.tw.init(self._experiment_name)
         await EventManager.get_instance().subscribe_node_event(ExperimentFinishEvent, self._process_experiment_finish_event)
         self._tracker.start()
 
     async def _create_trustworthiness_directory(self):
+        # Ensure the experiment trustworthiness directory exists.
         logs_dir = os.environ.get("NEBULA_LOGS_DIR", os.path.join("nebula", "app", "logs"))
         trust_dir = os.path.join(logs_dir, self._experiment_name, "trustworthiness")
         # Create a directory to store files used to compute trust
@@ -880,6 +957,7 @@ async def _create_trustworthiness_directory(self):
         os.chmod(trust_dir, 0o755)
 
     async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
+        # Persist final local metrics and delegate role-specific finalization.
         class_counter = self._engine.trainer.datamodule.get_samples_per_label()
 
         save_class_count_per_participant(self._experiment_name, class_counter, self._idx)
@@ -911,6 +989,7 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         await self.tw.finish_experiment_role_post_actions(self._trust_config, self._experiment_name)
 
     def _factory_trust_workload(self, role: Role, engine: Engine, idx, trust_files_route) -> TrustWorkload:
+        # Create the workload implementation associated with the node role.
         trust_workloads = {
             Role.TRAINER: TrustWorkloadTrainer,
             Role.AGGREGATOR: TrustWorkloadTrainer,

From 7516058060bad373a97d05af2e41b73d26e0924e Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 29 May 2026 17:08:52 +0200
Subject: [PATCH 55/66] Refactoring calculation and utils

---
 nebula/addons/trustworthiness/calculation.py  | 2094 -----------------
 .../addons/trustworthiness/dfl_factsheet.py   |   18 +-
 nebula/addons/trustworthiness/factsheet.py    |   15 +-
 .../trustworthiness/factsheet_populators.py   |   26 +-
 .../trustworthiness/helpers/__init__.py       |    1 +
 .../addons/trustworthiness/helpers/csv_io.py  |  316 +++
 .../helpers/data_distribution.py              |  178 ++
 .../trustworthiness/helpers/explainability.py |  407 ++++
 .../helpers/factsheet_values.py               |  108 +
 .../trustworthiness/helpers/model_quality.py  |  371 +++
 .../addons/trustworthiness/helpers/privacy.py |  209 ++
 .../trustworthiness/helpers/robustness.py     |  413 ++++
 .../helpers/scenario_metrics.py               |  350 +++
 .../addons/trustworthiness/helpers/scoring.py |  190 ++
 .../trustworthiness/helpers/trust_reports.py  |  197 ++
 nebula/addons/trustworthiness/metric.py       |    2 +-
 .../trustworthiness/per_round_metrics.py      |    1 -
 nebula/addons/trustworthiness/pillar.py       |   20 +-
 .../addons/trustworthiness/trustworthiness.py |   27 +-
 nebula/addons/trustworthiness/utils.py        |  656 ------
 20 files changed, 2818 insertions(+), 2781 deletions(-)
 delete mode 100755 nebula/addons/trustworthiness/calculation.py
 create mode 100644 nebula/addons/trustworthiness/helpers/__init__.py
 create mode 100644 nebula/addons/trustworthiness/helpers/csv_io.py
 create mode 100644 nebula/addons/trustworthiness/helpers/data_distribution.py
 create mode 100644 nebula/addons/trustworthiness/helpers/explainability.py
 create mode 100644 nebula/addons/trustworthiness/helpers/factsheet_values.py
 create mode 100644 nebula/addons/trustworthiness/helpers/model_quality.py
 create mode 100644 nebula/addons/trustworthiness/helpers/privacy.py
 create mode 100644 nebula/addons/trustworthiness/helpers/robustness.py
 create mode 100644 nebula/addons/trustworthiness/helpers/scenario_metrics.py
 create mode 100644 nebula/addons/trustworthiness/helpers/scoring.py
 create mode 100644 nebula/addons/trustworthiness/helpers/trust_reports.py
 delete mode 100755 nebula/addons/trustworthiness/utils.py

diff --git a/nebula/addons/trustworthiness/calculation.py b/nebula/addons/trustworthiness/calculation.py
deleted file mode 100755
index 6fcb60ea2..000000000
--- a/nebula/addons/trustworthiness/calculation.py
+++ /dev/null
@@ -1,2094 +0,0 @@
-import logging
-import math
-import numbers
-import os.path
-import statistics
-import copy
-import gc
-from datetime import datetime
-from math import e
-from os.path import exists
-import json
-
-import numpy as np
-import pandas as pd
-import shap
-import torch
-import torch.nn
-from art.estimators.classification import PyTorchClassifier
-from art.metrics import clever_u, loss_sensitivity, empirical_robustness
-from codecarbon import EmissionsTracker
-from scipy.spatial.distance import jensenshannon
-from scipy.stats import entropy, variation
-from sklearn.metrics import f1_score, roc_auc_score, roc_curve
-from torch import nn, optim
-import torch.nn.functional as F
-import io
-
-
-from nebula.addons.trustworthiness.utils import read_csv
-
-dirname = os.path.dirname(__file__)
-logger = logging.getLogger(__name__)
-
-R_L1 = 40
-R_L2 = 2
-R_LI = 0.1
-
-
-# ---------------------------------------------------------------------------
-# Generic score mapping helpers used by eval_metrics*.json
-# ---------------------------------------------------------------------------
-
-def get_mapped_score(score_key, score_map):
-    """
-    Finds the score by the score_key in the score_map.
-
-    Args:
-        score_key (string): The key to look up in the score_map.
-        score_map (dict): The score map defined in the eval_metrics.json file.
-
-    Returns:
-        float: The normalized score of [0, 1].
-    """
-    score = 0
-    if score_map is None:
-        logger.warning("Score map is missing")
-    else:
-        keys = [key for key, value in score_map.items()]
-        scores = [value for key, value in score_map.items()]
-        normalized_scores = get_normalized_scores(scores)
-        normalized_score_map = dict(zip(keys, normalized_scores, strict=False))
-        score = normalized_score_map.get(score_key, np.nan)
-
-    return score
-
-
-def get_normalized_scores(scores):
-    """
-    Calculates the normalized scores of a list.
-
-    Args:
-        scores (list): The values that will be normalized.
-
-    Returns:
-        list: The normalized list.
-    """
-    if scores is None or len(scores) == 0:
-        return []
-
-    min_score = np.min(scores)
-    max_score = np.max(scores)
-    if max_score == min_score:
-        return [1.0 for _ in scores]
-
-    normalized = [(x - min_score) / (max_score - min_score) for x in scores]
-    return normalized
-
-
-def get_range_score(value, ranges, direction="asc"):
-    """
-    Maps the value to a range and gets the score by the range and direction.
-
-    Args:
-        value (int): The input score.
-        ranges (list): The ranges defined.
-        direction (string): Asc means the higher the range the higher the score, desc means otherwise.
-
-    Returns:
-        float: The normalized score of [0, 1].
-    """
-
-    if not (type(value) == int or type(value) == float):
-        logger.warning("Input value is not a number")
-        logger.warning(f"{value}")
-        return 0
-    else:
-        score = 0
-        if ranges is None:
-            logger.warning("Score ranges are missing")
-        else:
-            total_bins = len(ranges) + 1
-            bin = np.digitize(value, ranges, right=True)
-            score = 1 - (bin / total_bins) if direction == "desc" else bin / total_bins
-        return score
-
-
-def get_map_value_score(score_key, score_map):
-    """
-    Finds the score by the score_key in the score_map and returns the value.
-
-    Args:
-        score_key (string): The key to look up in the score_map.
-        score_map (dict): The score map defined in the eval_metrics.json file.
-
-    Returns:
-        float: The score obtained in the score_map.
-    """
-    score = 0
-    if score_map is None:
-        logger.warning("Score map is missing")
-    else:
-        score = score_map[score_key]
-    return score
-
-
-def get_true_score(value, direction):
-    """
-    Returns the negative of the value if direction is 'desc', otherwise returns value.
-
-    Args:
-        value (int): The input score.
-        direction (string): Asc means the higher the range the higher the score, desc means otherwise.
-
-    Returns:
-        float: The score obtained.
-    """
-
-    if value is True:
-        return 1
-    elif value is False:
-        return 0
-    else:
-        if not (type(value) == int or type(value) == float):
-            logger.warning("Input value is not a number")
-            logger.warning(f"{value}.")
-            return 0
-        else:
-            if direction == "desc":
-                return 1 - value
-            else:
-                return value
-
-
-def get_scaled_score(value, scale: list, direction: str):
-    """
-    Maps a score of a specific scale into the scale between zero and one.
-
-    Args:
-        value (int or float): The raw value of the metric.
-        scale (list): List containing the minimum and maximum value the value can fall in between.
-
-    Returns:
-        float: The normalized score of [0, 1].
-    """
-
-    score = 0
-    try:
-        value_min, value_max = scale[0], scale[1]
-    except Exception:
-        logger.warning("Score minimum or score maximum is missing. The minimum has been set to 0 and the maximum to 1")
-        value_min, value_max = 0, 1
-    if value is None or value == "":
-        logger.warning("Score value is missing. Set value to zero")
-    else:
-        low, high = 0, 1
-        if value >= value_max:
-            score = 1
-        elif value <= value_min:
-            score = 0
-        else:
-            diff = value_max - value_min
-            diffScale = high - low
-            score = (float(value) - value_min) * (float(diffScale) / diff) + low
-        if direction == "desc":
-            score = high - score
-
-    return score
-
-
-def get_value(value):
-    """
-    Get the value of a metric.
-
-    Args:
-        value (float): The value of the metric.
-
-    Returns:
-        float: The value of the metric.
-    """
-
-    return value
-
-
-def check_properties(*args):
-    """
-    Check if all the arguments have values.
-
-    Args:
-        args (list): All the arguments.
-
-    Returns:
-        float: The mean of arguments that have values.
-    """
-
-    result = map(lambda x: x is not None and x != "", args)
-    return np.mean(list(result))
-
-
-# ---------------------------------------------------------------------------
-# Local/global data distribution and participation metrics
-# ---------------------------------------------------------------------------
-
-def get_class_count_file(scenario_name, participant_id):
-    """
-    Returns the class-count file path for a participant.
-    """
-    return os.path.join(
-        os.environ.get("NEBULA_LOGS_DIR"),
-        scenario_name,
-        "trustworthiness",
-        f"{str(participant_id)}_class_count.json",
-    )
-
-
-def load_class_counts(scenario_name, participant_id):
-    """
-    Loads the saved class-count distribution for a participant.
-    """
-    with open(get_class_count_file(scenario_name, participant_id), "r") as file:
-        return json.load(file)
-
-
-def get_class_imbalance_from_counts(class_counts):
-    """
-    Calculates class imbalance as coefficient of variation over class counts.
-
-    Higher values mean a more imbalanced local dataset.
-    """
-    return get_cv(list=list(class_counts.values()))
-
-
-def get_class_imbalance_score(class_imbalance):
-    """
-    Converts class imbalance into a trust score.
-
-    A score of 1 means balanced classes; higher imbalance lowers the score.
-    """
-    return 1 / (1 + class_imbalance)
-
-
-def get_class_imbalance_local(participant_id, experiment_name):
-    class_distribution = load_class_counts(experiment_name, participant_id)
-    return get_class_imbalance_from_counts(class_distribution)
-
-
-def get_local_class_imbalance_score(scenario_name, participant_id):
-    """
-    Calculates the class-imbalance trust score for a participant.
-    """
-    return get_class_imbalance_score(get_class_imbalance_local(participant_id, scenario_name))
-
-
-def get_entropy_from_class_counts(class_counts, normalize=False):
-    """
-    Calculates entropy from class counts.
-
-    When normalized, returns a value in [0, 1] independent of class count.
-    """
-    counts = np.array(list(class_counts.values()), dtype=float)
-    total = counts.sum()
-    if total <= 0:
-        return 0.0
-
-    probabilities = counts / total
-    entropy_value = entropy(probabilities, base=2)
-
-    if not normalize:
-        return round(float(entropy_value), 6)
-
-    class_count = len(probabilities)
-    if class_count <= 1:
-        return 0.0
-
-    normalized_entropy = float(entropy_value / np.log2(class_count))
-    return max(0.0, min(1.0, normalized_entropy))
-
-
-def get_local_normalized_entropy(scenario_name, participant_id):
-    """
-    Calculates normalized entropy from a participant's saved class counts.
-    """
-    return get_entropy_from_class_counts(
-        load_class_counts(scenario_name, participant_id),
-        normalize=True,
-    )
-
-
-def get_cv(list=None, std=None, mean=None):
-    """
-    Get the coefficient of variation.
-
-    Args:
-        list (list): List in which the coefficient of variation will be calculated.
-        std (float): Standard deviation of a list.
-        mean (float): Mean of a list.
-
-    Returns:
-        float: The coefficient of variation calculated.
-    """
-    if std is not None and mean is not None:
-        if mean == 0:
-            return 0
-        return std / mean
-
-    if list is not None:
-        mean_value = np.mean(list)
-        if mean_value == 0:
-            return 0
-        return np.std(list) / mean_value
-
-    return 0
-
-
-def get_participation_variation_score(participation_counts):
-    """
-    Convert participation-count dispersion into a trust-oriented score.
-
-    Args:
-        participation_counts (list[float | int]): Number of participations per client.
-
-    Returns:
-        float: Score in [0, 1] where 1 means equal participation.
-    """
-    if not participation_counts:
-        return 1.0
-
-    counts = np.asarray(participation_counts, dtype=float)
-    mean_count = float(np.mean(counts))
-
-    if mean_count <= 0:
-        return 0.0
-
-    cv = get_cv(list=counts)
-    if not np.isfinite(cv):
-        return 0.0
-
-    return float(1 / (1 + cv))
-
-
-# ---------------------------------------------------------------------------
-# Privacy metrics
-# ---------------------------------------------------------------------------
-
-
-def get_global_privacy_risk(dp, epsilon, n):
-    """
-    Calculates the global privacy risk by epsilon and the number of clients.
-
-    Args:
-        dp (bool): Indicates if differential privacy is used or not.
-        epsilon (int): The epsilon value.
-        n (int): The number of clients in the scenario.
-
-    Returns:
-        float: The global privacy risk.
-    """
-
-    try:
-        epsilon = float(epsilon)
-        n = float(n)
-    except (TypeError, ValueError):
-        return 1
-
-    if dp is True and isinstance(epsilon, numbers.Number):
-        return 1 / (1 + (n - 1) * math.pow(e, -epsilon))
-    else:
-        return 1
-
-def get_global_privacy_risk_dfl(dp, epsilon, n):
-    """
-    Calculates the global privacy risk by epsilon and the number of clients.
-
-    Args:
-        dp (bool): Indicates if differential privacy is used or not.
-        epsilon (int): The epsilon value.
-        n (int): The number of neighbours.
-
-    Returns:
-        float: The global privacy risk.
-    """
-
-    try:
-        epsilon = float(epsilon)
-        n = float(n)
-    except (TypeError, ValueError):
-        return 1
-
-    if dp is True and isinstance(epsilon, numbers.Number):
-        return 1 / (1 + (n + 1) * math.pow(e, -epsilon))
-    else:
-        return 1
-
-
-def _collect_per_sample_losses(model, dataloader, max_samples=5000):
-    """
-    Compute per-sample cross-entropy losses for a dataloader.
-
-    Args:
-        model (torch.nn.Module): The model to evaluate.
-        dataloader: DataLoader providing (samples, labels).
-        max_samples (int): Maximum number of samples to process.
-
-    Returns:
-        np.ndarray: Losses per sample.
-    """
-    if not isinstance(model, torch.nn.Module) or dataloader is None:
-        return np.array([])
-
-    try:
-        device = next(model.parameters()).device
-    except Exception:
-        device = torch.device("cpu")
-
-    criterion = nn.CrossEntropyLoss(reduction="none")
-    losses = []
-    collected = 0
-
-    model.eval()
-    with torch.no_grad():
-        for batch in dataloader:
-            if not isinstance(batch, (tuple, list)) or len(batch) < 2:
-                continue
-
-            samples, labels = batch[0], batch[1]
-            if not torch.is_tensor(samples) or not torch.is_tensor(labels):
-                continue
-
-            remaining = max_samples - collected
-            if remaining <= 0:
-                break
-
-            samples = samples[:remaining].to(device)
-            labels = labels[:remaining]
-
-            if labels.ndim > 1:
-                labels = torch.argmax(labels, dim=1)
-
-            labels = labels.long().to(device)
-
-            outputs = model(samples)
-            logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
-            batch_losses = criterion(logits, labels)
-
-            losses.append(batch_losses.detach().cpu().numpy())
-            collected += int(batch_losses.shape[0])
-
-    if not losses:
-        return np.array([])
-
-    return np.concatenate(losses, axis=0)
-
-
-def get_epsilon_star(model, train_dataloader, test_dataloader, max_samples=5000):
-    """
-    Compute empirical epsilon* from train/test loss distributions.
-
-    This follows the same core structure as privacy_metrics_core.epsilon_star,
-    adapted to PyTorch models and DataLoaders used in Nebula.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate.
-        train_dataloader: Training DataLoader.
-        test_dataloader: Test DataLoader.
-        max_samples (int): Maximum samples to evaluate per split.
-
-    Returns:
-        float: Empirical epsilon* value. Returns 0.0 on failure.
-    """
-    try:
-        loss_train = _collect_per_sample_losses(model, train_dataloader, max_samples=max_samples)
-        loss_test = _collect_per_sample_losses(model, test_dataloader, max_samples=max_samples)
-
-        if loss_train.size == 0 or loss_test.size == 0:
-            return 0.0
-
-        scores = np.concatenate([-loss_train, -loss_test])
-        y_true = np.concatenate([np.ones(len(loss_train)), np.zeros(len(loss_test))])
-
-        fpr, tpr, _ = roc_curve(y_true, scores)
-
-        fpr = np.clip(fpr, 1e-10, 1 - 1e-10)
-        tpr = np.clip(tpr, 1e-10, 1 - 1e-10)
-        fnr = 1 - tpr
-
-        delta = 1.0 / len(loss_train) if len(loss_train) > 0 else 1e-5
-
-        m1 = (1 - delta - fnr) / fpr
-        m2 = (1 - delta - fpr) / fnr
-        m3 = (fnr - delta) / (1 - fpr)
-        m4 = (fpr - delta) / (1 - fnr)
-
-        epsilon_star_val = np.log(
-            np.nanmax(np.maximum.reduce([m1, m2, m3, m4, np.ones_like(m1)]))
-        )
-
-        if np.isnan(epsilon_star_val) or np.isinf(epsilon_star_val):
-            return 0.0
-
-        return float(max(0.0, epsilon_star_val))
-    except Exception as exc:
-        logger.warning("Could not compute epsilon_star")
-        logger.warning(exc)
-        return 0.0
-
-
-def get_mia_auc(model, train_dataloader, test_dataloader, max_samples=5000):
-    """
-    Compute membership inference attack AUC using per-sample loss as the attack score.
-
-    Lower loss suggests a sample is more likely to be a training member, so the
-    attack score is defined as negative loss.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate.
-        train_dataloader: Training DataLoader.
-        test_dataloader: Test DataLoader.
-        max_samples (int): Maximum samples to evaluate per split.
-
-    Returns:
-        float: ROC-AUC of the loss-threshold membership attack. Returns 0.5 on failure.
-    """
-    try:
-        loss_train = _collect_per_sample_losses(model, train_dataloader, max_samples=max_samples)
-        loss_test = _collect_per_sample_losses(model, test_dataloader, max_samples=max_samples)
-
-        if loss_train.size == 0 or loss_test.size == 0:
-            return 0.5
-
-        scores = np.concatenate([-loss_train, -loss_test])
-        y_true = np.concatenate([np.ones(len(loss_train)), np.zeros(len(loss_test))])
-        mia_auc = roc_auc_score(y_true, scores)
-
-        if np.isnan(mia_auc) or np.isinf(mia_auc):
-            return 0.5
-
-        return float(np.clip(mia_auc, 0.0, 1.0))
-    except Exception as exc:
-        logger.warning("Could not compute mia_auc")
-        logger.warning(exc)
-        return 0.5
-
-
-# ---------------------------------------------------------------------------
-# Scenario report readers and aggregate system metrics
-# ---------------------------------------------------------------------------
-
-def get_elapsed_time(start_time, end_time):
-    """
-    Calculates the elapsed time during the execution of the scenario.
-
-    Args:
-        start_time (datetime): Start datetime.
-        end_time (datetime): End datetime.
-
-    Returns:
-        float: The elapsed time.
-    """
-    start_date = datetime.strptime(start_time, "%d/%m/%Y %H:%M:%S")
-    end_date = datetime.strptime(end_time, "%d/%m/%Y %H:%M:%S")
-
-    elapsed_time = (end_date - start_date).total_seconds() / 60
-
-    return elapsed_time
-
-
-def _trustworthiness_dir(scenario_name):
-    return os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness")
-
-
-def _global_data_results_path(scenario_name):
-    return os.path.join(_trustworthiness_dir(scenario_name), "data_results.csv")
-
-
-def _participant_data_results_path(scenario_name, participant_id):
-    return os.path.join(_trustworthiness_dir(scenario_name), f"data_results_{participant_id}.csv")
-
-
-def _read_global_results(scenario_name):
-    return read_csv(_global_data_results_path(scenario_name))
-
-
-def _read_participant_results(scenario_name, participant_id):
-    return read_csv(_participant_data_results_path(scenario_name, participant_id))
-
-
-def _find_participant_row(data, participant_id, source_name):
-    row = data[data["id"] == participant_id]
-
-    if row.empty:
-        try:
-            row = data[data["id"] == int(participant_id)]
-        except (TypeError, ValueError):
-            row = data.iloc[0:0]
-
-    if row.empty:
-        raise ValueError(f"Participant {participant_id} not found in {source_name}")
-
-    return row.iloc[0]
-
-
-def get_bytes_model(model):
-    """
-    Calculates the serialized size in bytes of a PyTorch model state_dict.
-
-    Args:
-        model (nn.Module): PyTorch model.
-
-    Returns:
-        int: Model size in bytes.
-    """
-    buffer: io.BytesIO = io.BytesIO()
-    torch.save(model.state_dict(), buffer)
-    model_size: int = buffer.tell()
-
-    return model_size
-
-
-def get_bytes_sent_recv(scenario_name):
-    """
-    Calculates the mean bytes sent and received of the nodes.
-
-    Args:
-        bytes_sent_files (list): Files that contain the bytes sent of the nodes.
-        bytes_recv_files (list): Files that contain the bytes received of the nodes.
-
-    Returns:
-        4-tupla: The total bytes sent, the total bytes received, the mean bytes sent and the mean bytes received of the nodes.
-    """
-    data = _read_global_results(scenario_name)
-
-    number_files = len(data)
-
-    total_upload_bytes = int(data["bytes_sent"].sum())
-    total_download_bytes = int(data["bytes_recv"].sum())
-
-    avg_upload_bytes = total_upload_bytes / number_files
-    avg_download_bytes = total_download_bytes / number_files
-
-    return total_upload_bytes, total_download_bytes, avg_upload_bytes, avg_download_bytes
-
-
-def get_avg_loss_accuracy(scenario_name):
-    """
-    Calculates the mean accuracy and loss models of the nodes.
-
-    Args:
-        loss_files (list): Files that contain the loss of the models of the nodes.
-        accuracy_files (list): Files that contain the acurracies of the models of the nodes.
-
-    Returns:
-        3-tupla: The mean loss of the models, the mean accuracies of the models, the standard deviation of the accuracies of the models.
-    """
-    data = _read_global_results(scenario_name)
-
-    number_files = len(data)
-
-    total_loss = data["loss"].sum()
-    total_accuracy = data["accuracy"].sum()
-
-    denominator = max(1, number_files - 1)
-    avg_loss = total_loss / denominator
-    avg_accuracy = total_accuracy / denominator
-    std_accuracy = statistics.stdev(data["accuracy"]) if number_files > 1 else 0.0
-
-    return avg_loss, avg_accuracy, std_accuracy
-
-def get_underfitting_score(scenario_name, id):
-    """
-    Calculates the mean val accuracy of the nodes.
-    """
-    data = _read_global_results(scenario_name)
-
-    number_files = len(data)
-
-    total_val_accuracy = data["val_accuracy"].sum()
-
-    avg_val_accuracy = total_val_accuracy / max(1, number_files - 1)
-
-    return avg_val_accuracy
-
-
-def get_participant_loss_accuracy(scenario_name, participant_id):
-    """
-    Gets loss and accuracy for a specific participant from CFL aggregated results.
-
-    Args:
-        scenario_name (str): Scenario name.
-        participant_id (int | str): Participant identifier.
-
-    Returns:
-        tuple[float, float]: (loss, accuracy)
-    """
-    data_file = _global_data_results_path(scenario_name)
-    row = _find_participant_row(read_csv(data_file), participant_id, data_file)
-
-    loss = float(row["loss"])
-    accuracy = float(row["accuracy"])
-    return loss, accuracy
-
-
-# ---------------------------------------------------------------------------
-# Model performance metrics
-# ---------------------------------------------------------------------------
-
-
-def _get_model_accuracy(model, dataloader):
-    """
-    Calculates model accuracy over a dataloader.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate.
-        dataloader (DataLoader): Dataloader with (x, y) batches.
-
-    Returns:
-        float: Accuracy in [0, 1].
-    """
-    if not isinstance(model, torch.nn.Module):
-        logger.warning("Model is not a torch.nn.Module")
-        return 0.0
-
-    try:
-        device = next(model.parameters()).device
-    except Exception:
-        device = torch.device("cpu")
-
-    model.eval()
-    correct = 0
-    total = 0
-
-    with torch.no_grad():
-        for x, y in dataloader:
-            x = x.to(device)
-            y = y.to(device)
-
-            out = model(x)
-            logits = out[0] if isinstance(out, (tuple, list)) else out
-            preds = logits.argmax(dim=1)
-
-            correct += (preds == y).sum().item()
-            total += y.size(0)
-
-    return correct / total if total > 0 else 0.0
-
-
-def get_macro_f1_score(model, dataloader):
-    """
-    Calculates macro F1 score over a dataloader.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate.
-        dataloader (DataLoader): Dataloader with (x, y) batches.
-
-    Returns:
-        float: Macro F1 score in [0, 1].
-    """
-    if not isinstance(model, torch.nn.Module):
-        logger.warning("Model is not a torch.nn.Module")
-        return 0.0
-
-    try:
-        device = next(model.parameters()).device
-    except Exception:
-        device = torch.device("cpu")
-
-    model.eval()
-    y_true = []
-    y_pred = []
-
-    with torch.no_grad():
-        for x, y in dataloader:
-            x = x.to(device)
-            y = y.to(device)
-
-            out = model(x)
-            logits = out[0] if isinstance(out, (tuple, list)) else out
-            preds = logits.argmax(dim=1)
-
-            y_true.extend(y.detach().cpu().numpy().tolist())
-            y_pred.extend(preds.detach().cpu().numpy().tolist())
-
-    if not y_true:
-        return 0.0
-
-    return float(f1_score(y_true, y_pred, average="macro", zero_division=0))
-
-def _extract_model_logits(model_output):
-    """
-    Normalize the output returned by a model forward pass into a logits tensor.
-
-    Some models may return tuples/lists; for trust metrics we always consume the
-    first element as the classification output.
-    """
-    return model_output[0] if isinstance(model_output, (tuple, list)) else model_output
-
-
-def _prepare_class_targets(y):
-    """
-    Convert different target representations into a flat class-index tensor.
-    """
-    if not torch.is_tensor(y):
-        y = torch.as_tensor(y)
-
-    if y.ndim > 1:
-        if y.size(-1) > 1:
-            y = y.argmax(dim=-1)
-        else:
-            y = y.view(-1)
-
-    return y.long().view(-1)
-
-
-def _logits_to_probabilities(logits):
-    """
-    Convert model outputs into a probability matrix of shape (N, C).
-
-    Supports:
-    - multiclass logits/log-probabilities with shape (N, C)
-    - binary logits with shape (N,) or (N, 1)
-    - already-normalized probability matrices
-    """
-    if not torch.is_tensor(logits):
-        logits = torch.as_tensor(logits)
-
-    if logits.ndim == 0:
-        logits = logits.view(1, 1)
-    elif logits.ndim == 1:
-        logits = logits.view(-1, 1)
-    elif logits.ndim > 2:
-        logits = logits.reshape(logits.shape[0], -1)
-
-    if logits.size(1) == 1:
-        pos_prob = torch.sigmoid(logits[:, 0])
-        probs = torch.stack([1.0 - pos_prob, pos_prob], dim=1)
-    else:
-        row_sums = logits.sum(dim=1)
-        looks_like_probs = (
-            torch.all(logits >= 0)
-            and torch.all(logits <= 1.0 + 1e-6)
-            and torch.allclose(row_sums, torch.ones_like(row_sums), atol=1e-4, rtol=1e-4)
-        )
-        probs = logits if looks_like_probs else torch.softmax(logits, dim=1)
-
-    probs = torch.clamp(probs, min=0.0, max=1.0)
-    probs = probs / probs.sum(dim=1, keepdim=True).clamp_min(1e-12)
-    return probs
-
-
-def _collect_classification_statistics(model, dataloader):
-    """
-    Collect prediction statistics required by calibration and inequality metrics.
-
-    Returns:
-        tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-        predicted labels, true labels, prediction confidences, correctness flags,
-        and probability assigned to the true class.
-    """
-    if not isinstance(model, torch.nn.Module):
-        logger.warning("Model is not a torch.nn.Module")
-        empty = np.array([], dtype=float)
-        return empty, empty, empty, empty, empty
-
-    try:
-        device = next(model.parameters()).device
-    except Exception:
-        device = torch.device("cpu")
-
-    preds_all = []
-    targets_all = []
-    confidences_all = []
-    correct_all = []
-    true_probs_all = []
-
-    model.eval()
-    with torch.no_grad():
-        for batch in dataloader:
-            if not isinstance(batch, (tuple, list)) or len(batch) < 2:
-                continue
-
-            x, y = batch[0], batch[1]
-            if not (torch.is_tensor(x) and torch.is_tensor(y)):
-                continue
-
-            x = x.to(device)
-            y = _prepare_class_targets(y).to(device)
-
-            out = model(x)
-            logits = _extract_model_logits(out)
-            probs = _logits_to_probabilities(logits)
-
-            if probs.ndim != 2 or probs.size(0) == 0:
-                continue
-
-            if y.numel() != probs.size(0):
-                n = min(int(y.numel()), int(probs.size(0)))
-                if n == 0:
-                    continue
-                y = y[:n]
-                probs = probs[:n]
-
-            valid_mask = (y >= 0) & (y < probs.size(1))
-            if not torch.any(valid_mask):
-                continue
-
-            y = y[valid_mask]
-            probs = probs[valid_mask]
-
-            conf, preds = probs.max(dim=1)
-            true_probs = probs.gather(1, y.view(-1, 1)).squeeze(1)
-            correct = preds.eq(y).float()
-
-            preds_all.extend(preds.detach().cpu().numpy().tolist())
-            targets_all.extend(y.detach().cpu().numpy().tolist())
-            confidences_all.extend(conf.detach().cpu().numpy().tolist())
-            correct_all.extend(correct.detach().cpu().numpy().tolist())
-            true_probs_all.extend(true_probs.detach().cpu().numpy().tolist())
-
-    return (
-        np.asarray(preds_all, dtype=int),
-        np.asarray(targets_all, dtype=int),
-        np.asarray(confidences_all, dtype=float),
-        np.asarray(correct_all, dtype=float),
-        np.asarray(true_probs_all, dtype=float),
-    )
-
-
-
-def get_overfitting_score(model, train_dataloader, test_accuracy):
-    """
-    Calculates overfitting as the positive train-test accuracy gap.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate on training data.
-        train_dataloader (DataLoader): Training dataloader.
-        test_accuracy (float): Test accuracy in [0, 1].
-
-    Returns:
-        float: Positive train-test accuracy gap.
-    """
-    try:
-        train_accuracy = _get_model_accuracy(model, train_dataloader)
-        return max(0.0, float(train_accuracy) - float(test_accuracy))
-    except Exception as exc:
-        logger.warning("Could not compute overfitting score")
-        logger.warning(exc)
-        return 0.0
-
-def get_underfitting_score_local(scenario_name, id):
-    """
-    Gets the local validation accuracy for a specific DFL/SDFL participant.
-
-    Args:
-        scenario_name (str): Scenario name.
-        participant_id (int | str): Participant identifier.
-
-    Returns:
-        float: Validation accuracy.
-    """
-    data = _read_participant_results(scenario_name, id)
-    return float(data["val_accuracy"].iloc[0])
-
-def get_dp_local(scenario_name, id):
-    """
-    Gets the dp metrics for a specific DFL/SDFL participant.
-
-    Args:
-        scenario_name (str): Scenario name.
-        participant_id (int | str): Participant identifier.
-
-    Returns:
-        float: DP Enabled, Epsilon.
-    """
-    data = _read_participant_results(scenario_name, id)
-    return data["dp_enabled"].iloc[0], float(data["dp_epsilon"].iloc[0])
-
-
-def get_dp_global(scenario_name):
-    """
-    Gets the aggregated DP metrics for a CFL scenario, excluding the server node.
-
-    Args:
-        scenario_name (str): Scenario name.
-
-    Returns:
-        tuple[bool, float | str]: Whether DP is enabled, and the
-        average epsilon across client nodes.
-    """
-    data = _read_global_results(scenario_name)
-
-    if data["dp_enabled"].iloc[0] == False:
-        return False, 0.0
-
-    number_files = len(data)
-
-    avg_epsilon = data["dp_epsilon"].sum() / max(1, number_files - 1)
-
-    return True, avg_epsilon
-
-
-# ---------------------------------------------------------------------------
-# Fairness and calibration metrics
-# ---------------------------------------------------------------------------
-
-def get_well_calibration_error(model, test_dataloader, n_bins=10):
-    """
-    Calculates a well-calibration error style metric using prediction confidence.
-
-    For multiclass models, confidence is taken as the max softmax probability and
-    the observed outcome is whether the prediction is correct.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate.
-        test_dataloader (DataLoader): Test dataloader.
-        n_bins (int): Number of quantile bins.
-
-    Returns:
-        float: Calibration error in [0, 1] when computation succeeds.
-    """
-    if not isinstance(model, torch.nn.Module):
-        logger.warning("Model is not a torch.nn.Module")
-        return 0.0
-
-    try:
-        n_bins = max(2, int(n_bins))
-    except Exception:
-        n_bins = 10
-
-    _, _, confidences, correct, _ = _collect_classification_statistics(model, test_dataloader)
-
-    if len(confidences) == 0 or len(correct) == 0:
-        return 0.0
-
-    confidences = np.clip(np.asarray(confidences, dtype=float), 0.0, 1.0)
-    correct = np.clip(np.asarray(correct, dtype=float), 0.0, 1.0)
-
-    bin_edges = np.linspace(0.0, 1.0, n_bins + 1)
-    ece = 0.0
-    total = float(len(confidences))
-
-    for idx in range(n_bins):
-        left = bin_edges[idx]
-        right = bin_edges[idx + 1]
-        if idx == n_bins - 1:
-            mask = (confidences >= left) & (confidences <= right)
-        else:
-            mask = (confidences >= left) & (confidences < right)
-
-        if not np.any(mask):
-            continue
-
-        bin_weight = float(mask.sum()) / total
-        bin_accuracy = float(correct[mask].mean())
-        bin_confidence = float(confidences[mask].mean())
-        ece += bin_weight * abs(bin_accuracy - bin_confidence)
-
-    return float(np.clip(ece, 0.0, 1.0))
-
-
-def get_generalized_entropy_index(model, test_dataloader, alpha=2):
-    """
-    Calculates generalized entropy index from model predictions.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate.
-        test_dataloader (DataLoader): Test dataloader.
-        alpha (float): GEI alpha parameter.
-
-    Returns:
-        float: Generalized entropy index value.
-    """
-    try:
-        _, _, _, _, true_class_probs = _collect_classification_statistics(model, test_dataloader)
-        if len(true_class_probs) == 0:
-            return 0.0
-
-        # Use the probability assigned to the true class as a continuous, positive
-        # benefit. This works consistently for multiclass neural models on both
-        # images and tabular data, and avoids collapsing the metric to a coarse
-        # correct/incorrect indicator.
-        eps = 1e-12
-        b = np.clip(np.asarray(true_class_probs, dtype=float), eps, 1.0)
-        mu = float(np.mean(b))
-        if mu <= 0:
-            return 0.0
-
-        ratio = np.clip(b / mu, eps, None)
-
-        if alpha == 0:
-            val = float(np.mean(-np.log(ratio)))
-        elif alpha == 1:
-            val = float(np.mean(ratio * np.log(ratio)))
-        elif alpha == 2:
-            val = float(np.mean((ratio - 1.0) ** 2) / 2.0)
-        else:
-            val = float(np.mean(ratio**alpha - 1.0) / (alpha * (alpha - 1.0)))
-
-        if math.isnan(val) or math.isinf(val):
-            return 0.0
-        return max(0.0, val)
-    except Exception as exc:
-        logger.warning("Could not compute generalized entropy index")
-        logger.warning(exc)
-        return 0.0
-
-
-def get_theil_index(model, test_dataloader):
-    """
-    Convenience wrapper for generalized entropy index with alpha=1.
-    """
-    return get_generalized_entropy_index(model, test_dataloader, alpha=1)
-
-
-def get_coefficient_of_variation(model, test_dataloader):
-    """
-    Calculates coefficient of variation from GEI(alpha=2).
-    """
-    try:
-        gei = get_generalized_entropy_index(model, test_dataloader, alpha=2)
-        return float(np.sqrt(2 * gei))
-    except Exception as exc:
-        logger.warning("Could not compute coefficient of variation")
-        logger.warning(exc)
-        return 0.0
-
-
-def get_avg_class_imbalance_model_size(scenario_name):
-    """
-    Calculates the mean class imbalance and model size of the nodes.
-
-    Args:
-        data_results_files (list): Files that contain the class imbalance and model size of the nodes
-
-    Returns:
-        2-tupla: The mean class imbalance mean and model size mean of the nodes.
-    """
-    data = _read_global_results(scenario_name)
-
-    number_files = len(data)
-
-    total_class_imbalance = data["class_imbalance"].sum()
-    total_model_size = data["model_size"].sum()
-
-    avg_class_imbalance = total_class_imbalance / number_files
-    avg_model_size = total_model_size / number_files
-
-    return avg_class_imbalance, avg_model_size
-
-
-def get_entropy_list(scenario_name):
-    """
-    Obtiene una lista con los valores de entropy de todos los nodos.
-
-    Args:
-        scenario_name (str): Nombre del escenario.
-
-    Returns:
-        list: Lista con los valores de entropy
-    """
-    data = _read_global_results(scenario_name)
-
-    entropy_list = data["local_entropy"].tolist()
-
-    return entropy_list
-
-
-# ---------------------------------------------------------------------------
-# Explainability metrics
-# ---------------------------------------------------------------------------
-
-def get_feature_importance_cv(model, test_sample):
-    """
-    Calculates the coefficient of variation of the feature importance.
-
-    Args:
-        model (object): The model.
-        test_sample (object): One test sample to calculate the feature importance.
-
-    Returns:
-        float: The coefficient of variation of the feature importance.
-    """
-
-    try:
-        vals = np.asarray(_get_feature_importances(model, test_sample), dtype=float).reshape(-1)
-        vals = np.nan_to_num(vals, nan=0.0, posinf=0.0, neginf=0.0)
-        vals = vals[vals > 0]
-
-        if len(vals) <= 1:
-            return 0.0
-
-        cv = float(variation(vals))
-        if math.isnan(cv) or math.isinf(cv):
-            return 1.0
-        return max(0.0, cv)
-    except Exception as exc:
-        logger.warning("Could not compute feature importance CV with shap")
-        logger.warning(exc)
-        return 1.0
-
-
-def _get_feature_importances(model, test_sample):
-    """
-    Computes global feature importances from SHAP values.
-
-    Args:
-        model (object): The model.
-        test_sample (object): One test sample batch.
-
-    Returns:
-        np.ndarray: Global importances per feature.
-    """
-    if not isinstance(model, torch.nn.Module):
-        logger.warning("Model is not a torch.nn.Module")
-        return np.array([])
-
-    def _clone_model(model_ref, device):
-        optimizer_attrs = ("_optimizer", "_optimizer_override")
-        optimizer_state = {}
-        try:
-            for attr in optimizer_attrs:
-                if hasattr(model_ref, attr):
-                    optimizer_state[attr] = getattr(model_ref, attr)
-                    setattr(model_ref, attr, None)
-
-            model_clone = copy.deepcopy(model_ref)
-            for attr in optimizer_attrs:
-                if hasattr(model_clone, attr):
-                    setattr(model_clone, attr, None)
-
-            model_clone.to(device)
-            model_clone.eval()
-            return model_clone
-        except Exception as exc:
-            logger.warning("Could not clone model for SHAP, using original model")
-            logger.warning(exc)
-            model_ref.eval()
-            return model_ref
-        finally:
-            for attr, value in optimizer_state.items():
-                setattr(model_ref, attr, value)
-
-    def _prepare_shap_inputs(sample):
-        if not (isinstance(sample, (tuple, list)) and len(sample) >= 1):
-            return None, None, None
-
-        batched_data = sample[0]
-        if not torch.is_tensor(batched_data) or batched_data.ndim == 0 or batched_data.size(0) == 0:
-            return None, None, None
-
-        if not torch.is_floating_point(batched_data):
-            batched_data = batched_data.float()
-
-        batch_size = int(batched_data.size(0))
-        input_shape = tuple(int(dim) for dim in batched_data.shape[1:])
-
-        if batch_size == 1:
-            return batched_data[:1], batched_data[:1], input_shape
-
-        background_size = min(max(8, batch_size // 4), 32, batch_size - 1)
-        explainable = batch_size - background_size
-        explain_size = min(max(4, explainable), 32, explainable)
-
-        background = batched_data[:background_size]
-        test_data = batched_data[background_size:background_size + explain_size]
-
-        if test_data.size(0) == 0:
-            test_data = batched_data[: min(batch_size, 32)]
-
-        return background, test_data, input_shape
-
-    def _compute_shap_values(model_ref, background, test_data):
-        explainer_errors = []
-
-        for explainer_name in ("DeepExplainer", "GradientExplainer"):
-            explainer = None
-            try:
-                if explainer_name == "DeepExplainer":
-                    explainer = shap.DeepExplainer(model_ref, background)
-                    return explainer.shap_values(test_data, check_additivity=False)
-
-                explainer = shap.GradientExplainer(model_ref, background)
-                return explainer.shap_values(test_data)
-            except Exception as exc:
-                explainer_errors.append(f"{explainer_name}: {exc}")
-            finally:
-                # SHAP explainers may register autograd hooks. If we explain on the
-                # original model, those hooks can leak into later ART metrics.
-                del explainer
-                gc.collect()
-
-        raise RuntimeError("; ".join(explainer_errors))
-
-    def _compute_gradient_importances(model_ref, test_data):
-        was_training = bool(getattr(model_ref, "training", False))
-        model_ref.eval()
-
-        try:
-            inputs = test_data.detach().clone().requires_grad_(True)
-            model_ref.zero_grad(set_to_none=True)
-
-            outputs = model_ref(inputs)
-            if isinstance(outputs, (tuple, list)):
-                outputs = outputs[0]
-
-            if outputs.ndim == 1:
-                score = outputs.sum()
-            else:
-                score = outputs.reshape(outputs.shape[0], -1).max(dim=1).values.sum()
-
-            score.backward()
-            if inputs.grad is None:
-                return np.array([])
-
-            importances = torch.abs(inputs.grad * inputs).mean(dim=0)
-            importances = importances.detach().cpu().numpy().reshape(-1)
-            importances = np.nan_to_num(importances, nan=0.0, posinf=0.0, neginf=0.0)
-            return np.maximum(importances, 0.0)
-        finally:
-            if was_training:
-                model_ref.train()
-
-    def _feature_axes_from_shape(arr_shape, input_shape, n_samples):
-        input_shape = tuple(input_shape)
-        input_rank = len(input_shape)
-
-        if input_rank == 0 or len(arr_shape) < input_rank:
-            return None
-
-        if len(arr_shape) >= input_rank + 1 and tuple(arr_shape[1:1 + input_rank]) == input_shape:
-            return tuple(range(1, 1 + input_rank))
-
-        if len(arr_shape) >= input_rank + 2 and arr_shape[1] == n_samples and tuple(arr_shape[2:2 + input_rank]) == input_shape:
-            return tuple(range(2, 2 + input_rank))
-
-        candidates = []
-        for start in range(len(arr_shape) - input_rank + 1):
-            if tuple(arr_shape[start:start + input_rank]) == input_shape:
-                candidates.append(start)
-
-        if not candidates:
-            return None
-
-        # Prefer matches that do not consume the leading sample/output axes.
-        non_leading = [start for start in candidates if start > 0]
-        if non_leading:
-            candidates = non_leading
-
-        if len(arr_shape) > 1 and arr_shape[1] == n_samples:
-            non_output_sample = [start for start in candidates if start > 1]
-            if non_output_sample:
-                candidates = non_output_sample
-
-        start = candidates[0]
-        return tuple(range(start, start + input_rank))
-
-    try:
-        try:
-            device = next(model.parameters()).device
-        except Exception:
-            device = torch.device("cpu")
-
-        background, test_data, input_shape = _prepare_shap_inputs(test_sample)
-        if background is None or test_data is None or input_shape is None:
-            return np.array([])
-
-        background = background.to(device)
-        test_data = test_data.to(device)
-
-        shap_model = _clone_model(model, device)
-        try:
-            shap_values = _compute_shap_values(shap_model, background, test_data)
-        except Exception as exc:
-            logger.debug("Could not compute feature importances with SHAP, using gradient fallback: %s", exc)
-            del shap_model
-            gc.collect()
-
-            gradient_model = _clone_model(model, device)
-            try:
-                return _compute_gradient_importances(gradient_model, test_data)
-            except Exception as fallback_exc:
-                logger.debug("Could not compute feature importances with gradient fallback: %s", fallback_exc)
-                return np.array([])
-            finally:
-                del gradient_model
-                gc.collect()
-        finally:
-            if "shap_model" in locals():
-                del shap_model
-            gc.collect()
-
-        if shap_values is None:
-            return np.array([])
-
-        if isinstance(shap_values, (list, tuple)):
-            arrays = [np.asarray(val, dtype=float) for val in shap_values if val is not None]
-            if not arrays:
-                return np.array([])
-            shap_arr = np.stack(arrays, axis=0)
-        else:
-            shap_arr = np.asarray(shap_values, dtype=float)
-
-        if shap_arr.size == 0:
-            return np.array([])
-
-        shap_arr = np.nan_to_num(shap_arr, nan=0.0, posinf=0.0, neginf=0.0)
-        feature_axes = _feature_axes_from_shape(tuple(shap_arr.shape), input_shape, int(test_data.size(0)))
-
-        if feature_axes is None:
-            # Conservative fallback: treat the first axis as samples when possible and
-            # flatten the remaining dimensions into features.
-            if shap_arr.ndim == 1:
-                importances = np.abs(shap_arr)
-            else:
-                aggregate_axes = (0,)
-                importances = np.mean(np.abs(shap_arr), axis=aggregate_axes)
-        else:
-            aggregate_axes = tuple(idx for idx in range(shap_arr.ndim) if idx not in feature_axes)
-            if aggregate_axes:
-                importances = np.mean(np.abs(shap_arr), axis=aggregate_axes)
-            else:
-                importances = np.abs(shap_arr)
-
-        importances = np.asarray(importances, dtype=float).reshape(-1)
-        importances = np.nan_to_num(importances, nan=0.0, posinf=0.0, neginf=0.0)
-        return np.maximum(importances, 0.0)
-    except Exception as exc:
-        logger.debug("Could not compute feature importances")
-        logger.debug(exc)
-        return np.array([])
-
-
-def get_alpha_score(model, test_sample, alpha=0.8):
-    """
-    Computes alpha score from global feature importances.
-    """
-    try:
-        vals = np.asarray(_get_feature_importances(model, test_sample), dtype=float).reshape(-1)
-        vals = np.nan_to_num(vals, nan=0.0, posinf=0.0, neginf=0.0)
-        vals = np.maximum(vals, 0.0)
-        total_features = len(vals)
-        if total_features == 0 or np.sum(vals) <= 1e-12:
-            return 1.0
-
-        try:
-            alpha = float(alpha)
-        except Exception:
-            alpha = 0.8
-        alpha = min(max(alpha, 0.0), 1.0)
-
-        vals_sorted = np.sort(vals)[::-1]
-        cum_sum = np.cumsum(vals_sorted)
-        threshold = float(alpha) * np.sum(vals_sorted)
-        idx = np.searchsorted(cum_sum, threshold)
-        return float(min(total_features, idx + 1) / total_features)
-    except Exception as exc:
-        logger.warning("Could not compute alpha score")
-        logger.warning(exc)
-        return 1.0
-
-
-def _get_spread_base(model, test_sample, divergence=True):
-    vals = _get_feature_importances(model, test_sample)
-    tol = 1e-8
-
-    if len(vals) == 0 or np.sum(vals) < tol:
-        return 0.0 if divergence else 1.0
-    if len(vals) == 1:
-        return 0.0 if divergence else 1.0
-
-    weights = vals / np.sum(vals)
-    equal_weights = np.ones(len(vals)) / len(vals)
-
-    if divergence:
-        metric = jensenshannon(weights, equal_weights, base=2)
-    else:
-        denom = entropy(equal_weights)
-        metric = 0.0 if denom <= tol else entropy(weights) / denom
-
-    if math.isnan(metric) or math.isinf(metric):
-        return 0.0 if divergence else 1.0
-    return float(np.clip(metric, 0.0, 1.0))
-
-
-def get_spread_ratio(model, test_sample):
-    """
-    Computes spread ratio from global feature importances.
-    """
-    try:
-        return _get_spread_base(model, test_sample, divergence=False)
-    except Exception as exc:
-        logger.warning("Could not compute spread ratio")
-        logger.warning(exc)
-        return 1.0
-
-
-def get_spread_divergence(model, test_sample):
-    """
-    Computes spread divergence from global feature importances.
-    """
-    try:
-        return _get_spread_base(model, test_sample, divergence=True)
-    except Exception as exc:
-        logger.warning("Could not compute spread divergence")
-        logger.warning(exc)
-        return 0.0
-
-
-def get_explainability_metrics_summary(model, test_dataloader, max_batches=4):
-    """
-    Computes explainability metrics over multiple test batches and returns
-    their mean values.
-
-    Args:
-        model (object): The model.
-        test_dataloader: Test dataloader providing batches.
-        max_batches (int): Maximum number of batches to use.
-
-    Returns:
-        dict: Mean values for feature_importance_cv, alpha_score,
-        spread_ratio and spread_divergence.
-    """
-    summary = {
-        "feature_importance_cv": 1.0,
-        "alpha_score": 1.0,
-        "spread_ratio": 1.0,
-        "spread_divergence": 0.0,
-    }
-
-    if test_dataloader is None:
-        return summary
-
-    try:
-        max_batches = max(1, int(max_batches))
-    except Exception:
-        max_batches = 4
-
-    fi_values = []
-    alpha_values = []
-    spread_ratio_values = []
-    spread_divergence_values = []
-
-    try:
-        for batch_idx, test_sample in enumerate(test_dataloader):
-            if batch_idx >= max_batches:
-                break
-
-            fi_values.append(float(get_feature_importance_cv(model, test_sample)))
-            alpha_values.append(float(get_alpha_score(model, test_sample)))
-            spread_ratio_values.append(float(get_spread_ratio(model, test_sample)))
-            spread_divergence_values.append(float(get_spread_divergence(model, test_sample)))
-    except Exception as exc:
-        logger.warning("Could not compute explainability metrics summary")
-        logger.warning(exc)
-
-    if fi_values:
-        summary["feature_importance_cv"] = float(np.mean(fi_values))
-    if alpha_values:
-        summary["alpha_score"] = float(np.mean(alpha_values))
-    if spread_ratio_values:
-        summary["spread_ratio"] = float(np.mean(spread_ratio_values))
-    if spread_divergence_values:
-        summary["spread_divergence"] = float(np.mean(spread_divergence_values))
-
-    return summary
-
-
-# ---------------------------------------------------------------------------
-# Robustness metrics based on ART estimators
-# ---------------------------------------------------------------------------
-
-def _build_art_classifier(model, input_shape, nb_classes, learning_rate):
-    criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters(), learning_rate)
-
-    return PyTorchClassifier(
-        model=model,
-        loss=criterion,
-        optimizer=optimizer,
-        input_shape=tuple(input_shape),
-        nb_classes=nb_classes,
-    )
-
-
-def _validate_test_sample_tensors(test_sample):
-    if not (isinstance(test_sample, (tuple, list)) and len(test_sample) >= 2):
-        raise ValueError("`test_sample` must contain samples and labels.")
-
-    samples, labels = test_sample[0], test_sample[1]
-    if not (torch.is_tensor(samples) and torch.is_tensor(labels) and samples.shape[0] > 0):
-        raise ValueError("`test_sample` must contain non-empty tensors for samples and labels.")
-
-    return samples, labels
-
-
-def _coerce_max_samples(max_samples, default=8):
-    try:
-        return max(1, int(max_samples))
-    except Exception:
-        return default
-
-
-def get_clever_score(model, test_sample, nb_classes, learning_rate, max_samples=8):
-    """
-    Calculates the CLEVER score as the mean score over multiple samples.
-
-    Args:
-        model (object): The model.
-        test_sample (object): A batch from the test dataloader.
-        nb_classes (int): The nb_classes of the model.
-        learning_rate (float): The learning rate of the model.
-        max_samples (int): Maximum number of samples from the batch to evaluate.
-
-    Returns:
-        float: Mean CLEVER score across the selected samples.
-    """
-    samples, _ = _validate_test_sample_tensors(test_sample)
-
-    input_shape = tuple(samples.shape[1:]) if samples.dim() >= 2 else tuple(samples.shape)
-
-    max_samples = _coerce_max_samples(max_samples)
-    n_samples = min(int(samples.shape[0]), max_samples)
-
-    # Create the ART classifier once and reuse it for all selected samples.
-    classifier = _build_art_classifier(model, input_shape, nb_classes, learning_rate)
-
-    clever_scores = []
-    for idx in range(n_samples):
-        background = samples[idx].detach().cpu()
-        sample_np = background.numpy()
-
-        try:
-            score_untargeted = clever_u(
-                classifier,
-                sample_np,
-                10,
-                5,
-                R_L2,
-                norm=2,
-                pool_factor=3,
-                verbose=False,
-            )
-            if score_untargeted is not None and not math.isnan(float(score_untargeted)):
-                clever_scores.append(float(score_untargeted))
-        except Exception as exc:
-            logger.warning("Could not compute CLEVER score for sample index %s", idx)
-            logger.warning(exc)
-
-    if not clever_scores:
-        return 0.0
-
-    return float(np.mean(clever_scores))
-
-
-# ---------------------------------------------------------------------------
-# Sustainability and communication metrics
-# ---------------------------------------------------------------------------
-
-def stop_emissions_tracking_and_save(
-    tracker: EmissionsTracker,
-    outdir: str,
-    emissions_file: str,
-    role: str,
-    workload: str,
-    sample_size: int = 0,
-    participant_idx=None,
-):
-    """
-    Stops emissions tracking object from CodeCarbon and saves relevant information to emissions.csv file.
-
-    Args:
-        tracker (object): The emissions tracker object holding information.
-        outdir (str): The path of the output directory of the experiment.
-        emissions_file (str): The path to the emissions file.
-        role (str): Either client or server depending on the role.
-        workload (str): Either aggregation or training depending on the workload.
-        sample_size (int): The number of samples used for training, if aggregation 0.
-    """
-
-    tracker.stop()
-
-    emissions_file = os.path.join(outdir, emissions_file)
-
-    if exists(emissions_file):
-        df = pd.read_csv(emissions_file)
-    else:
-        df = pd.DataFrame(
-            columns=[
-                "id",
-                "role",
-                "energy_grid",
-                "emissions",
-                "workload",
-                "CPU_model",
-                "GPU_model",
-            ]
-        )
-    try:
-        energy_grid = (tracker.final_emissions_data.emissions / tracker.final_emissions_data.energy_consumed) * 1000
-        df = pd.concat(
-            [
-                df,
-                pd.DataFrame({
-                    "id": participant_idx,
-                    "role": role,
-                    "energy_grid": [energy_grid],
-                    "emissions": [tracker.final_emissions_data.emissions],
-                    "workload": workload,
-                    "CPU_model": tracker.final_emissions_data.cpu_model
-                    if tracker.final_emissions_data.cpu_model
-                    else "None",
-                    "GPU_model": tracker.final_emissions_data.gpu_model
-                    if tracker.final_emissions_data.gpu_model
-                    else "None",
-                    "CPU_used": True if tracker.final_emissions_data.cpu_energy else False,
-                    "GPU_used": True if tracker.final_emissions_data.gpu_energy else False,
-                    "energy_consumed": tracker.final_emissions_data.energy_consumed,
-                    "sample_size": sample_size,
-                }),
-            ],
-            ignore_index=True,
-        )
-        df.to_csv(emissions_file, encoding="utf-8", index=False)
-    except Exception as e:
-        logger.warning(e)
-
-def comm_efficiency(bytes_up: int, bytes_down: int, test_acc_avg: float, eps: float = 1e-12) -> float:
-    """
-    Communication efficiency = total_bytes / final_accuracy.
-    Lower is better.
-
-    Args:
-        bytes_up: total uploaded bytes
-        bytes_down: total downloaded bytes
-        final_accuracy: final test accuracy in [0,1]
-        eps: small constant to avoid division by zero
-
-    Returns:
-        float
-    """
-    total_bytes = float(bytes_up) + float(bytes_down)
-    acc = float(test_acc_avg)
-
-    if acc < eps:
-        acc = eps
-
-    return total_bytes / acc
-
-
-# ---------------------------------------------------------------------------
-# Additional robustness and adversarial metrics
-# ---------------------------------------------------------------------------
-
-def get_loss_sensitivity_score(model, test_sample, nb_classes, learning_rate, max_samples=8):
-
-    """
-    Calculates the loss sensitivity score as the mean score over multiple samples.
-
-    Args:
-        model (object): The model.
-        test_sample (object): A batch from the test dataloader.
-        nb_classes (int): The nb_classes of the model.
-        learning_rate (float): The learning rate of the model.
-        max_samples (int): Maximum number of samples from the batch to evaluate.
-
-    Returns:
-        float: Mean loss sensitivity score across the selected samples.
-    """
-    samples, labels = _validate_test_sample_tensors(test_sample)
-
-    max_samples = _coerce_max_samples(max_samples)
-    n_samples = min(int(samples.shape[0]), max_samples)
-
-    # Create the ART classifier once and reuse it for all selected samples.
-    classifier = _build_art_classifier(model, samples.shape[1:], nb_classes, learning_rate)
-
-    sensitivity_scores = []
-    for idx in range(n_samples):
-        sample = samples[idx].detach().cpu().unsqueeze(0)
-        label = labels[idx].detach().cpu().unsqueeze(0)
-        label = F.one_hot(label, num_classes=nb_classes).float()
-
-        try:
-            score = loss_sensitivity(
-                classifier,
-                sample.numpy(),
-                label.numpy(),
-            )
-            if score is not None and not math.isnan(float(score)):
-                sensitivity_scores.append(float(score))
-        except Exception as exc:
-            logger.warning("Could not compute loss sensitivity for sample index %s", idx)
-            logger.warning(exc)
-
-    if not sensitivity_scores:
-        return 0.0
-
-    return float(np.mean(sensitivity_scores))
-
-def compute_adversarial_accuracy_art(
-    model,
-    test_loader,
-    nb_classes,
-    learning_rate,
-    epsilon=0.03
-):
-    """
-    Computes adversarial accuracy using FGSM attack.
-
-    Args:
-        model (object): The model.
-        test_loader (DataLoader): DataLoader providing test samples.
-        nb_classes (int): The nb_classes of the model.
-        learning_rate (float): The learning rate of the model.
-        epsilon (float): Maximum perturbation magnitude for the attacks.
-
-    Returns:
-        float: The adversarial accuracy score.
-    """
-
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model.eval()
-    model.to(device)
-
-    correct = 0
-    total = 0
-
-    for samples, labels in test_loader:
-        samples = samples.to(device)
-        labels = labels.to(device)
-
-        x_adv = fgsm_attack(model, samples, labels, epsilon=epsilon)
-
-        with torch.no_grad():
-            outputs = model(x_adv)
-            logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
-            preds = logits.argmax(dim=1)
-
-        correct += (preds == labels).sum().item()
-        total += labels.size(0)
-
-    return correct / total if total > 0 else 0.0
-
-def get_empirical_robustness_score(
-    model,
-    test_sample,
-    nb_classes,
-    learning_rate,
-    attack_name = "fgsm",
-    attack_params = None,
-    max_samples = 128,
-):
-    """
-    Calculates the Empirical Robustness score using Adversarial Robustness Toolbox (ART).
-
-    Args:
-        model (object): The model.
-        test_sample (object): A batch from the test dataloader (samples, labels).
-        nb_classes (int): The nb_classes of the model.
-        learning_rate (float): The learning rate of the model.
-        attack_name (str): Attack key supported by ART empirical_robustness.
-        attack_params (dict | None): Optional attack parameters.
-        max_samples (int): Max number of samples from the batch to use.
-
-    Returns:
-        float: Empirical robustness score (>= 0.0). If it cannot be computed, returns 0.0.
-    """
-    try:
-        samples, _ = _validate_test_sample_tensors(test_sample)
-
-        batch_size: int = int(samples.shape[0])
-        n: int = int(min(max_samples, batch_size))
-        x = samples[:n].detach().cpu().numpy()
-
-        classifier = _build_art_classifier(model, samples.shape[1:], nb_classes, learning_rate)
-
-        score = empirical_robustness(
-            classifier=classifier,
-            x=x,
-            attack_name=attack_name,
-            attack_params=attack_params,
-        )
-
-        if isinstance(score, np.ndarray):
-            score = float(np.mean(score))
-
-        if score is None or (isinstance(score, float) and math.isnan(score)):
-            return 0.0
-
-        return float(score)
-
-    except Exception as exc:
-        logger.warning("Could not compute empirical robustness (ART). Returning 0.0")
-        logger.warning(exc)
-        return 0.0
-
-
-
-def _get_image_normalization_for_samples(samples):
-    if not isinstance(samples, torch.Tensor) or samples.ndim < 4:
-        return None
-
-    channels = int(samples.shape[1])
-    if channels == 1:
-        return (0.5,), (0.5,)
-    if channels == 3:
-        return (0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)
-    return None
-
-
-def _channel_tensor(values, samples):
-    shape = [1, len(values)] + [1] * max(samples.dim() - 2, 0)
-    return torch.tensor(values, dtype=samples.dtype, device=samples.device).view(*shape)
-
-
-def _fgsm_step_and_clamp(samples, grad, epsilon):
-    normalization = _get_image_normalization_for_samples(samples)
-    if normalization is None:
-        return samples + epsilon * grad.sign()
-
-    mean, std = normalization
-    mean = _channel_tensor(mean, samples)
-    std = _channel_tensor(std, samples)
-
-    normalized_epsilon = float(epsilon) / std
-    lower = (0.0 - mean) / std
-    upper = (1.0 - mean) / std
-
-    x_adv = samples + normalized_epsilon * grad.sign()
-    x_adv = torch.max(torch.min(x_adv, samples + normalized_epsilon), samples - normalized_epsilon)
-    return torch.max(torch.min(x_adv, upper), lower)
-
-
-def fgsm_attack(model, samples, labels, epsilon=0.03):
-    """
-        Performs an FGSM (Fast Gradient Sign Method) adversarial attack on a batch of samples.
-
-        Args:
-            model (torch.nn.Module): The PyTorch model to attack.
-            samples (torch.Tensor): Input samples to perturb, shape (B, ...).
-            labels (torch.Tensor): True labels corresponding to the samples.
-            epsilon (float, optional): Maximum perturbation magnitude for the attack. Defaults to 0.03.
-
-        Returns:
-            torch.Tensor: Adversarially perturbed samples with the same shape as `samples`.
-    """
-    try:
-        device = next(model.parameters()).device
-    except Exception:
-        device = samples.device
-
-    samples = samples.clone().detach().to(device)
-    labels = labels.to(device)
-    samples.requires_grad = True
-
-    outputs = model(samples)
-    logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
-    loss = nn.CrossEntropyLoss()(logits, labels)
-    grad = torch.autograd.grad(loss, samples, only_inputs=True)[0]
-    x_adv = _fgsm_step_and_clamp(samples, grad, epsilon)
-
-    return x_adv.detach()
-
-def get_confidence_score(
-    model,
-    test_sample,
-    max_samples = 128,
-    use_true_label = True,
-):
-    """
-    Calculates the confidence score.
-
-    Args:
-        model (object): The model.
-        test_sample (object): A batch from the test dataloader (samples, labels).
-        max_samples (int): Max number of samples from the batch to use.
-        use_true_label (bool): Whether to compute confidence with respect to the true labels. Defaults to True.
-
-    Returns:
-        float: Confidence score.
-    """
-    try:
-        if not isinstance(model, torch.nn.Module):
-            logger.warning("Model is not a torch.nn.Module")
-            return 0.0
-
-        x, y = test_sample
-
-        if isinstance(x, torch.Tensor):
-            x = x[:max_samples]
-        if isinstance(y, torch.Tensor):
-            y = y[:max_samples]
-
-        try:
-            device = next(model.parameters()).device
-        except Exception:
-            device = torch.device("cpu")
-
-        model.eval()
-        with torch.no_grad():
-            x = x.to(device) if isinstance(x, torch.Tensor) else x
-            out = model(x)
-
-            logits = out[0] if isinstance(out, (tuple, list)) else out
-            probs = torch.softmax(logits, dim=1)
-
-            if use_true_label and isinstance(y, torch.Tensor):
-                if y.ndim > 1:
-                    y_idx = torch.argmax(y, dim=1)
-                else:
-                    y_idx = y
-                y_idx = y_idx.to(device)
-
-                true_probs = probs.gather(1, y_idx.view(-1, 1)).squeeze(1)
-                return float(true_probs.mean().detach().cpu().item())
-
-            msp = probs.max(dim=1).values
-            return float(msp.mean().detach().cpu().item())
-
-    except Exception as e:
-        logger.warning("Could not compute confidence score")
-        logger.warning(e)
-        return 0.0
-
-def attack_success_rate(model, test_sample,epsilon=0.03):
-    """
-    Calculates the ASR.
-
-    Args:
-        model (object): The model.
-        test_sample (object): A batch from the test dataloader (samples, labels).
-        epsilon (float): Maximum perturbation magnitude for the attacks.
-
-    Returns:
-        float: The ASR.
-    """
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model.eval()
-    model.to(device)
-
-    images, labels = test_sample
-    images = images.to(device)
-    labels = labels.to(device)
-
-    with torch.no_grad():
-        outputs = model(images)
-        logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
-        preds = logits.argmax(dim=1)
-
-    correct_mask = preds.eq(labels)
-    num_correct = correct_mask.sum().item()
-
-    if num_correct == 0:
-        return 0.0
-
-    x_adv = fgsm_attack(model, images, labels, epsilon=epsilon)
-
-    with torch.no_grad():
-        outputs_adv = model(x_adv)
-        logits_adv = outputs_adv[0] if isinstance(outputs_adv, (tuple, list)) else outputs_adv
-        preds_adv = logits_adv.argmax(dim=1)
-
-    successful_attacks = (correct_mask & preds_adv.ne(labels)).sum().item()
-
-    asr = successful_attacks / num_correct
-
-    return asr
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 3f32e8b9e..5be3ee012 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -2,13 +2,22 @@
 import os
 import pandas as pd
 
-from nebula.addons.trustworthiness.calculation import (
+from nebula.addons.trustworthiness.helpers.csv_io import (
+    load_data_results_participant,
+    load_emissions_participant,
+)
+from nebula.addons.trustworthiness.helpers.data_distribution import (
+    get_all_data_entropy,
+    get_local_class_imbalance_score,
+    get_local_normalized_entropy,
+)
+from nebula.addons.trustworthiness.helpers.privacy import (
+    get_global_privacy_risk_dfl,
+)
+from nebula.addons.trustworthiness.helpers.scenario_metrics import (
     get_bytes_model,
     get_dp_local,
     get_elapsed_time,
-    get_global_privacy_risk_dfl,
-    get_local_class_imbalance_score,
-    get_local_normalized_entropy,
     get_underfitting_score_local,
 )
 from nebula.addons.trustworthiness.factsheet_common import (
@@ -24,7 +33,6 @@
     write_factsheet,
 )
 from nebula.addons.trustworthiness.factsheet_populators import populate_profile_metrics
-from nebula.addons.trustworthiness.utils import read_csv, get_all_data_entropy
 
 logger = logging.getLogger(__name__)
 
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/factsheet.py
index 7c23f20c2..0417efdc2 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/factsheet.py
@@ -4,16 +4,22 @@
 import numpy as np
 import pandas as pd
 
-from nebula.addons.trustworthiness.calculation import (
+from nebula.addons.trustworthiness.helpers.csv_io import read_csv
+from nebula.addons.trustworthiness.helpers.data_distribution import (
+    get_class_imbalance_score,
+    get_cv,
+)
+from nebula.addons.trustworthiness.helpers.factsheet_values import check_field_filled
+from nebula.addons.trustworthiness.helpers.privacy import (
+    get_global_privacy_risk,
+)
+from nebula.addons.trustworthiness.helpers.scenario_metrics import (
     get_avg_class_imbalance_model_size,
     get_avg_loss_accuracy,
     get_bytes_sent_recv,
-    get_class_imbalance_score,
-    get_cv,
     get_dp_global,
     get_elapsed_time,
     get_entropy_list,
-    get_global_privacy_risk,
     get_participant_loss_accuracy,
     get_underfitting_score,
 )
@@ -31,7 +37,6 @@
     write_factsheet,
 )
 from nebula.addons.trustworthiness.factsheet_populators import populate_profile_metrics
-from nebula.addons.trustworthiness.utils import read_csv, check_field_filled
 # from nebula.core.models.syscall.mlp import SyscallModelMLP
 
 logger = logging.getLogger(__name__)
diff --git a/nebula/addons/trustworthiness/factsheet_populators.py b/nebula/addons/trustworthiness/factsheet_populators.py
index d5cce3371..5ace6b034 100644
--- a/nebula/addons/trustworthiness/factsheet_populators.py
+++ b/nebula/addons/trustworthiness/factsheet_populators.py
@@ -2,23 +2,29 @@
 
 import logging
 
-from nebula.addons.trustworthiness.calculation import (
-    attack_success_rate,
-    compute_adversarial_accuracy_art,
-    get_clever_score,
-    get_coefficient_of_variation,
-    get_confidence_score,
-    get_empirical_robustness_score,
-    get_epsilon_star,
+from nebula.addons.trustworthiness.helpers.explainability import (
     get_explainability_metrics_summary,
+)
+from nebula.addons.trustworthiness.helpers.model_quality import (
+    get_coefficient_of_variation,
     get_generalized_entropy_index,
-    get_loss_sensitivity_score,
     get_macro_f1_score,
-    get_mia_auc,
     get_overfitting_score,
     get_theil_index,
     get_well_calibration_error,
 )
+from nebula.addons.trustworthiness.helpers.privacy import (
+    get_epsilon_star,
+    get_mia_auc,
+)
+from nebula.addons.trustworthiness.helpers.robustness import (
+    attack_success_rate,
+    compute_adversarial_accuracy_art,
+    get_clever_score,
+    get_confidence_score,
+    get_empirical_robustness_score,
+    get_loss_sensitivity_score,
+)
 
 logger = logging.getLogger(__name__)
 from nebula.addons.trustworthiness.factsheet_common import (
diff --git a/nebula/addons/trustworthiness/helpers/__init__.py b/nebula/addons/trustworthiness/helpers/__init__.py
new file mode 100644
index 000000000..4ef3ae023
--- /dev/null
+++ b/nebula/addons/trustworthiness/helpers/__init__.py
@@ -0,0 +1 @@
+"""Small helper modules for trustworthiness calculations and persistence."""
diff --git a/nebula/addons/trustworthiness/helpers/csv_io.py b/nebula/addons/trustworthiness/helpers/csv_io.py
new file mode 100644
index 000000000..40bd7fda0
--- /dev/null
+++ b/nebula/addons/trustworthiness/helpers/csv_io.py
@@ -0,0 +1,316 @@
+import csv
+import json
+import logging
+import os
+
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+# CSV schemas used by trustworthiness outputs. Keeping column order centralized
+# avoids subtle differences between append writes and full report exports.
+DATA_RESULTS_COLUMNS = [
+    "id",
+    "bytes_sent",
+    "bytes_recv",
+    "accuracy",
+    "loss",
+    "val_accuracy",
+    "dp_enabled",
+    "dp_epsilon",
+]
+
+CFL_DATA_RESULTS_COLUMNS = [
+    "id",
+    "bytes_sent",
+    "bytes_recv",
+    "accuracy",
+    "loss",
+    "class_imbalance",
+    "model_size",
+    "local_entropy",
+    "val_accuracy",
+    "dp_enabled",
+    "dp_epsilon",
+]
+
+EMISSIONS_COLUMNS = [
+    "id",
+    "role",
+    "energy_grid",
+    "emissions",
+    "workload",
+    "CPU_model",
+    "GPU_model",
+    "CPU_used",
+    "GPU_used",
+    "energy_consumed",
+    "sample_size",
+]
+
+
+def _logs_dir():
+    # Prefer the runtime logs directory; keep the historical app path as fallback.
+    return os.environ.get("NEBULA_LOGS_DIR") or os.path.join("nebula", "app", "logs")
+
+
+def _trustworthiness_dir(scenario_name: str) -> str:
+    # Every scenario stores trustworthiness artifacts in this subdirectory.
+    return os.path.join(_logs_dir(), scenario_name, "trustworthiness")
+
+
+def _trustworthiness_path(scenario_name: str, filename: str) -> str:
+    # Build a concrete artifact path for a scenario.
+    return os.path.join(_trustworthiness_dir(scenario_name), filename)
+
+
+def _ensure_parent_dir(file_path: str) -> None:
+    # Ensure CSV/JSON writes work even when the trust folder was not created yet.
+    directory = os.path.dirname(file_path)
+    if directory:
+        os.makedirs(directory, exist_ok=True)
+
+
+def _read_first_csv_row(file_path: str) -> dict:
+    # Per-participant summary CSVs are expected to contain one current row.
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"File not found: {file_path}")
+
+    with open(file_path, "r", newline="") as csv_file:
+        rows = list(csv.DictReader(csv_file))
+
+    if not rows:
+        raise ValueError(f"No rows found in {file_path}")
+
+    return rows[0]
+
+
+def _read_or_empty_dataframe(file_path: str, columns: list[str]) -> pd.DataFrame:
+    # Append flows start from the existing CSV or from an empty schema.
+    if os.path.exists(file_path):
+        return pd.read_csv(file_path)
+
+    return pd.DataFrame(columns=columns)
+
+
+def _append_csv_row(file_path: str, columns: list[str], row: dict) -> None:
+    # Preserve the declared schema and ignore any unexpected keys in row.
+    _ensure_parent_dir(file_path)
+    df = _read_or_empty_dataframe(file_path, columns)
+    new_row = pd.DataFrame([{column: row.get(column) for column in columns}])
+    pd.concat([df, new_row], ignore_index=True).to_csv(file_path, encoding="utf-8", index=False)
+
+
+def _write_csv_rows(file_path: str, fieldnames: list[str], rows: list[dict]) -> None:
+    # Aggregate reports replace the previous CSV content in one write.
+    _ensure_parent_dir(file_path)
+    with open(file_path, "w", newline="") as csv_file:
+        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(rows)
+
+
+def _to_bool(value) -> bool:
+    # DictReader returns strings, while some tests/builders may pass booleans.
+    return str(value).strip().lower() == "true"
+
+
+def read_csv(filename):
+    # Missing optional CSVs are represented as None for existing callers.
+    if os.path.exists(filename):
+        return pd.read_csv(filename)
+
+    return None
+
+
+def write_results_json(out_file, data):
+    # Trust metric evaluation appends one result object per evaluation call.
+    _ensure_parent_dir(out_file)
+    with open(out_file, "a", encoding="utf-8") as file:
+        json.dump(data, file, indent=4)
+
+
+def load_data_results_participant(experiment_name: str, participant_id: int | str):
+    # Load the DFL/SDFL participant training summary written by save_results_csv.
+    row = _read_first_csv_row(
+        _trustworthiness_path(experiment_name, f"data_results_{participant_id}.csv")
+    )
+
+    return (
+        int(float(row["bytes_sent"])),
+        int(float(row["bytes_recv"])),
+        float(row["accuracy"]),
+        float(row["loss"]),
+        float(row["val_accuracy"]),
+        _to_bool(row["dp_enabled"]),
+        float(row["dp_epsilon"]),
+    )
+
+
+def load_emissions_participant(experiment_name: str, participant_id: int | str):
+    # Load the DFL/SDFL participant CodeCarbon summary.
+    row = _read_first_csv_row(
+        _trustworthiness_path(experiment_name, f"emissions_{participant_id}.csv")
+    )
+
+    return (
+        str(row["role"]),
+        float(row["energy_grid"]),
+        float(row["emissions"]),
+        str(row["workload"]),
+        str(row["CPU_model"]),
+        str(row["GPU_model"]),
+        _to_bool(row["CPU_used"]),
+        _to_bool(row["GPU_used"]),
+        float(row["energy_consumed"]),
+        int(float(row["sample_size"])),
+    )
+
+
+def save_trustworthiness_reports_csv(
+    reports: dict,
+    experiment_name: str,
+) -> None:
+    # Server-side CFL flow exports one aggregate data CSV and one emissions CSV.
+    sorted_reports = sorted(reports.values(), key=lambda report: int(report["node_id"]))
+
+    data_rows = [
+        {
+            "id": report["node_id"],
+            "bytes_sent": report["bytes_sent"],
+            "bytes_recv": report["bytes_recv"],
+            "accuracy": report["accuracy"],
+            "loss": report["loss"],
+            "class_imbalance": report["class_imbalance"],
+            "model_size": report["model_size"],
+            "local_entropy": report["local_entropy"],
+            "val_accuracy": report["val_accuracy"],
+            "dp_enabled": report["dp_enabled"],
+            "dp_epsilon": report["dp_epsilon"],
+        }
+        for report in sorted_reports
+    ]
+    emissions_rows = [
+        {
+            "id": report["node_id"],
+            "role": report["role"],
+            "energy_grid": report["energy_grid"],
+            "emissions": report["emissions"],
+            "workload": report["workload"],
+            "CPU_model": report["cpu_model"],
+            "GPU_model": report["gpu_model"],
+            "CPU_used": report["cpu_used"],
+            "GPU_used": report["gpu_used"],
+            "energy_consumed": report["energy_consumed"],
+            "sample_size": report["sample_size"],
+        }
+        for report in sorted_reports
+    ]
+
+    data_results_path = _trustworthiness_path(experiment_name, "data_results.csv")
+    emissions_path = _trustworthiness_path(experiment_name, "emissions.csv")
+
+    _write_csv_rows(data_results_path, CFL_DATA_RESULTS_COLUMNS, data_rows)
+    _write_csv_rows(emissions_path, EMISSIONS_COLUMNS, emissions_rows)
+
+    logger.info(
+        "[TW SERVER] CSV files written correctly: %s, %s",
+        data_results_path,
+        emissions_path,
+    )
+
+
+def save_results_csv_cfl(
+    scenario_name: str,
+    id: int,
+    bytes_sent: int,
+    bytes_recv: int,
+    accuracy: float,
+    loss: float,
+    class_imbalance: float,
+    model_size: int,
+    local_entropy: float,
+    val_accuracy: float,
+    dp_enabled: bool,
+    dp_epsilon: float,
+):
+    # Append one participant to the centralized data-results CSV.
+    _append_csv_row(
+        _trustworthiness_path(scenario_name, "data_results.csv"),
+        CFL_DATA_RESULTS_COLUMNS,
+        {
+            "id": id,
+            "bytes_sent": bytes_sent,
+            "bytes_recv": bytes_recv,
+            "accuracy": accuracy,
+            "loss": loss,
+            "class_imbalance": class_imbalance,
+            "model_size": model_size,
+            "local_entropy": local_entropy,
+            "val_accuracy": val_accuracy,
+            "dp_enabled": dp_enabled,
+            "dp_epsilon": dp_epsilon,
+        },
+    )
+
+
+def save_emissions_csv_cfl(
+    scenario_name: str,
+    id: int,
+    role: str,
+    energy_grid: float,
+    emissions: float,
+    workload: str,
+    cpu_model: str,
+    gpu_model: str,
+    cpu_used: bool,
+    gpu_used: bool,
+    energy_consumed: float,
+    sample_size: int,
+):
+    # Append one participant to the centralized emissions CSV.
+    _append_csv_row(
+        _trustworthiness_path(scenario_name, "emissions.csv"),
+        EMISSIONS_COLUMNS,
+        {
+            "id": id,
+            "role": role,
+            "energy_grid": energy_grid,
+            "emissions": emissions,
+            "workload": workload,
+            "CPU_model": cpu_model,
+            "GPU_model": gpu_model,
+            "CPU_used": cpu_used,
+            "GPU_used": gpu_used,
+            "energy_consumed": energy_consumed,
+            "sample_size": sample_size,
+        },
+    )
+
+
+def save_results_csv(
+    scenario_name: str,
+    id: int,
+    bytes_sent: int,
+    bytes_recv: int,
+    accuracy: float,
+    loss: float,
+    val_accuracy: float,
+    dp_enabled: bool,
+    dp_epsilon: float,
+):
+    # Local DFL/SDFL nodes persist their own data-results CSV before exchange.
+    _append_csv_row(
+        _trustworthiness_path(scenario_name, f"data_results_{id}.csv"),
+        DATA_RESULTS_COLUMNS,
+        {
+            "id": id,
+            "bytes_sent": bytes_sent,
+            "bytes_recv": bytes_recv,
+            "accuracy": accuracy,
+            "loss": loss,
+            "val_accuracy": val_accuracy,
+            "dp_enabled": dp_enabled,
+            "dp_epsilon": dp_epsilon,
+        },
+    )
diff --git a/nebula/addons/trustworthiness/helpers/data_distribution.py b/nebula/addons/trustworthiness/helpers/data_distribution.py
new file mode 100644
index 000000000..6a118019b
--- /dev/null
+++ b/nebula/addons/trustworthiness/helpers/data_distribution.py
@@ -0,0 +1,178 @@
+import json
+import os
+from collections import Counter
+
+import numpy as np
+from hashids import Hashids
+from scipy.stats import entropy
+
+hashids = Hashids()
+
+
+def _logs_dir():
+    # Return the base logs directory used to read and write trust artifacts.
+    return os.environ.get("NEBULA_LOGS_DIR") or os.path.join("nebula", "app", "logs")
+
+
+def _trustworthiness_dir(scenario_name: str) -> str:
+    # Return the trustworthiness directory for a scenario.
+    return os.path.join(_logs_dir(), scenario_name, "trustworthiness")
+
+
+def _trustworthiness_path(scenario_name: str, filename: str) -> str:
+    # Return the path of a trustworthiness artifact for a scenario.
+    return os.path.join(_trustworthiness_dir(scenario_name), filename)
+
+
+def _ensure_trustworthiness_dir(scenario_name: str) -> None:
+    # Create the scenario trustworthiness directory if it does not exist.
+    os.makedirs(_trustworthiness_dir(scenario_name), exist_ok=True)
+
+
+def _encode_class_id(class_id) -> str:
+    # Convert a numeric class ID into the hash used in persisted JSON files.
+    return hashids.encode(int(class_id))
+
+
+def _class_counts_from_counter(class_counter: Counter) -> dict:
+    # Return hashed class counts from an in-memory Counter.
+    return {
+        _encode_class_id(class_id): int(count)
+        for class_id, count in class_counter.items()
+    }
+
+
+def _write_json(scenario_name: str, filename: str, data: dict, indent=None) -> None:
+    # Write a JSON trust artifact inside the scenario trustworthiness directory.
+    _ensure_trustworthiness_dir(scenario_name)
+    with open(_trustworthiness_path(scenario_name, filename), "w") as file:
+        json.dump(data, file, indent=indent)
+
+
+def _iter_participant_class_counts(experiment_name: str):
+    # Yield each consecutive participant ID and its saved class-count dictionary.
+    participant_id = 0
+    while True:
+        file_path = get_class_count_file(experiment_name, participant_id)
+        if not os.path.exists(file_path):
+            break
+
+        yield participant_id, load_class_counts(experiment_name, participant_id)
+        participant_id += 1
+
+
+def get_class_count_file(scenario_name, participant_id):
+    # Return the class-count JSON path for one participant.
+    return _trustworthiness_path(scenario_name, f"{str(participant_id)}_class_count.json")
+
+
+def load_class_counts(scenario_name, participant_id):
+    # Load one participant's saved class-count dictionary.
+    with open(get_class_count_file(scenario_name, participant_id), "r") as file:
+        return json.load(file)
+
+
+def get_class_imbalance_from_counts(class_counts):
+    # Calculate class imbalance as the coefficient of variation of class counts.
+    return get_cv(list=list(class_counts.values()))
+
+
+def get_class_imbalance_score(class_imbalance):
+    # Convert class imbalance into a score where 1 means balanced classes.
+    return 1 / (1 + class_imbalance)
+
+
+def get_class_imbalance_local(participant_id, experiment_name):
+    # Return the raw class-imbalance value for one participant.
+    return get_class_imbalance_from_counts(load_class_counts(experiment_name, participant_id))
+
+
+def get_local_class_imbalance_score(scenario_name, participant_id):
+    # Return the trust-oriented class-imbalance score for one participant.
+    return get_class_imbalance_score(get_class_imbalance_local(participant_id, scenario_name))
+
+
+def get_entropy_from_class_counts(class_counts, normalize=False):
+    # Calculate entropy from a class-count dictionary, optionally normalized to [0, 1].
+    counts = np.array(list(class_counts.values()), dtype=float)
+    total = counts.sum()
+    if total <= 0:
+        return 0.0
+
+    probabilities = counts / total
+    entropy_value = entropy(probabilities, base=2)
+
+    if not normalize:
+        return round(float(entropy_value), 6)
+
+    class_count = len(probabilities)
+    if class_count <= 1:
+        return 0.0
+
+    normalized_entropy = float(entropy_value / np.log2(class_count))
+    return float(np.clip(normalized_entropy, 0.0, 1.0))
+
+
+def get_local_normalized_entropy(scenario_name, participant_id):
+    # Return normalized entropy for one participant's saved class counts.
+    return get_entropy_from_class_counts(
+        load_class_counts(scenario_name, participant_id),
+        normalize=True,
+    )
+
+
+def get_cv(list=None, std=None, mean=None):
+    # Return the coefficient of variation from either values or precomputed std/mean.
+    if std is not None and mean is not None:
+        return 0 if mean == 0 else std / mean
+
+    if list is None:
+        return 0
+
+    values = np.asarray(list, dtype=float)
+    mean_value = float(np.mean(values)) if values.size else 0.0
+    if mean_value == 0:
+        return 0
+
+    return float(np.std(values) / mean_value)
+
+
+def get_participation_variation_score(participation_counts):
+    # Convert participation-count dispersion into a score where 1 means equal participation.
+    if not participation_counts:
+        return 1.0
+
+    counts = np.asarray(participation_counts, dtype=float)
+    mean_count = float(np.mean(counts))
+    if mean_count <= 0:
+        return 0.0
+
+    cv = get_cv(list=counts)
+    if not np.isfinite(cv):
+        return 0.0
+
+    return float(1 / (1 + cv))
+
+
+def save_class_count_per_participant(experiment_name, class_counter: Counter, idx):
+    # Save one participant's class-count dictionary as <participant>_class_count.json.
+    _write_json(
+        experiment_name,
+        f"{str(idx)}_class_count.json",
+        _class_counts_from_counter(class_counter),
+    )
+
+
+def get_all_data_entropy(experiment_name):
+    # Compute entropy for every participant class-count file and write entropy.json.
+    entropy_per_participant = {
+        str(participant_id): round(get_entropy_from_class_counts(class_count), 6)
+        for participant_id, class_count in _iter_participant_class_counts(experiment_name)
+    }
+
+    _write_json(experiment_name, "entropy.json", entropy_per_participant, indent=2)
+
+
+def get_local_entropy(id, experiment_name):
+    # Return non-normalized entropy for one participant's saved class counts.
+    return get_entropy_from_class_counts(load_class_counts(experiment_name, id))
diff --git a/nebula/addons/trustworthiness/helpers/explainability.py b/nebula/addons/trustworthiness/helpers/explainability.py
new file mode 100644
index 000000000..ce9809c3e
--- /dev/null
+++ b/nebula/addons/trustworthiness/helpers/explainability.py
@@ -0,0 +1,407 @@
+import copy
+import gc
+import logging
+import math
+
+import numpy as np
+import shap
+import torch
+from scipy.spatial.distance import jensenshannon
+from scipy.stats import entropy, variation
+
+logger = logging.getLogger(__name__)
+
+def get_feature_importance_cv(model, test_sample):
+    """
+    Calculates the coefficient of variation of the feature importance.
+
+    Args:
+        model (object): The model.
+        test_sample (object): One test sample to calculate the feature importance.
+
+    Returns:
+        float: The coefficient of variation of the feature importance.
+    """
+
+    try:
+        vals = np.asarray(_get_feature_importances(model, test_sample), dtype=float).reshape(-1)
+        vals = np.nan_to_num(vals, nan=0.0, posinf=0.0, neginf=0.0)
+        vals = vals[vals > 0]
+
+        if len(vals) <= 1:
+            return 0.0
+
+        cv = float(variation(vals))
+        if math.isnan(cv) or math.isinf(cv):
+            return 1.0
+        return max(0.0, cv)
+    except Exception as exc:
+        logger.warning("Could not compute feature importance CV with shap")
+        logger.warning(exc)
+        return 1.0
+
+
+def _get_feature_importances(model, test_sample):
+    """
+    Computes global feature importances from SHAP values.
+
+    Args:
+        model (object): The model.
+        test_sample (object): One test sample batch.
+
+    Returns:
+        np.ndarray: Global importances per feature.
+    """
+    if not isinstance(model, torch.nn.Module):
+        logger.warning("Model is not a torch.nn.Module")
+        return np.array([])
+
+    def _clone_model(model_ref, device):
+        optimizer_attrs = ("_optimizer", "_optimizer_override")
+        optimizer_state = {}
+        try:
+            for attr in optimizer_attrs:
+                if hasattr(model_ref, attr):
+                    optimizer_state[attr] = getattr(model_ref, attr)
+                    setattr(model_ref, attr, None)
+
+            model_clone = copy.deepcopy(model_ref)
+            for attr in optimizer_attrs:
+                if hasattr(model_clone, attr):
+                    setattr(model_clone, attr, None)
+
+            model_clone.to(device)
+            model_clone.eval()
+            return model_clone
+        except Exception as exc:
+            logger.warning("Could not clone model for SHAP, using original model")
+            logger.warning(exc)
+            model_ref.eval()
+            return model_ref
+        finally:
+            for attr, value in optimizer_state.items():
+                setattr(model_ref, attr, value)
+
+    def _prepare_shap_inputs(sample):
+        if not (isinstance(sample, (tuple, list)) and len(sample) >= 1):
+            return None, None, None
+
+        batched_data = sample[0]
+        if not torch.is_tensor(batched_data) or batched_data.ndim == 0 or batched_data.size(0) == 0:
+            return None, None, None
+
+        if not torch.is_floating_point(batched_data):
+            batched_data = batched_data.float()
+
+        batch_size = int(batched_data.size(0))
+        input_shape = tuple(int(dim) for dim in batched_data.shape[1:])
+
+        if batch_size == 1:
+            return batched_data[:1], batched_data[:1], input_shape
+
+        background_size = min(max(8, batch_size // 4), 32, batch_size - 1)
+        explainable = batch_size - background_size
+        explain_size = min(max(4, explainable), 32, explainable)
+
+        background = batched_data[:background_size]
+        test_data = batched_data[background_size:background_size + explain_size]
+
+        if test_data.size(0) == 0:
+            test_data = batched_data[: min(batch_size, 32)]
+
+        return background, test_data, input_shape
+
+    def _compute_shap_values(model_ref, background, test_data):
+        explainer_errors = []
+
+        for explainer_name in ("DeepExplainer", "GradientExplainer"):
+            explainer = None
+            try:
+                if explainer_name == "DeepExplainer":
+                    explainer = shap.DeepExplainer(model_ref, background)
+                    return explainer.shap_values(test_data, check_additivity=False)
+
+                explainer = shap.GradientExplainer(model_ref, background)
+                return explainer.shap_values(test_data)
+            except Exception as exc:
+                explainer_errors.append(f"{explainer_name}: {exc}")
+            finally:
+                # SHAP explainers may register autograd hooks. If we explain on the
+                # original model, those hooks can leak into later ART metrics.
+                del explainer
+                gc.collect()
+
+        raise RuntimeError("; ".join(explainer_errors))
+
+    def _compute_gradient_importances(model_ref, test_data):
+        was_training = bool(getattr(model_ref, "training", False))
+        model_ref.eval()
+
+        try:
+            inputs = test_data.detach().clone().requires_grad_(True)
+            model_ref.zero_grad(set_to_none=True)
+
+            outputs = model_ref(inputs)
+            if isinstance(outputs, (tuple, list)):
+                outputs = outputs[0]
+
+            if outputs.ndim == 1:
+                score = outputs.sum()
+            else:
+                score = outputs.reshape(outputs.shape[0], -1).max(dim=1).values.sum()
+
+            score.backward()
+            if inputs.grad is None:
+                return np.array([])
+
+            importances = torch.abs(inputs.grad * inputs).mean(dim=0)
+            importances = importances.detach().cpu().numpy().reshape(-1)
+            importances = np.nan_to_num(importances, nan=0.0, posinf=0.0, neginf=0.0)
+            return np.maximum(importances, 0.0)
+        finally:
+            if was_training:
+                model_ref.train()
+
+    def _feature_axes_from_shape(arr_shape, input_shape, n_samples):
+        input_shape = tuple(input_shape)
+        input_rank = len(input_shape)
+
+        if input_rank == 0 or len(arr_shape) < input_rank:
+            return None
+
+        if len(arr_shape) >= input_rank + 1 and tuple(arr_shape[1:1 + input_rank]) == input_shape:
+            return tuple(range(1, 1 + input_rank))
+
+        if len(arr_shape) >= input_rank + 2 and arr_shape[1] == n_samples and tuple(arr_shape[2:2 + input_rank]) == input_shape:
+            return tuple(range(2, 2 + input_rank))
+
+        candidates = []
+        for start in range(len(arr_shape) - input_rank + 1):
+            if tuple(arr_shape[start:start + input_rank]) == input_shape:
+                candidates.append(start)
+
+        if not candidates:
+            return None
+
+        # Prefer matches that do not consume the leading sample/output axes.
+        non_leading = [start for start in candidates if start > 0]
+        if non_leading:
+            candidates = non_leading
+
+        if len(arr_shape) > 1 and arr_shape[1] == n_samples:
+            non_output_sample = [start for start in candidates if start > 1]
+            if non_output_sample:
+                candidates = non_output_sample
+
+        start = candidates[0]
+        return tuple(range(start, start + input_rank))
+
+    try:
+        try:
+            device = next(model.parameters()).device
+        except Exception:
+            device = torch.device("cpu")
+
+        background, test_data, input_shape = _prepare_shap_inputs(test_sample)
+        if background is None or test_data is None or input_shape is None:
+            return np.array([])
+
+        background = background.to(device)
+        test_data = test_data.to(device)
+
+        shap_model = _clone_model(model, device)
+        try:
+            shap_values = _compute_shap_values(shap_model, background, test_data)
+        except Exception as exc:
+            logger.debug("Could not compute feature importances with SHAP, using gradient fallback: %s", exc)
+            shap_model = None
+            gc.collect()
+
+            gradient_model = _clone_model(model, device)
+            try:
+                return _compute_gradient_importances(gradient_model, test_data)
+            except Exception as fallback_exc:
+                logger.debug("Could not compute feature importances with gradient fallback: %s", fallback_exc)
+                return np.array([])
+            finally:
+                del gradient_model
+                gc.collect()
+        finally:
+            if shap_model is not None:
+                del shap_model
+            gc.collect()
+
+        if shap_values is None:
+            return np.array([])
+
+        if isinstance(shap_values, (list, tuple)):
+            arrays = [np.asarray(val, dtype=float) for val in shap_values if val is not None]
+            if not arrays:
+                return np.array([])
+            shap_arr = np.stack(arrays, axis=0)
+        else:
+            shap_arr = np.asarray(shap_values, dtype=float)
+
+        if shap_arr.size == 0:
+            return np.array([])
+
+        shap_arr = np.nan_to_num(shap_arr, nan=0.0, posinf=0.0, neginf=0.0)
+        feature_axes = _feature_axes_from_shape(tuple(shap_arr.shape), input_shape, int(test_data.size(0)))
+
+        if feature_axes is None:
+            # Conservative fallback: treat the first axis as samples when possible and
+            # flatten the remaining dimensions into features.
+            if shap_arr.ndim == 1:
+                importances = np.abs(shap_arr)
+            else:
+                aggregate_axes = (0,)
+                importances = np.mean(np.abs(shap_arr), axis=aggregate_axes)
+        else:
+            aggregate_axes = tuple(idx for idx in range(shap_arr.ndim) if idx not in feature_axes)
+            if aggregate_axes:
+                importances = np.mean(np.abs(shap_arr), axis=aggregate_axes)
+            else:
+                importances = np.abs(shap_arr)
+
+        importances = np.asarray(importances, dtype=float).reshape(-1)
+        importances = np.nan_to_num(importances, nan=0.0, posinf=0.0, neginf=0.0)
+        return np.maximum(importances, 0.0)
+    except Exception as exc:
+        logger.debug("Could not compute feature importances")
+        logger.debug(exc)
+        return np.array([])
+
+
+def get_alpha_score(model, test_sample, alpha=0.8):
+    """
+    Computes alpha score from global feature importances.
+    """
+    try:
+        vals = np.asarray(_get_feature_importances(model, test_sample), dtype=float).reshape(-1)
+        vals = np.nan_to_num(vals, nan=0.0, posinf=0.0, neginf=0.0)
+        vals = np.maximum(vals, 0.0)
+        total_features = len(vals)
+        if total_features == 0 or np.sum(vals) <= 1e-12:
+            return 1.0
+
+        try:
+            alpha = float(alpha)
+        except Exception:
+            alpha = 0.8
+        alpha = min(max(alpha, 0.0), 1.0)
+
+        vals_sorted = np.sort(vals)[::-1]
+        cum_sum = np.cumsum(vals_sorted)
+        threshold = float(alpha) * np.sum(vals_sorted)
+        idx = np.searchsorted(cum_sum, threshold)
+        return float(min(total_features, idx + 1) / total_features)
+    except Exception as exc:
+        logger.warning("Could not compute alpha score")
+        logger.warning(exc)
+        return 1.0
+
+
+def _get_spread_base(model, test_sample, divergence=True):
+    vals = _get_feature_importances(model, test_sample)
+    tol = 1e-8
+
+    if len(vals) == 0 or np.sum(vals) < tol:
+        return 0.0 if divergence else 1.0
+    if len(vals) == 1:
+        return 0.0 if divergence else 1.0
+
+    weights = vals / np.sum(vals)
+    equal_weights = np.ones(len(vals)) / len(vals)
+
+    if divergence:
+        metric = jensenshannon(weights, equal_weights, base=2)
+    else:
+        denom = entropy(equal_weights)
+        metric = 0.0 if denom <= tol else entropy(weights) / denom
+
+    if math.isnan(metric) or math.isinf(metric):
+        return 0.0 if divergence else 1.0
+    return float(np.clip(metric, 0.0, 1.0))
+
+
+def get_spread_ratio(model, test_sample):
+    """
+    Computes spread ratio from global feature importances.
+    """
+    try:
+        return _get_spread_base(model, test_sample, divergence=False)
+    except Exception as exc:
+        logger.warning("Could not compute spread ratio")
+        logger.warning(exc)
+        return 1.0
+
+
+def get_spread_divergence(model, test_sample):
+    """
+    Computes spread divergence from global feature importances.
+    """
+    try:
+        return _get_spread_base(model, test_sample, divergence=True)
+    except Exception as exc:
+        logger.warning("Could not compute spread divergence")
+        logger.warning(exc)
+        return 0.0
+
+
+def get_explainability_metrics_summary(model, test_dataloader, max_batches=4):
+    """
+    Computes explainability metrics over multiple test batches and returns
+    their mean values.
+
+    Args:
+        model (object): The model.
+        test_dataloader: Test dataloader providing batches.
+        max_batches (int): Maximum number of batches to use.
+
+    Returns:
+        dict: Mean values for feature_importance_cv, alpha_score,
+        spread_ratio and spread_divergence.
+    """
+    summary = {
+        "feature_importance_cv": 1.0,
+        "alpha_score": 1.0,
+        "spread_ratio": 1.0,
+        "spread_divergence": 0.0,
+    }
+
+    if test_dataloader is None:
+        return summary
+
+    try:
+        max_batches = max(1, int(max_batches))
+    except Exception:
+        max_batches = 4
+
+    fi_values = []
+    alpha_values = []
+    spread_ratio_values = []
+    spread_divergence_values = []
+
+    try:
+        for batch_idx, test_sample in enumerate(test_dataloader):
+            if batch_idx >= max_batches:
+                break
+
+            fi_values.append(float(get_feature_importance_cv(model, test_sample)))
+            alpha_values.append(float(get_alpha_score(model, test_sample)))
+            spread_ratio_values.append(float(get_spread_ratio(model, test_sample)))
+            spread_divergence_values.append(float(get_spread_divergence(model, test_sample)))
+    except Exception as exc:
+        logger.warning("Could not compute explainability metrics summary")
+        logger.warning(exc)
+
+    if fi_values:
+        summary["feature_importance_cv"] = float(np.mean(fi_values))
+    if alpha_values:
+        summary["alpha_score"] = float(np.mean(alpha_values))
+    if spread_ratio_values:
+        summary["spread_ratio"] = float(np.mean(spread_ratio_values))
+    if spread_divergence_values:
+        summary["spread_divergence"] = float(np.mean(spread_divergence_values))
+
+    return summary
diff --git a/nebula/addons/trustworthiness/helpers/factsheet_values.py b/nebula/addons/trustworthiness/helpers/factsheet_values.py
new file mode 100644
index 000000000..ee42940af
--- /dev/null
+++ b/nebula/addons/trustworthiness/helpers/factsheet_values.py
@@ -0,0 +1,108 @@
+import logging
+import math
+
+from nebula.addons.trustworthiness.helpers.privacy import (
+    get_global_privacy_risk,
+    get_global_privacy_risk_dfl,
+)
+from nebula.addons.trustworthiness.helpers.scenario_metrics import comm_efficiency
+from nebula.addons.trustworthiness.helpers.scoring import (
+    check_properties,
+    get_value,
+)
+
+logger = logging.getLogger(__name__)
+
+OPERATIONS = {
+    "check_properties": check_properties,
+    "comm_efficiency": comm_efficiency,
+    "get_global_privacy_risk": get_global_privacy_risk,
+    "get_global_privacy_risk_dfl": get_global_privacy_risk_dfl,
+    "get_value": get_value,
+}
+
+def check_field_filled(factsheet_dict, factsheet_path, value, empty=""):
+    """
+    Check if the field in the factsheet file is filled or not.
+
+    Args:
+        factsheet_dict (dict): The factshett dict.
+        factsheet_path (list): The factsheet field to check.
+        value (float): The value to add in the field.
+        empty (string): If the value could not be appended, the empty string is returned.
+
+    Returns:
+        float: The value added in the factsheet or empty if the value could not be appened
+
+    """
+    if factsheet_dict[factsheet_path[0]][factsheet_path[1]]:
+        return factsheet_dict[factsheet_path[0]][factsheet_path[1]]
+    elif value != "" and value != "nan":
+        if type(value) != str and type(value) != list:
+            if math.isnan(value):
+                return 0
+            else:
+                return value
+        else:
+            return value
+    else:
+        return empty
+
+
+def get_input_value(input_docs, inputs, operation):
+    """
+    Gets the input value from input document and apply the metric operation on the value.
+
+    Args:
+        inputs_docs (map): The input document map.
+        inputs (list): All the inputs.
+        operation (string): The metric operation.
+
+    Returns:
+        float: The metric value
+
+    """
+
+    input_value = None
+    args = []
+    for i in inputs:
+        source = i.get("source", "")
+        field = i.get("field_path", "")
+        input_doc = input_docs.get(source, None)
+        if input_doc is None:
+            logger.warning(f"{source} is null")
+        else:
+            input = get_value_from_path(input_doc, field)
+            args.append(input)
+    try:
+        operationFn = OPERATIONS[operation]
+        input_value = operationFn(*args)
+    except KeyError:
+        logger.warning(f"{operation} is not valid")
+    except TypeError:
+        logger.warning(f"{operation} is not valid")
+
+    return input_value
+
+
+def get_value_from_path(input_doc, path):
+    """
+    Gets the input value from input document by path.
+
+    Args:
+        inputs_doc (map): The input document map.
+        path (string): The field name of the input value of interest.
+
+    Returns:
+        float: The input value from the input document
+
+    """
+
+    d = input_doc
+    for nested_key in path.split("/"):
+        temp = d.get(nested_key)
+        if isinstance(temp, dict):
+            d = d.get(nested_key)
+        else:
+            return temp
+    return None
diff --git a/nebula/addons/trustworthiness/helpers/model_quality.py b/nebula/addons/trustworthiness/helpers/model_quality.py
new file mode 100644
index 000000000..0b87937fe
--- /dev/null
+++ b/nebula/addons/trustworthiness/helpers/model_quality.py
@@ -0,0 +1,371 @@
+import logging
+import math
+
+import numpy as np
+import torch
+from sklearn.metrics import f1_score
+
+logger = logging.getLogger(__name__)
+
+def _get_model_accuracy(model, dataloader):
+    """
+    Calculates model accuracy over a dataloader.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate.
+        dataloader (DataLoader): Dataloader with (x, y) batches.
+
+    Returns:
+        float: Accuracy in [0, 1].
+    """
+    if not isinstance(model, torch.nn.Module):
+        logger.warning("Model is not a torch.nn.Module")
+        return 0.0
+
+    try:
+        device = next(model.parameters()).device
+    except Exception:
+        device = torch.device("cpu")
+
+    model.eval()
+    correct = 0
+    total = 0
+
+    with torch.no_grad():
+        for x, y in dataloader:
+            x = x.to(device)
+            y = y.to(device)
+
+            out = model(x)
+            logits = out[0] if isinstance(out, (tuple, list)) else out
+            preds = logits.argmax(dim=1)
+
+            correct += (preds == y).sum().item()
+            total += y.size(0)
+
+    return correct / total if total > 0 else 0.0
+
+
+def get_macro_f1_score(model, dataloader):
+    """
+    Calculates macro F1 score over a dataloader.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate.
+        dataloader (DataLoader): Dataloader with (x, y) batches.
+
+    Returns:
+        float: Macro F1 score in [0, 1].
+    """
+    if not isinstance(model, torch.nn.Module):
+        logger.warning("Model is not a torch.nn.Module")
+        return 0.0
+
+    try:
+        device = next(model.parameters()).device
+    except Exception:
+        device = torch.device("cpu")
+
+    model.eval()
+    y_true = []
+    y_pred = []
+
+    with torch.no_grad():
+        for x, y in dataloader:
+            x = x.to(device)
+            y = y.to(device)
+
+            out = model(x)
+            logits = out[0] if isinstance(out, (tuple, list)) else out
+            preds = logits.argmax(dim=1)
+
+            y_true.extend(y.detach().cpu().numpy().tolist())
+            y_pred.extend(preds.detach().cpu().numpy().tolist())
+
+    if not y_true:
+        return 0.0
+
+    return float(f1_score(y_true, y_pred, average="macro", zero_division=0))
+
+
+def _extract_model_logits(model_output):
+    """
+    Normalize the output returned by a model forward pass into a logits tensor.
+
+    Some models may return tuples/lists; for trust metrics we always consume the
+    first element as the classification output.
+    """
+    return model_output[0] if isinstance(model_output, (tuple, list)) else model_output
+
+
+def _prepare_class_targets(y):
+    """
+    Convert different target representations into a flat class-index tensor.
+    """
+    if not torch.is_tensor(y):
+        y = torch.as_tensor(y)
+
+    if y.ndim > 1:
+        if y.size(-1) > 1:
+            y = y.argmax(dim=-1)
+        else:
+            y = y.view(-1)
+
+    return y.long().view(-1)
+
+
+def _logits_to_probabilities(logits):
+    """
+    Convert model outputs into a probability matrix of shape (N, C).
+
+    Supports:
+    - multiclass logits/log-probabilities with shape (N, C)
+    - binary logits with shape (N,) or (N, 1)
+    - already-normalized probability matrices
+    """
+    if not torch.is_tensor(logits):
+        logits = torch.as_tensor(logits)
+
+    if logits.ndim == 0:
+        logits = logits.view(1, 1)
+    elif logits.ndim == 1:
+        logits = logits.view(-1, 1)
+    elif logits.ndim > 2:
+        logits = logits.reshape(logits.shape[0], -1)
+
+    if logits.size(1) == 1:
+        pos_prob = torch.sigmoid(logits[:, 0])
+        probs = torch.stack([1.0 - pos_prob, pos_prob], dim=1)
+    else:
+        row_sums = logits.sum(dim=1)
+        looks_like_probs = (
+            torch.all(logits >= 0)
+            and torch.all(logits <= 1.0 + 1e-6)
+            and torch.allclose(row_sums, torch.ones_like(row_sums), atol=1e-4, rtol=1e-4)
+        )
+        probs = logits if looks_like_probs else torch.softmax(logits, dim=1)
+
+    probs = torch.clamp(probs, min=0.0, max=1.0)
+    probs = probs / probs.sum(dim=1, keepdim=True).clamp_min(1e-12)
+    return probs
+
+
+def _collect_classification_statistics(model, dataloader):
+    """
+    Collect prediction statistics required by calibration and inequality metrics.
+
+    Returns:
+        tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        predicted labels, true labels, prediction confidences, correctness flags,
+        and probability assigned to the true class.
+    """
+    if not isinstance(model, torch.nn.Module):
+        logger.warning("Model is not a torch.nn.Module")
+        empty = np.array([], dtype=float)
+        return empty, empty, empty, empty, empty
+
+    try:
+        device = next(model.parameters()).device
+    except Exception:
+        device = torch.device("cpu")
+
+    preds_all = []
+    targets_all = []
+    confidences_all = []
+    correct_all = []
+    true_probs_all = []
+
+    model.eval()
+    with torch.no_grad():
+        for batch in dataloader:
+            if not isinstance(batch, (tuple, list)) or len(batch) < 2:
+                continue
+
+            x, y = batch[0], batch[1]
+            if not (torch.is_tensor(x) and torch.is_tensor(y)):
+                continue
+
+            x = x.to(device)
+            y = _prepare_class_targets(y).to(device)
+
+            out = model(x)
+            logits = _extract_model_logits(out)
+            probs = _logits_to_probabilities(logits)
+
+            if probs.ndim != 2 or probs.size(0) == 0:
+                continue
+
+            if y.numel() != probs.size(0):
+                n = min(int(y.numel()), int(probs.size(0)))
+                if n == 0:
+                    continue
+                y = y[:n]
+                probs = probs[:n]
+
+            valid_mask = (y >= 0) & (y < probs.size(1))
+            if not torch.any(valid_mask):
+                continue
+
+            y = y[valid_mask]
+            probs = probs[valid_mask]
+
+            conf, preds = probs.max(dim=1)
+            true_probs = probs.gather(1, y.view(-1, 1)).squeeze(1)
+            correct = preds.eq(y).float()
+
+            preds_all.extend(preds.detach().cpu().numpy().tolist())
+            targets_all.extend(y.detach().cpu().numpy().tolist())
+            confidences_all.extend(conf.detach().cpu().numpy().tolist())
+            correct_all.extend(correct.detach().cpu().numpy().tolist())
+            true_probs_all.extend(true_probs.detach().cpu().numpy().tolist())
+
+    return (
+        np.asarray(preds_all, dtype=int),
+        np.asarray(targets_all, dtype=int),
+        np.asarray(confidences_all, dtype=float),
+        np.asarray(correct_all, dtype=float),
+        np.asarray(true_probs_all, dtype=float),
+    )
+
+
+def get_overfitting_score(model, train_dataloader, test_accuracy):
+    """
+    Calculates overfitting as the positive train-test accuracy gap.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate on training data.
+        train_dataloader (DataLoader): Training dataloader.
+        test_accuracy (float): Test accuracy in [0, 1].
+
+    Returns:
+        float: Positive train-test accuracy gap.
+    """
+    try:
+        train_accuracy = _get_model_accuracy(model, train_dataloader)
+        return max(0.0, float(train_accuracy) - float(test_accuracy))
+    except Exception as exc:
+        logger.warning("Could not compute overfitting score")
+        logger.warning(exc)
+        return 0.0
+
+
+def get_well_calibration_error(model, test_dataloader, n_bins=10):
+    """
+    Calculates a well-calibration error style metric using prediction confidence.
+
+    For multiclass models, confidence is taken as the max softmax probability and
+    the observed outcome is whether the prediction is correct.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate.
+        test_dataloader (DataLoader): Test dataloader.
+        n_bins (int): Number of quantile bins.
+
+    Returns:
+        float: Calibration error in [0, 1] when computation succeeds.
+    """
+    if not isinstance(model, torch.nn.Module):
+        logger.warning("Model is not a torch.nn.Module")
+        return 0.0
+
+    try:
+        n_bins = max(2, int(n_bins))
+    except Exception:
+        n_bins = 10
+
+    _, _, confidences, correct, _ = _collect_classification_statistics(model, test_dataloader)
+
+    if len(confidences) == 0 or len(correct) == 0:
+        return 0.0
+
+    confidences = np.clip(np.asarray(confidences, dtype=float), 0.0, 1.0)
+    correct = np.clip(np.asarray(correct, dtype=float), 0.0, 1.0)
+
+    bin_edges = np.linspace(0.0, 1.0, n_bins + 1)
+    ece = 0.0
+    total = float(len(confidences))
+
+    for idx in range(n_bins):
+        left = bin_edges[idx]
+        right = bin_edges[idx + 1]
+        if idx == n_bins - 1:
+            mask = (confidences >= left) & (confidences <= right)
+        else:
+            mask = (confidences >= left) & (confidences < right)
+
+        if not np.any(mask):
+            continue
+
+        bin_weight = float(mask.sum()) / total
+        bin_accuracy = float(correct[mask].mean())
+        bin_confidence = float(confidences[mask].mean())
+        ece += bin_weight * abs(bin_accuracy - bin_confidence)
+
+    return float(np.clip(ece, 0.0, 1.0))
+
+
+def get_generalized_entropy_index(model, test_dataloader, alpha=2):
+    """
+    Calculates generalized entropy index from model predictions.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate.
+        test_dataloader (DataLoader): Test dataloader.
+        alpha (float): GEI alpha parameter.
+
+    Returns:
+        float: Generalized entropy index value.
+    """
+    try:
+        _, _, _, _, true_class_probs = _collect_classification_statistics(model, test_dataloader)
+        if len(true_class_probs) == 0:
+            return 0.0
+
+        # Use the probability assigned to the true class as a continuous, positive
+        # benefit. This works consistently for multiclass neural models on both
+        # images and tabular data, and avoids collapsing the metric to a coarse
+        # correct/incorrect indicator.
+        eps = 1e-12
+        b = np.clip(np.asarray(true_class_probs, dtype=float), eps, 1.0)
+        mu = float(np.mean(b))
+        if mu <= 0:
+            return 0.0
+
+        ratio = np.clip(b / mu, eps, None)
+
+        if alpha == 0:
+            val = float(np.mean(-np.log(ratio)))
+        elif alpha == 1:
+            val = float(np.mean(ratio * np.log(ratio)))
+        elif alpha == 2:
+            val = float(np.mean((ratio - 1.0) ** 2) / 2.0)
+        else:
+            val = float(np.mean(ratio**alpha - 1.0) / (alpha * (alpha - 1.0)))
+
+        if math.isnan(val) or math.isinf(val):
+            return 0.0
+        return max(0.0, val)
+    except Exception as exc:
+        logger.warning("Could not compute generalized entropy index")
+        logger.warning(exc)
+        return 0.0
+
+
+def get_theil_index(model, test_dataloader):
+    """
+    Convenience wrapper for generalized entropy index with alpha=1.
+    """
+    return get_generalized_entropy_index(model, test_dataloader, alpha=1)
+
+
+def get_coefficient_of_variation(model, test_dataloader):
+    """
+    Calculates coefficient of variation from GEI(alpha=2).
+    """
+    try:
+        gei = get_generalized_entropy_index(model, test_dataloader, alpha=2)
+        return float(np.sqrt(2 * gei))
+    except Exception as exc:
+        logger.warning("Could not compute coefficient of variation")
+        logger.warning(exc)
+        return 0.0
diff --git a/nebula/addons/trustworthiness/helpers/privacy.py b/nebula/addons/trustworthiness/helpers/privacy.py
new file mode 100644
index 000000000..f6ed327c1
--- /dev/null
+++ b/nebula/addons/trustworthiness/helpers/privacy.py
@@ -0,0 +1,209 @@
+import logging
+import math
+import numbers
+from math import e
+
+import numpy as np
+import torch
+from sklearn.metrics import roc_auc_score, roc_curve
+from torch import nn
+
+logger = logging.getLogger(__name__)
+
+def get_global_privacy_risk(dp, epsilon, n):
+    """
+    Calculates the global privacy risk by epsilon and the number of clients.
+
+    Args:
+        dp (bool): Indicates if differential privacy is used or not.
+        epsilon (int): The epsilon value.
+        n (int): The number of clients in the scenario.
+
+    Returns:
+        float: The global privacy risk.
+    """
+
+    try:
+        epsilon = float(epsilon)
+        n = float(n)
+    except (TypeError, ValueError):
+        return 1
+
+    if dp is True and isinstance(epsilon, numbers.Number):
+        return 1 / (1 + (n - 1) * math.pow(e, -epsilon))
+    else:
+        return 1
+
+
+def get_global_privacy_risk_dfl(dp, epsilon, n):
+    """
+    Calculates the global privacy risk by epsilon and the number of clients.
+
+    Args:
+        dp (bool): Indicates if differential privacy is used or not.
+        epsilon (int): The epsilon value.
+        n (int): The number of neighbours.
+
+    Returns:
+        float: The global privacy risk.
+    """
+
+    try:
+        epsilon = float(epsilon)
+        n = float(n)
+    except (TypeError, ValueError):
+        return 1
+
+    if dp is True and isinstance(epsilon, numbers.Number):
+        return 1 / (1 + (n + 1) * math.pow(e, -epsilon))
+    else:
+        return 1
+
+
+def _collect_per_sample_losses(model, dataloader, max_samples=5000):
+    """
+    Compute per-sample cross-entropy losses for a dataloader.
+
+    Args:
+        model (torch.nn.Module): The model to evaluate.
+        dataloader: DataLoader providing (samples, labels).
+        max_samples (int): Maximum number of samples to process.
+
+    Returns:
+        np.ndarray: Losses per sample.
+    """
+    if not isinstance(model, torch.nn.Module) or dataloader is None:
+        return np.array([])
+
+    try:
+        device = next(model.parameters()).device
+    except Exception:
+        device = torch.device("cpu")
+
+    criterion = nn.CrossEntropyLoss(reduction="none")
+    losses = []
+    collected = 0
+
+    model.eval()
+    with torch.no_grad():
+        for batch in dataloader:
+            if not isinstance(batch, (tuple, list)) or len(batch) < 2:
+                continue
+
+            samples, labels = batch[0], batch[1]
+            if not torch.is_tensor(samples) or not torch.is_tensor(labels):
+                continue
+
+            remaining = max_samples - collected
+            if remaining <= 0:
+                break
+
+            samples = samples[:remaining].to(device)
+            labels = labels[:remaining]
+
+            if labels.ndim > 1:
+                labels = torch.argmax(labels, dim=1)
+
+            labels = labels.long().to(device)
+
+            outputs = model(samples)
+            logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
+            batch_losses = criterion(logits, labels)
+
+            losses.append(batch_losses.detach().cpu().numpy())
+            collected += int(batch_losses.shape[0])
+
+    if not losses:
+        return np.array([])
+
+    return np.concatenate(losses, axis=0)
+
+
+def get_epsilon_star(model, train_dataloader, test_dataloader, max_samples=5000):
+    """
+    Compute empirical epsilon* from train/test loss distributions.
+
+    This follows the same core structure as privacy_metrics_core.epsilon_star,
+    adapted to PyTorch models and DataLoaders used in Nebula.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate.
+        train_dataloader: Training DataLoader.
+        test_dataloader: Test DataLoader.
+        max_samples (int): Maximum samples to evaluate per split.
+
+    Returns:
+        float: Empirical epsilon* value. Returns 0.0 on failure.
+    """
+    try:
+        loss_train = _collect_per_sample_losses(model, train_dataloader, max_samples=max_samples)
+        loss_test = _collect_per_sample_losses(model, test_dataloader, max_samples=max_samples)
+
+        if loss_train.size == 0 or loss_test.size == 0:
+            return 0.0
+
+        scores = np.concatenate([-loss_train, -loss_test])
+        y_true = np.concatenate([np.ones(len(loss_train)), np.zeros(len(loss_test))])
+
+        fpr, tpr, _ = roc_curve(y_true, scores)
+
+        fpr = np.clip(fpr, 1e-10, 1 - 1e-10)
+        tpr = np.clip(tpr, 1e-10, 1 - 1e-10)
+        fnr = 1 - tpr
+
+        delta = 1.0 / len(loss_train) if len(loss_train) > 0 else 1e-5
+
+        m1 = (1 - delta - fnr) / fpr
+        m2 = (1 - delta - fpr) / fnr
+        m3 = (fnr - delta) / (1 - fpr)
+        m4 = (fpr - delta) / (1 - fnr)
+
+        epsilon_star_val = np.log(
+            np.nanmax(np.maximum.reduce([m1, m2, m3, m4, np.ones_like(m1)]))
+        )
+
+        if np.isnan(epsilon_star_val) or np.isinf(epsilon_star_val):
+            return 0.0
+
+        return float(max(0.0, epsilon_star_val))
+    except Exception as exc:
+        logger.warning("Could not compute epsilon_star")
+        logger.warning(exc)
+        return 0.0
+
+
+def get_mia_auc(model, train_dataloader, test_dataloader, max_samples=5000):
+    """
+    Compute membership inference attack AUC using per-sample loss as the attack score.
+
+    Lower loss suggests a sample is more likely to be a training member, so the
+    attack score is defined as negative loss.
+
+    Args:
+        model (torch.nn.Module): Model to evaluate.
+        train_dataloader: Training DataLoader.
+        test_dataloader: Test DataLoader.
+        max_samples (int): Maximum samples to evaluate per split.
+
+    Returns:
+        float: ROC-AUC of the loss-threshold membership attack. Returns 0.5 on failure.
+    """
+    try:
+        loss_train = _collect_per_sample_losses(model, train_dataloader, max_samples=max_samples)
+        loss_test = _collect_per_sample_losses(model, test_dataloader, max_samples=max_samples)
+
+        if loss_train.size == 0 or loss_test.size == 0:
+            return 0.5
+
+        scores = np.concatenate([-loss_train, -loss_test])
+        y_true = np.concatenate([np.ones(len(loss_train)), np.zeros(len(loss_test))])
+        mia_auc = roc_auc_score(y_true, scores)
+
+        if np.isnan(mia_auc) or np.isinf(mia_auc):
+            return 0.5
+
+        return float(np.clip(mia_auc, 0.0, 1.0))
+    except Exception as exc:
+        logger.warning("Could not compute mia_auc")
+        logger.warning(exc)
+        return 0.5
diff --git a/nebula/addons/trustworthiness/helpers/robustness.py b/nebula/addons/trustworthiness/helpers/robustness.py
new file mode 100644
index 000000000..13611842b
--- /dev/null
+++ b/nebula/addons/trustworthiness/helpers/robustness.py
@@ -0,0 +1,413 @@
+import logging
+import math
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from art.estimators.classification import PyTorchClassifier
+from art.metrics import clever_u, empirical_robustness, loss_sensitivity
+from torch import nn, optim
+
+logger = logging.getLogger(__name__)
+
+R_L2 = 2
+
+def _build_art_classifier(model, input_shape, nb_classes, learning_rate):
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), learning_rate)
+
+    return PyTorchClassifier(
+        model=model,
+        loss=criterion,
+        optimizer=optimizer,
+        input_shape=tuple(input_shape),
+        nb_classes=nb_classes,
+    )
+
+
+def _validate_test_sample_tensors(test_sample):
+    if not (isinstance(test_sample, (tuple, list)) and len(test_sample) >= 2):
+        raise ValueError("`test_sample` must contain samples and labels.")
+
+    samples, labels = test_sample[0], test_sample[1]
+    if not (torch.is_tensor(samples) and torch.is_tensor(labels) and samples.shape[0] > 0):
+        raise ValueError("`test_sample` must contain non-empty tensors for samples and labels.")
+
+    return samples, labels
+
+
+def _coerce_max_samples(max_samples, default=8):
+    try:
+        return max(1, int(max_samples))
+    except Exception:
+        return default
+
+
+def get_clever_score(model, test_sample, nb_classes, learning_rate, max_samples=8):
+    """
+    Calculates the CLEVER score as the mean score over multiple samples.
+
+    Args:
+        model (object): The model.
+        test_sample (object): A batch from the test dataloader.
+        nb_classes (int): The nb_classes of the model.
+        learning_rate (float): The learning rate of the model.
+        max_samples (int): Maximum number of samples from the batch to evaluate.
+
+    Returns:
+        float: Mean CLEVER score across the selected samples.
+    """
+    samples, _ = _validate_test_sample_tensors(test_sample)
+
+    input_shape = tuple(samples.shape[1:]) if samples.dim() >= 2 else tuple(samples.shape)
+
+    max_samples = _coerce_max_samples(max_samples)
+    n_samples = min(int(samples.shape[0]), max_samples)
+
+    # Create the ART classifier once and reuse it for all selected samples.
+    classifier = _build_art_classifier(model, input_shape, nb_classes, learning_rate)
+
+    clever_scores = []
+    for idx in range(n_samples):
+        background = samples[idx].detach().cpu()
+        sample_np = background.numpy()
+
+        try:
+            score_untargeted = clever_u(
+                classifier,
+                sample_np,
+                10,
+                5,
+                R_L2,
+                norm=2,
+                pool_factor=3,
+                verbose=False,
+            )
+            if score_untargeted is not None and not math.isnan(float(score_untargeted)):
+                clever_scores.append(float(score_untargeted))
+        except Exception as exc:
+            logger.warning("Could not compute CLEVER score for sample index %s", idx)
+            logger.warning(exc)
+
+    if not clever_scores:
+        return 0.0
+
+    return float(np.mean(clever_scores))
+
+def get_loss_sensitivity_score(model, test_sample, nb_classes, learning_rate, max_samples=8):
+
+    """
+    Calculates the loss sensitivity score as the mean score over multiple samples.
+
+    Args:
+        model (object): The model.
+        test_sample (object): A batch from the test dataloader.
+        nb_classes (int): The nb_classes of the model.
+        learning_rate (float): The learning rate of the model.
+        max_samples (int): Maximum number of samples from the batch to evaluate.
+
+    Returns:
+        float: Mean loss sensitivity score across the selected samples.
+    """
+    samples, labels = _validate_test_sample_tensors(test_sample)
+
+    max_samples = _coerce_max_samples(max_samples)
+    n_samples = min(int(samples.shape[0]), max_samples)
+
+    # Create the ART classifier once and reuse it for all selected samples.
+    classifier = _build_art_classifier(model, samples.shape[1:], nb_classes, learning_rate)
+
+    sensitivity_scores = []
+    for idx in range(n_samples):
+        sample = samples[idx].detach().cpu().unsqueeze(0)
+        label = labels[idx].detach().cpu().unsqueeze(0)
+        label = F.one_hot(label, num_classes=nb_classes).float()
+
+        try:
+            score = loss_sensitivity(
+                classifier,
+                sample.numpy(),
+                label.numpy(),
+            )
+            if score is not None and not math.isnan(float(score)):
+                sensitivity_scores.append(float(score))
+        except Exception as exc:
+            logger.warning("Could not compute loss sensitivity for sample index %s", idx)
+            logger.warning(exc)
+
+    if not sensitivity_scores:
+        return 0.0
+
+    return float(np.mean(sensitivity_scores))
+
+
+def compute_adversarial_accuracy_art(
+    model,
+    test_loader,
+    nb_classes,
+    learning_rate,
+    epsilon=0.03
+):
+    """
+    Computes adversarial accuracy using FGSM attack.
+
+    Args:
+        model (object): The model.
+        test_loader (DataLoader): DataLoader providing test samples.
+        nb_classes (int): The nb_classes of the model.
+        learning_rate (float): The learning rate of the model.
+        epsilon (float): Maximum perturbation magnitude for the attacks.
+
+    Returns:
+        float: The adversarial accuracy score.
+    """
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.eval()
+    model.to(device)
+
+    correct = 0
+    total = 0
+
+    for samples, labels in test_loader:
+        samples = samples.to(device)
+        labels = labels.to(device)
+
+        x_adv = fgsm_attack(model, samples, labels, epsilon=epsilon)
+
+        with torch.no_grad():
+            outputs = model(x_adv)
+            logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
+            preds = logits.argmax(dim=1)
+
+        correct += (preds == labels).sum().item()
+        total += labels.size(0)
+
+    return correct / total if total > 0 else 0.0
+
+
+def get_empirical_robustness_score(
+    model,
+    test_sample,
+    nb_classes,
+    learning_rate,
+    attack_name = "fgsm",
+    attack_params = None,
+    max_samples = 128,
+):
+    """
+    Calculates the Empirical Robustness score using Adversarial Robustness Toolbox (ART).
+
+    Args:
+        model (object): The model.
+        test_sample (object): A batch from the test dataloader (samples, labels).
+        nb_classes (int): The nb_classes of the model.
+        learning_rate (float): The learning rate of the model.
+        attack_name (str): Attack key supported by ART empirical_robustness.
+        attack_params (dict | None): Optional attack parameters.
+        max_samples (int): Max number of samples from the batch to use.
+
+    Returns:
+        float: Empirical robustness score (>= 0.0). If it cannot be computed, returns 0.0.
+    """
+    try:
+        samples, _ = _validate_test_sample_tensors(test_sample)
+
+        batch_size: int = int(samples.shape[0])
+        n: int = int(min(max_samples, batch_size))
+        x = samples[:n].detach().cpu().numpy()
+
+        classifier = _build_art_classifier(model, samples.shape[1:], nb_classes, learning_rate)
+
+        score = empirical_robustness(
+            classifier=classifier,
+            x=x,
+            attack_name=attack_name,
+            attack_params=attack_params,
+        )
+
+        if isinstance(score, np.ndarray):
+            score = float(np.mean(score))
+
+        if score is None or (isinstance(score, float) and math.isnan(score)):
+            return 0.0
+
+        return float(score)
+
+    except Exception as exc:
+        logger.warning("Could not compute empirical robustness (ART). Returning 0.0")
+        logger.warning(exc)
+        return 0.0
+
+
+def _get_image_normalization_for_samples(samples):
+    if not isinstance(samples, torch.Tensor) or samples.ndim < 4:
+        return None
+
+    channels = int(samples.shape[1])
+    if channels == 1:
+        return (0.5,), (0.5,)
+    if channels == 3:
+        return (0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)
+    return None
+
+
+def _channel_tensor(values, samples):
+    shape = [1, len(values)] + [1] * max(samples.dim() - 2, 0)
+    return torch.tensor(values, dtype=samples.dtype, device=samples.device).view(*shape)
+
+
+def _fgsm_step_and_clamp(samples, grad, epsilon):
+    normalization = _get_image_normalization_for_samples(samples)
+    if normalization is None:
+        return samples + epsilon * grad.sign()
+
+    mean, std = normalization
+    mean = _channel_tensor(mean, samples)
+    std = _channel_tensor(std, samples)
+
+    normalized_epsilon = float(epsilon) / std
+    lower = (0.0 - mean) / std
+    upper = (1.0 - mean) / std
+
+    x_adv = samples + normalized_epsilon * grad.sign()
+    x_adv = torch.max(torch.min(x_adv, samples + normalized_epsilon), samples - normalized_epsilon)
+    return torch.max(torch.min(x_adv, upper), lower)
+
+
+def fgsm_attack(model, samples, labels, epsilon=0.03):
+    """
+        Performs an FGSM (Fast Gradient Sign Method) adversarial attack on a batch of samples.
+
+        Args:
+            model (torch.nn.Module): The PyTorch model to attack.
+            samples (torch.Tensor): Input samples to perturb, shape (B, ...).
+            labels (torch.Tensor): True labels corresponding to the samples.
+            epsilon (float, optional): Maximum perturbation magnitude for the attack. Defaults to 0.03.
+
+        Returns:
+            torch.Tensor: Adversarially perturbed samples with the same shape as `samples`.
+    """
+    try:
+        device = next(model.parameters()).device
+    except Exception:
+        device = samples.device
+
+    samples = samples.clone().detach().to(device)
+    labels = labels.to(device)
+    samples.requires_grad = True
+
+    outputs = model(samples)
+    logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
+    loss = nn.CrossEntropyLoss()(logits, labels)
+    grad = torch.autograd.grad(loss, samples, only_inputs=True)[0]
+    x_adv = _fgsm_step_and_clamp(samples, grad, epsilon)
+
+    return x_adv.detach()
+
+
+def get_confidence_score(
+    model,
+    test_sample,
+    max_samples = 128,
+    use_true_label = True,
+):
+    """
+    Calculates the confidence score.
+
+    Args:
+        model (object): The model.
+        test_sample (object): A batch from the test dataloader (samples, labels).
+        max_samples (int): Max number of samples from the batch to use.
+        use_true_label (bool): Whether to compute confidence with respect to the true labels. Defaults to True.
+
+    Returns:
+        float: Confidence score.
+    """
+    try:
+        if not isinstance(model, torch.nn.Module):
+            logger.warning("Model is not a torch.nn.Module")
+            return 0.0
+
+        x, y = test_sample
+
+        if isinstance(x, torch.Tensor):
+            x = x[:max_samples]
+        if isinstance(y, torch.Tensor):
+            y = y[:max_samples]
+
+        try:
+            device = next(model.parameters()).device
+        except Exception:
+            device = torch.device("cpu")
+
+        model.eval()
+        with torch.no_grad():
+            x = x.to(device) if isinstance(x, torch.Tensor) else x
+            out = model(x)
+
+            logits = out[0] if isinstance(out, (tuple, list)) else out
+            probs = torch.softmax(logits, dim=1)
+
+            if use_true_label and isinstance(y, torch.Tensor):
+                if y.ndim > 1:
+                    y_idx = torch.argmax(y, dim=1)
+                else:
+                    y_idx = y
+                y_idx = y_idx.to(device)
+
+                true_probs = probs.gather(1, y_idx.view(-1, 1)).squeeze(1)
+                return float(true_probs.mean().detach().cpu().item())
+
+            msp = probs.max(dim=1).values
+            return float(msp.mean().detach().cpu().item())
+
+    except Exception as e:
+        logger.warning("Could not compute confidence score")
+        logger.warning(e)
+        return 0.0
+
+
+def attack_success_rate(model, test_sample,epsilon=0.03):
+    """
+    Calculates the ASR.
+
+    Args:
+        model (object): The model.
+        test_sample (object): A batch from the test dataloader (samples, labels).
+        epsilon (float): Maximum perturbation magnitude for the attacks.
+
+    Returns:
+        float: The ASR.
+    """
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.eval()
+    model.to(device)
+
+    images, labels = test_sample
+    images = images.to(device)
+    labels = labels.to(device)
+
+    with torch.no_grad():
+        outputs = model(images)
+        logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
+        preds = logits.argmax(dim=1)
+
+    correct_mask = preds.eq(labels)
+    num_correct = correct_mask.sum().item()
+
+    if num_correct == 0:
+        return 0.0
+
+    x_adv = fgsm_attack(model, images, labels, epsilon=epsilon)
+
+    with torch.no_grad():
+        outputs_adv = model(x_adv)
+        logits_adv = outputs_adv[0] if isinstance(outputs_adv, (tuple, list)) else outputs_adv
+        preds_adv = logits_adv.argmax(dim=1)
+
+    successful_attacks = (correct_mask & preds_adv.ne(labels)).sum().item()
+
+    asr = successful_attacks / num_correct
+
+    return asr
diff --git a/nebula/addons/trustworthiness/helpers/scenario_metrics.py b/nebula/addons/trustworthiness/helpers/scenario_metrics.py
new file mode 100644
index 000000000..d714e8523
--- /dev/null
+++ b/nebula/addons/trustworthiness/helpers/scenario_metrics.py
@@ -0,0 +1,350 @@
+import io
+import logging
+import os
+import statistics
+from datetime import datetime
+from os.path import exists
+
+import pandas as pd
+import torch
+from codecarbon import EmissionsTracker
+
+from nebula.addons.trustworthiness.helpers.csv_io import read_csv
+
+logger = logging.getLogger(__name__)
+
+def get_elapsed_time(start_time, end_time):
+    """
+    Calculates the elapsed time during the execution of the scenario.
+
+    Args:
+        start_time (datetime): Start datetime.
+        end_time (datetime): End datetime.
+
+    Returns:
+        float: The elapsed time.
+    """
+    start_date = datetime.strptime(start_time, "%d/%m/%Y %H:%M:%S")
+    end_date = datetime.strptime(end_time, "%d/%m/%Y %H:%M:%S")
+
+    elapsed_time = (end_date - start_date).total_seconds() / 60
+
+    return elapsed_time
+
+
+def _trustworthiness_dir(scenario_name):
+    return os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness")
+
+
+def _global_data_results_path(scenario_name):
+    return os.path.join(_trustworthiness_dir(scenario_name), "data_results.csv")
+
+
+def _participant_data_results_path(scenario_name, participant_id):
+    return os.path.join(_trustworthiness_dir(scenario_name), f"data_results_{participant_id}.csv")
+
+
+def _read_global_results(scenario_name):
+    return read_csv(_global_data_results_path(scenario_name))
+
+
+def _read_participant_results(scenario_name, participant_id):
+    return read_csv(_participant_data_results_path(scenario_name, participant_id))
+
+
+def _find_participant_row(data, participant_id, source_name):
+    row = data[data["id"] == participant_id]
+
+    if row.empty:
+        try:
+            row = data[data["id"] == int(participant_id)]
+        except (TypeError, ValueError):
+            row = data.iloc[0:0]
+
+    if row.empty:
+        raise ValueError(f"Participant {participant_id} not found in {source_name}")
+
+    return row.iloc[0]
+
+
+def get_bytes_model(model):
+    """
+    Calculates the serialized size in bytes of a PyTorch model state_dict.
+
+    Args:
+        model (nn.Module): PyTorch model.
+
+    Returns:
+        int: Model size in bytes.
+    """
+    buffer: io.BytesIO = io.BytesIO()
+    torch.save(model.state_dict(), buffer)
+    model_size: int = buffer.tell()
+
+    return model_size
+
+
+def get_bytes_sent_recv(scenario_name):
+    """
+    Calculates the mean bytes sent and received of the nodes.
+
+    Args:
+        bytes_sent_files (list): Files that contain the bytes sent of the nodes.
+        bytes_recv_files (list): Files that contain the bytes received of the nodes.
+
+    Returns:
+        4-tupla: The total bytes sent, the total bytes received, the mean bytes sent and the mean bytes received of the nodes.
+    """
+    data = _read_global_results(scenario_name)
+
+    number_files = len(data)
+
+    total_upload_bytes = int(data["bytes_sent"].sum())
+    total_download_bytes = int(data["bytes_recv"].sum())
+
+    avg_upload_bytes = total_upload_bytes / number_files
+    avg_download_bytes = total_download_bytes / number_files
+
+    return total_upload_bytes, total_download_bytes, avg_upload_bytes, avg_download_bytes
+
+
+def get_avg_loss_accuracy(scenario_name):
+    """
+    Calculates the mean accuracy and loss models of the nodes.
+
+    Args:
+        loss_files (list): Files that contain the loss of the models of the nodes.
+        accuracy_files (list): Files that contain the acurracies of the models of the nodes.
+
+    Returns:
+        3-tupla: The mean loss of the models, the mean accuracies of the models, the standard deviation of the accuracies of the models.
+    """
+    data = _read_global_results(scenario_name)
+
+    number_files = len(data)
+
+    total_loss = data["loss"].sum()
+    total_accuracy = data["accuracy"].sum()
+
+    denominator = max(1, number_files - 1)
+    avg_loss = total_loss / denominator
+    avg_accuracy = total_accuracy / denominator
+    std_accuracy = statistics.stdev(data["accuracy"]) if number_files > 1 else 0.0
+
+    return avg_loss, avg_accuracy, std_accuracy
+
+
+def get_underfitting_score(scenario_name, id):
+    """
+    Calculates the mean val accuracy of the nodes.
+    """
+    data = _read_global_results(scenario_name)
+
+    number_files = len(data)
+
+    total_val_accuracy = data["val_accuracy"].sum()
+
+    avg_val_accuracy = total_val_accuracy / max(1, number_files - 1)
+
+    return avg_val_accuracy
+
+
+def get_participant_loss_accuracy(scenario_name, participant_id):
+    """
+    Gets loss and accuracy for a specific participant from CFL aggregated results.
+
+    Args:
+        scenario_name (str): Scenario name.
+        participant_id (int | str): Participant identifier.
+
+    Returns:
+        tuple[float, float]: (loss, accuracy)
+    """
+    data_file = _global_data_results_path(scenario_name)
+    row = _find_participant_row(read_csv(data_file), participant_id, data_file)
+
+    loss = float(row["loss"])
+    accuracy = float(row["accuracy"])
+    return loss, accuracy
+
+def get_underfitting_score_local(scenario_name, id):
+    """
+    Gets the local validation accuracy for a specific DFL/SDFL participant.
+
+    Args:
+        scenario_name (str): Scenario name.
+        participant_id (int | str): Participant identifier.
+
+    Returns:
+        float: Validation accuracy.
+    """
+    data = _read_participant_results(scenario_name, id)
+    return float(data["val_accuracy"].iloc[0])
+
+
+def get_dp_local(scenario_name, id):
+    """
+    Gets the dp metrics for a specific DFL/SDFL participant.
+
+    Args:
+        scenario_name (str): Scenario name.
+        participant_id (int | str): Participant identifier.
+
+    Returns:
+        float: DP Enabled, Epsilon.
+    """
+    data = _read_participant_results(scenario_name, id)
+    return data["dp_enabled"].iloc[0], float(data["dp_epsilon"].iloc[0])
+
+
+def get_dp_global(scenario_name):
+    """
+    Gets the aggregated DP metrics for a CFL scenario, excluding the server node.
+
+    Args:
+        scenario_name (str): Scenario name.
+
+    Returns:
+        tuple[bool, float | str]: Whether DP is enabled, and the
+        average epsilon across client nodes.
+    """
+    data = _read_global_results(scenario_name)
+
+    if data["dp_enabled"].iloc[0] == False:
+        return False, 0.0
+
+    number_files = len(data)
+
+    avg_epsilon = data["dp_epsilon"].sum() / max(1, number_files - 1)
+
+    return True, avg_epsilon
+
+def get_avg_class_imbalance_model_size(scenario_name):
+    """
+    Calculates the mean class imbalance and model size of the nodes.
+
+    Args:
+        data_results_files (list): Files that contain the class imbalance and model size of the nodes
+
+    Returns:
+        2-tupla: The mean class imbalance mean and model size mean of the nodes.
+    """
+    data = _read_global_results(scenario_name)
+
+    number_files = len(data)
+
+    total_class_imbalance = data["class_imbalance"].sum()
+    total_model_size = data["model_size"].sum()
+
+    avg_class_imbalance = total_class_imbalance / number_files
+    avg_model_size = total_model_size / number_files
+
+    return avg_class_imbalance, avg_model_size
+
+
+def get_entropy_list(scenario_name):
+    """
+    Obtiene una lista con los valores de entropy de todos los nodos.
+
+    Args:
+        scenario_name (str): Nombre del escenario.
+
+    Returns:
+        list: Lista con los valores de entropy
+    """
+    data = _read_global_results(scenario_name)
+
+    entropy_list = data["local_entropy"].tolist()
+
+    return entropy_list
+
+def stop_emissions_tracking_and_save(
+    tracker: EmissionsTracker,
+    outdir: str,
+    emissions_file: str,
+    role: str,
+    workload: str,
+    sample_size: int = 0,
+    participant_idx=None,
+):
+    """
+    Stops emissions tracking object from CodeCarbon and saves relevant information to emissions.csv file.
+
+    Args:
+        tracker (object): The emissions tracker object holding information.
+        outdir (str): The path of the output directory of the experiment.
+        emissions_file (str): The path to the emissions file.
+        role (str): Either client or server depending on the role.
+        workload (str): Either aggregation or training depending on the workload.
+        sample_size (int): The number of samples used for training, if aggregation 0.
+    """
+
+    tracker.stop()
+
+    emissions_file = os.path.join(outdir, emissions_file)
+
+    if exists(emissions_file):
+        df = pd.read_csv(emissions_file)
+    else:
+        df = pd.DataFrame(
+            columns=[
+                "id",
+                "role",
+                "energy_grid",
+                "emissions",
+                "workload",
+                "CPU_model",
+                "GPU_model",
+            ]
+        )
+    try:
+        energy_grid = (tracker.final_emissions_data.emissions / tracker.final_emissions_data.energy_consumed) * 1000
+        df = pd.concat(
+            [
+                df,
+                pd.DataFrame({
+                    "id": participant_idx,
+                    "role": role,
+                    "energy_grid": [energy_grid],
+                    "emissions": [tracker.final_emissions_data.emissions],
+                    "workload": workload,
+                    "CPU_model": tracker.final_emissions_data.cpu_model
+                    if tracker.final_emissions_data.cpu_model
+                    else "None",
+                    "GPU_model": tracker.final_emissions_data.gpu_model
+                    if tracker.final_emissions_data.gpu_model
+                    else "None",
+                    "CPU_used": True if tracker.final_emissions_data.cpu_energy else False,
+                    "GPU_used": True if tracker.final_emissions_data.gpu_energy else False,
+                    "energy_consumed": tracker.final_emissions_data.energy_consumed,
+                    "sample_size": sample_size,
+                }),
+            ],
+            ignore_index=True,
+        )
+        df.to_csv(emissions_file, encoding="utf-8", index=False)
+    except Exception as e:
+        logger.warning(e)
+
+
+def comm_efficiency(bytes_up: int, bytes_down: int, test_acc_avg: float, eps: float = 1e-12) -> float:
+    """
+    Communication efficiency = total_bytes / final_accuracy.
+    Lower is better.
+
+    Args:
+        bytes_up: total uploaded bytes
+        bytes_down: total downloaded bytes
+        final_accuracy: final test accuracy in [0,1]
+        eps: small constant to avoid division by zero
+
+    Returns:
+        float
+    """
+    total_bytes = float(bytes_up) + float(bytes_down)
+    acc = float(test_acc_avg)
+
+    if acc < eps:
+        acc = eps
+
+    return total_bytes / acc
diff --git a/nebula/addons/trustworthiness/helpers/scoring.py b/nebula/addons/trustworthiness/helpers/scoring.py
new file mode 100644
index 000000000..5103626c8
--- /dev/null
+++ b/nebula/addons/trustworthiness/helpers/scoring.py
@@ -0,0 +1,190 @@
+import logging
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+def get_mapped_score(score_key, score_map):
+    """
+    Finds the score by the score_key in the score_map.
+
+    Args:
+        score_key (string): The key to look up in the score_map.
+        score_map (dict): The score map defined in the eval_metrics.json file.
+
+    Returns:
+        float: The normalized score of [0, 1].
+    """
+    score = 0
+    if score_map is None:
+        logger.warning("Score map is missing")
+    else:
+        keys = [key for key, value in score_map.items()]
+        scores = [value for key, value in score_map.items()]
+        normalized_scores = get_normalized_scores(scores)
+        normalized_score_map = dict(zip(keys, normalized_scores, strict=False))
+        score = normalized_score_map.get(score_key, np.nan)
+
+    return score
+
+
+def get_normalized_scores(scores):
+    """
+    Calculates the normalized scores of a list.
+
+    Args:
+        scores (list): The values that will be normalized.
+
+    Returns:
+        list: The normalized list.
+    """
+    if scores is None or len(scores) == 0:
+        return []
+
+    min_score = np.min(scores)
+    max_score = np.max(scores)
+    if max_score == min_score:
+        return [1.0 for _ in scores]
+
+    normalized = [(x - min_score) / (max_score - min_score) for x in scores]
+    return normalized
+
+
+def get_range_score(value, ranges, direction="asc"):
+    """
+    Maps the value to a range and gets the score by the range and direction.
+
+    Args:
+        value (int): The input score.
+        ranges (list): The ranges defined.
+        direction (string): Asc means the higher the range the higher the score, desc means otherwise.
+
+    Returns:
+        float: The normalized score of [0, 1].
+    """
+
+    if not (type(value) == int or type(value) == float):
+        logger.warning("Input value is not a number")
+        logger.warning(f"{value}")
+        return 0
+    else:
+        score = 0
+        if ranges is None:
+            logger.warning("Score ranges are missing")
+        else:
+            total_bins = len(ranges) + 1
+            bin = np.digitize(value, ranges, right=True)
+            score = 1 - (bin / total_bins) if direction == "desc" else bin / total_bins
+        return score
+
+
+def get_map_value_score(score_key, score_map):
+    """
+    Finds the score by the score_key in the score_map and returns the value.
+
+    Args:
+        score_key (string): The key to look up in the score_map.
+        score_map (dict): The score map defined in the eval_metrics.json file.
+
+    Returns:
+        float: The score obtained in the score_map.
+    """
+    score = 0
+    if score_map is None:
+        logger.warning("Score map is missing")
+    else:
+        score = score_map[score_key]
+    return score
+
+
+def get_true_score(value, direction):
+    """
+    Returns the negative of the value if direction is 'desc', otherwise returns value.
+
+    Args:
+        value (int): The input score.
+        direction (string): Asc means the higher the range the higher the score, desc means otherwise.
+
+    Returns:
+        float: The score obtained.
+    """
+
+    if value is True:
+        return 1
+    elif value is False:
+        return 0
+    else:
+        if not (type(value) == int or type(value) == float):
+            logger.warning("Input value is not a number")
+            logger.warning(f"{value}.")
+            return 0
+        else:
+            if direction == "desc":
+                return 1 - value
+            else:
+                return value
+
+
+def get_scaled_score(value, scale: list, direction: str):
+    """
+    Maps a score of a specific scale into the scale between zero and one.
+
+    Args:
+        value (int or float): The raw value of the metric.
+        scale (list): List containing the minimum and maximum value the value can fall in between.
+
+    Returns:
+        float: The normalized score of [0, 1].
+    """
+
+    score = 0
+    try:
+        value_min, value_max = scale[0], scale[1]
+    except Exception:
+        logger.warning("Score minimum or score maximum is missing. The minimum has been set to 0 and the maximum to 1")
+        value_min, value_max = 0, 1
+    if value is None or value == "":
+        logger.warning("Score value is missing. Set value to zero")
+    else:
+        low, high = 0, 1
+        if value >= value_max:
+            score = 1
+        elif value <= value_min:
+            score = 0
+        else:
+            diff = value_max - value_min
+            diffScale = high - low
+            score = (float(value) - value_min) * (float(diffScale) / diff) + low
+        if direction == "desc":
+            score = high - score
+
+    return score
+
+
+def get_value(value):
+    """
+    Get the value of a metric.
+
+    Args:
+        value (float): The value of the metric.
+
+    Returns:
+        float: The value of the metric.
+    """
+
+    return value
+
+
+def check_properties(*args):
+    """
+    Check if all the arguments have values.
+
+    Args:
+        args (list): All the arguments.
+
+    Returns:
+        float: The mean of arguments that have values.
+    """
+
+    result = map(lambda x: x is not None and x != "", args)
+    return np.mean(list(result))
diff --git a/nebula/addons/trustworthiness/helpers/trust_reports.py b/nebula/addons/trustworthiness/helpers/trust_reports.py
new file mode 100644
index 000000000..08d1798ec
--- /dev/null
+++ b/nebula/addons/trustworthiness/helpers/trust_reports.py
@@ -0,0 +1,197 @@
+import copy
+import json
+import os
+
+def load_trust_report_json_dumped(scenario_name: str, participant_id: int) -> str:
+    """
+    Read a participant trustworthiness JSON file and return it
+    serialized as a string with json.dumps(...).
+
+    Args:
+        scenario_name (str): Scenario/experiment name.
+        participant_id (int): Participant ID.
+
+    Returns:
+        str: JSON content serialized as a string.
+
+    Raises:
+        FileNotFoundError: If the file does not exist.
+        ValueError: If the file content is not valid JSON.
+    """
+    logs_dir = os.environ.get("NEBULA_LOGS_DIR")
+    if not logs_dir:
+        raise ValueError("The NEBULA_LOGS_DIR environment variable is not defined.")
+
+    file_name = f"nebula_trust_results_{participant_id}.json"
+    file_path = os.path.join(
+        logs_dir,
+        scenario_name,
+        "trustworthiness",
+        file_name,
+    )
+
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"The file does not exist: {file_path}")
+
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            trust_report = json.load(f)
+    except json.JSONDecodeError as e:
+        raise ValueError(f"The file does not contain valid JSON: {file_path}") from e
+
+    return json.dumps(trust_report)
+
+
+def load_trust_report_json(scenario_name: str, participant_id: int | str) -> dict:
+    trust_report_json = load_trust_report_json_dumped(scenario_name, participant_id)
+    return json.loads(trust_report_json)
+
+
+def create_local_trust_report_copy(scenario_name: str, participant_id: int | str, suffix: str = "global") -> tuple[dict, str]:
+    trust_report = load_trust_report_json(scenario_name, participant_id)
+    logs_dir = os.environ.get("NEBULA_LOGS_DIR")
+    if not logs_dir:
+        raise ValueError("The NEBULA_LOGS_DIR environment variable is not defined.")
+
+    trust_dir = os.path.join(logs_dir, scenario_name, "trustworthiness")
+    os.makedirs(trust_dir, exist_ok=True)
+
+    file_path = os.path.join(trust_dir, f"nebula_trust_results_{participant_id}_{suffix}.json")
+    with open(file_path, "w", encoding="utf-8") as f:
+        json.dump(trust_report, f, indent=4)
+
+    return trust_report, file_path
+
+
+def save_trust_report_json(file_path: str, trust_report: dict) -> str:
+    directory = os.path.dirname(file_path)
+    if directory:
+        os.makedirs(directory, exist_ok=True)
+
+    with open(file_path, "w", encoding="utf-8") as f:
+        json.dump(trust_report, f, indent=4)
+
+    return file_path
+
+
+def accumulate_weighted_trustscores(report: dict, weight: float, score_accumulator: dict, weight_accumulator: dict):
+    if weight <= 0:
+        raise ValueError("The aggregation weight must be greater than 0.")
+
+    _accumulate_weighted_trustscores_recursive(
+        obj=report,
+        weight=float(weight),
+        path=(),
+        score_accumulator=score_accumulator,
+        weight_accumulator=weight_accumulator,
+    )
+
+
+def build_weighted_trustscores_report(template_report: dict, score_accumulator: dict, weight_accumulator: dict) -> dict:
+    aggregated_report = copy.deepcopy(template_report)
+    _apply_weighted_trustscores_recursive(
+        obj=aggregated_report,
+        path=(),
+        score_accumulator=score_accumulator,
+        weight_accumulator=weight_accumulator,
+    )
+    return aggregated_report
+
+
+def _accumulate_weighted_trustscores_recursive(obj, weight: float, path: tuple, score_accumulator: dict, weight_accumulator: dict):
+    if isinstance(obj, dict):
+        structural_named_entry = _get_structural_named_entry(obj)
+        if structural_named_entry is not None:
+            _, nested_value = structural_named_entry
+            _accumulate_weighted_trustscores_recursive(
+                obj=nested_value,
+                weight=weight,
+                path=path + ("__named_entry__",),
+                score_accumulator=score_accumulator,
+                weight_accumulator=weight_accumulator,
+            )
+            return
+
+        for key, value in obj.items():
+            if key in {"trust_score", "score"} and _is_numeric_score(value):
+                score_path = path + (key,)
+                score_accumulator[score_path] = score_accumulator.get(score_path, 0.0) + (float(value) * weight)
+                weight_accumulator[score_path] = weight_accumulator.get(score_path, 0.0) + weight
+                continue
+
+            _accumulate_weighted_trustscores_recursive(
+                obj=value,
+                weight=weight,
+                path=path + (key,),
+                score_accumulator=score_accumulator,
+                weight_accumulator=weight_accumulator,
+            )
+        return
+
+    if isinstance(obj, list):
+        for index, item in enumerate(obj):
+            _accumulate_weighted_trustscores_recursive(
+                obj=item,
+                weight=weight,
+                path=path + (index,),
+                score_accumulator=score_accumulator,
+                weight_accumulator=weight_accumulator,
+            )
+
+
+def _apply_weighted_trustscores_recursive(obj, path: tuple, score_accumulator: dict, weight_accumulator: dict):
+    if isinstance(obj, dict):
+        structural_named_entry = _get_structural_named_entry(obj)
+        if structural_named_entry is not None:
+            entry_key, nested_value = structural_named_entry
+            obj[entry_key] = _apply_weighted_trustscores_recursive(
+                obj=nested_value,
+                path=path + ("__named_entry__",),
+                score_accumulator=score_accumulator,
+                weight_accumulator=weight_accumulator,
+            )
+            return obj
+
+        for key, value in obj.items():
+            if key in {"trust_score", "score"} and _is_numeric_score(value):
+                score_path = path + (key,)
+                total_weight = weight_accumulator.get(score_path)
+                if total_weight:
+                    obj[key] = round(score_accumulator[score_path] / total_weight, 6)
+                continue
+
+            obj[key] = _apply_weighted_trustscores_recursive(
+                obj=value,
+                path=path + (key,),
+                score_accumulator=score_accumulator,
+                weight_accumulator=weight_accumulator,
+            )
+        return obj
+
+    if isinstance(obj, list):
+        for index, item in enumerate(obj):
+            obj[index] = _apply_weighted_trustscores_recursive(
+                obj=item,
+                path=path + (index,),
+                score_accumulator=score_accumulator,
+                weight_accumulator=weight_accumulator,
+            )
+    return obj
+
+
+def _get_structural_named_entry(obj: dict):
+    if len(obj) != 1:
+        return None
+
+    entry_key, nested_value = next(iter(obj.items()))
+    if not isinstance(nested_value, dict):
+        return None
+
+    if any(key in nested_value for key in ("score", "metrics", "notions", "pillars")):
+        return entry_key, nested_value
+
+    return None
+
+
+def _is_numeric_score(value):
+    return isinstance(value, (int, float)) and not isinstance(value, bool)
diff --git a/nebula/addons/trustworthiness/metric.py b/nebula/addons/trustworthiness/metric.py
index f9e24e72d..f1e453235 100755
--- a/nebula/addons/trustworthiness/metric.py
+++ b/nebula/addons/trustworthiness/metric.py
@@ -4,7 +4,7 @@
 
 from nebula.addons.trustworthiness.graphics import Graphics
 from nebula.addons.trustworthiness.pillar import TrustPillar
-from nebula.addons.trustworthiness.utils import write_results_json
+from nebula.addons.trustworthiness.helpers.csv_io import write_results_json
 
 dirname = os.path.dirname(__file__)
 
diff --git a/nebula/addons/trustworthiness/per_round_metrics.py b/nebula/addons/trustworthiness/per_round_metrics.py
index e8104befd..ea5a2ff3d 100644
--- a/nebula/addons/trustworthiness/per_round_metrics.py
+++ b/nebula/addons/trustworthiness/per_round_metrics.py
@@ -2,7 +2,6 @@
 from __future__ import annotations
 
 import asyncio
-import copy
 import csv
 import os
 from dataclasses import dataclass, field
diff --git a/nebula/addons/trustworthiness/pillar.py b/nebula/addons/trustworthiness/pillar.py
index a57ec1abb..ecd15cf7e 100755
--- a/nebula/addons/trustworthiness/pillar.py
+++ b/nebula/addons/trustworthiness/pillar.py
@@ -1,7 +1,13 @@
 import logging
 
-from nebula.addons.trustworthiness import calculation
-from nebula.addons.trustworthiness.utils import get_input_value
+from nebula.addons.trustworthiness.helpers.factsheet_values import get_input_value
+from nebula.addons.trustworthiness.helpers.scoring import (
+    get_map_value_score,
+    get_mapped_score,
+    get_range_score,
+    get_scaled_score,
+    get_true_score,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -96,15 +102,15 @@ def get_metric_score(self, result, name, metric):
                 logger.warning(f"{name} input value is null")
             else:
                 if score_type == "true_score":
-                    score = calculation.get_true_score(input_value, metric.get("direction"))
+                    score = get_true_score(input_value, metric.get("direction"))
                 elif score_type == "score_mapping":
-                    score = calculation.get_mapped_score(input_value, metric.get("score_map"))
+                    score = get_mapped_score(input_value, metric.get("score_map"))
                 elif score_type == "ranges":
-                    score = calculation.get_range_score(input_value, metric.get("ranges"), metric.get("direction"))
+                    score = get_range_score(input_value, metric.get("ranges"), metric.get("direction"))
                 elif score_type == "score_map_value":
-                    score = calculation.get_map_value_score(input_value, metric.get("score_map"))
+                    score = get_map_value_score(input_value, metric.get("score_map"))
                 elif score_type == "scaled_score":
-                    score = calculation.get_scaled_score(input_value, metric.get("scale"), metric.get("direction"))
+                    score = get_scaled_score(input_value, metric.get("scale"), metric.get("direction"))
                 elif score_type == "property_check":
                     score = 0 if input_value is None else input_value
 
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index b2a9ba2ad..17b9a4ef8 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -7,8 +7,31 @@
 from abc import ABC, abstractmethod
 from nebula.config.config import Config
 from nebula.core.engine import Engine
-from nebula.addons.trustworthiness.calculation import stop_emissions_tracking_and_save, get_bytes_model, get_class_imbalance_local, get_participation_variation_score
-from nebula.addons.trustworthiness.utils import save_results_csv, save_trustworthiness_reports_csv, load_emissions_participant, load_data_results_participant, save_results_csv_cfl, save_emissions_csv_cfl, save_class_count_per_participant, get_local_entropy, load_trust_report_json_dumped, create_local_trust_report_copy, accumulate_weighted_trustscores, build_weighted_trustscores_report, save_trust_report_json
+from nebula.addons.trustworthiness.helpers.csv_io import (
+    load_data_results_participant,
+    load_emissions_participant,
+    save_emissions_csv_cfl,
+    save_results_csv,
+    save_results_csv_cfl,
+    save_trustworthiness_reports_csv,
+)
+from nebula.addons.trustworthiness.helpers.data_distribution import (
+    get_class_imbalance_local,
+    get_local_entropy,
+    get_participation_variation_score,
+    save_class_count_per_participant,
+)
+from nebula.addons.trustworthiness.helpers.scenario_metrics import (
+    get_bytes_model,
+    stop_emissions_tracking_and_save,
+)
+from nebula.addons.trustworthiness.helpers.trust_reports import (
+    accumulate_weighted_trustscores,
+    build_weighted_trustscores_report,
+    create_local_trust_report_copy,
+    load_trust_report_json_dumped,
+    save_trust_report_json,
+)
 from codecarbon import EmissionsTracker
 from nebula.addons.trustworthiness.per_round_metrics import PerRoundTrustMetrics
 from datetime import datetime
diff --git a/nebula/addons/trustworthiness/utils.py b/nebula/addons/trustworthiness/utils.py
deleted file mode 100755
index 62dfe5f08..000000000
--- a/nebula/addons/trustworthiness/utils.py
+++ /dev/null
@@ -1,656 +0,0 @@
-import json
-import csv
-import logging
-import math
-import os
-import pickle
-from os.path import exists
-import copy
-
-import pandas as pd
-from hashids import Hashids
-from scipy.stats import entropy
-
-from nebula.addons.trustworthiness import calculation
-from collections import Counter
-
-hashids = Hashids()
-logger = logging.getLogger(__name__)
-dirname = os.path.dirname(__file__)
-
-
-def save_class_count_per_participant(experiment_name, class_counter: Counter, idx):
-    class_count = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(idx)}_class_count.json")
-    result = {hashids.encode(int(class_id)): count for class_id, count in class_counter.items()}
-    with open(class_count, "w") as f:
-        json.dump(result, f)
-
-def count_all_class_samples(experiment_name):
-    participant_id = 0
-    global_class_count = {}
-
-    while True:
-        data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_id)}_class_count.json")
-
-        if not os.path.exists(data_class_count_file):
-            break
-
-        with open(data_class_count_file, "r") as f:
-            class_count = json.load(f)
-
-        for class_hash, count in class_count.items():
-            global_class_count[class_hash] = global_class_count.get(class_hash, 0) + count
-
-        participant_id += 1
-
-    # Save the total class count into class_count.json
-    output_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'),experiment_name, "trustworthiness", "count_class.json")
-
-    with open(output_file, "w") as f:
-        json.dump(global_class_count, f, indent=2)
-
-def count_class_samples(scenario_name, dataloaders_files, class_counter: Counter = None):
-    """
-    Counts the number of samples by class.
-
-    Args:
-        scenario_name (string): Name of the scenario.
-        dataloaders_files (list): Files that contain the dataloaders.
-
-    """
-
-    result = {}
-    dataloaders = []
-
-    if class_counter:
-        result = {hashids.encode(int(class_id)): count for class_id, count in class_counter.items()}
-    else:
-        for file in dataloaders_files:
-            with open(file, "rb") as f:
-                dataloader = pickle.load(f)
-                dataloaders.append(dataloader)
-
-        for dataloader in dataloaders:
-            for batch, labels in dataloader:
-                for b, label in zip(batch, labels):
-                    l = hashids.encode(label.item())
-                    if l in result:
-                        result[l] += 1
-                    else:
-                        result[l] = 1
-
-    try:
-        name_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "count_class.json")
-    except:
-        name_file = os.path.join("nebula", "app", "logs", scenario_name, "trustworthiness", "count_class.json")
-
-    with open(name_file, "w") as f:
-        json.dump(result, f)
-
-
-def get_all_data_entropy(experiment_name):
-    participant_id = 0
-    data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_id)}_class_count.json")
-    entropy_per_participant = {}
-
-    while True:
-        data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(participant_id)}_class_count.json")
-
-        if not os.path.exists(data_class_count_file):
-            break
-
-        with open(data_class_count_file, "r") as f:
-            class_count = json.load(f)
-
-        entropy_value = calculation.get_entropy_from_class_counts(class_count)
-
-        entropy_per_participant[str(participant_id)] = round(entropy_value, 6)
-        participant_id += 1
-
-    name_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'),experiment_name, "trustworthiness", "entropy.json")
-
-    with open(name_file, "w") as f:
-        json.dump(entropy_per_participant, f, indent=2)
-
-def get_local_entropy(id, experiment_name):
-    data_class_count_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"{str(id)}_class_count.json")
-
-    with open(data_class_count_file, "r") as f:
-        class_count = json.load(f)
-
-    return calculation.get_entropy_from_class_counts(class_count)
-
-def get_entropy(client_id, scenario_name, dataloader):
-    """
-    Get the entropy of each client in the scenario.
-
-    Args:
-        client_id (int): The client id.
-        scenario_name (string): Name of the scenario.
-        dataloaders_files (list): Files that contain the dataloaders.
-
-    """
-    result = {}
-    client_entropy = {}
-
-    name_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "entropy.json")
-
-    if os.path.exists(name_file):
-        logging.info(f"entropy fiel already exists.. loading.")
-        with open(name_file, "r") as f:
-            client_entropy = json.load(f)
-
-    client_id_hash = hashids.encode(client_id)
-
-    for batch, labels in dataloader:
-        for b, label in zip(batch, labels):
-            l = hashids.encode(label.item())
-            if l in result:
-                result[l] += 1
-            else:
-                result[l] = 1
-
-    n = len(dataloader)
-    entropy_value = entropy([x / n for x in result.values()], base=2)
-    client_entropy[client_id_hash] = entropy_value
-    with open(name_file, "w") as f:
-        json.dump(client_entropy, f)
-
-
-def read_csv(filename):
-    """
-    Read a CSV file.
-
-    Args:
-        filename (string): Name of the file.
-
-    Returns:
-        object: The CSV readed.
-
-    """
-    if exists(filename):
-        return pd.read_csv(filename)
-
-
-def check_field_filled(factsheet_dict, factsheet_path, value, empty=""):
-    """
-    Check if the field in the factsheet file is filled or not.
-
-    Args:
-        factsheet_dict (dict): The factshett dict.
-        factsheet_path (list): The factsheet field to check.
-        value (float): The value to add in the field.
-        empty (string): If the value could not be appended, the empty string is returned.
-
-    Returns:
-        float: The value added in the factsheet or empty if the value could not be appened
-
-    """
-    if factsheet_dict[factsheet_path[0]][factsheet_path[1]]:
-        return factsheet_dict[factsheet_path[0]][factsheet_path[1]]
-    elif value != "" and value != "nan":
-        if type(value) != str and type(value) != list:
-            if math.isnan(value):
-                return 0
-            else:
-                return value
-        else:
-            return value
-    else:
-        return empty
-
-
-def get_input_value(input_docs, inputs, operation):
-    """
-    Gets the input value from input document and apply the metric operation on the value.
-
-    Args:
-        inputs_docs (map): The input document map.
-        inputs (list): All the inputs.
-        operation (string): The metric operation.
-
-    Returns:
-        float: The metric value
-
-    """
-
-    input_value = None
-    args = []
-    for i in inputs:
-        source = i.get("source", "")
-        field = i.get("field_path", "")
-        input_doc = input_docs.get(source, None)
-        if input_doc is None:
-            logger.warning(f"{source} is null")
-        else:
-            input = get_value_from_path(input_doc, field)
-            args.append(input)
-    try:
-        operationFn = getattr(calculation, operation)
-        input_value = operationFn(*args)
-    except TypeError:
-        logger.warning(f"{operation} is not valid")
-
-    return input_value
-
-
-def get_value_from_path(input_doc, path):
-    """
-    Gets the input value from input document by path.
-
-    Args:
-        inputs_doc (map): The input document map.
-        path (string): The field name of the input value of interest.
-
-    Returns:
-        float: The input value from the input document
-
-    """
-
-    d = input_doc
-    for nested_key in path.split("/"):
-        temp = d.get(nested_key)
-        if isinstance(temp, dict):
-            d = d.get(nested_key)
-        else:
-            return temp
-    return None
-
-
-def write_results_json(out_file, dict):
-    """
-    Writes the result to JSON.
-
-    Args:
-        out_file (string): The output file.
-        dict (dict): The object to be witten into JSON.
-
-    Returns:
-        float: The input value from the input document
-
-    """
-
-    with open(out_file, "a") as f:
-        json.dump(dict, f, indent=4)
-
-def load_data_results_participant(experiment_name: str, participant_id: int | str):
-    data_results_path = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"data_results_{participant_id}.csv")
-
-    if not os.path.exists(data_results_path):
-        raise FileNotFoundError(f"File not found: {data_results_path}")
-
-    with open(data_results_path, "r", newline="") as csv_file:
-        reader = csv.DictReader(csv_file)
-        rows = list(reader)
-
-    if len(rows) == 0:
-        raise ValueError(f"No rows found in {data_results_path}")
-
-    row = rows[0]
-
-    bytes_sent = int(float(row["bytes_sent"]))
-    bytes_recv = int(float(row["bytes_recv"]))
-    accuracy = float(row["accuracy"])
-    loss = float(row["loss"])
-    val_accuracy = float(row["val_accuracy"])
-    dp_enabled = row["dp_enabled"].lower() == "true"
-    dp_epsilon = float(row["dp_epsilon"])
-
-    return bytes_sent, bytes_recv, accuracy, loss, val_accuracy, dp_enabled, dp_epsilon
-
-
-def load_emissions_participant(experiment_name: str, participant_id: int | str):
-    emissions_path = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), experiment_name, "trustworthiness", f"emissions_{participant_id}.csv")
-
-    if not os.path.exists(emissions_path):
-        raise FileNotFoundError(f"File not found: {emissions_path}")
-
-    with open(emissions_path, "r", newline="") as csv_file:
-        reader = csv.DictReader(csv_file)
-        rows = list(reader)
-
-    if len(rows) == 0:
-        raise ValueError(f"No rows found in {emissions_path}")
-
-    row = rows[0]
-
-    role = str(row["role"])
-    energy_grid = float(row["energy_grid"])
-    emissions = float(row["emissions"])
-    workload = str(row["workload"])
-    cpu_model = str(row["CPU_model"])
-    gpu_model = str(row["GPU_model"])
-    cpu_used = str(row["CPU_used"]).strip().lower() == "true"
-    gpu_used = str(row["GPU_used"]).strip().lower() == "true"
-    energy_consumed = float(row["energy_consumed"])
-    sample_size = int(float(row["sample_size"]))
-
-    return role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size
-
-def save_trustworthiness_reports_csv(
-    reports: dict,
-    experiment_name: str,
-) -> None:
-
-    data_results_path = os.path.join("nebula", "app", "logs", experiment_name, "trustworthiness", "data_results.csv")
-    emissions_path = os.path.join("nebula", "app", "logs", experiment_name, "trustworthiness", "emissions.csv")
-
-    sorted_reports = sorted(
-        reports.values(),
-        key=lambda report: int(report["node_id"])
-    )
-
-    with open(data_results_path, "w", newline="") as csv_file:
-        writer = csv.DictWriter(
-            csv_file,
-            fieldnames=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "class_imbalance", "model_size", "local_entropy", "val_accuracy", "dp_enabled", "dp_epsilon"],
-        )
-        writer.writeheader()
-
-        for report in sorted_reports:
-            writer.writerow({
-                "id": report["node_id"],
-                "bytes_sent": report["bytes_sent"],
-                "bytes_recv": report["bytes_recv"],
-                "accuracy": report["accuracy"],
-                "loss": report["loss"],
-                "class_imbalance": report["class_imbalance"],
-                "model_size": report["model_size"],
-                "local_entropy": report["local_entropy"],
-                "val_accuracy": report["val_accuracy"],
-                "dp_enabled": report["dp_enabled"],
-                "dp_epsilon": report["dp_epsilon"],
-            })
-
-    with open(emissions_path, "w", newline="") as csv_file:
-        writer = csv.DictWriter(
-            csv_file,
-            fieldnames=["id", "role", "energy_grid", "emissions", "workload", "CPU_model", "GPU_model", "CPU_used", "GPU_used", "energy_consumed", "sample_size"],
-        )
-        writer.writeheader()
-
-        for report in sorted_reports:
-            writer.writerow({
-                "id": report["node_id"],
-                "role": report["role"],
-                "energy_grid": report["energy_grid"],
-                "emissions": report["emissions"],
-                "workload": report["workload"],
-                "CPU_model": report["cpu_model"],
-                "GPU_model": report["gpu_model"],
-                "CPU_used": report["cpu_used"],
-                "GPU_used": report["gpu_used"],
-                "energy_consumed": report["energy_consumed"],
-                "sample_size": report["sample_size"],
-            })
-
-    logging.info(
-        "[TW SERVER] CSV files written correctly: %s, %s",
-        data_results_path,
-        emissions_path,
-    )
-
-def save_results_csv_cfl(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float, class_imbalance: float, model_size: int, local_entropy: float, val_accuracy: float, dp_enabled: bool, dp_epsilon: float):
-    try:
-        data_results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "data_results.csv")
-    except:
-        data_results_file = os.path.join("nebula", "app", "logs", scenario_name, "trustworthiness", "data_results.csv")
-
-    if exists(data_results_file):
-        df = pd.read_csv(data_results_file)
-    else:
-        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "class_imbalance", "model_size", "local_entropy", "val_accuracy", "dp_enabled", "dp_epsilon"])
-
-    try:
-        # Add new entry to DataFrame
-        new_data = pd.DataFrame({'id': [id], 'bytes_sent': [bytes_sent],
-                                    'bytes_recv': [bytes_recv], 'accuracy': [accuracy],
-                                    'loss': [loss], 'class_imbalance': [class_imbalance], 'model_size': [model_size], 'local_entropy': [local_entropy], 'val_accuracy': [val_accuracy], 'dp_enabled': [dp_enabled], 'dp_epsilon': [dp_epsilon]})
-        df = pd.concat([df, new_data], ignore_index=True)
-
-        df.to_csv(data_results_file, encoding='utf-8', index=False)
-
-    except Exception as e:
-        logger.warning(e)
-
-def save_emissions_csv_cfl(scenario_name: str, id: int, role: str, energy_grid: float, emissions: float, workload: str, cpu_model: str, gpu_model: str, cpu_used: bool, gpu_used: bool, energy_consumed: float, sample_size: int):
-    try:
-        data_results_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", "emissions.csv")
-    except:
-        data_results_file = os.path.join("nebula", "app", "logs", scenario_name, "trustworthiness", "emissions.csv")
-
-    if exists(data_results_file):
-        df = pd.read_csv(data_results_file)
-    else:
-        df = pd.DataFrame(columns=["id", "role", "energy_grid", "emissions", "workload", "CPU_model", "GPU_model", "CPU_used", "GPU_used", "energy_consumed", "sample_size"])
-
-    try:
-        # Add new entry to DataFrame
-        new_data = pd.DataFrame({'id': [id], 'role': [role], 'energy_grid': [energy_grid],
-                                    'emissions': [emissions], 'workload': [workload], 'CPU_model': [cpu_model], 'GPU_model': [gpu_model], 'CPU_used': [cpu_used], 'GPU_used': [gpu_used], 'energy_consumed': [energy_consumed],
-                                    'sample_size': [sample_size]})
-        df = pd.concat([df, new_data], ignore_index=True)
-
-        df.to_csv(data_results_file, encoding='utf-8', index=False)
-
-    except Exception as e:
-        logger.warning(e)
-
-
-def save_results_csv(scenario_name: str, id: int, bytes_sent: int, bytes_recv: int, accuracy: float, loss: float, val_accuracy: float, dp_enabled: bool, dp_epsilon: float):
-
-    try:
-        data_results_id_file = os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness", f"data_results_{id}.csv")
-    except:
-        data_results_id_file = os.path.join("nebula", "app", "logs", scenario_name, "trustworthiness", f"data_results_{id}.csv")
-
-    if exists(data_results_id_file):
-        df = pd.read_csv(data_results_id_file)
-    else:
-        df = pd.DataFrame(columns=["id", "bytes_sent", "bytes_recv", "accuracy", "loss", "val_accuracy", "dp_enabled", "dp_epsilon"])
-
-    try:
-        # Add new entry to DataFrame
-        new_data = pd.DataFrame({'id': [id], 'bytes_sent': [bytes_sent],
-                                    'bytes_recv': [bytes_recv], 'accuracy': [accuracy],
-                                    'loss': [loss], 'val_accuracy': [val_accuracy], 'dp_enabled': [dp_enabled], 'dp_epsilon': [dp_epsilon]})
-        df = pd.concat([df, new_data], ignore_index=True)
-
-        df.to_csv(data_results_id_file, encoding='utf-8', index=False)
-
-    except Exception as e:
-        logger.warning(e)
-
-def load_trust_report_json_dumped(scenario_name: str, participant_id: int) -> str:
-    """
-    Read a participant trustworthiness JSON file and return it
-    serialized as a string with json.dumps(...).
-
-    Args:
-        scenario_name (str): Scenario/experiment name.
-        participant_id (int): Participant ID.
-
-    Returns:
-        str: JSON content serialized as a string.
-
-    Raises:
-        FileNotFoundError: If the file does not exist.
-        ValueError: If the file content is not valid JSON.
-    """
-    logs_dir = os.environ.get("NEBULA_LOGS_DIR")
-    if not logs_dir:
-        raise ValueError("The NEBULA_LOGS_DIR environment variable is not defined.")
-
-    file_name = f"nebula_trust_results_{participant_id}.json"
-    file_path = os.path.join(
-        logs_dir,
-        scenario_name,
-        "trustworthiness",
-        file_name,
-    )
-
-    if not os.path.exists(file_path):
-        raise FileNotFoundError(f"The file does not exist: {file_path}")
-
-    try:
-        with open(file_path, "r", encoding="utf-8") as f:
-            trust_report = json.load(f)
-    except json.JSONDecodeError as e:
-        raise ValueError(f"The file does not contain valid JSON: {file_path}") from e
-
-    return json.dumps(trust_report)
-
-
-def load_trust_report_json(scenario_name: str, participant_id: int | str) -> dict:
-    trust_report_json = load_trust_report_json_dumped(scenario_name, participant_id)
-    return json.loads(trust_report_json)
-
-
-def create_local_trust_report_copy(scenario_name: str, participant_id: int | str, suffix: str = "global") -> tuple[dict, str]:
-    trust_report = load_trust_report_json(scenario_name, participant_id)
-    logs_dir = os.environ.get("NEBULA_LOGS_DIR")
-    if not logs_dir:
-        raise ValueError("The NEBULA_LOGS_DIR environment variable is not defined.")
-
-    trust_dir = os.path.join(logs_dir, scenario_name, "trustworthiness")
-    os.makedirs(trust_dir, exist_ok=True)
-
-    file_path = os.path.join(trust_dir, f"nebula_trust_results_{participant_id}_{suffix}.json")
-    with open(file_path, "w", encoding="utf-8") as f:
-        json.dump(trust_report, f, indent=4)
-
-    return trust_report, file_path
-
-
-def save_trust_report_json(file_path: str, trust_report: dict) -> str:
-    directory = os.path.dirname(file_path)
-    if directory:
-        os.makedirs(directory, exist_ok=True)
-
-    with open(file_path, "w", encoding="utf-8") as f:
-        json.dump(trust_report, f, indent=4)
-
-    return file_path
-
-
-def accumulate_weighted_trustscores(report: dict, weight: float, score_accumulator: dict, weight_accumulator: dict):
-    if weight <= 0:
-        raise ValueError("The aggregation weight must be greater than 0.")
-
-    _accumulate_weighted_trustscores_recursive(
-        obj=report,
-        weight=float(weight),
-        path=(),
-        score_accumulator=score_accumulator,
-        weight_accumulator=weight_accumulator,
-    )
-
-
-def build_weighted_trustscores_report(template_report: dict, score_accumulator: dict, weight_accumulator: dict) -> dict:
-    aggregated_report = copy.deepcopy(template_report)
-    _apply_weighted_trustscores_recursive(
-        obj=aggregated_report,
-        path=(),
-        score_accumulator=score_accumulator,
-        weight_accumulator=weight_accumulator,
-    )
-    return aggregated_report
-
-
-def _accumulate_weighted_trustscores_recursive(obj, weight: float, path: tuple, score_accumulator: dict, weight_accumulator: dict):
-    if isinstance(obj, dict):
-        structural_named_entry = _get_structural_named_entry(obj)
-        if structural_named_entry is not None:
-            _, nested_value = structural_named_entry
-            _accumulate_weighted_trustscores_recursive(
-                obj=nested_value,
-                weight=weight,
-                path=path + ("__named_entry__",),
-                score_accumulator=score_accumulator,
-                weight_accumulator=weight_accumulator,
-            )
-            return
-
-        for key, value in obj.items():
-            if key in {"trust_score", "score"} and _is_numeric_score(value):
-                score_path = path + (key,)
-                score_accumulator[score_path] = score_accumulator.get(score_path, 0.0) + (float(value) * weight)
-                weight_accumulator[score_path] = weight_accumulator.get(score_path, 0.0) + weight
-                continue
-
-            _accumulate_weighted_trustscores_recursive(
-                obj=value,
-                weight=weight,
-                path=path + (key,),
-                score_accumulator=score_accumulator,
-                weight_accumulator=weight_accumulator,
-            )
-        return
-
-    if isinstance(obj, list):
-        for index, item in enumerate(obj):
-            _accumulate_weighted_trustscores_recursive(
-                obj=item,
-                weight=weight,
-                path=path + (index,),
-                score_accumulator=score_accumulator,
-                weight_accumulator=weight_accumulator,
-            )
-
-
-def _apply_weighted_trustscores_recursive(obj, path: tuple, score_accumulator: dict, weight_accumulator: dict):
-    if isinstance(obj, dict):
-        structural_named_entry = _get_structural_named_entry(obj)
-        if structural_named_entry is not None:
-            entry_key, nested_value = structural_named_entry
-            obj[entry_key] = _apply_weighted_trustscores_recursive(
-                obj=nested_value,
-                path=path + ("__named_entry__",),
-                score_accumulator=score_accumulator,
-                weight_accumulator=weight_accumulator,
-            )
-            return obj
-
-        for key, value in obj.items():
-            if key in {"trust_score", "score"} and _is_numeric_score(value):
-                score_path = path + (key,)
-                total_weight = weight_accumulator.get(score_path)
-                if total_weight:
-                    obj[key] = round(score_accumulator[score_path] / total_weight, 6)
-                continue
-
-            obj[key] = _apply_weighted_trustscores_recursive(
-                obj=value,
-                path=path + (key,),
-                score_accumulator=score_accumulator,
-                weight_accumulator=weight_accumulator,
-            )
-        return obj
-
-    if isinstance(obj, list):
-        for index, item in enumerate(obj):
-            obj[index] = _apply_weighted_trustscores_recursive(
-                obj=item,
-                path=path + (index,),
-                score_accumulator=score_accumulator,
-                weight_accumulator=weight_accumulator,
-            )
-    return obj
-
-
-def _get_structural_named_entry(obj: dict):
-    if len(obj) != 1:
-        return None
-
-    entry_key, nested_value = next(iter(obj.items()))
-    if not isinstance(nested_value, dict):
-        return None
-
-    if any(key in nested_value for key in ("score", "metrics", "notions", "pillars")):
-        return entry_key, nested_value
-
-    return None
-
-
-def _is_numeric_score(value):
-    return isinstance(value, (int, float)) and not isinstance(value, bool)

From 23fec3593ca182be874aeddddb15ba3c289d5628 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Mon, 1 Jun 2026 13:37:25 +0200
Subject: [PATCH 56/66] Refactoring scoring, trust_reports, scenario_metrics,
 factsheet_values, and many functionality commented, other changes

---
 nebula/addons/reputation/reputation.py        |  16 +
 .../{factsheet.py => cfl_factsheet.py}        |  19 +-
 .../addons/trustworthiness/dfl_factsheet.py   |  15 +-
 .../trustworthiness/factsheet_common.py       |  33 +-
 .../helpers/factsheet_values.py               | 109 +++---
 .../helpers/scenario_metrics.py               | 329 ++++++------------
 .../addons/trustworthiness/helpers/scoring.py | 205 ++++-------
 .../trustworthiness/helpers/trust_reports.py  | 137 ++++----
 .../addons/trustworthiness/trustworthiness.py |   3 +-
 .../updatehandlers/sdflupdatehandler.py       |  10 +
 nebula/core/engine.py                         |   4 +
 nebula/core/models/nebulamodel.py             |   1 +
 nebula/core/network/forwarder.py              |   2 +
 nebula/core/network/messages.py               |   4 +
 nebula/core/node.py                           |   1 +
 nebula/core/noderole.py                       |  12 +
 nebula/core/training/dp.py                    |  10 +
 nebula/core/training/lightning.py             |   1 +
 nebula/core/training/lightning_dp.py          |  13 +
 19 files changed, 410 insertions(+), 514 deletions(-)
 rename nebula/addons/trustworthiness/{factsheet.py => cfl_factsheet.py} (89%)

diff --git a/nebula/addons/reputation/reputation.py b/nebula/addons/reputation/reputation.py
index 19b4e9159..25ce5a770 100644
--- a/nebula/addons/reputation/reputation.py
+++ b/nebula/addons/reputation/reputation.py
@@ -1861,6 +1861,7 @@ async def calculate_reputation(self, ae: AggregationEvent):
 
     async def calculate_sdfl_reputation(self, _ree: RoundEndEvent):
         """Calculate SDFL reputation at round end for trainers and aggregators."""
+        # SDFL shares reputation tables instead of direct feedback messages at round end.
         await self.calculate_and_send_sdfl_reputation_table()
 
     async def calculate_and_send_sdfl_reputation_table(self):
@@ -1875,6 +1876,7 @@ async def calculate_and_send_sdfl_reputation_table(self):
 
         await self._log_reputation_calculation_start()
 
+        # Each node computes direct-neighbor reputation from locally observed metrics.
         neighbors = set(await self._engine._cm.get_addrs_current_connections(only_direct=True))
         await self._process_neighbor_metrics(neighbors)
         await self._calculate_reputation_by_factor(neighbors)
@@ -1965,6 +1967,7 @@ async def _finalize_reputation_calculation(self, updates, neighbors):
             await self.update_process_aggregation(updates)
             federation = self._engine.config.participant["scenario_args"].get("federation")
             if federation == "SDFL":
+                # SDFL forwards compact reputation tables so the aggregator can infer non-neighbor trust.
                 await self.send_reputation_table_to_neighbors(neighbors)
             elif federation != "CFL":
                 await self.send_reputation_to_neighbors(neighbors)
@@ -1975,6 +1978,7 @@ async def get_local_reputation_table(self, round_num: int = None):
             round_num = await self._engine.get_round()
 
         direct_neighbors = set(await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False))
+        # Only export scores observed locally for this round; indirect scores are not re-shared.
         return {
             node_id: float(data["reputation"])
             for node_id, data in self.reputation.items()
@@ -1985,6 +1989,7 @@ async def get_local_reputation_table(self, round_num: int = None):
 
     async def register_reputation_table(self, node_id: str, round_num: int, reputation_table: dict, received_from: str = None):
         """Store a reputation table received for a round."""
+        # Normalize table payloads at the boundary so aggregation uses numeric scores only.
         normalized_table = {}
         for neighbor, score in reputation_table.items():
             try:
@@ -2006,6 +2011,7 @@ async def register_reputation_table(self, node_id: str, round_num: int, reputati
         expected = self._reputation_tables_expected.get(round_num)
         event = self._reputation_tables_events.get(round_num)
         if expected and event and expected.issubset(self.reputation_tables[round_num].keys()):
+            # Wake any aggregator task blocked waiting for all expected reputation tables.
             event.set()
 
     async def wait_reputation_tables(self, expected_nodes, round_num: int, timeout: float):
@@ -2014,6 +2020,7 @@ async def wait_reputation_tables(self, expected_nodes, round_num: int, timeout:
         self._reputation_tables_expected[round_num] = expected_nodes
         event = self._reputation_tables_events.setdefault(round_num, asyncio.Event())
 
+        # The table may have arrived before the wait was registered.
         if expected_nodes.issubset(self.reputation_tables.get(round_num, {}).keys()):
             event.set()
 
@@ -2035,6 +2042,7 @@ def start_reputation_tables_collection(self, expected_nodes, round_num: int, tim
         if round_num in self._reputation_tables_wait_tasks:
             return
 
+        # Keep collecting in the background so late tables are visible before aggregation.
         async def _wait_and_log():
             tables, missing = await self.wait_reputation_tables(expected_nodes, round_num, timeout)
             logging.info(
@@ -2056,6 +2064,7 @@ async def calculate_indirect_reputation_for_non_neighbors(
     ):
         """Calculate indirect SDFL reputation for non-neighbor nodes from received tables."""
         direct_neighbors = set(await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False))
+        # The aggregator already has direct scores for neighbors; tables fill the non-neighbor gap.
         target_nodes = set(target_nodes) - direct_neighbors - {self._addr}
         expected_table_nodes = set(expected_table_nodes)
 
@@ -2076,6 +2085,7 @@ async def calculate_indirect_reputation_for_non_neighbors(
 
         indirect_reputations = {}
         for node_id in target_nodes:
+            # Average all tables that contain the target node to estimate indirect reputation.
             scores = [
                 float(table[node_id])
                 for table in tables.values()
@@ -2097,6 +2107,7 @@ async def calculate_indirect_reputation_for_non_neighbors(
             indirect_reputations[node_id] = reputation
 
             if reputation < self.REPUTATION_THRESHOLD and round_num > 0:
+                # Rejections based on indirect reputation affect aggregation weights for this round.
                 self.rejected_nodes.add(node_id)
                 logging.info(f"SDFL reputation | Indirect reputation rejected node {node_id} at round {round_num}")
 
@@ -2110,9 +2121,11 @@ async def send_reputation_table_to_neighbors(self, neighbors):
         """Send the local SDFL reputation table through the forwarding channel."""
         round_num = await self._engine.get_round()
         reputation_table = await self.get_local_reputation_table(round_num)
+        # Register our own table locally so local aggregation paths see the same state as receivers.
         await self.register_reputation_table(self._addr, round_num, reputation_table, received_from=self._addr)
 
         if self._engine.rb.get_role_name(True) == "aggregator":
+            # Aggregators start waiting early because trainer tables may arrive before aggregation.
             expected_nodes = self._engine.get_sdfl_expected_trainers()
             timeout = float(
                 self._config.participant["defense_args"]
@@ -2130,6 +2143,7 @@ async def send_reputation_table_to_neighbors(self, neighbors):
         )
 
         for neighbor in neighbors:
+            # Reputation tables are forwarded by the network layer in SDFL.
             await self._engine.cm.send_message(neighbor, message)
 
         logging.info(
@@ -2477,6 +2491,7 @@ async def recollect_number_message(self, source, message):
     async def recollect_duplicated_number_message(self, dme: DuplicatedMessageEvent):
         """Record a duplicated message event."""
         if self._engine.config.participant["scenario_args"].get("federation") == "SDFL":
+            # SDFL forwards model/table messages, so duplicates are not a reliable reputation signal.
             return
 
         event_data = await dme.get_event_data()
@@ -2490,6 +2505,7 @@ async def _record_message_data(self, source: str):
         """Record message data for the given source if it's not the current address."""
         if source != self._addr:
             if self._engine.config.participant["scenario_args"].get("federation") == "SDFL":
+                # In SDFL, message-count reputation is only meaningful for direct neighbors.
                 direct_neighbors = await self._engine.cm.get_addrs_current_connections(only_direct=True, myself=False)
                 if source not in direct_neighbors:
                     return
diff --git a/nebula/addons/trustworthiness/factsheet.py b/nebula/addons/trustworthiness/cfl_factsheet.py
similarity index 89%
rename from nebula/addons/trustworthiness/factsheet.py
rename to nebula/addons/trustworthiness/cfl_factsheet.py
index 0417efdc2..88eedab28 100755
--- a/nebula/addons/trustworthiness/factsheet.py
+++ b/nebula/addons/trustworthiness/cfl_factsheet.py
@@ -43,9 +43,7 @@
 
 class CflFactsheet:
     def __init__(self):
-        """
-        Manager class to populate the FactSheet
-        """
+        # Manage the single CFL factsheet populated from server-side aggregation.
         self.factsheet_file_nm = "factsheet.json"
         self.factsheet_template_file_nm = "factsheet_template_cfl.json"
 
@@ -64,6 +62,7 @@ def populate_factsheet_cfl(
         reliability_summary=None,
     ):
 
+        # Resolve the output factsheet and template for federation/data type.
         factsheet_file = get_factsheet_path(scenario_name, self.factsheet_file_nm)
         factsheet_template_file_nm = get_factsheet_template_name(
             data["federation"],
@@ -82,10 +81,12 @@ def populate_factsheet_cfl(
 
             populate_common_pre_train_sections(factsheet, data, model)
 
+            # CFL reads aggregate CSV artifacts from the scenario trust directory.
             files_dir = get_trustworthiness_dir(scenario_name)
 
             emissions_file = os.path.join(files_dir, "emissions.csv")
 
+            # Aggregate class imbalance, entropy and model size across participants.
             avg_class_imbalance, avg_model_size = get_avg_class_imbalance_model_size(scenario_name)
             entropy_distribution = get_entropy_list (scenario_name)
 
@@ -97,7 +98,7 @@ def populate_factsheet_cfl(
 
             factsheet["data"]["avg_entropy"] = avg_entropy
 
-            # Set performance data
+            # Set global performance and fairness metrics from aggregate results.
             result_avg_loss_accuracy = get_avg_loss_accuracy(scenario_name)
             factsheet["performance"]["test_loss_avg"] = result_avg_loss_accuracy[0]
             factsheet["performance"]["test_acc_avg"] = result_avg_loss_accuracy[1]
@@ -105,6 +106,7 @@ def populate_factsheet_cfl(
             factsheet["fairness"]["test_acc_cv"] = 1 if test_acc_cv > 1 else test_acc_cv
             _, participant_test_acc = get_participant_loss_accuracy(scenario_name, participant_idx)
 
+            # Compute CFL privacy risk from aggregate DP settings and client count.
             dp_enabled, dp_epsilon = get_dp_global(scenario_name)
             set_dp_configuration(factsheet, dp_enabled, dp_epsilon)
             factsheet["privacy"]["privacy_risk"] = get_global_privacy_risk(
@@ -113,6 +115,7 @@ def populate_factsheet_cfl(
                 factsheet["participants"]["client_num"],
             )
 
+            # Populate system timing, model-size and communication totals.
             factsheet["system"]["avg_time_minutes"] = get_elapsed_time(start_time, end_time)
             factsheet["system"]["avg_model_size"] = avg_model_size
 
@@ -124,6 +127,7 @@ def populate_factsheet_cfl(
             populate_reliability(factsheet, reliability_summary)
             populate_participation(factsheet, participation_summary)
 
+            # Convert class imbalance and runtime summaries into factsheet fields.
             class_imbalance_score = get_class_imbalance_score(avg_class_imbalance)
             factsheet["fairness"]["class_imbalance"] = cap_score(class_imbalance_score)
             populate_reputation(factsheet, reputation_summary)
@@ -131,6 +135,7 @@ def populate_factsheet_cfl(
             underfitting_score = get_underfitting_score(scenario_name, participant_idx)
 
             factsheet["fairness"]["underfitting"] = underfitting_score
+            # Add model/profile-specific metrics after base factsheet fields exist.
             populate_profile_metrics(
                 factsheet,
                 data["federation"],
@@ -140,7 +145,7 @@ def populate_factsheet_cfl(
                 participant_test_acc,
             )
 
-            # Set emissions metrics
+            # Enrich CodeCarbon emissions with CPU/GPU benchmark metadata.
             emissions = None if emissions_file is None else read_csv(emissions_file)
             if emissions is not None:
                 logging.info("FactSheet: Populating emissions")
@@ -156,6 +161,7 @@ def populate_factsheet_cfl(
                 emissions.drop("gpuName", axis=1, inplace=True)
                 emissions["powerPerf"] = emissions["powerPerf"].astype(float)
                 emissions["powerPerformance"] = emissions["powerPerformance"].astype(float)
+                # Trainer rows represent client-side training cost.
                 client_emissions = emissions.loc[emissions["role"] == "trainer"]
                 client_avg_carbon_intensity = round(client_emissions["energy_grid"].mean(), 2)
                 factsheet["sustainability"]["avg_carbon_intensity_clients"] = check_field_filled(factsheet, ["sustainability", "avg_carbon_intensity_clients"], client_avg_carbon_intensity, "")
@@ -166,6 +172,7 @@ def populate_factsheet_cfl(
                 clients_power_performance = round(pd.concat([GPU_powerperf, CPU_powerperf]).mean(), 2)
                 factsheet["sustainability"]["avg_power_performance_clients"] = check_field_filled(factsheet, ["sustainability", "avg_power_performance_clients"], clients_power_performance, "")
 
+                # Server rows represent aggregation cost.
                 server_emissions = emissions.loc[emissions["role"] == "server"]
                 server_avg_carbon_intensity = round(server_emissions["energy_grid"].mean(), 2)
                 factsheet["sustainability"]["avg_carbon_intensity_server"] = check_field_filled(factsheet, ["sustainability", "avg_carbon_intensity_server"], server_avg_carbon_intensity, "")
@@ -175,11 +182,13 @@ def populate_factsheet_cfl(
                 server_power_performance = round(pd.concat([GPU_powerperf, CPU_powerperf]).mean(), 2)
                 factsheet["sustainability"]["avg_power_performance_server"] = check_field_filled(factsheet, ["sustainability", "avg_power_performance_server"], server_power_performance, "")
 
+                # Estimate communication emissions from byte counts and carbon intensity.
                 factsheet["sustainability"]["emissions_communication_uplink"] = check_field_filled(factsheet, ["sustainability", "emissions_communication_uplink"], factsheet["system"]["total_upload_bytes"] * 2.24e-10 * factsheet["sustainability"]["avg_carbon_intensity_clients"], "")
                 factsheet["sustainability"]["emissions_communication_downlink"] = check_field_filled(factsheet, ["sustainability", "emissions_communication_downlink"], factsheet["system"]["total_download_bytes"] * 2.24e-10 * factsheet["sustainability"]["avg_carbon_intensity_server"], "")
 
             write_factsheet(factsheet_file, factsheet)
 
         except JSONDecodeError as e:
+            # Keep corrupted factsheet failures explicit in logs.
             logging.info(f"{factsheet_file} is invalid")
             logging.error(e)
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 5be3ee012..2fb2fd115 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -38,9 +38,7 @@
 
 class DflFactsheet:
     def __init__(self):
-        """
-        Manager class to populate the FactSheet
-        """
+        # Manage participant-specific DFL/SDFL factsheets.
         self.factsheet_template_file_nm = "factsheet_template_dfl.json"
 
     def populate_factsheet_dfl(
@@ -58,6 +56,7 @@ def populate_factsheet_dfl(
         reliability_summary=None,
     ):
 
+        # Resolve participant-specific output and data-type-aware template.
         self.factsheet_file_nm = f"factsheet_participant_{participant_idx}.json"
         factsheet_template_file_nm = get_factsheet_template_name(
             data["federation"],
@@ -77,15 +76,18 @@ def populate_factsheet_dfl(
 
         populate_common_pre_train_sections(factsheet, data, model)
 
+        # DP configuration is stored per participant in decentralized runs.
         dp_enabled, dp_epsilon = get_dp_local(scenario_name, participant_idx)
         set_dp_configuration(factsheet, dp_enabled, dp_epsilon)
 
         files_dir = get_trustworthiness_dir(scenario_name)
 
+        # Refresh entropy.json so participant-local entropy can be read consistently.
         get_all_data_entropy(scenario_name)
 
         factsheet["data"]["entropy_local"] = get_local_normalized_entropy(scenario_name, participant_idx)
 
+        # Use the final valid round metrics as participant test performance.
         df = load_round_metrics(scenario_name, participant_idx)
         acc = df["accuracy"].astype(float).to_numpy()
         loss = df["loss"].astype(float).to_numpy()
@@ -96,6 +98,7 @@ def populate_factsheet_dfl(
         factsheet["performance"]["test_loss"] = float(final_loss)
         factsheet["performance"]["test_acc"] = float(final_acc)
 
+        # Load local communication and privacy values reported by the participant.
         bytes_sent, bytes_recv, *_ = load_data_results_participant(scenario_name, participant_idx)
 
         factsheet["system"]["model_size"] = get_bytes_model(model)
@@ -107,6 +110,7 @@ def populate_factsheet_dfl(
 
         factsheet["system"]["time_minutes"] = get_elapsed_time(start_time, end_time)
 
+        # Class imbalance can only be populated after local class-counts exist.
         count_class_file = os.path.join(files_dir, f"{participant_idx}_class_count.json")
         factsheet["fairness"]["class_imbalance"] = (
             get_local_class_imbalance_score(scenario_name, participant_idx)
@@ -116,6 +120,7 @@ def populate_factsheet_dfl(
 
         populate_participation(factsheet, participation_summary)
 
+        # Local CodeCarbon output feeds participant sustainability fields.
         (
             role,
             carbon_intensity_local,
@@ -138,17 +143,20 @@ def populate_factsheet_dfl(
         factsheet["participants"]["local_dataset_size"] = sample_size
 
         populate_reputation(factsheet, reputation_summary, include_neighbor_num=True)
+        # DFL privacy risk depends on local DP settings and neighbor count.
         factsheet["privacy"]["privacy_risk"] = get_global_privacy_risk_dfl(
             dp_enabled,
             dp_epsilon,
             factsheet["participants"]["neighbor_num"],
         )
 
+        # Communication emissions are estimated from local bytes and carbon intensity.
         factsheet["sustainability"]["emissions_communication_local"] = (
             (bytes_sent * 2.24e-10 * carbon_intensity_local)
             + (bytes_recv * 2.24e-10 * carbon_intensity_local)
         )
 
+        # Populate model/profile metrics after final participant accuracy is known.
         factsheet["fairness"]["underfitting"] = get_underfitting_score_local(scenario_name, participant_idx)
         populate_profile_metrics(
             factsheet,
@@ -163,6 +171,7 @@ def populate_factsheet_dfl(
 
 
 def load_round_metrics(scenario_name, participant_idx):
+    # Load participant per-round metrics and keep only rows with loss/accuracy.
     files_dir = get_trustworthiness_dir(scenario_name)
     path = os.path.join(files_dir, f"round_metrics_participant_{participant_idx}.csv")
     df = pd.read_csv(path)
diff --git a/nebula/addons/trustworthiness/factsheet_common.py b/nebula/addons/trustworthiness/factsheet_common.py
index 3aa972fa9..7cfbe11d9 100644
--- a/nebula/addons/trustworthiness/factsheet_common.py
+++ b/nebula/addons/trustworthiness/factsheet_common.py
@@ -1,5 +1,3 @@
-"""Shared helpers for trustworthiness factsheet generation."""
-
 import json
 import os
 import shutil
@@ -7,12 +5,13 @@
 
 dirname = os.path.dirname(__file__)
 
+# Shared helpers for trustworthiness factsheet generation.
 DATA_TYPE_IMAGES = "images"
 DATA_TYPE_TABULAR = "tabular"
 
 
 def get_model_data_type(model):
-    """Returns the data type declared by the model, when available."""
+    # Return the data type declared by the model, when available.
     if not hasattr(model, "get_data_type"):
         return ""
 
@@ -27,10 +26,12 @@ def get_model_data_type(model):
 
 
 def get_normalized_model_data_type(model):
+    # Normalize the model data type before matching templates or profiles.
     return get_model_data_type(model).lower()
 
 
 def get_factsheet_template_name(federation, model, default_template_name):
+    # Select a data-type-specific template when one exists for the federation.
     federation_prefix = "dfl" if str(federation).upper() in {"DFL", "SDFL"} else "cfl"
     data_type = get_normalized_model_data_type(model)
 
@@ -44,22 +45,22 @@ def get_factsheet_template_name(federation, model, default_template_name):
 
 
 def get_trustworthiness_dir(scenario_name):
-    """Returns the trustworthiness output directory for a scenario."""
+    # Return the trustworthiness output directory for a scenario.
     return os.path.join(os.environ.get("NEBULA_LOGS_DIR"), scenario_name, "trustworthiness")
 
 
 def get_factsheet_path(scenario_name, factsheet_name):
-    """Returns the path to a factsheet inside the scenario trustworthiness directory."""
+    # Return the path to a factsheet inside the scenario trustworthiness directory.
     return os.path.join(get_trustworthiness_dir(scenario_name), factsheet_name)
 
 
 def get_factsheet_template_path(template_name):
-    """Returns the path to a factsheet template bundled with the addon."""
+    # Return the path to a factsheet template bundled with the addon.
     return os.path.join(dirname, "configs", template_name)
 
 
 def load_or_create_factsheet(scenario_name, factsheet_name, template_name):
-    """Loads a factsheet, creating it from its template if it does not exist."""
+    # Load a factsheet, creating it from the selected template if needed.
     trustworthiness_dir = get_trustworthiness_dir(scenario_name)
     os.makedirs(trustworthiness_dir, exist_ok=True)
 
@@ -74,23 +75,23 @@ def load_or_create_factsheet(scenario_name, factsheet_name, template_name):
 
 
 def write_factsheet(factsheet_path, factsheet):
-    """Writes a factsheet using the standard JSON formatting."""
+    # Write a factsheet using readable standard JSON formatting.
     with open(factsheet_path, "w", encoding="utf-8") as factsheet_file:
         json.dump(factsheet, factsheet_file, indent=4)
 
 
 def cap_score(value, maximum=1):
-    """Caps a score to the maximum value expected by the factsheet."""
+    # Cap a score to the maximum value expected by the factsheet.
     return maximum if value > maximum else value
 
 
 def inverse_score(value):
-    """Converts an error or risk value into a bounded inverse score."""
+    # Convert an error or risk value into a bounded inverse score.
     return 1 / (1 + value)
 
 
 def build_project_background(data):
-    """Builds the natural-language scenario description used in factsheets."""
+    # Build the natural-language scenario description used in factsheets.
     federation = data["federation"]
     n_nodes = int(data["n_nodes"])
     dataset = data["dataset"]
@@ -121,7 +122,7 @@ def build_project_background(data):
 
 
 def populate_common_pre_train_sections(factsheet, data, model):
-    """Populates project, data, participant and training configuration fields."""
+    # Populate project, data, participant and training configuration fields.
     with_reputation = data["reputation"]["enabled"]
 
     factsheet["project"]["overview"] = data["scenario_title"]
@@ -153,13 +154,13 @@ def populate_common_pre_train_sections(factsheet, data, model):
 
 
 def set_dp_configuration(factsheet, dp_enabled, dp_epsilon):
-    """Writes differential privacy configuration using the factsheet schema."""
+    # Write differential privacy configuration using the factsheet schema.
     factsheet["configuration"]["differential_privacy"] = bool(dp_enabled)
     factsheet["configuration"]["dp_epsilon"] = dp_epsilon if dp_enabled else ""
 
 
 def populate_reliability(factsheet, reliability_summary):
-    """Writes dropout and timeout rates, defaulting to a fully reliable run."""
+    # Write dropout and timeout rates, defaulting to a fully reliable run.
     factsheet["system"]["dropout_rate"] = (
         reliability_summary.get("dropout_rate", 0.0)
         if reliability_summary is not None
@@ -173,7 +174,7 @@ def populate_reliability(factsheet, reliability_summary):
 
 
 def populate_participation(factsheet, participation_summary):
-    """Writes participant selection dispersion, defaulting to full participation."""
+    # Write participant selection dispersion, defaulting to full participation.
     factsheet["fairness"]["selection_cv"] = (
         participation_summary.get("selection_cv", 1)
         if participation_summary is not None
@@ -182,7 +183,7 @@ def populate_participation(factsheet, participation_summary):
 
 
 def populate_reputation(factsheet, reputation_summary, include_neighbor_num=False):
-    """Writes reputation information for centralized or decentralized factsheets."""
+    # Write reputation information for centralized or decentralized factsheets.
     if reputation_summary is not None:
         factsheet["participants"]["avg_neighbor_reputation"] = reputation_summary.get(
             "avg_neighbor_reputation",
diff --git a/nebula/addons/trustworthiness/helpers/factsheet_values.py b/nebula/addons/trustworthiness/helpers/factsheet_values.py
index ee42940af..8faa2cf81 100644
--- a/nebula/addons/trustworthiness/helpers/factsheet_values.py
+++ b/nebula/addons/trustworthiness/helpers/factsheet_values.py
@@ -13,6 +13,7 @@
 
 logger = logging.getLogger(__name__)
 
+# Operations available from the eval_metrics JSON files.
 OPERATIONS = {
     "check_properties": check_properties,
     "comm_efficiency": comm_efficiency,
@@ -21,88 +22,62 @@
     "get_value": get_value,
 }
 
+
 def check_field_filled(factsheet_dict, factsheet_path, value, empty=""):
-    """
-    Check if the field in the factsheet file is filled or not.
-
-    Args:
-        factsheet_dict (dict): The factshett dict.
-        factsheet_path (list): The factsheet field to check.
-        value (float): The value to add in the field.
-        empty (string): If the value could not be appended, the empty string is returned.
-
-    Returns:
-        float: The value added in the factsheet or empty if the value could not be appened
-
-    """
-    if factsheet_dict[factsheet_path[0]][factsheet_path[1]]:
-        return factsheet_dict[factsheet_path[0]][factsheet_path[1]]
-    elif value != "" and value != "nan":
-        if type(value) != str and type(value) != list:
-            if math.isnan(value):
-                return 0
-            else:
-                return value
-        else:
-            return value
-    else:
+    # Keep an existing factsheet value; otherwise return a clean fallback for empty or NaN values.
+    current_value = factsheet_dict[factsheet_path[0]][factsheet_path[1]]
+    if current_value:
+        return current_value
+
+    if _is_empty_value(value):
         return empty
 
+    if _is_nan_number(value):
+        return 0
+
+    return value
 
-def get_input_value(input_docs, inputs, operation):
-    """
-    Gets the input value from input document and apply the metric operation on the value.
 
-    Args:
-        inputs_docs (map): The input document map.
-        inputs (list): All the inputs.
-        operation (string): The metric operation.
+def _is_empty_value(value):
+    # Empty strings and the literal "nan" should not overwrite missing factsheet fields.
+    return value == "" or value == "nan"
 
-    Returns:
-        float: The metric value
 
-    """
+def _is_nan_number(value):
+    # Only numeric values can be checked with math.isnan safely.
+    return isinstance(value, (int, float)) and not isinstance(value, bool) and math.isnan(value)
 
-    input_value = None
+
+def get_input_value(input_docs, inputs, operation):
+    # Collect metric inputs from their configured paths and apply the configured operation.
     args = []
-    for i in inputs:
-        source = i.get("source", "")
-        field = i.get("field_path", "")
-        input_doc = input_docs.get(source, None)
+    for input_config in inputs:
+        source = input_config.get("source", "")
+        field = input_config.get("field_path", "")
+        input_doc = input_docs.get(source)
         if input_doc is None:
             logger.warning(f"{source} is null")
-        else:
-            input = get_value_from_path(input_doc, field)
-            args.append(input)
+            continue
+
+        args.append(get_value_from_path(input_doc, field))
+
     try:
-        operationFn = OPERATIONS[operation]
-        input_value = operationFn(*args)
-    except KeyError:
-        logger.warning(f"{operation} is not valid")
-    except TypeError:
+        operation_fn = OPERATIONS[operation]
+        return operation_fn(*args)
+    except (KeyError, TypeError):
         logger.warning(f"{operation} is not valid")
-
-    return input_value
+        return None
 
 
 def get_value_from_path(input_doc, path):
-    """
-    Gets the input value from input document by path.
-
-    Args:
-        inputs_doc (map): The input document map.
-        path (string): The field name of the input value of interest.
-
-    Returns:
-        float: The input value from the input document
+    # Walk a slash-separated path through a nested dict and return the leaf value.
+    current_value = input_doc
+    for nested_key in path.split("/"):
+        if not isinstance(current_value, dict):
+            return None
 
-    """
+        current_value = current_value.get(nested_key)
+        if current_value is None:
+            return None
 
-    d = input_doc
-    for nested_key in path.split("/"):
-        temp = d.get(nested_key)
-        if isinstance(temp, dict):
-            d = d.get(nested_key)
-        else:
-            return temp
-    return None
+    return current_value
diff --git a/nebula/addons/trustworthiness/helpers/scenario_metrics.py b/nebula/addons/trustworthiness/helpers/scenario_metrics.py
index d714e8523..1d1f35615 100644
--- a/nebula/addons/trustworthiness/helpers/scenario_metrics.py
+++ b/nebula/addons/trustworthiness/helpers/scenario_metrics.py
@@ -3,7 +3,6 @@
 import os
 import statistics
 from datetime import datetime
-from os.path import exists
 
 import pandas as pd
 import torch
@@ -13,53 +12,46 @@
 
 logger = logging.getLogger(__name__)
 
-def get_elapsed_time(start_time, end_time):
-    """
-    Calculates the elapsed time during the execution of the scenario.
-
-    Args:
-        start_time (datetime): Start datetime.
-        end_time (datetime): End datetime.
-
-    Returns:
-        float: The elapsed time.
-    """
-    start_date = datetime.strptime(start_time, "%d/%m/%Y %H:%M:%S")
-    end_date = datetime.strptime(end_time, "%d/%m/%Y %H:%M:%S")
+DATETIME_FORMAT = "%d/%m/%Y %H:%M:%S"
 
-    elapsed_time = (end_date - start_date).total_seconds() / 60
 
-    return elapsed_time
+def get_elapsed_time(start_time, end_time):
+    # Return scenario duration in minutes from the timestamps stored by the workload.
+    start_date = datetime.strptime(start_time, DATETIME_FORMAT)
+    end_date = datetime.strptime(end_time, DATETIME_FORMAT)
+    return (end_date - start_date).total_seconds() / 60
 
 
 def _trustworthiness_dir(scenario_name):
-    return os.path.join(os.environ.get('NEBULA_LOGS_DIR'), scenario_name, "trustworthiness")
+    # All scenario metrics are stored under the scenario trustworthiness directory.
+    return os.path.join(os.environ.get("NEBULA_LOGS_DIR"), scenario_name, "trustworthiness")
 
 
 def _global_data_results_path(scenario_name):
+    # CFL/global metrics are written in the shared data_results.csv file.
     return os.path.join(_trustworthiness_dir(scenario_name), "data_results.csv")
 
 
 def _participant_data_results_path(scenario_name, participant_id):
+    # DFL/SDFL participant metrics are written in participant-specific CSV files.
     return os.path.join(_trustworthiness_dir(scenario_name), f"data_results_{participant_id}.csv")
 
 
 def _read_global_results(scenario_name):
+    # Load the aggregate scenario metrics once and let callers pick the columns they need.
     return read_csv(_global_data_results_path(scenario_name))
 
 
 def _read_participant_results(scenario_name, participant_id):
+    # Load local metrics for one participant.
     return read_csv(_participant_data_results_path(scenario_name, participant_id))
 
 
 def _find_participant_row(data, participant_id, source_name):
+    # Match both string and integer IDs because CSV typing can vary between runs.
     row = data[data["id"] == participant_id]
-
     if row.empty:
-        try:
-            row = data[data["id"] == int(participant_id)]
-        except (TypeError, ValueError):
-            row = data.iloc[0:0]
+        row = _find_participant_row_by_int_id(data, participant_id)
 
     if row.empty:
         raise ValueError(f"Participant {participant_id} not found in {source_name}")
@@ -67,36 +59,34 @@ def _find_participant_row(data, participant_id, source_name):
     return row.iloc[0]
 
 
-def get_bytes_model(model):
-    """
-    Calculates the serialized size in bytes of a PyTorch model state_dict.
+def _find_participant_row_by_int_id(data, participant_id):
+    # Retry numeric participant IDs when pandas read the id column as integers.
+    try:
+        return data[data["id"] == int(participant_id)]
+    except (TypeError, ValueError):
+        return data.iloc[0:0]
 
-    Args:
-        model (nn.Module): PyTorch model.
 
-    Returns:
-        int: Model size in bytes.
-    """
-    buffer: io.BytesIO = io.BytesIO()
-    torch.save(model.state_dict(), buffer)
-    model_size: int = buffer.tell()
+def _client_count(data):
+    # Global CSVs include the server row, so client averages exclude one row.
+    return max(1, len(data) - 1)
 
-    return model_size
 
+def _mean_client_column(data, column_name):
+    # Average a global metric across clients while keeping the historical server-row exclusion.
+    return data[column_name].sum() / _client_count(data)
 
-def get_bytes_sent_recv(scenario_name):
-    """
-    Calculates the mean bytes sent and received of the nodes.
 
-    Args:
-        bytes_sent_files (list): Files that contain the bytes sent of the nodes.
-        bytes_recv_files (list): Files that contain the bytes received of the nodes.
+def get_bytes_model(model):
+    # Serialize the model state_dict to measure the bytes that would be transmitted.
+    buffer = io.BytesIO()
+    torch.save(model.state_dict(), buffer)
+    return buffer.tell()
 
-    Returns:
-        4-tupla: The total bytes sent, the total bytes received, the mean bytes sent and the mean bytes received of the nodes.
-    """
-    data = _read_global_results(scenario_name)
 
+def get_bytes_sent_recv(scenario_name):
+    # Return total and average upload/download bytes from aggregate scenario results.
+    data = _read_global_results(scenario_name)
     number_files = len(data)
 
     total_upload_bytes = int(data["bytes_sent"].sum())
@@ -109,154 +99,67 @@ def get_bytes_sent_recv(scenario_name):
 
 
 def get_avg_loss_accuracy(scenario_name):
-    """
-    Calculates the mean accuracy and loss models of the nodes.
-
-    Args:
-        loss_files (list): Files that contain the loss of the models of the nodes.
-        accuracy_files (list): Files that contain the acurracies of the models of the nodes.
-
-    Returns:
-        3-tupla: The mean loss of the models, the mean accuracies of the models, the standard deviation of the accuracies of the models.
-    """
+    # Return client-average test loss, test accuracy and accuracy standard deviation.
     data = _read_global_results(scenario_name)
 
-    number_files = len(data)
-
-    total_loss = data["loss"].sum()
-    total_accuracy = data["accuracy"].sum()
-
-    denominator = max(1, number_files - 1)
-    avg_loss = total_loss / denominator
-    avg_accuracy = total_accuracy / denominator
-    std_accuracy = statistics.stdev(data["accuracy"]) if number_files > 1 else 0.0
+    avg_loss = _mean_client_column(data, "loss")
+    avg_accuracy = _mean_client_column(data, "accuracy")
+    std_accuracy = statistics.stdev(data["accuracy"]) if len(data) > 1 else 0.0
 
     return avg_loss, avg_accuracy, std_accuracy
 
 
-def get_underfitting_score(scenario_name, id):
-    """
-    Calculates the mean val accuracy of the nodes.
-    """
+def get_underfitting_score(scenario_name, participant_id):
+    # CFL underfitting uses the average validation accuracy across client rows.
     data = _read_global_results(scenario_name)
-
-    number_files = len(data)
-
-    total_val_accuracy = data["val_accuracy"].sum()
-
-    avg_val_accuracy = total_val_accuracy / max(1, number_files - 1)
-
-    return avg_val_accuracy
+    return _mean_client_column(data, "val_accuracy")
 
 
 def get_participant_loss_accuracy(scenario_name, participant_id):
-    """
-    Gets loss and accuracy for a specific participant from CFL aggregated results.
-
-    Args:
-        scenario_name (str): Scenario name.
-        participant_id (int | str): Participant identifier.
-
-    Returns:
-        tuple[float, float]: (loss, accuracy)
-    """
+    # Read one participant's final CFL loss and accuracy from the aggregate CSV.
     data_file = _global_data_results_path(scenario_name)
     row = _find_participant_row(read_csv(data_file), participant_id, data_file)
+    return float(row["loss"]), float(row["accuracy"])
 
-    loss = float(row["loss"])
-    accuracy = float(row["accuracy"])
-    return loss, accuracy
-
-def get_underfitting_score_local(scenario_name, id):
-    """
-    Gets the local validation accuracy for a specific DFL/SDFL participant.
 
-    Args:
-        scenario_name (str): Scenario name.
-        participant_id (int | str): Participant identifier.
-
-    Returns:
-        float: Validation accuracy.
-    """
-    data = _read_participant_results(scenario_name, id)
+def get_underfitting_score_local(scenario_name, participant_id):
+    # DFL/SDFL underfitting uses the participant-local validation accuracy.
+    data = _read_participant_results(scenario_name, participant_id)
     return float(data["val_accuracy"].iloc[0])
 
 
-def get_dp_local(scenario_name, id):
-    """
-    Gets the dp metrics for a specific DFL/SDFL participant.
-
-    Args:
-        scenario_name (str): Scenario name.
-        participant_id (int | str): Participant identifier.
-
-    Returns:
-        float: DP Enabled, Epsilon.
-    """
-    data = _read_participant_results(scenario_name, id)
+def get_dp_local(scenario_name, participant_id):
+    # Return DP settings stored by a single DFL/SDFL participant.
+    data = _read_participant_results(scenario_name, participant_id)
     return data["dp_enabled"].iloc[0], float(data["dp_epsilon"].iloc[0])
 
 
 def get_dp_global(scenario_name):
-    """
-    Gets the aggregated DP metrics for a CFL scenario, excluding the server node.
-
-    Args:
-        scenario_name (str): Scenario name.
-
-    Returns:
-        tuple[bool, float | str]: Whether DP is enabled, and the
-        average epsilon across client nodes.
-    """
+    # Return CFL DP settings, averaging epsilon across client rows when DP is enabled.
     data = _read_global_results(scenario_name)
 
     if data["dp_enabled"].iloc[0] == False:
         return False, 0.0
 
-    number_files = len(data)
-
-    avg_epsilon = data["dp_epsilon"].sum() / max(1, number_files - 1)
+    return True, _mean_client_column(data, "dp_epsilon")
 
-    return True, avg_epsilon
 
 def get_avg_class_imbalance_model_size(scenario_name):
-    """
-    Calculates the mean class imbalance and model size of the nodes.
-
-    Args:
-        data_results_files (list): Files that contain the class imbalance and model size of the nodes
-
-    Returns:
-        2-tupla: The mean class imbalance mean and model size mean of the nodes.
-    """
+    # Return average class imbalance and model size across all global result rows.
     data = _read_global_results(scenario_name)
-
     number_files = len(data)
 
-    total_class_imbalance = data["class_imbalance"].sum()
-    total_model_size = data["model_size"].sum()
-
-    avg_class_imbalance = total_class_imbalance / number_files
-    avg_model_size = total_model_size / number_files
+    avg_class_imbalance = data["class_imbalance"].sum() / number_files
+    avg_model_size = data["model_size"].sum() / number_files
 
     return avg_class_imbalance, avg_model_size
 
 
 def get_entropy_list(scenario_name):
-    """
-    Obtiene una lista con los valores de entropy de todos los nodos.
-
-    Args:
-        scenario_name (str): Nombre del escenario.
-
-    Returns:
-        list: Lista con los valores de entropy
-    """
+    # Return local entropy values so callers can normalize the distribution.
     data = _read_global_results(scenario_name)
+    return data["local_entropy"].tolist()
 
-    entropy_list = data["local_entropy"].tolist()
-
-    return entropy_list
 
 def stop_emissions_tracking_and_save(
     tracker: EmissionsTracker,
@@ -267,84 +170,60 @@ def stop_emissions_tracking_and_save(
     sample_size: int = 0,
     participant_idx=None,
 ):
-    """
-    Stops emissions tracking object from CodeCarbon and saves relevant information to emissions.csv file.
-
-    Args:
-        tracker (object): The emissions tracker object holding information.
-        outdir (str): The path of the output directory of the experiment.
-        emissions_file (str): The path to the emissions file.
-        role (str): Either client or server depending on the role.
-        workload (str): Either aggregation or training depending on the workload.
-        sample_size (int): The number of samples used for training, if aggregation 0.
-    """
-
+    # Stop CodeCarbon tracking and append the final emissions row to emissions.csv.
     tracker.stop()
 
-    emissions_file = os.path.join(outdir, emissions_file)
-
-    if exists(emissions_file):
-        df = pd.read_csv(emissions_file)
-    else:
-        df = pd.DataFrame(
-            columns=[
-                "id",
-                "role",
-                "energy_grid",
-                "emissions",
-                "workload",
-                "CPU_model",
-                "GPU_model",
-            ]
-        )
+    emissions_path = os.path.join(outdir, emissions_file)
+    df = _read_or_create_emissions_dataframe(emissions_path)
+
     try:
-        energy_grid = (tracker.final_emissions_data.emissions / tracker.final_emissions_data.energy_consumed) * 1000
-        df = pd.concat(
-            [
-                df,
-                pd.DataFrame({
-                    "id": participant_idx,
-                    "role": role,
-                    "energy_grid": [energy_grid],
-                    "emissions": [tracker.final_emissions_data.emissions],
-                    "workload": workload,
-                    "CPU_model": tracker.final_emissions_data.cpu_model
-                    if tracker.final_emissions_data.cpu_model
-                    else "None",
-                    "GPU_model": tracker.final_emissions_data.gpu_model
-                    if tracker.final_emissions_data.gpu_model
-                    else "None",
-                    "CPU_used": True if tracker.final_emissions_data.cpu_energy else False,
-                    "GPU_used": True if tracker.final_emissions_data.gpu_energy else False,
-                    "energy_consumed": tracker.final_emissions_data.energy_consumed,
-                    "sample_size": sample_size,
-                }),
-            ],
-            ignore_index=True,
-        )
-        df.to_csv(emissions_file, encoding="utf-8", index=False)
+        row = _build_emissions_row(tracker, role, workload, sample_size, participant_idx)
+        df = pd.concat([df, pd.DataFrame(row)], ignore_index=True)
+        df.to_csv(emissions_path, encoding="utf-8", index=False)
     except Exception as e:
         logger.warning(e)
 
 
-def comm_efficiency(bytes_up: int, bytes_down: int, test_acc_avg: float, eps: float = 1e-12) -> float:
-    """
-    Communication efficiency = total_bytes / final_accuracy.
-    Lower is better.
-
-    Args:
-        bytes_up: total uploaded bytes
-        bytes_down: total downloaded bytes
-        final_accuracy: final test accuracy in [0,1]
-        eps: small constant to avoid division by zero
-
-    Returns:
-        float
-    """
-    total_bytes = float(bytes_up) + float(bytes_down)
-    acc = float(test_acc_avg)
+def _read_or_create_emissions_dataframe(emissions_path):
+    # Reuse the existing file when present, otherwise create the expected columns.
+    if os.path.exists(emissions_path):
+        return pd.read_csv(emissions_path)
+
+    return pd.DataFrame(
+        columns=[
+            "id",
+            "role",
+            "energy_grid",
+            "emissions",
+            "workload",
+            "CPU_model",
+            "GPU_model",
+        ]
+    )
+
+
+def _build_emissions_row(tracker, role, workload, sample_size, participant_idx):
+    # Convert CodeCarbon's final data object into the CSV row persisted by trustworthiness.
+    emissions_data = tracker.final_emissions_data
+    energy_grid = (emissions_data.emissions / emissions_data.energy_consumed) * 1000
+
+    return {
+        "id": participant_idx,
+        "role": role,
+        "energy_grid": [energy_grid],
+        "emissions": [emissions_data.emissions],
+        "workload": workload,
+        "CPU_model": emissions_data.cpu_model if emissions_data.cpu_model else "None",
+        "GPU_model": emissions_data.gpu_model if emissions_data.gpu_model else "None",
+        "CPU_used": bool(emissions_data.cpu_energy),
+        "GPU_used": bool(emissions_data.gpu_energy),
+        "energy_consumed": emissions_data.energy_consumed,
+        "sample_size": sample_size,
+    }
 
-    if acc < eps:
-        acc = eps
 
-    return total_bytes / acc
+def comm_efficiency(bytes_up: int, bytes_down: int, test_acc_avg: float, eps: float = 1e-12) -> float:
+    # Communication efficiency is total transferred bytes divided by final accuracy.
+    total_bytes = float(bytes_up) + float(bytes_down)
+    accuracy = max(float(test_acc_avg), eps)
+    return total_bytes / accuracy
diff --git a/nebula/addons/trustworthiness/helpers/scoring.py b/nebula/addons/trustworthiness/helpers/scoring.py
index 5103626c8..955bf5421 100644
--- a/nebula/addons/trustworthiness/helpers/scoring.py
+++ b/nebula/addons/trustworthiness/helpers/scoring.py
@@ -4,40 +4,31 @@
 
 logger = logging.getLogger(__name__)
 
-def get_mapped_score(score_key, score_map):
-    """
-    Finds the score by the score_key in the score_map.
 
-    Args:
-        score_key (string): The key to look up in the score_map.
-        score_map (dict): The score map defined in the eval_metrics.json file.
+def _is_number(value):
+    # Score calculations expect real numeric values; booleans are handled explicitly.
+    return isinstance(value, (int, float, np.number)) and not isinstance(value, bool)
+
+
+def _warn_not_number(value):
+    # Keep the warning format consistent across all numeric scoring functions.
+    logger.warning("Input value is not a number")
+    logger.warning(f"{value}")
 
-    Returns:
-        float: The normalized score of [0, 1].
-    """
-    score = 0
+
+def get_mapped_score(score_key, score_map):
+    # Normalize the configured score map and return the normalized value for the input key.
     if score_map is None:
         logger.warning("Score map is missing")
-    else:
-        keys = [key for key, value in score_map.items()]
-        scores = [value for key, value in score_map.items()]
-        normalized_scores = get_normalized_scores(scores)
-        normalized_score_map = dict(zip(keys, normalized_scores, strict=False))
-        score = normalized_score_map.get(score_key, np.nan)
+        return 0
 
-    return score
+    normalized_scores = get_normalized_scores(list(score_map.values()))
+    normalized_score_map = dict(zip(score_map.keys(), normalized_scores, strict=False))
+    return normalized_score_map.get(score_key, np.nan)
 
 
 def get_normalized_scores(scores):
-    """
-    Calculates the normalized scores of a list.
-
-    Args:
-        scores (list): The values that will be normalized.
-
-    Returns:
-        list: The normalized list.
-    """
+    # Convert a list of raw configured scores to the [0, 1] range.
     if scores is None or len(scores) == 0:
         return []
 
@@ -46,145 +37,89 @@ def get_normalized_scores(scores):
     if max_score == min_score:
         return [1.0 for _ in scores]
 
-    normalized = [(x - min_score) / (max_score - min_score) for x in scores]
-    return normalized
+    return [(score - min_score) / (max_score - min_score) for score in scores]
 
 
 def get_range_score(value, ranges, direction="asc"):
-    """
-    Maps the value to a range and gets the score by the range and direction.
-
-    Args:
-        value (int): The input score.
-        ranges (list): The ranges defined.
-        direction (string): Asc means the higher the range the higher the score, desc means otherwise.
-
-    Returns:
-        float: The normalized score of [0, 1].
-    """
-
-    if not (type(value) == int or type(value) == float):
-        logger.warning("Input value is not a number")
-        logger.warning(f"{value}")
+    # Place the value in one of the configured bins and normalize that bin index.
+    if not _is_number(value):
+        _warn_not_number(value)
         return 0
-    else:
-        score = 0
-        if ranges is None:
-            logger.warning("Score ranges are missing")
-        else:
-            total_bins = len(ranges) + 1
-            bin = np.digitize(value, ranges, right=True)
-            score = 1 - (bin / total_bins) if direction == "desc" else bin / total_bins
-        return score
 
+    if ranges is None:
+        logger.warning("Score ranges are missing")
+        return 0
 
-def get_map_value_score(score_key, score_map):
-    """
-    Finds the score by the score_key in the score_map and returns the value.
+    total_bins = len(ranges) + 1
+    bin_index = np.digitize(value, ranges, right=True)
+    score = bin_index / total_bins
+    return 1 - score if direction == "desc" else score
 
-    Args:
-        score_key (string): The key to look up in the score_map.
-        score_map (dict): The score map defined in the eval_metrics.json file.
 
-    Returns:
-        float: The score obtained in the score_map.
-    """
-    score = 0
+def get_map_value_score(score_key, score_map):
+    # Return the exact configured score for maps that already store normalized values.
     if score_map is None:
         logger.warning("Score map is missing")
-    else:
-        score = score_map[score_key]
-    return score
-
-
-def get_true_score(value, direction):
-    """
-    Returns the negative of the value if direction is 'desc', otherwise returns value.
+        return 0
 
-    Args:
-        value (int): The input score.
-        direction (string): Asc means the higher the range the higher the score, desc means otherwise.
+    return score_map[score_key]
 
-    Returns:
-        float: The score obtained.
-    """
 
+def get_true_score(value, direction):
+    # Booleans are direct scores; numeric values can be inverted for descending metrics.
     if value is True:
         return 1
-    elif value is False:
+    if value is False:
         return 0
-    else:
-        if not (type(value) == int or type(value) == float):
-            logger.warning("Input value is not a number")
-            logger.warning(f"{value}.")
-            return 0
-        else:
-            if direction == "desc":
-                return 1 - value
-            else:
-                return value
-
 
-def get_scaled_score(value, scale: list, direction: str):
-    """
-    Maps a score of a specific scale into the scale between zero and one.
+    if not _is_number(value):
+        _warn_not_number(value)
+        return 0
 
-    Args:
-        value (int or float): The raw value of the metric.
-        scale (list): List containing the minimum and maximum value the value can fall in between.
+    return 1 - value if direction == "desc" else value
 
-    Returns:
-        float: The normalized score of [0, 1].
-    """
 
-    score = 0
-    try:
-        value_min, value_max = scale[0], scale[1]
-    except Exception:
-        logger.warning("Score minimum or score maximum is missing. The minimum has been set to 0 and the maximum to 1")
-        value_min, value_max = 0, 1
+def get_scaled_score(value, scale: list, direction: str):
+    # Clamp a metric from its configured scale into the [0, 1] score range.
     if value is None or value == "":
         logger.warning("Score value is missing. Set value to zero")
-    else:
-        low, high = 0, 1
-        if value >= value_max:
-            score = 1
-        elif value <= value_min:
-            score = 0
-        else:
-            diff = value_max - value_min
-            diffScale = high - low
-            score = (float(value) - value_min) * (float(diffScale) / diff) + low
-        if direction == "desc":
-            score = high - score
+        return 0
 
-    return score
+    if not _is_number(value):
+        _warn_not_number(value)
+        return 0
 
+    value_min, value_max = _get_scale_bounds(scale)
+    if value_max == value_min:
+        score = 1
+    elif value >= value_max:
+        score = 1
+    elif value <= value_min:
+        score = 0
+    else:
+        score = (float(value) - value_min) / (value_max - value_min)
 
-def get_value(value):
-    """
-    Get the value of a metric.
+    return 1 - score if direction == "desc" else score
 
-    Args:
-        value (float): The value of the metric.
 
-    Returns:
-        float: The value of the metric.
-    """
+def _get_scale_bounds(scale):
+    # Fall back to the default [0, 1] scale when the config is incomplete.
+    try:
+        return scale[0], scale[1]
+    except (TypeError, IndexError):
+        logger.warning("Score minimum or score maximum is missing. The minimum has been set to 0 and the maximum to 1")
+        return 0, 1
+
 
+def get_value(value):
+    # Factsheet operations use this when a metric only needs the raw input value.
     return value
 
 
 def check_properties(*args):
-    """
-    Check if all the arguments have values.
-
-    Args:
-        args (list): All the arguments.
-
-    Returns:
-        float: The mean of arguments that have values.
-    """
+    # Return the fraction of required properties that are filled.
+    if not args:
+        return 0
 
-    result = map(lambda x: x is not None and x != "", args)
-    return np.mean(list(result))
+    filled = [value is not None and value != "" for value in args]
+    return np.mean(filled)
diff --git a/nebula/addons/trustworthiness/helpers/trust_reports.py b/nebula/addons/trustworthiness/helpers/trust_reports.py
index 08d1798ec..11e09208d 100644
--- a/nebula/addons/trustworthiness/helpers/trust_reports.py
+++ b/nebula/addons/trustworthiness/helpers/trust_reports.py
@@ -2,79 +2,86 @@
 import json
 import os
 
-def load_trust_report_json_dumped(scenario_name: str, participant_id: int) -> str:
-    """
-    Read a participant trustworthiness JSON file and return it
-    serialized as a string with json.dumps(...).
-
-    Args:
-        scenario_name (str): Scenario/experiment name.
-        participant_id (int): Participant ID.
+SCORE_KEYS = {"trust_score", "score"}
+NAMED_ENTRY_KEYS = {"score", "metrics", "notions", "pillars"}
+NAMED_ENTRY_PATH_KEY = "__named_entry__"
 
-    Returns:
-        str: JSON content serialized as a string.
 
-    Raises:
-        FileNotFoundError: If the file does not exist.
-        ValueError: If the file content is not valid JSON.
-    """
+def _logs_dir() -> str:
+    # Return the configured logs directory required by trust report exchange.
     logs_dir = os.environ.get("NEBULA_LOGS_DIR")
     if not logs_dir:
         raise ValueError("The NEBULA_LOGS_DIR environment variable is not defined.")
+    return logs_dir
 
-    file_name = f"nebula_trust_results_{participant_id}.json"
-    file_path = os.path.join(
-        logs_dir,
-        scenario_name,
-        "trustworthiness",
-        file_name,
-    )
 
+def _trustworthiness_dir(scenario_name: str) -> str:
+    # Return the scenario trustworthiness directory used by report JSON files.
+    return os.path.join(_logs_dir(), scenario_name, "trustworthiness")
+
+
+def _trust_report_path(scenario_name: str, participant_id: int | str) -> str:
+    # Return the local trust report path for one participant.
+    return os.path.join(_trustworthiness_dir(scenario_name), f"nebula_trust_results_{participant_id}.json")
+
+
+def _read_json_file(file_path: str) -> dict:
+    # Load a JSON object and raise clear errors for missing or invalid files.
     if not os.path.exists(file_path):
         raise FileNotFoundError(f"The file does not exist: {file_path}")
 
     try:
-        with open(file_path, "r", encoding="utf-8") as f:
-            trust_report = json.load(f)
-    except json.JSONDecodeError as e:
-        raise ValueError(f"The file does not contain valid JSON: {file_path}") from e
+        with open(file_path, "r", encoding="utf-8") as file:
+            return json.load(file)
+    except json.JSONDecodeError as error:
+        raise ValueError(f"The file does not contain valid JSON: {file_path}") from error
 
-    return json.dumps(trust_report)
 
+def _write_json_file(file_path: str, data: dict) -> str:
+    # Write a formatted JSON object, creating the parent directory if needed.
+    directory = os.path.dirname(file_path)
+    if directory:
+        os.makedirs(directory, exist_ok=True)
 
-def load_trust_report_json(scenario_name: str, participant_id: int | str) -> dict:
-    trust_report_json = load_trust_report_json_dumped(scenario_name, participant_id)
-    return json.loads(trust_report_json)
+    with open(file_path, "w", encoding="utf-8") as file:
+        json.dump(data, file, indent=4)
 
+    return file_path
 
-def create_local_trust_report_copy(scenario_name: str, participant_id: int | str, suffix: str = "global") -> tuple[dict, str]:
-    trust_report = load_trust_report_json(scenario_name, participant_id)
-    logs_dir = os.environ.get("NEBULA_LOGS_DIR")
-    if not logs_dir:
-        raise ValueError("The NEBULA_LOGS_DIR environment variable is not defined.")
 
-    trust_dir = os.path.join(logs_dir, scenario_name, "trustworthiness")
-    os.makedirs(trust_dir, exist_ok=True)
+def _is_score_entry(key, value) -> bool:
+    # Trust report scores are numeric values stored under score-like keys.
+    return key in SCORE_KEYS and _is_numeric_score(value)
 
-    file_path = os.path.join(trust_dir, f"nebula_trust_results_{participant_id}_{suffix}.json")
-    with open(file_path, "w", encoding="utf-8") as f:
-        json.dump(trust_report, f, indent=4)
 
-    return trust_report, file_path
+def load_trust_report_json_dumped(scenario_name: str, participant_id: int) -> str:
+    # Load one participant report and return it serialized for network messages.
+    return json.dumps(load_trust_report_json(scenario_name, participant_id))
 
 
-def save_trust_report_json(file_path: str, trust_report: dict) -> str:
-    directory = os.path.dirname(file_path)
-    if directory:
-        os.makedirs(directory, exist_ok=True)
+def load_trust_report_json(scenario_name: str, participant_id: int | str) -> dict:
+    # Load one participant trustworthiness report as a dictionary.
+    return _read_json_file(_trust_report_path(scenario_name, participant_id))
 
-    with open(file_path, "w", encoding="utf-8") as f:
-        json.dump(trust_report, f, indent=4)
 
-    return file_path
+def create_local_trust_report_copy(scenario_name: str, participant_id: int | str, suffix: str = "global") -> tuple[dict, str]:
+    # Copy a participant report to a local aggregation output file.
+    trust_report = load_trust_report_json(scenario_name, participant_id)
+    file_path = os.path.join(
+        _trustworthiness_dir(scenario_name),
+        f"nebula_trust_results_{participant_id}_{suffix}.json",
+    )
+
+    return trust_report, _write_json_file(file_path, trust_report)
+
+
+def save_trust_report_json(file_path: str, trust_report: dict) -> str:
+    # Save a trust report and return the written file path.
+    return _write_json_file(file_path, trust_report)
 
 
 def accumulate_weighted_trustscores(report: dict, weight: float, score_accumulator: dict, weight_accumulator: dict):
+    # Add all score values from a report into weighted accumulators.
     if weight <= 0:
         raise ValueError("The aggregation weight must be greater than 0.")
 
@@ -88,6 +95,7 @@ def accumulate_weighted_trustscores(report: dict, weight: float, score_accumulat
 
 
 def build_weighted_trustscores_report(template_report: dict, score_accumulator: dict, weight_accumulator: dict) -> dict:
+    # Return a deep-copied report with every score replaced by its weighted mean.
     aggregated_report = copy.deepcopy(template_report)
     _apply_weighted_trustscores_recursive(
         obj=aggregated_report,
@@ -99,22 +107,23 @@ def build_weighted_trustscores_report(template_report: dict, score_accumulator:
 
 
 def _accumulate_weighted_trustscores_recursive(obj, weight: float, path: tuple, score_accumulator: dict, weight_accumulator: dict):
+    # Walk a trust report and accumulate weighted sums for every score path.
     if isinstance(obj, dict):
-        structural_named_entry = _get_structural_named_entry(obj)
-        if structural_named_entry is not None:
-            _, nested_value = structural_named_entry
+        named_entry = _get_structural_named_entry(obj)
+        if named_entry is not None:
+            _, nested_value = named_entry
             _accumulate_weighted_trustscores_recursive(
                 obj=nested_value,
                 weight=weight,
-                path=path + ("__named_entry__",),
+                path=path + (NAMED_ENTRY_PATH_KEY,),
                 score_accumulator=score_accumulator,
                 weight_accumulator=weight_accumulator,
             )
             return
 
         for key, value in obj.items():
-            if key in {"trust_score", "score"} and _is_numeric_score(value):
-                score_path = path + (key,)
+            score_path = path + (key,)
+            if _is_score_entry(key, value):
                 score_accumulator[score_path] = score_accumulator.get(score_path, 0.0) + (float(value) * weight)
                 weight_accumulator[score_path] = weight_accumulator.get(score_path, 0.0) + weight
                 continue
@@ -122,7 +131,7 @@ def _accumulate_weighted_trustscores_recursive(obj, weight: float, path: tuple,
             _accumulate_weighted_trustscores_recursive(
                 obj=value,
                 weight=weight,
-                path=path + (key,),
+                path=score_path,
                 score_accumulator=score_accumulator,
                 weight_accumulator=weight_accumulator,
             )
@@ -140,21 +149,22 @@ def _accumulate_weighted_trustscores_recursive(obj, weight: float, path: tuple,
 
 
 def _apply_weighted_trustscores_recursive(obj, path: tuple, score_accumulator: dict, weight_accumulator: dict):
+    # Walk a report copy and replace score values with weighted averages.
     if isinstance(obj, dict):
-        structural_named_entry = _get_structural_named_entry(obj)
-        if structural_named_entry is not None:
-            entry_key, nested_value = structural_named_entry
+        named_entry = _get_structural_named_entry(obj)
+        if named_entry is not None:
+            entry_key, nested_value = named_entry
             obj[entry_key] = _apply_weighted_trustscores_recursive(
                 obj=nested_value,
-                path=path + ("__named_entry__",),
+                path=path + (NAMED_ENTRY_PATH_KEY,),
                 score_accumulator=score_accumulator,
                 weight_accumulator=weight_accumulator,
             )
             return obj
 
         for key, value in obj.items():
-            if key in {"trust_score", "score"} and _is_numeric_score(value):
-                score_path = path + (key,)
+            score_path = path + (key,)
+            if _is_score_entry(key, value):
                 total_weight = weight_accumulator.get(score_path)
                 if total_weight:
                     obj[key] = round(score_accumulator[score_path] / total_weight, 6)
@@ -162,7 +172,7 @@ def _apply_weighted_trustscores_recursive(obj, path: tuple, score_accumulator: d
 
             obj[key] = _apply_weighted_trustscores_recursive(
                 obj=value,
-                path=path + (key,),
+                path=score_path,
                 score_accumulator=score_accumulator,
                 weight_accumulator=weight_accumulator,
             )
@@ -176,10 +186,12 @@ def _apply_weighted_trustscores_recursive(obj, path: tuple, score_accumulator: d
                 score_accumulator=score_accumulator,
                 weight_accumulator=weight_accumulator,
             )
+
     return obj
 
 
 def _get_structural_named_entry(obj: dict):
+    # Detect wrappers like {"Privacy": {"score": ..., "metrics": ...}}.
     if len(obj) != 1:
         return None
 
@@ -187,11 +199,12 @@ def _get_structural_named_entry(obj: dict):
     if not isinstance(nested_value, dict):
         return None
 
-    if any(key in nested_value for key in ("score", "metrics", "notions", "pillars")):
+    if any(key in nested_value for key in NAMED_ENTRY_KEYS):
         return entry_key, nested_value
 
     return None
 
 
 def _is_numeric_score(value):
+    # Booleans are ints in Python, but they are not trust score values here.
     return isinstance(value, (int, float)) and not isinstance(value, bool)
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 17b9a4ef8..1996171ba 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -35,7 +35,7 @@
 from codecarbon import EmissionsTracker
 from nebula.addons.trustworthiness.per_round_metrics import PerRoundTrustMetrics
 from datetime import datetime
-from nebula.addons.trustworthiness.factsheet import CflFactsheet
+from nebula.addons.trustworthiness.cfl_factsheet import CflFactsheet
 from nebula.addons.trustworthiness.metric import TrustMetricManager
 from nebula.addons.trustworthiness.dfl_factsheet import DflFactsheet
 from nebula.addons.trustworthiness.graphics import Graphics
@@ -996,6 +996,7 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         bytes_sent = self._engine.reporter.acc_bytes_sent
         bytes_recv = self._engine.reporter.acc_bytes_recv
 
+        # Persist the trainer-reported DP budget so factsheets can score privacy.
         privacy_metrics = self._engine.trainer.get_privacy_metrics()
         dp_enabled=bool(privacy_metrics.get("dp_enabled", False))
         dp_epsilon=privacy_metrics.get("dp_epsilon")
diff --git a/nebula/core/aggregation/updatehandlers/sdflupdatehandler.py b/nebula/core/aggregation/updatehandlers/sdflupdatehandler.py
index 4ebd15ba3..b91e82ef4 100644
--- a/nebula/core/aggregation/updatehandlers/sdflupdatehandler.py
+++ b/nebula/core/aggregation/updatehandlers/sdflupdatehandler.py
@@ -54,8 +54,10 @@ def __init__(self, aggregator, addr, buffersize=MAX_UPDATE_BUFFER_SIZE):
         self._addr = addr
         self._aggregator: Aggregator = aggregator
         self._buffersize = buffersize
+        # Store the last used update plus a short history per source to tolerate late/missing updates.
         self._updates_storage: dict[str, tuple[Update, deque[Update]]] = {}
         self._updates_storage_lock = Locker(name="updates_storage_lock", async_lock=True)
+        # SDFL aggregation waits for a dynamic set of trainer sources each round.
         self._sources_expected = set()
         self._sources_received = set()
         self._round_updates_lock = Locker(name="round_updates_lock", async_lock=True)
@@ -91,6 +93,7 @@ async def round_expected_updates(self, federation_nodes: set):
         """
         await self._update_federation_lock.acquire_async()
         await self._updates_storage_lock.acquire_async()
+        # Reset per-round reception state while preserving per-node history buffers.
         self._sources_expected = federation_nodes.copy()
         self._sources_received.clear()
 
@@ -144,6 +147,7 @@ async def storage_update(self, updt_received_event: UpdateReceivedEvent):
             updt_received_event (UpdateReceivedEvent): Event with model update data.
         """
         if updt_received_event.is_reputation_update():
+            # Reputation model updates are consumed by the reputation addon, not by aggregation.
             logging.debug("Discard reputation-only update in SDFL aggregation storage")
             return
 
@@ -168,6 +172,7 @@ async def storage_update(self, updt_received_event: UpdateReceivedEvent):
                     f"Updates received ({len(self._sources_received)}/{len(self._sources_expected)}) | Missing nodes: {updates_left}"
                 )
                 if self._round_updates_lock.locked() and not updates_left:
+                    # Release aggregation as soon as the last expected trainer update arrives.
                     all_rec = await self._all_updates_received()
                     if all_rec:
                         await self._notify()
@@ -194,6 +199,7 @@ async def get_round_updates(self):
         self._nodes_using_historic.clear()
         updates = {}
         for sr in self._sources_received:
+            # Use the newest update unless it was already consumed in a previous aggregation.
             source_historic = self.us[sr][1]
             last_updt_received = self.us[sr][0]
             updt: Update = None
@@ -217,6 +223,8 @@ async def before_aggregation(self, updates: dict[str, tuple[object, float]], fed
         if not hasattr(engine, "_reputation") or engine._reputation is None:
             return
 
+        # The aggregator may receive updates from non-neighbor trainers through forwarding.
+        # Their reputation is inferred from reputation tables shared by expected trainers.
         round_num = await engine.get_round()
         expected_table_nodes = engine.get_sdfl_expected_trainers()
         target_nodes = set(federation_nodes) | set(updates.keys())
@@ -285,6 +293,7 @@ async def notify_if_all_updates_received(self):
         Set a notification trigger and notify aggregator if all updates are already received.
         """
         logging.info("Set notification when all expected updates received")
+        # Hold this lock while the caller is waiting; _notify releases it once ready.
         await self._round_updates_lock.acquire_async()
         await self._updates_storage_lock.acquire_async()
         all_received = await self._all_updates_received()
@@ -306,6 +315,7 @@ async def _notify(self):
         """
         await self._notification_sent_lock.acquire_async()
         if self._notification:
+            # Multiple updates can race to complete the round; notify the aggregator once.
             await self._notification_sent_lock.release_async()
             return
         self._notification = True
diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index 7fde4164e..cb05c57db 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -559,6 +559,7 @@ async def _reputation_share_callback(self, source, message):
 
     async def _reputationtable_table_callback(self, source, message):
         try:
+            # Reputation tables are an SDFL-only control plane for indirect reputation.
             if self.config.participant["scenario_args"].get("federation") != "SDFL":
                 return
             if self.rb.get_role_name(True) != "aggregator":
@@ -580,6 +581,7 @@ async def _reputationtable_table_callback(self, source, message):
                 reputation_table,
                 received_from=source,
             )
+            # Start or refresh the async collection window for this SDFL round.
             expected_nodes = self.get_sdfl_expected_trainers()
             timeout = float(
                 self.config.participant["defense_args"]
@@ -685,6 +687,7 @@ async def _sdflmodel_trainer_update_callback(self, source, message):
                 )
                 return
 
+            # Valid trainer updates are converted into the normal aggregation event stream.
             decoded_model = self.trainer.deserialize_model(message.parameters)
 
             event = UpdateReceivedEvent(
@@ -729,6 +732,7 @@ async def _sdflmodel_global_model_callback(self, source, message):
                 )
                 return
 
+        # Trainers apply the aggregator's global model and unblock their SDFL round wait.
         decoded_model = self.trainer.deserialize_model(message.parameters)
         self.trainer.set_model_parameters(decoded_model)
 
diff --git a/nebula/core/models/nebulamodel.py b/nebula/core/models/nebulamodel.py
index b2d6065d8..3a270ae88 100755
--- a/nebula/core/models/nebulamodel.py
+++ b/nebula/core/models/nebulamodel.py
@@ -215,6 +215,7 @@ def __init__(
         self._latest_validation_metrics = {}
         self._train_extra_metrics = {}
 
+        # DP trainers update these fields after querying the Opacus accountant.
         self.dp_enabled = False
         self.dp_epsilon = None
         self.dp_delta = None
diff --git a/nebula/core/network/forwarder.py b/nebula/core/network/forwarder.py
index e6831eec7..9eccc15fe 100755
--- a/nebula/core/network/forwarder.py
+++ b/nebula/core/network/forwarder.py
@@ -143,8 +143,10 @@ def _allow_forward_after_learning_finished(self, msg: bytes) -> bool:
             if message_type == "trustscores_message":
                 return True
             if message_type == "sdflmodel_message":
+                # Trainers may finish their local cycle before the forwarded global model arrives.
                 return message_wrapper.sdflmodel_message.action == nebula_pb2.SdflmodelMessage.Action.GLOBAL_MODEL
             if message_type == "reputationtable_message":
+                # SDFL reputation tables can be forwarded while the aggregator is waiting.
                 return True
             return False
         except Exception as e:
diff --git a/nebula/core/network/messages.py b/nebula/core/network/messages.py
index 0d8a036ed..9963ff6ea 100644
--- a/nebula/core/network/messages.py
+++ b/nebula/core/network/messages.py
@@ -87,6 +87,7 @@ def _define_message_templates(self):
                 },
             },
             "sdflmodel": {
+                # SDFL uses a dedicated model channel for forwarded trainer/global updates.
                 "parameters": ["action", "target", "parameters", "weight", "round", "node_id"],
                 "defaults": {
                     "weight": 1,
@@ -100,6 +101,7 @@ def _define_message_templates(self):
                 },
             },
             "reputationtable": {
+                # Reputation tables carry one-hop trust scores for SDFL indirect reputation.
                 "parameters": ["action", "node_id", "round", "reputation_table_json"],
                 "defaults": {
                     "node_id": self.addr,
@@ -262,11 +264,13 @@ def _should_forward_message(self, message_type, message_wrapper):
             return True
 
         if  self.cm.config.participant["scenario_args"]["federation"] == "SDFL" and message_type == "sdflmodel_message":
+            # SDFL model messages must still flow after the generic learning-finished gate.
             return True
         if (
             self.cm.config.participant["scenario_args"]["federation"] == "SDFL"
             and message_type == "reputationtable_message"
         ):
+            # Reputation tables can arrive late while aggregation is waiting for trust evidence.
             return True
 
     def create_message(self, message_type: str, action: str = "", *args, **kwargs):
diff --git a/nebula/core/node.py b/nebula/core/node.py
index a5fa22f1d..772c9d832 100755
--- a/nebula/core/node.py
+++ b/nebula/core/node.py
@@ -210,6 +210,7 @@ async def main(config: Config):
     trainer_str = config.participant["training_args"]["trainer"]
     dp_enabled = config.participant["training_args"]["dp"]["enabled"]
     if trainer_str == "lightning":
+        # DP is implemented as a Lightning-specific trainer wrapper around Opacus.
         if dp_enabled:
             trainer = LightningDP
         else:
diff --git a/nebula/core/noderole.py b/nebula/core/noderole.py
index ce02f1cd4..3d7f68456 100644
--- a/nebula/core/noderole.py
+++ b/nebula/core/noderole.py
@@ -327,6 +327,7 @@ async def resolve_missing_updates(self):
 
 class SDFLRoleMixin:
     async def _send_reputation_model_update(self):
+        # SDFL reputation evaluates direct neighbors from the latest local model update.
         model_params = self._engine.trainer.get_model_parameters()
         serialized_model = (
             model_params
@@ -346,6 +347,7 @@ async def _send_reputation_model_update(self):
             logging.info("SDFL reputation | No direct neighbors to send model/update")
             return
 
+        # Reputation model updates use the regular model channel and stay one-hop local.
         logging.info(f"SDFL reputation | Broadcasting model/update to direct neighbors: {neighbors}")
         await asyncio.gather(
             *[
@@ -357,9 +359,11 @@ async def _send_reputation_model_update(self):
 
 class SDFLAggregatorRoleBehavior(SDFLRoleMixin, AggregatorRoleBehavior):
     async def before_round_start(self):
+        # Leadership transfer must be acknowledged before the new aggregator starts a round.
         await self._engine.wait_pending_leadership_ack()
 
     async def extended_learning_cycle(self):
+        # SDFL aggregators collect trainer updates, publish the global model, then rotate leadership.
         await self._engine.trainer.test()
         await self._send_reputation_model_update()
         await self._engine._waiting_model_updates()
@@ -370,12 +374,14 @@ async def _before_leadership_transfer(self, successor):
         await self._engine.mark_leadership_transfer_pending(successor)
 
     async def select_nodes_to_wait(self):
+        # The aggregator waits for all expected trainers, not just currently direct neighbors.
         nodes = self._engine.get_sdfl_expected_trainers()
         if nodes:
             return nodes
         return await super().select_nodes_to_wait()
 
     async def _send_global_model(self) -> None:
+        # Send the aggregated model through the SDFL forwarding channel.
         model_params = self._engine.trainer.get_model_parameters()
         serialized_model = (
             model_params
@@ -500,6 +506,7 @@ async def extended_learning_cycle(self):
 
         await self._engine.trainer.test()
         self._prepare_waiting_global_model()
+        # Trainers train locally, exchange reputation evidence, send their update, then wait for aggregation.
         await self._engine.trainning_in_progress_lock.acquire_async()
         try:
             await self._engine.trainer.train()
@@ -507,6 +514,7 @@ async def extended_learning_cycle(self):
             await self._engine.trainning_in_progress_lock.release_async()
 
         if self._engine._reputation is not None:
+            # Process reputation model updates that arrived before the local table is computed.
             await self._engine._reputation.process_pending_sdfl_reputation_updates(self._engine.round)
 
         await self._send_reputation_model_update()
@@ -515,10 +523,12 @@ async def extended_learning_cycle(self):
         await self._waiting_global_model()
 
     def _prepare_waiting_global_model(self):
+        # Reset the per-round event used by trainers to block until a GLOBAL_MODEL arrives.
         self._engine._global_model_source = None
         self._engine._global_model_received.clear()
 
     async def _calculate_and_send_reputation_table(self):
+        # Trainers publish direct-neighbor reputation tables for the aggregator to combine.
         if self._engine._reputation is None:
             return
 
@@ -544,6 +554,7 @@ async def _calculate_and_send_reputation_table(self):
         await self._engine._reputation.calculate_and_send_sdfl_reputation_table()
 
     async def _send_trainer_update(self):
+        # Broadcast the local trainer update; forwarding delivers it to the current aggregator.
         model_params = self._engine.trainer.get_model_parameters()
         serialized_model = (
             model_params
@@ -585,6 +596,7 @@ async def _send_trainer_update(self):
             logging.warning("SDFL trainer | No neighbors available to send TRAINER_UPDATE")
 
     async def _waiting_global_model(self):
+        # A trainer continues only after the aggregator's GLOBAL_MODEL is received or times out.
         timeout = self._config.participant["aggregator_args"]["aggregation_timeout"]
         logging.info(f"💤  Waiting global SDFL model in round {self._engine.round}.")
         try:
diff --git a/nebula/core/training/dp.py b/nebula/core/training/dp.py
index 56a2508f8..15b094c88 100644
--- a/nebula/core/training/dp.py
+++ b/nebula/core/training/dp.py
@@ -1,4 +1,5 @@
 class SimpleDPState:
+    # Minimal mutable state used to pass Opacus-wrapped objects between hooks.
     def __init__(self):
         self.extras = {}
 
@@ -17,6 +18,8 @@ def __init__(
         poisson_sampling=True,
         clipping="flat",
     ):
+        # Fixed DP-SGD controls. Epsilon is not configured here; it is computed
+        # from the accountant as the consumed privacy budget after training.
         self.noise_multiplier = float(noise_multiplier)
         self.max_grad_norm = float(max_grad_norm)
         self.target_delta = target_delta
@@ -27,11 +30,14 @@ def __init__(
         self._privacy_engine = None
 
     def on_train_start(self, model, optimizer, state):
+        # Import Opacus lazily so non-DP trainers do not need to load it.
         from opacus import PrivacyEngine
 
         dataloader = state.extras["dataloader"]
         model.train()
 
+        # Keep one PrivacyEngine per plugin instance so the accountant composes
+        # privacy loss across Nebula rounds instead of resetting every round.
         if self._privacy_engine is None:
             self._privacy_engine = PrivacyEngine(
                 accountant=self.accountant,
@@ -49,12 +55,14 @@ def on_train_start(self, model, optimizer, state):
             clipping=self.clipping,
         )
 
+        # Replace the training components with DP-aware versions used by LightningDP.
         state.extras["privacy_engine"] = privacy_engine
         state.extras["model"] = private_model
         state.extras["optimizer"] = private_optimizer
         state.extras["dataloader"] = private_dataloader
 
     def on_train_end(self, state):
+        # Query the accumulated epsilon for the configured delta after this round.
         privacy_engine = state.extras.get("privacy_engine")
         private_model = state.extras.get("model")
 
@@ -67,6 +75,8 @@ def on_train_end(self, state):
                 pass
 
         if private_model is not None:
+            # Clean Opacus hook state so the same model can continue through later
+            # Nebula phases without stale per-sample gradient hooks.
             try:
                 private_model.zero_grad(set_to_none=True)
             except Exception:
diff --git a/nebula/core/training/lightning.py b/nebula/core/training/lightning.py
index 0c869d1de..f5988ef00 100755
--- a/nebula/core/training/lightning.py
+++ b/nebula/core/training/lightning.py
@@ -390,6 +390,7 @@ def show_current_learning_rate(self):
         self.model.show_current_learning_rate()
 
     def get_privacy_metrics(self):
+        # Non-DP trainers expose the same metrics contract with neutral values.
         return {
             "dp_enabled": False,
             "dp_epsilon": 0,
diff --git a/nebula/core/training/lightning_dp.py b/nebula/core/training/lightning_dp.py
index c15a164b9..bed08a973 100644
--- a/nebula/core/training/lightning_dp.py
+++ b/nebula/core/training/lightning_dp.py
@@ -19,11 +19,13 @@ class LightningDP(Lightning):
 
     def __init__(self, model, datamodule, config=None):
         super().__init__(model, datamodule, config)
+        # The DP plugin owns the Opacus PrivacyEngine and its cumulative accountant.
         self._dp_plugin = self.create_dp_plugin()
         self.dp_epsilon = None
         self.dp_delta = None
 
     def create_dp_plugin(self):
+        # Translate Nebula participant config into the fixed DP-SGD controls used by Opacus.
         dp_config = self.config.participant["training_args"].get("dp")
 
         if dp_config is None or not dp_config.get("enabled", False):
@@ -40,6 +42,7 @@ def create_dp_plugin(self):
         )
 
     def _train_sync(self):
+        # Keep the public Lightning trainer contract: train once and return loss/accuracy.
         try:
             self._fit_with_dp()
 
@@ -63,6 +66,7 @@ def _train_sync(self):
             raise
 
     def _get_training_device(self):
+        # Resolve the effective device for any manual DP path that needs it.
         if (
             self.config.participant["device_args"]["accelerator"] == "gpu"
             and torch.cuda.is_available()
@@ -73,6 +77,7 @@ def _get_training_device(self):
         return torch.device("cpu")
 
     def _log_manual_metrics(self, phase, metrics):
+        # Log manually computed metrics using the same naming scheme as Lightning.
         output = metrics.compute()
         output = {
             f"{phase}/{key.replace('Multiclass', '').split('/')[-1]}": value.detach()
@@ -88,9 +93,11 @@ def _log_manual_metrics(self, phase, metrics):
         self._logger.log_data(output, step=self.model.global_number[phase])
 
     def _fit_with_dp(self):
+        # Bridge Nebula's Lightning trainer with Opacus' private optimizer/dataloader.
         state = SimpleDPState()
 
         if hasattr(self.model, "clear_optimizer_override"):
+            # Start from a clean optimizer so a previous round cannot leak into this fit.
             self.model.clear_optimizer_override()
 
         try:
@@ -102,6 +109,7 @@ def _fit_with_dp(self):
             optimizer = self.model.configure_optimizers()
             state.extras["dataloader"] = train_dataloader
 
+            # Opacus wraps the model, optimizer and dataloader, and updates the accountant.
             self._dp_plugin.on_train_start(self.model, optimizer, state)
 
             private_optimizer = state.extras["optimizer"]
@@ -114,6 +122,8 @@ def _fit_with_dp(self):
             # the original LightningModule and a DPOptimizer through configure_optimizers.
             self.model.dp_enabled = True
             self.model.set_optimizer_override(private_optimizer)
+            # Lightning still drives the training loop; the injected optimizer/dataloader
+            # make the loop perform DP-SGD instead of standard SGD.
             self._trainer.fit(
                 self.model,
                 train_dataloaders=private_dataloader,
@@ -123,6 +133,7 @@ def _fit_with_dp(self):
             self.model.train()
 
         finally:
+            # Always restore the model/trainer state, even if Lightning raises.
             self.model.dp_enabled = False
             if hasattr(self.model, "clear_optimizer_override"):
                 self.model.clear_optimizer_override()
@@ -132,6 +143,7 @@ def _fit_with_dp(self):
         dp_epsilon = state.extras.get("dp_epsilon")
 
         if dp_epsilon is not None:
+            # Store the accumulated privacy budget for logging and trustworthiness reports.
             dp_delta = state.extras["dp_delta"]
 
             self.dp_epsilon = float(dp_epsilon)
@@ -153,6 +165,7 @@ def _fit_with_dp(self):
             )
 
     def get_privacy_metrics(self):
+        # Trustworthiness consumes these values at experiment finish.
         return {
             "dp_enabled": True,
             "dp_epsilon": self.dp_epsilon,

From 309e5f0718704503416a47744cbef7af92411b4b Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 3 Jun 2026 10:54:01 +0200
Subject: [PATCH 57/66] Refactoring and metrics fixed: Privacy and
 explainability

---
 .../trustworthiness/helpers/explainability.py | 438 ++++++------------
 .../addons/trustworthiness/helpers/privacy.py |  94 +---
 2 files changed, 155 insertions(+), 377 deletions(-)

diff --git a/nebula/addons/trustworthiness/helpers/explainability.py b/nebula/addons/trustworthiness/helpers/explainability.py
index ce9809c3e..96b066a1f 100644
--- a/nebula/addons/trustworthiness/helpers/explainability.py
+++ b/nebula/addons/trustworthiness/helpers/explainability.py
@@ -1,5 +1,3 @@
-import copy
-import gc
 import logging
 import math
 
@@ -11,298 +9,160 @@
 
 logger = logging.getLogger(__name__)
 
-def get_feature_importance_cv(model, test_sample):
-    """
-    Calculates the coefficient of variation of the feature importance.
 
-    Args:
-        model (object): The model.
-        test_sample (object): One test sample to calculate the feature importance.
+def _feature_importance_cv_from_values(vals):
+    # Higher CV means attributions differ more across features, i.e. a more selective explanation.
+    vals = np.asarray(vals, dtype=float).reshape(-1)
+    vals = np.nan_to_num(vals, nan=0.0, posinf=0.0, neginf=0.0)
+    vals = vals[vals > 0]
 
-    Returns:
-        float: The coefficient of variation of the feature importance.
-    """
-
-    try:
-        vals = np.asarray(_get_feature_importances(model, test_sample), dtype=float).reshape(-1)
-        vals = np.nan_to_num(vals, nan=0.0, posinf=0.0, neginf=0.0)
-        vals = vals[vals > 0]
-
-        if len(vals) <= 1:
-            return 0.0
+    if len(vals) <= 1:
+        return 0.0
 
-        cv = float(variation(vals))
-        if math.isnan(cv) or math.isinf(cv):
-            return 1.0
-        return max(0.0, cv)
-    except Exception as exc:
-        logger.warning("Could not compute feature importance CV with shap")
-        logger.warning(exc)
+    cv = float(variation(vals))
+    if math.isnan(cv) or math.isinf(cv):
         return 1.0
+    return max(0.0, cv)
 
 
 def _get_feature_importances(model, test_sample):
-    """
-    Computes global feature importances from SHAP values.
-
-    Args:
-        model (object): The model.
-        test_sample (object): One test sample batch.
-
-    Returns:
-        np.ndarray: Global importances per feature.
-    """
+    # Computes global feature importances with a simple modality-aware policy:
+    # SHAP for tabular tensors and Integrated Gradients for image-like tensors.
     if not isinstance(model, torch.nn.Module):
         logger.warning("Model is not a torch.nn.Module")
         return np.array([])
 
-    def _clone_model(model_ref, device):
-        optimizer_attrs = ("_optimizer", "_optimizer_override")
-        optimizer_state = {}
-        try:
-            for attr in optimizer_attrs:
-                if hasattr(model_ref, attr):
-                    optimizer_state[attr] = getattr(model_ref, attr)
-                    setattr(model_ref, attr, None)
-
-            model_clone = copy.deepcopy(model_ref)
-            for attr in optimizer_attrs:
-                if hasattr(model_clone, attr):
-                    setattr(model_clone, attr, None)
-
-            model_clone.to(device)
-            model_clone.eval()
-            return model_clone
-        except Exception as exc:
-            logger.warning("Could not clone model for SHAP, using original model")
-            logger.warning(exc)
-            model_ref.eval()
-            return model_ref
-        finally:
-            for attr, value in optimizer_state.items():
-                setattr(model_ref, attr, value)
-
-    def _prepare_shap_inputs(sample):
-        if not (isinstance(sample, (tuple, list)) and len(sample) >= 1):
-            return None, None, None
-
-        batched_data = sample[0]
-        if not torch.is_tensor(batched_data) or batched_data.ndim == 0 or batched_data.size(0) == 0:
-            return None, None, None
-
-        if not torch.is_floating_point(batched_data):
-            batched_data = batched_data.float()
-
-        batch_size = int(batched_data.size(0))
-        input_shape = tuple(int(dim) for dim in batched_data.shape[1:])
-
-        if batch_size == 1:
-            return batched_data[:1], batched_data[:1], input_shape
-
-        background_size = min(max(8, batch_size // 4), 32, batch_size - 1)
-        explainable = batch_size - background_size
-        explain_size = min(max(4, explainable), 32, explainable)
-
-        background = batched_data[:background_size]
-        test_data = batched_data[background_size:background_size + explain_size]
-
-        if test_data.size(0) == 0:
-            test_data = batched_data[: min(batch_size, 32)]
-
-        return background, test_data, input_shape
-
-    def _compute_shap_values(model_ref, background, test_data):
-        explainer_errors = []
-
-        for explainer_name in ("DeepExplainer", "GradientExplainer"):
-            explainer = None
-            try:
-                if explainer_name == "DeepExplainer":
-                    explainer = shap.DeepExplainer(model_ref, background)
-                    return explainer.shap_values(test_data, check_additivity=False)
-
-                explainer = shap.GradientExplainer(model_ref, background)
-                return explainer.shap_values(test_data)
-            except Exception as exc:
-                explainer_errors.append(f"{explainer_name}: {exc}")
-            finally:
-                # SHAP explainers may register autograd hooks. If we explain on the
-                # original model, those hooks can leak into later ART metrics.
-                del explainer
-                gc.collect()
-
-        raise RuntimeError("; ".join(explainer_errors))
-
-    def _compute_gradient_importances(model_ref, test_data):
-        was_training = bool(getattr(model_ref, "training", False))
-        model_ref.eval()
-
-        try:
-            inputs = test_data.detach().clone().requires_grad_(True)
-            model_ref.zero_grad(set_to_none=True)
-
-            outputs = model_ref(inputs)
-            if isinstance(outputs, (tuple, list)):
-                outputs = outputs[0]
-
-            if outputs.ndim == 1:
-                score = outputs.sum()
-            else:
-                score = outputs.reshape(outputs.shape[0], -1).max(dim=1).values.sum()
-
-            score.backward()
-            if inputs.grad is None:
-                return np.array([])
-
-            importances = torch.abs(inputs.grad * inputs).mean(dim=0)
-            importances = importances.detach().cpu().numpy().reshape(-1)
-            importances = np.nan_to_num(importances, nan=0.0, posinf=0.0, neginf=0.0)
-            return np.maximum(importances, 0.0)
-        finally:
-            if was_training:
-                model_ref.train()
-
-    def _feature_axes_from_shape(arr_shape, input_shape, n_samples):
-        input_shape = tuple(input_shape)
-        input_rank = len(input_shape)
-
-        if input_rank == 0 or len(arr_shape) < input_rank:
-            return None
-
-        if len(arr_shape) >= input_rank + 1 and tuple(arr_shape[1:1 + input_rank]) == input_shape:
-            return tuple(range(1, 1 + input_rank))
-
-        if len(arr_shape) >= input_rank + 2 and arr_shape[1] == n_samples and tuple(arr_shape[2:2 + input_rank]) == input_shape:
-            return tuple(range(2, 2 + input_rank))
-
-        candidates = []
-        for start in range(len(arr_shape) - input_rank + 1):
-            if tuple(arr_shape[start:start + input_rank]) == input_shape:
-                candidates.append(start)
+    if not isinstance(test_sample, (tuple, list)) or len(test_sample) < 1:
+        return np.array([])
 
-        if not candidates:
-            return None
+    inputs = test_sample[0]
+    if not torch.is_tensor(inputs) or inputs.ndim < 2 or inputs.size(0) == 0:
+        return np.array([])
 
-        # Prefer matches that do not consume the leading sample/output axes.
-        non_leading = [start for start in candidates if start > 0]
-        if non_leading:
-            candidates = non_leading
+    try:
+        device = next(model.parameters()).device
+    except Exception:
+        device = torch.device("cpu")
 
-        if len(arr_shape) > 1 and arr_shape[1] == n_samples:
-            non_output_sample = [start for start in candidates if start > 1]
-            if non_output_sample:
-                candidates = non_output_sample
+    inputs = inputs.to(device)
+    if not torch.is_floating_point(inputs):
+        inputs = inputs.float()
 
-        start = candidates[0]
-        return tuple(range(start, start + input_rank))
+    was_training = bool(getattr(model, "training", False))
+    model.eval()
 
     try:
-        try:
-            device = next(model.parameters()).device
-        except Exception:
-            device = torch.device("cpu")
+        if inputs.ndim == 2:
+            logger.info("Computing tabular feature importances with SHAP, input_shape=%s", tuple(inputs.shape))
+            importances = _get_shap_importances(model, inputs)
+        else:
+            logger.info("Computing image-like feature importances with Integrated Gradients, input_shape=%s", tuple(inputs.shape))
+            importances = _get_integrated_gradients_importances(model, inputs)
 
-        background, test_data, input_shape = _prepare_shap_inputs(test_sample)
-        if background is None or test_data is None or input_shape is None:
-            return np.array([])
+        logger.info("Computed feature importances, n_features=%s, total_importance=%s", len(importances), float(np.sum(importances)))
+        return importances
+    except Exception as exc:
+        logger.warning("Could not compute feature importances")
+        logger.warning(exc)
+        return np.array([])
+    finally:
+        if was_training:
+            model.train()
 
-        background = background.to(device)
-        test_data = test_data.to(device)
-
-        shap_model = _clone_model(model, device)
-        try:
-            shap_values = _compute_shap_values(shap_model, background, test_data)
-        except Exception as exc:
-            logger.debug("Could not compute feature importances with SHAP, using gradient fallback: %s", exc)
-            shap_model = None
-            gc.collect()
-
-            gradient_model = _clone_model(model, device)
-            try:
-                return _compute_gradient_importances(gradient_model, test_data)
-            except Exception as fallback_exc:
-                logger.debug("Could not compute feature importances with gradient fallback: %s", fallback_exc)
-                return np.array([])
-            finally:
-                del gradient_model
-                gc.collect()
-        finally:
-            if shap_model is not None:
-                del shap_model
-            gc.collect()
-
-        if shap_values is None:
-            return np.array([])
 
-        if isinstance(shap_values, (list, tuple)):
-            arrays = [np.asarray(val, dtype=float) for val in shap_values if val is not None]
-            if not arrays:
-                return np.array([])
-            shap_arr = np.stack(arrays, axis=0)
-        else:
-            shap_arr = np.asarray(shap_values, dtype=float)
+def _get_shap_importances(model, inputs):
+    # SHAP is a natural fit for tabular data: one attribution per input column.
+    if inputs.size(0) < 2:
+        return np.array([])
+
+    background_size = min(16, inputs.size(0) - 1)
+    background = inputs[:background_size]
+    explained = inputs[background_size:]
 
-        if shap_arr.size == 0:
+    logger.info("SHAP background_size=%s, explained_size=%s", int(background.size(0)), int(explained.size(0)))
+    explainer = shap.GradientExplainer(model, background)
+    shap_values = explainer.shap_values(explained)
+
+    if isinstance(shap_values, (list, tuple)):
+        arrays = [np.asarray(values, dtype=float) for values in shap_values if values is not None]
+        if not arrays:
             return np.array([])
+        shap_arr = np.stack(arrays, axis=0)
+        importances = np.mean(np.abs(shap_arr), axis=(0, 1))
+    else:
+        shap_arr = np.asarray(shap_values, dtype=float)
+        if shap_arr.ndim == 3:
+            importances = np.mean(np.abs(shap_arr), axis=(0, 2))
+        else:
+            importances = np.mean(np.abs(shap_arr), axis=0)
+
+    return _clean_importances(importances)
+
+
+def _get_integrated_gradients_importances(model, inputs, steps=16):
+    # Zero baseline is simple and works well for normalized image tensors.
+    logger.info("Integrated Gradients steps=%s", int(steps))
+    baseline = torch.zeros_like(inputs)
+    total_gradients = torch.zeros_like(inputs)
+
+    for alpha in torch.linspace(0.0, 1.0, steps, device=inputs.device):
+        scaled_inputs = (baseline + alpha * (inputs - baseline)).detach().requires_grad_(True)
+        model.zero_grad(set_to_none=True)
 
-        shap_arr = np.nan_to_num(shap_arr, nan=0.0, posinf=0.0, neginf=0.0)
-        feature_axes = _feature_axes_from_shape(tuple(shap_arr.shape), input_shape, int(test_data.size(0)))
-
-        if feature_axes is None:
-            # Conservative fallback: treat the first axis as samples when possible and
-            # flatten the remaining dimensions into features.
-            if shap_arr.ndim == 1:
-                importances = np.abs(shap_arr)
-            else:
-                aggregate_axes = (0,)
-                importances = np.mean(np.abs(shap_arr), axis=aggregate_axes)
+        outputs = model(scaled_inputs)
+        if isinstance(outputs, (tuple, list)):
+            outputs = outputs[0]
+
+        # Explain the model's predicted class for each sample.
+        if outputs.ndim == 1:
+            score = outputs.sum()
         else:
-            aggregate_axes = tuple(idx for idx in range(shap_arr.ndim) if idx not in feature_axes)
-            if aggregate_axes:
-                importances = np.mean(np.abs(shap_arr), axis=aggregate_axes)
-            else:
-                importances = np.abs(shap_arr)
-
-        importances = np.asarray(importances, dtype=float).reshape(-1)
-        importances = np.nan_to_num(importances, nan=0.0, posinf=0.0, neginf=0.0)
-        return np.maximum(importances, 0.0)
-    except Exception as exc:
-        logger.debug("Could not compute feature importances")
-        logger.debug(exc)
-        return np.array([])
+            score = outputs.reshape(outputs.shape[0], -1).max(dim=1).values.sum()
 
+        gradients = torch.autograd.grad(score, scaled_inputs)[0]
+        total_gradients += gradients.detach()
 
-def get_alpha_score(model, test_sample, alpha=0.8):
-    """
-    Computes alpha score from global feature importances.
-    """
-    try:
-        vals = np.asarray(_get_feature_importances(model, test_sample), dtype=float).reshape(-1)
-        vals = np.nan_to_num(vals, nan=0.0, posinf=0.0, neginf=0.0)
-        vals = np.maximum(vals, 0.0)
-        total_features = len(vals)
-        if total_features == 0 or np.sum(vals) <= 1e-12:
-            return 1.0
-
-        try:
-            alpha = float(alpha)
-        except Exception:
-            alpha = 0.8
-        alpha = min(max(alpha, 0.0), 1.0)
-
-        vals_sorted = np.sort(vals)[::-1]
-        cum_sum = np.cumsum(vals_sorted)
-        threshold = float(alpha) * np.sum(vals_sorted)
-        idx = np.searchsorted(cum_sum, threshold)
-        return float(min(total_features, idx + 1) / total_features)
-    except Exception as exc:
-        logger.warning("Could not compute alpha score")
-        logger.warning(exc)
+    attributions = (inputs - baseline) * total_gradients / float(steps)
+    importances = torch.abs(attributions).mean(dim=0)
+
+    if importances.ndim == 3:
+        # For RGB images, keep one importance value per spatial position.
+        importances = importances.mean(dim=0)
+
+    return _clean_importances(importances.detach().cpu().numpy())
+
+
+def _clean_importances(importances):
+    importances = np.asarray(importances, dtype=float).reshape(-1)
+    importances = np.nan_to_num(importances, nan=0.0, posinf=0.0, neginf=0.0)
+    return np.maximum(importances, 0.0)
+
+
+def _alpha_score_from_values(vals, alpha=0.8):
+    # Fraction of features needed to explain alpha of the attribution mass; lower is better.
+    vals = np.asarray(vals, dtype=float).reshape(-1)
+    vals = np.nan_to_num(vals, nan=0.0, posinf=0.0, neginf=0.0)
+    vals = np.maximum(vals, 0.0)
+    total_features = len(vals)
+    if total_features == 0 or np.sum(vals) <= 1e-12:
         return 1.0
 
+    try:
+        alpha = float(alpha)
+    except Exception:
+        alpha = 0.8
+    alpha = min(max(alpha, 0.0), 1.0)
+
+    vals_sorted = np.sort(vals)[::-1]
+    cum_sum = np.cumsum(vals_sorted)
+    threshold = float(alpha) * np.sum(vals_sorted)
+    idx = np.searchsorted(cum_sum, threshold)
+    return float(min(total_features, idx + 1) / total_features)
+
 
-def _get_spread_base(model, test_sample, divergence=True):
-    vals = _get_feature_importances(model, test_sample)
+def _spread_base_from_values(vals, divergence=True):
+    # Entropy ratio measures spread; JS divergence measures distance from uniform attribution.
+    vals = np.asarray(vals, dtype=float).reshape(-1)
     tol = 1e-8
 
     if len(vals) == 0 or np.sum(vals) < tol:
@@ -324,44 +184,8 @@ def _get_spread_base(model, test_sample, divergence=True):
     return float(np.clip(metric, 0.0, 1.0))
 
 
-def get_spread_ratio(model, test_sample):
-    """
-    Computes spread ratio from global feature importances.
-    """
-    try:
-        return _get_spread_base(model, test_sample, divergence=False)
-    except Exception as exc:
-        logger.warning("Could not compute spread ratio")
-        logger.warning(exc)
-        return 1.0
-
-
-def get_spread_divergence(model, test_sample):
-    """
-    Computes spread divergence from global feature importances.
-    """
-    try:
-        return _get_spread_base(model, test_sample, divergence=True)
-    except Exception as exc:
-        logger.warning("Could not compute spread divergence")
-        logger.warning(exc)
-        return 0.0
-
-
 def get_explainability_metrics_summary(model, test_dataloader, max_batches=4):
-    """
-    Computes explainability metrics over multiple test batches and returns
-    their mean values.
-
-    Args:
-        model (object): The model.
-        test_dataloader: Test dataloader providing batches.
-        max_batches (int): Maximum number of batches to use.
-
-    Returns:
-        dict: Mean values for feature_importance_cv, alpha_score,
-        spread_ratio and spread_divergence.
-    """
+    # Computes explainability metrics over multiple test batches and returns their mean values.
     summary = {
         "feature_importance_cv": 1.0,
         "alpha_score": 1.0,
@@ -387,10 +211,12 @@ def get_explainability_metrics_summary(model, test_dataloader, max_batches=4):
             if batch_idx >= max_batches:
                 break
 
-            fi_values.append(float(get_feature_importance_cv(model, test_sample)))
-            alpha_values.append(float(get_alpha_score(model, test_sample)))
-            spread_ratio_values.append(float(get_spread_ratio(model, test_sample)))
-            spread_divergence_values.append(float(get_spread_divergence(model, test_sample)))
+            # Compute attributions once per batch and derive all explainability metrics from them.
+            importances = _get_feature_importances(model, test_sample)
+            fi_values.append(float(_feature_importance_cv_from_values(importances)))
+            alpha_values.append(float(_alpha_score_from_values(importances)))
+            spread_ratio_values.append(float(_spread_base_from_values(importances, divergence=False)))
+            spread_divergence_values.append(float(_spread_base_from_values(importances, divergence=True)))
     except Exception as exc:
         logger.warning("Could not compute explainability metrics summary")
         logger.warning(exc)
diff --git a/nebula/addons/trustworthiness/helpers/privacy.py b/nebula/addons/trustworthiness/helpers/privacy.py
index f6ed327c1..b33c062f2 100644
--- a/nebula/addons/trustworthiness/helpers/privacy.py
+++ b/nebula/addons/trustworthiness/helpers/privacy.py
@@ -11,17 +11,7 @@
 logger = logging.getLogger(__name__)
 
 def get_global_privacy_risk(dp, epsilon, n):
-    """
-    Calculates the global privacy risk by epsilon and the number of clients.
-
-    Args:
-        dp (bool): Indicates if differential privacy is used or not.
-        epsilon (int): The epsilon value.
-        n (int): The number of clients in the scenario.
-
-    Returns:
-        float: The global privacy risk.
-    """
+    # Calculates the global privacy risk by epsilon and the number of clients.
 
     try:
         epsilon = float(epsilon)
@@ -36,17 +26,7 @@ def get_global_privacy_risk(dp, epsilon, n):
 
 
 def get_global_privacy_risk_dfl(dp, epsilon, n):
-    """
-    Calculates the global privacy risk by epsilon and the number of clients.
-
-    Args:
-        dp (bool): Indicates if differential privacy is used or not.
-        epsilon (int): The epsilon value.
-        n (int): The number of neighbours.
-
-    Returns:
-        float: The global privacy risk.
-    """
+    # Calculates the global privacy risk by epsilon and the number of clients for DFL.
 
     try:
         epsilon = float(epsilon)
@@ -61,17 +41,7 @@ def get_global_privacy_risk_dfl(dp, epsilon, n):
 
 
 def _collect_per_sample_losses(model, dataloader, max_samples=5000):
-    """
-    Compute per-sample cross-entropy losses for a dataloader.
-
-    Args:
-        model (torch.nn.Module): The model to evaluate.
-        dataloader: DataLoader providing (samples, labels).
-        max_samples (int): Maximum number of samples to process.
-
-    Returns:
-        np.ndarray: Losses per sample.
-    """
+    # Compute per-sample cross-entropy losses for a dataloader.
     if not isinstance(model, torch.nn.Module) or dataloader is None:
         return np.array([])
 
@@ -110,7 +80,12 @@ def _collect_per_sample_losses(model, dataloader, max_samples=5000):
             logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
             batch_losses = criterion(logits, labels)
 
-            losses.append(batch_losses.detach().cpu().numpy())
+            batch_losses_np = batch_losses.detach().cpu().numpy()
+            batch_losses_np = batch_losses_np[np.isfinite(batch_losses_np)]
+            if batch_losses_np.size == 0:
+                continue
+
+            losses.append(batch_losses_np)
             collected += int(batch_losses.shape[0])
 
     if not losses:
@@ -119,22 +94,8 @@ def _collect_per_sample_losses(model, dataloader, max_samples=5000):
     return np.concatenate(losses, axis=0)
 
 
-def get_epsilon_star(model, train_dataloader, test_dataloader, max_samples=5000):
-    """
-    Compute empirical epsilon* from train/test loss distributions.
-
-    This follows the same core structure as privacy_metrics_core.epsilon_star,
-    adapted to PyTorch models and DataLoaders used in Nebula.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate.
-        train_dataloader: Training DataLoader.
-        test_dataloader: Test DataLoader.
-        max_samples (int): Maximum samples to evaluate per split.
-
-    Returns:
-        float: Empirical epsilon* value. Returns 0.0 on failure.
-    """
+def get_epsilon_star(model, train_dataloader, test_dataloader, max_samples=5000, percentile=95):
+    # Compute empirical epsilon* from train/test loss distributions.
     try:
         loss_train = _collect_per_sample_losses(model, train_dataloader, max_samples=max_samples)
         loss_test = _collect_per_sample_losses(model, test_dataloader, max_samples=max_samples)
@@ -147,9 +108,11 @@ def get_epsilon_star(model, train_dataloader, test_dataloader, max_samples=5000)
 
         fpr, tpr, _ = roc_curve(y_true, scores)
 
-        fpr = np.clip(fpr, 1e-10, 1 - 1e-10)
-        tpr = np.clip(tpr, 1e-10, 1 - 1e-10)
-        fnr = 1 - tpr
+        fpr_floor = 1.0 / len(loss_test)
+        fnr_floor = 1.0 / len(loss_train)
+
+        fpr = np.clip(fpr, fpr_floor, 1 - fpr_floor)
+        fnr = np.clip(1 - tpr, fnr_floor, 1 - fnr_floor)
 
         delta = 1.0 / len(loss_train) if len(loss_train) > 0 else 1e-5
 
@@ -158,9 +121,12 @@ def get_epsilon_star(model, train_dataloader, test_dataloader, max_samples=5000)
         m3 = (fnr - delta) / (1 - fpr)
         m4 = (fpr - delta) / (1 - fnr)
 
-        epsilon_star_val = np.log(
-            np.nanmax(np.maximum.reduce([m1, m2, m3, m4, np.ones_like(m1)]))
-        )
+        ratios = np.maximum.reduce([m1, m2, m3, m4, np.ones_like(m1)])
+        ratios = ratios[np.isfinite(ratios)]
+        if ratios.size == 0:
+            return 0.0
+
+        epsilon_star_val = np.log(np.nanpercentile(ratios, percentile))
 
         if np.isnan(epsilon_star_val) or np.isinf(epsilon_star_val):
             return 0.0
@@ -173,21 +139,7 @@ def get_epsilon_star(model, train_dataloader, test_dataloader, max_samples=5000)
 
 
 def get_mia_auc(model, train_dataloader, test_dataloader, max_samples=5000):
-    """
-    Compute membership inference attack AUC using per-sample loss as the attack score.
-
-    Lower loss suggests a sample is more likely to be a training member, so the
-    attack score is defined as negative loss.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate.
-        train_dataloader: Training DataLoader.
-        test_dataloader: Test DataLoader.
-        max_samples (int): Maximum samples to evaluate per split.
-
-    Returns:
-        float: ROC-AUC of the loss-threshold membership attack. Returns 0.5 on failure.
-    """
+    # Compute membership inference attack AUC using per-sample loss as the attack score.
     try:
         loss_train = _collect_per_sample_losses(model, train_dataloader, max_samples=max_samples)
         loss_test = _collect_per_sample_losses(model, test_dataloader, max_samples=max_samples)

From e91ffb0d637f317dc1788a4922b54a17bec97a7a Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 3 Jun 2026 13:33:46 +0200
Subject: [PATCH 58/66] Quality_model and fairness refactored, train accuracy
 and macro f1 score are obtained from training

---
 .../addons/trustworthiness/cfl_factsheet.py   |   6 +-
 .../addons/trustworthiness/dfl_factsheet.py   |   4 +-
 .../trustworthiness/factsheet_populators.py   |   5 +-
 .../addons/trustworthiness/helpers/csv_io.py  |  18 ++
 .../trustworthiness/helpers/model_quality.py  | 232 +++---------------
 .../helpers/scenario_metrics.py               |  35 ++-
 .../addons/trustworthiness/trustworthiness.py |  52 ++--
 nebula/core/engine.py                         |   4 +-
 nebula/core/models/cifar10/resnet.py          |   2 +-
 nebula/core/models/nebulamodel.py             |  30 ++-
 nebula/core/nebulaevents.py                   |  10 +-
 nebula/core/network/messages.py               |   4 +-
 nebula/core/pb/nebula.proto                   |   2 +
 nebula/core/pb/nebula_pb2.py                  |  12 +-
 nebula/core/training/lightning.py             |  34 ++-
 15 files changed, 193 insertions(+), 257 deletions(-)

diff --git a/nebula/addons/trustworthiness/cfl_factsheet.py b/nebula/addons/trustworthiness/cfl_factsheet.py
index 88eedab28..b8cbe104b 100755
--- a/nebula/addons/trustworthiness/cfl_factsheet.py
+++ b/nebula/addons/trustworthiness/cfl_factsheet.py
@@ -20,7 +20,6 @@
     get_dp_global,
     get_elapsed_time,
     get_entropy_list,
-    get_participant_loss_accuracy,
     get_underfitting_score,
 )
 from nebula.addons.trustworthiness.factsheet_common import (
@@ -104,7 +103,8 @@ def populate_factsheet_cfl(
             factsheet["performance"]["test_acc_avg"] = result_avg_loss_accuracy[1]
             test_acc_cv = get_cv(std=result_avg_loss_accuracy[2], mean=result_avg_loss_accuracy[1])
             factsheet["fairness"]["test_acc_cv"] = 1 if test_acc_cv > 1 else test_acc_cv
-            _, participant_test_acc = get_participant_loss_accuracy(scenario_name, participant_idx)
+            factsheet["performance"]["test_macro_f1"] = result_avg_loss_accuracy[3]
+            factsheet["performance"]["train_accuracy"] = result_avg_loss_accuracy[4]
 
             # Compute CFL privacy risk from aggregate DP settings and client count.
             dp_enabled, dp_epsilon = get_dp_global(scenario_name)
@@ -142,7 +142,7 @@ def populate_factsheet_cfl(
                 model,
                 train_loader,
                 test_loader,
-                participant_test_acc,
+                factsheet["performance"]["test_acc_avg"],
             )
 
             # Enrich CodeCarbon emissions with CPU/GPU benchmark metadata.
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index 2fb2fd115..d8f4a6afd 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -99,7 +99,9 @@ def populate_factsheet_dfl(
         factsheet["performance"]["test_acc"] = float(final_acc)
 
         # Load local communication and privacy values reported by the participant.
-        bytes_sent, bytes_recv, *_ = load_data_results_participant(scenario_name, participant_idx)
+        bytes_sent, bytes_recv, _, _, _, macro_f1, train_accuracy, *_ = load_data_results_participant(scenario_name, participant_idx)
+        factsheet["performance"]["test_macro_f1"] = macro_f1
+        factsheet["performance"]["train_accuracy"] = train_accuracy
 
         factsheet["system"]["model_size"] = get_bytes_model(model)
 
diff --git a/nebula/addons/trustworthiness/factsheet_populators.py b/nebula/addons/trustworthiness/factsheet_populators.py
index 5ace6b034..25f5180bd 100644
--- a/nebula/addons/trustworthiness/factsheet_populators.py
+++ b/nebula/addons/trustworthiness/factsheet_populators.py
@@ -8,8 +8,6 @@
 from nebula.addons.trustworthiness.helpers.model_quality import (
     get_coefficient_of_variation,
     get_generalized_entropy_index,
-    get_macro_f1_score,
-    get_overfitting_score,
     get_theil_index,
     get_well_calibration_error,
 )
@@ -113,7 +111,6 @@ def populate_common_model_quality_metrics(
     test_sample,
 ):
     # Populate model quality, privacy, and fairness metrics shared by all profiles.
-    factsheet["performance"]["test_macro_f1"] = get_macro_f1_score(model, test_loader)
 
     # Privacy metrics derived from train/test behavior.
     factsheet["privacy"]["epsilon_star"] = get_epsilon_star(model, train_loader, test_loader)
@@ -122,7 +119,7 @@ def populate_common_model_quality_metrics(
     factsheet["privacy"]["mia_auc_score"] = 1 - 2 * abs(factsheet["privacy"]["mia_auc"] - 0.5)
 
     # Fairness and calibration metrics expressed as inverse scores.
-    overfitting_value = get_overfitting_score(model, train_loader, test_accuracy)
+    overfitting_value = max(0.0, float(factsheet["performance"]["train_accuracy"]) - float(test_accuracy))
     factsheet["fairness"]["inverse_overfitting"] = inverse_score(overfitting_value)
 
     well_calibration_error_value = get_well_calibration_error(model, test_loader)
diff --git a/nebula/addons/trustworthiness/helpers/csv_io.py b/nebula/addons/trustworthiness/helpers/csv_io.py
index 40bd7fda0..c924fc887 100644
--- a/nebula/addons/trustworthiness/helpers/csv_io.py
+++ b/nebula/addons/trustworthiness/helpers/csv_io.py
@@ -16,6 +16,8 @@
     "accuracy",
     "loss",
     "val_accuracy",
+    "macro_f1",
+    "train_accuracy",
     "dp_enabled",
     "dp_epsilon",
 ]
@@ -30,6 +32,8 @@
     "model_size",
     "local_entropy",
     "val_accuracy",
+    "macro_f1",
+    "train_accuracy",
     "dp_enabled",
     "dp_epsilon",
 ]
@@ -135,6 +139,8 @@ def load_data_results_participant(experiment_name: str, participant_id: int | st
     row = _read_first_csv_row(
         _trustworthiness_path(experiment_name, f"data_results_{participant_id}.csv")
     )
+    macro_f1 = row["macro_f1"] or 0.0
+    train_accuracy = row["train_accuracy"] or 0.0
 
     return (
         int(float(row["bytes_sent"])),
@@ -142,6 +148,8 @@ def load_data_results_participant(experiment_name: str, participant_id: int | st
         float(row["accuracy"]),
         float(row["loss"]),
         float(row["val_accuracy"]),
+        float(macro_f1),
+        float(train_accuracy),
         _to_bool(row["dp_enabled"]),
         float(row["dp_epsilon"]),
     )
@@ -185,6 +193,8 @@ def save_trustworthiness_reports_csv(
             "model_size": report["model_size"],
             "local_entropy": report["local_entropy"],
             "val_accuracy": report["val_accuracy"],
+            "macro_f1": report["macro_f1"],
+            "train_accuracy": report["train_accuracy"],
             "dp_enabled": report["dp_enabled"],
             "dp_epsilon": report["dp_epsilon"],
         }
@@ -231,6 +241,8 @@ def save_results_csv_cfl(
     model_size: int,
     local_entropy: float,
     val_accuracy: float,
+    macro_f1: float,
+    train_accuracy: float,
     dp_enabled: bool,
     dp_epsilon: float,
 ):
@@ -248,6 +260,8 @@ def save_results_csv_cfl(
             "model_size": model_size,
             "local_entropy": local_entropy,
             "val_accuracy": val_accuracy,
+            "macro_f1": macro_f1,
+            "train_accuracy": train_accuracy,
             "dp_enabled": dp_enabled,
             "dp_epsilon": dp_epsilon,
         },
@@ -296,6 +310,8 @@ def save_results_csv(
     accuracy: float,
     loss: float,
     val_accuracy: float,
+    macro_f1: float,
+    train_accuracy: float,
     dp_enabled: bool,
     dp_epsilon: float,
 ):
@@ -310,6 +326,8 @@ def save_results_csv(
             "accuracy": accuracy,
             "loss": loss,
             "val_accuracy": val_accuracy,
+            "macro_f1": macro_f1,
+            "train_accuracy": train_accuracy,
             "dp_enabled": dp_enabled,
             "dp_epsilon": dp_epsilon,
         },
diff --git a/nebula/addons/trustworthiness/helpers/model_quality.py b/nebula/addons/trustworthiness/helpers/model_quality.py
index 0b87937fe..4887f5719 100644
--- a/nebula/addons/trustworthiness/helpers/model_quality.py
+++ b/nebula/addons/trustworthiness/helpers/model_quality.py
@@ -3,105 +3,16 @@
 
 import numpy as np
 import torch
-from sklearn.metrics import f1_score
 
 logger = logging.getLogger(__name__)
 
-def _get_model_accuracy(model, dataloader):
-    """
-    Calculates model accuracy over a dataloader.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate.
-        dataloader (DataLoader): Dataloader with (x, y) batches.
-
-    Returns:
-        float: Accuracy in [0, 1].
-    """
-    if not isinstance(model, torch.nn.Module):
-        logger.warning("Model is not a torch.nn.Module")
-        return 0.0
-
-    try:
-        device = next(model.parameters()).device
-    except Exception:
-        device = torch.device("cpu")
-
-    model.eval()
-    correct = 0
-    total = 0
-
-    with torch.no_grad():
-        for x, y in dataloader:
-            x = x.to(device)
-            y = y.to(device)
-
-            out = model(x)
-            logits = out[0] if isinstance(out, (tuple, list)) else out
-            preds = logits.argmax(dim=1)
-
-            correct += (preds == y).sum().item()
-            total += y.size(0)
-
-    return correct / total if total > 0 else 0.0
-
-
-def get_macro_f1_score(model, dataloader):
-    """
-    Calculates macro F1 score over a dataloader.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate.
-        dataloader (DataLoader): Dataloader with (x, y) batches.
-
-    Returns:
-        float: Macro F1 score in [0, 1].
-    """
-    if not isinstance(model, torch.nn.Module):
-        logger.warning("Model is not a torch.nn.Module")
-        return 0.0
-
-    try:
-        device = next(model.parameters()).device
-    except Exception:
-        device = torch.device("cpu")
-
-    model.eval()
-    y_true = []
-    y_pred = []
-
-    with torch.no_grad():
-        for x, y in dataloader:
-            x = x.to(device)
-            y = y.to(device)
-
-            out = model(x)
-            logits = out[0] if isinstance(out, (tuple, list)) else out
-            preds = logits.argmax(dim=1)
-
-            y_true.extend(y.detach().cpu().numpy().tolist())
-            y_pred.extend(preds.detach().cpu().numpy().tolist())
-
-    if not y_true:
-        return 0.0
-
-    return float(f1_score(y_true, y_pred, average="macro", zero_division=0))
-
-
 def _extract_model_logits(model_output):
-    """
-    Normalize the output returned by a model forward pass into a logits tensor.
-
-    Some models may return tuples/lists; for trust metrics we always consume the
-    first element as the classification output.
-    """
+    # Normalize the output returned by a model forward pass into a logits tensor.
     return model_output[0] if isinstance(model_output, (tuple, list)) else model_output
 
 
 def _prepare_class_targets(y):
-    """
-    Convert different target representations into a flat class-index tensor.
-    """
+    # Convert different target representations into a flat class-index tensor.
     if not torch.is_tensor(y):
         y = torch.as_tensor(y)
 
@@ -115,14 +26,7 @@ def _prepare_class_targets(y):
 
 
 def _logits_to_probabilities(logits):
-    """
-    Convert model outputs into a probability matrix of shape (N, C).
-
-    Supports:
-    - multiclass logits/log-probabilities with shape (N, C)
-    - binary logits with shape (N,) or (N, 1)
-    - already-normalized probability matrices
-    """
+    # Convert model outputs into a probability matrix of shape (N, C).
     if not torch.is_tensor(logits):
         logits = torch.as_tensor(logits)
 
@@ -151,29 +55,20 @@ def _logits_to_probabilities(logits):
 
 
 def _collect_classification_statistics(model, dataloader):
-    """
-    Collect prediction statistics required by calibration and inequality metrics.
-
-    Returns:
-        tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-        predicted labels, true labels, prediction confidences, correctness flags,
-        and probability assigned to the true class.
-    """
+    # Collect prediction statistics required by calibration and inequality metrics.
     if not isinstance(model, torch.nn.Module):
         logger.warning("Model is not a torch.nn.Module")
         empty = np.array([], dtype=float)
-        return empty, empty, empty, empty, empty
+        return empty, empty, empty
 
     try:
         device = next(model.parameters()).device
     except Exception:
         device = torch.device("cpu")
 
-    preds_all = []
-    targets_all = []
-    confidences_all = []
-    correct_all = []
-    true_probs_all = []
+    confidences = []
+    correct = []
+    true_probs = []
 
     model.eval()
     with torch.no_grad():
@@ -188,19 +83,18 @@ def _collect_classification_statistics(model, dataloader):
             x = x.to(device)
             y = _prepare_class_targets(y).to(device)
 
-            out = model(x)
-            logits = _extract_model_logits(out)
-            probs = _logits_to_probabilities(logits)
+            # Metrics consume probabilities even when the model returns raw logits
+            # or wraps the classification output in a tuple/list.
+            probs = _logits_to_probabilities(_extract_model_logits(model(x)))
 
             if probs.ndim != 2 or probs.size(0) == 0:
                 continue
 
-            if y.numel() != probs.size(0):
-                n = min(int(y.numel()), int(probs.size(0)))
-                if n == 0:
-                    continue
-                y = y[:n]
-                probs = probs[:n]
+            n = min(int(y.numel()), int(probs.size(0)))
+            if n == 0:
+                continue
+            y = y[:n]
+            probs = probs[:n]
 
             valid_mask = (y >= 0) & (y < probs.size(1))
             if not torch.any(valid_mask):
@@ -209,74 +103,39 @@ def _collect_classification_statistics(model, dataloader):
             y = y[valid_mask]
             probs = probs[valid_mask]
 
+            # Confidence is the predicted-class probability. true_probs is the
+            # probability assigned to the actual class, used as a continuous benefit.
             conf, preds = probs.max(dim=1)
-            true_probs = probs.gather(1, y.view(-1, 1)).squeeze(1)
-            correct = preds.eq(y).float()
+            confidences.append(conf.cpu())
+            correct.append(preds.eq(y).float().cpu())
+            true_probs.append(probs.gather(1, y.view(-1, 1)).squeeze(1).cpu())
 
-            preds_all.extend(preds.detach().cpu().numpy().tolist())
-            targets_all.extend(y.detach().cpu().numpy().tolist())
-            confidences_all.extend(conf.detach().cpu().numpy().tolist())
-            correct_all.extend(correct.detach().cpu().numpy().tolist())
-            true_probs_all.extend(true_probs.detach().cpu().numpy().tolist())
+    if not confidences:
+        empty = np.array([], dtype=float)
+        return empty, empty, empty
 
     return (
-        np.asarray(preds_all, dtype=int),
-        np.asarray(targets_all, dtype=int),
-        np.asarray(confidences_all, dtype=float),
-        np.asarray(correct_all, dtype=float),
-        np.asarray(true_probs_all, dtype=float),
+        torch.cat(confidences).numpy(),
+        torch.cat(correct).numpy(),
+        torch.cat(true_probs).numpy(),
     )
 
 
-def get_overfitting_score(model, train_dataloader, test_accuracy):
-    """
-    Calculates overfitting as the positive train-test accuracy gap.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate on training data.
-        train_dataloader (DataLoader): Training dataloader.
-        test_accuracy (float): Test accuracy in [0, 1].
-
-    Returns:
-        float: Positive train-test accuracy gap.
-    """
-    try:
-        train_accuracy = _get_model_accuracy(model, train_dataloader)
-        return max(0.0, float(train_accuracy) - float(test_accuracy))
-    except Exception as exc:
-        logger.warning("Could not compute overfitting score")
-        logger.warning(exc)
-        return 0.0
-
-
 def get_well_calibration_error(model, test_dataloader, n_bins=10):
-    """
-    Calculates a well-calibration error style metric using prediction confidence.
-
-    For multiclass models, confidence is taken as the max softmax probability and
-    the observed outcome is whether the prediction is correct.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate.
-        test_dataloader (DataLoader): Test dataloader.
-        n_bins (int): Number of quantile bins.
-
-    Returns:
-        float: Calibration error in [0, 1] when computation succeeds.
-    """
+    # Calculates a well-calibration error style metric using prediction confidence.
     if not isinstance(model, torch.nn.Module):
         logger.warning("Model is not a torch.nn.Module")
-        return 0.0
+        return 1.0
 
     try:
         n_bins = max(2, int(n_bins))
     except Exception:
         n_bins = 10
 
-    _, _, confidences, correct, _ = _collect_classification_statistics(model, test_dataloader)
+    confidences, correct, _ = _collect_classification_statistics(model, test_dataloader)
 
     if len(confidences) == 0 or len(correct) == 0:
-        return 0.0
+        return 1.0
 
     confidences = np.clip(np.asarray(confidences, dtype=float), 0.0, 1.0)
     correct = np.clip(np.asarray(correct, dtype=float), 0.0, 1.0)
@@ -285,6 +144,7 @@ def get_well_calibration_error(model, test_dataloader, n_bins=10):
     ece = 0.0
     total = float(len(confidences))
 
+    # ECE compares empirical accuracy and average confidence within each bin.
     for idx in range(n_bins):
         left = bin_edges[idx]
         right = bin_edges[idx + 1]
@@ -305,32 +165,20 @@ def get_well_calibration_error(model, test_dataloader, n_bins=10):
 
 
 def get_generalized_entropy_index(model, test_dataloader, alpha=2):
-    """
-    Calculates generalized entropy index from model predictions.
-
-    Args:
-        model (torch.nn.Module): Model to evaluate.
-        test_dataloader (DataLoader): Test dataloader.
-        alpha (float): GEI alpha parameter.
-
-    Returns:
-        float: Generalized entropy index value.
-    """
+    # Calculates generalized entropy index from model predictions.
     try:
-        _, _, _, _, true_class_probs = _collect_classification_statistics(model, test_dataloader)
+        _, _, true_class_probs = _collect_classification_statistics(model, test_dataloader)
         if len(true_class_probs) == 0:
             return 0.0
 
-        # Use the probability assigned to the true class as a continuous, positive
-        # benefit. This works consistently for multiclass neural models on both
-        # images and tabular data, and avoids collapsing the metric to a coarse
-        # correct/incorrect indicator.
         eps = 1e-12
         b = np.clip(np.asarray(true_class_probs, dtype=float), eps, 1.0)
         mu = float(np.mean(b))
         if mu <= 0:
             return 0.0
 
+        # GEI measures dispersion around the mean benefit. Lower values mean the
+        # model gives more even true-class confidence across samples.
         ratio = np.clip(b / mu, eps, None)
 
         if alpha == 0:
@@ -352,16 +200,12 @@ def get_generalized_entropy_index(model, test_dataloader, alpha=2):
 
 
 def get_theil_index(model, test_dataloader):
-    """
-    Convenience wrapper for generalized entropy index with alpha=1.
-    """
+    # Convenience wrapper for generalized entropy index with alpha=1.
     return get_generalized_entropy_index(model, test_dataloader, alpha=1)
 
 
 def get_coefficient_of_variation(model, test_dataloader):
-    """
-    Calculates coefficient of variation from GEI(alpha=2).
-    """
+    # Calculates coefficient of variation from GEI(alpha=2).
     try:
         gei = get_generalized_entropy_index(model, test_dataloader, alpha=2)
         return float(np.sqrt(2 * gei))
diff --git a/nebula/addons/trustworthiness/helpers/scenario_metrics.py b/nebula/addons/trustworthiness/helpers/scenario_metrics.py
index 1d1f35615..081c3db39 100644
--- a/nebula/addons/trustworthiness/helpers/scenario_metrics.py
+++ b/nebula/addons/trustworthiness/helpers/scenario_metrics.py
@@ -69,12 +69,18 @@ def _find_participant_row_by_int_id(data, participant_id):
 
 def _client_count(data):
     # Global CSVs include the server row, so client averages exclude one row.
-    return max(1, len(data) - 1)
+    return len(_client_rows(data))
+
+
+def _client_rows(data):
+    # CFL writes client reports first and appends the server row last.
+    return data.iloc[:-1] if len(data) > 1 else data
 
 
 def _mean_client_column(data, column_name):
     # Average a global metric across clients while keeping the historical server-row exclusion.
-    return data[column_name].sum() / _client_count(data)
+    clients = _client_rows(data)
+    return clients[column_name].sum() / max(1, len(clients))
 
 
 def get_bytes_model(model):
@@ -99,14 +105,17 @@ def get_bytes_sent_recv(scenario_name):
 
 
 def get_avg_loss_accuracy(scenario_name):
-    # Return client-average test loss, test accuracy and accuracy standard deviation.
+    # Return client-average test loss, accuracy, accuracy std, macro F1 and train accuracy.
     data = _read_global_results(scenario_name)
+    clients = _client_rows(data)
 
     avg_loss = _mean_client_column(data, "loss")
     avg_accuracy = _mean_client_column(data, "accuracy")
-    std_accuracy = statistics.stdev(data["accuracy"]) if len(data) > 1 else 0.0
+    std_accuracy = statistics.stdev(clients["accuracy"]) if len(clients) > 1 else 0.0
+    avg_macro_f1 = _mean_client_column(data, "macro_f1")
+    avg_train_accuracy = _mean_client_column(data, "train_accuracy")
 
-    return avg_loss, avg_accuracy, std_accuracy
+    return avg_loss, avg_accuracy, std_accuracy, avg_macro_f1, avg_train_accuracy
 
 
 def get_underfitting_score(scenario_name, participant_id):
@@ -137,28 +146,30 @@ def get_dp_local(scenario_name, participant_id):
 def get_dp_global(scenario_name):
     # Return CFL DP settings, averaging epsilon across client rows when DP is enabled.
     data = _read_global_results(scenario_name)
+    clients = _client_rows(data)
 
-    if data["dp_enabled"].iloc[0] == False:
+    if clients["dp_enabled"].iloc[0] == False:
         return False, 0.0
 
     return True, _mean_client_column(data, "dp_epsilon")
 
 
 def get_avg_class_imbalance_model_size(scenario_name):
-    # Return average class imbalance and model size across all global result rows.
+    # Return average class imbalance and model size across client rows.
     data = _read_global_results(scenario_name)
-    number_files = len(data)
+    clients = _client_rows(data)
+    number_files = max(1, len(clients))
 
-    avg_class_imbalance = data["class_imbalance"].sum() / number_files
-    avg_model_size = data["model_size"].sum() / number_files
+    avg_class_imbalance = clients["class_imbalance"].sum() / number_files
+    avg_model_size = clients["model_size"].sum() / number_files
 
     return avg_class_imbalance, avg_model_size
 
 
 def get_entropy_list(scenario_name):
-    # Return local entropy values so callers can normalize the distribution.
+    # Return client entropy values so callers can normalize the distribution.
     data = _read_global_results(scenario_name)
-    return data["local_entropy"].tolist()
+    return _client_rows(data)["local_entropy"].tolist()
 
 
 def stop_emissions_tracking_and_save(
diff --git a/nebula/addons/trustworthiness/trustworthiness.py b/nebula/addons/trustworthiness/trustworthiness.py
index 1996171ba..39778c22c 100644
--- a/nebula/addons/trustworthiness/trustworthiness.py
+++ b/nebula/addons/trustworthiness/trustworthiness.py
@@ -69,8 +69,8 @@ def get_sample_size(self) -> float:
         raise NotImplementedError
 
     @abstractmethod
-    def get_metrics(self) -> tuple[float, float]:
-        # Return the latest test loss and accuracy.
+    def get_metrics(self) -> tuple[float, float, float]:
+        # Return the latest test loss, accuracy and macro F1.
         raise NotImplementedError
 
     @abstractmethod
@@ -93,8 +93,10 @@ def __init__(self, engine: Engine, idx, trust_files_route, workload: str, role_l
         self._sample_size = sample_size
         self._current_loss = None
         self._current_accuracy = None
+        self._current_macro_f1 = None
         self._current_val_loss = None
         self._current_val_accuracy = None
+        self._current_train_accuracy = None
         self._experiment_name = ""
         self._per_round = None
         self._role_label = role_label
@@ -134,11 +136,11 @@ def get_sample_size(self):
 
     def get_metrics(self):
         # Return the latest test metrics observed through events.
-        return (self._current_loss, self._current_accuracy)
+        return (self._current_loss, self._current_accuracy, self._current_macro_f1)
 
     def get_validation_metrics(self):
-        # Return the latest validation metrics observed through events.
-        return (self._current_val_loss, self._current_val_accuracy)
+        # Return the latest validation metrics and train accuracy observed through events.
+        return (self._current_val_loss, self._current_val_accuracy, self._current_train_accuracy)
 
     def _is_reputation_enabled(self) -> bool:
         # Read the reputation toggle from the participant defense config.
@@ -230,18 +232,20 @@ async def _process_aggregation_event(self, age: AggregationEvent):
 
     async def _process_test_metrics_event(self, tme: TestMetricsEvent):
         # Cache final test metrics and forward them to per-round trust metrics.
-        cur_loss, cur_acc = await tme.get_event_data()
+        cur_loss, cur_acc, cur_macro_f1 = await tme.get_event_data()
         if cur_loss is not None and cur_acc is not None:
             self._current_loss, self._current_accuracy = cur_loss, cur_acc
+            self._current_macro_f1 = cur_macro_f1
 
             if self._per_round is not None:
                 await self._per_round.on_test_metrics(self._engine, float(cur_loss), float(cur_acc))
 
     async def _process_validation_metrics_event(self, vme: ValidationMetricsEvent):
         # Cache final validation metrics for final trustworthiness outputs.
-        cur_loss, cur_acc = await vme.get_event_data()
+        cur_loss, cur_acc, train_acc = await vme.get_event_data()
         if cur_loss is not None and cur_acc is not None:
             self._current_val_loss, self._current_val_accuracy = cur_loss, cur_acc
+            self._current_train_accuracy = train_acc
 
 
 class TrustWorkloadTrainer(BaseTrustWorkload):
@@ -312,7 +316,7 @@ async def _send_cfl_trustworthiness_report(self, experiment_name: str):
 
     def _build_cfl_trustworthiness_report(self, experiment_name: str) -> dict:
         # Load local metrics and shape them as a trustworthiness message payload.
-        bytes_sent, bytes_recv, accuracy, loss, val_accuracy, dp_enabled, dp_epsilon = load_data_results_participant(
+        bytes_sent, bytes_recv, accuracy, loss, val_accuracy, macro_f1, train_accuracy, dp_enabled, dp_epsilon = load_data_results_participant(
             experiment_name,
             self._idx,
         )
@@ -340,6 +344,8 @@ def _build_cfl_trustworthiness_report(self, experiment_name: str) -> dict:
             "model_size": get_bytes_model(self._engine.trainer.model),
             "local_entropy": get_local_entropy(self._idx, experiment_name),
             "val_accuracy": val_accuracy,
+            "macro_f1": macro_f1,
+            "train_accuracy": train_accuracy,
             "dp_enabled": dp_enabled,
             "dp_epsilon": dp_epsilon,
         }
@@ -349,7 +355,7 @@ def _log_cfl_trustworthiness_report(self, server_addr: str, report: dict):
         logging.info(
             "[TW SEND] dest=%s node_id=%s bytes_sent=%s bytes_recv=%s "
             "accuracy=%s loss=%s role=%s energy_grid=%s emissions=%s workload=%s "
-            "cpu_model=%s gpu_model=%s cpu_used=%s gpu_used=%s energy_consumed=%s sample_size=%s class_imbalance=%s model_size=%s local_entropy=%s val_accuracy=%s dp_enabled=%s dp_epsilon=%s",
+            "cpu_model=%s gpu_model=%s cpu_used=%s gpu_used=%s energy_consumed=%s sample_size=%s class_imbalance=%s model_size=%s local_entropy=%s val_accuracy=%s dp_enabled=%s dp_epsilon=%s macro_f1=%s train_accuracy=%s",
             server_addr,
             str(self._idx),
             report["bytes_sent"],
@@ -372,6 +378,8 @@ def _log_cfl_trustworthiness_report(self, server_addr: str, report: dict):
             report["val_accuracy"],
             report["dp_enabled"],
             report["dp_epsilon"],
+            report["macro_f1"],
+            report["train_accuracy"],
         )
 
     async def _finish_trustscores_exchange(self, federation, trust_config, experiment_name):
@@ -840,7 +848,7 @@ def _save_trustworthiness_reports_once(self):
 
     async def _save_local_server_report_and_generate_factsheet(self, trust_config, experiment_name):
         # Add the server's own local report and generate final trust artifacts.
-        bytes_sent, bytes_recv, _, _, val_accuracy, dp_enabled, dp_epsilon = load_data_results_participant(
+        bytes_sent, bytes_recv, _, _, val_accuracy, _, _, dp_enabled, dp_epsilon = load_data_results_participant(
             self._experiment_name,
             self._idx,
         )
@@ -859,7 +867,7 @@ async def _save_local_server_report_and_generate_factsheet(self, trust_config, e
         model_size = get_bytes_model(self._engine.trainer.model)
         local_entropy = get_local_entropy(self._idx, experiment_name)
 
-        save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, 0, 0, class_imbalance, model_size, local_entropy, val_accuracy, dp_enabled, dp_epsilon)
+        save_results_csv_cfl(self._experiment_name, self._idx, bytes_sent, bytes_recv, 0, 0, class_imbalance, model_size, local_entropy, val_accuracy, 0, 0, dp_enabled, dp_epsilon)
         save_emissions_csv_cfl(self._experiment_name, self._idx, role, energy_grid, emissions, workload, cpu_model, gpu_model, cpu_used, gpu_used, energy_consumed, sample_size)
         await self._generate_factsheet(trust_config, experiment_name)
 
@@ -887,7 +895,9 @@ async def register_trustworthiness_report(self, source, message):
             "local_entropy": message.local_entropy,
             "val_accuracy": message.val_accuracy,
             "dp_enabled": message.dp_enabled,
-            "dp_epsilon": message.dp_epsilon
+            "dp_epsilon": message.dp_epsilon,
+            "macro_f1": message.macro_f1,
+            "train_accuracy": message.train_accuracy,
         }
 
         logging.info(
@@ -987,8 +997,8 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
 
         await self.tw.finish_experiment_role_pre_actions()
 
-        last_loss, last_accuracy = self.tw.get_metrics()
-        _, last_val_accuracy = self.tw.get_validation_metrics()
+        last_loss, last_accuracy, last_macro_f1 = self.tw.get_metrics()
+        _, last_val_accuracy, last_train_accuracy = self.tw.get_validation_metrics()
         if last_val_accuracy is None:
             last_val_accuracy = 0.0
 
@@ -1008,7 +1018,19 @@ async def _process_experiment_finish_event(self, efe: ExperimentFinishEvent):
         sample_size = self.tw.get_sample_size()
 
         # Final operations
-        save_results_csv(self._experiment_name, self._idx, bytes_sent, bytes_recv, last_accuracy, last_loss, last_val_accuracy, dp_enabled, dp_epsilon)
+        save_results_csv(
+            self._experiment_name,
+            self._idx,
+            bytes_sent,
+            bytes_recv,
+            last_accuracy,
+            last_loss,
+            last_val_accuracy,
+            last_macro_f1,
+            last_train_accuracy,
+            dp_enabled,
+            dp_epsilon,
+        )
         stop_emissions_tracking_and_save(self._tracker, self._trust_dir_files, f'emissions_{self._idx}.csv', self._role.value, workload, sample_size, self._idx)
         await self.tw.finish_experiment_role_post_actions(self._trust_config, self._experiment_name)
 
diff --git a/nebula/core/engine.py b/nebula/core/engine.py
index cb05c57db..831abf549 100644
--- a/nebula/core/engine.py
+++ b/nebula/core/engine.py
@@ -618,7 +618,9 @@ async def _trustworthiness_report_callback(self, source, message):
                 "local_entropy": message.local_entropy,
                 "val_accuracy": message.val_accuracy,
                 "dp_enabled": message.dp_enabled,
-                "dp_epsilon": message.dp_epsilon
+                "dp_epsilon": message.dp_epsilon,
+                "macro_f1": message.macro_f1,
+                "train_accuracy": message.train_accuracy,
             }
 
             logging.info(f"handle_trustworthiness_message | Trigger | {report}")
diff --git a/nebula/core/models/cifar10/resnet.py b/nebula/core/models/cifar10/resnet.py
index a0b6d1f15..d2da13f3b 100755
--- a/nebula/core/models/cifar10/resnet.py
+++ b/nebula/core/models/cifar10/resnet.py
@@ -47,7 +47,7 @@ def __init__(
                 MulticlassAccuracy(num_classes=num_classes),
                 MulticlassPrecision(num_classes=num_classes),
                 MulticlassRecall(num_classes=num_classes),
-                MulticlassF1Score(num_classes=num_classes),
+                MulticlassF1Score(num_classes=num_classes, average="macro"),
             ])
         self.train_metrics = metrics.clone(prefix="Train/")
         self.val_metrics = metrics.clone(prefix="Validation/")
diff --git a/nebula/core/models/nebulamodel.py b/nebula/core/models/nebulamodel.py
index 3a270ae88..a6557fed6 100755
--- a/nebula/core/models/nebulamodel.py
+++ b/nebula/core/models/nebulamodel.py
@@ -83,10 +83,18 @@ def log_metrics_end(self, phase):
             f"{phase}/{key.replace('Multiclass', '').split('/')[-1]}": value.detach() for key, value in output.items()
         }
 
+        output_values = {
+            key: float(value.detach().cpu().item()) for key, value in output.items()
+        }
+
+        if phase == "Train":
+            self._latest_train_metrics = output_values
+
         if phase == "Validation":
-            self._latest_validation_metrics = {
-                key: float(value.detach().cpu().item()) for key, value in output.items()
-            }
+            self._latest_validation_metrics = output_values
+
+        if phase in {"Test", "Test (Local)"}:
+            self._latest_test_metrics = output_values
 
         if phase == "Train" and self._train_extra_metrics:
             output.update({
@@ -180,7 +188,7 @@ def __init__(
                 MulticlassAccuracy(num_classes=num_classes),
                 MulticlassPrecision(num_classes=num_classes),
                 MulticlassRecall(num_classes=num_classes),
-                MulticlassF1Score(num_classes=num_classes),
+                MulticlassF1Score(num_classes=num_classes, average="macro"),
             ])
         self.train_metrics = metrics.clone(prefix="Train/")
         self.val_metrics = metrics.clone(prefix="Validation/")
@@ -212,7 +220,9 @@ def __init__(
         self._current_loss = -1
         self._optimizer = None
         self._optimizer_override = None
+        self._latest_train_metrics = {}
         self._latest_validation_metrics = {}
+        self._latest_test_metrics = {}
         self._train_extra_metrics = {}
 
         # DP trainers update these fields after querying the Opacus accountant.
@@ -293,6 +303,18 @@ def get_loss(self):
     def get_latest_validation_metrics(self):
         return self._latest_validation_metrics
 
+    def get_latest_train_metrics(self):
+        return self._latest_train_metrics
+
+    def get_latest_test_metrics(self):
+        return self._latest_test_metrics
+
+    def get_latest_train_accuracy(self):
+        return self._latest_train_metrics.get("Train/Accuracy")
+
+    def get_latest_test_macro_f1(self):
+        return self._latest_test_metrics.get("Test (Local)/F1Score")
+
     def modify_learning_rate(self, new_lr):
         logging.info(f"Modifiying | learning rate, new value: {new_lr}")
         self.learning_rate = new_lr
diff --git a/nebula/core/nebulaevents.py b/nebula/core/nebulaevents.py
index ecdd482da..583f8facd 100644
--- a/nebula/core/nebulaevents.py
+++ b/nebula/core/nebulaevents.py
@@ -464,24 +464,26 @@ async def get_event_data(self):
         return (self.latitude, self.longitude)
 
 class TestMetricsEvent(AddonEvent):
-    def __init__(self, loss, accuracy):
+    def __init__(self, loss, accuracy, macro_f1=None):
         self._loss = loss
         self._accuracy = accuracy
+        self._macro_f1 = macro_f1
 
     def __str__(self):
         return "TestMetricsEvent"
 
     async def get_event_data(self):
-        return (self._loss, self._accuracy)
+        return (self._loss, self._accuracy, self._macro_f1)
 
 
 class ValidationMetricsEvent(AddonEvent):
-    def __init__(self, loss, accuracy):
+    def __init__(self, loss, accuracy, train_accuracy=None):
         self._loss = loss
         self._accuracy = accuracy
+        self._train_accuracy = train_accuracy
 
     def __str__(self):
         return "ValidationMetricsEvent"
 
     async def get_event_data(self):
-        return (self._loss, self._accuracy)
+        return (self._loss, self._accuracy, self._train_accuracy)
diff --git a/nebula/core/network/messages.py b/nebula/core/network/messages.py
index 9963ff6ea..29e7088bc 100644
--- a/nebula/core/network/messages.py
+++ b/nebula/core/network/messages.py
@@ -134,7 +134,9 @@ def _define_message_templates(self):
                     "local_entropy",
                     "val_accuracy",
                     "dp_enabled",
-                    "dp_epsilon"
+                    "dp_epsilon",
+                    "macro_f1",
+                    "train_accuracy"
                 ],
                 "defaults": {},
             },
diff --git a/nebula/core/pb/nebula.proto b/nebula/core/pb/nebula.proto
index c05d6131f..13b0fa74d 100755
--- a/nebula/core/pb/nebula.proto
+++ b/nebula/core/pb/nebula.proto
@@ -185,6 +185,8 @@ message TrustworthinessMessage {
   float val_accuracy = 20;
   bool dp_enabled = 21;
   float dp_epsilon = 22;
+  double macro_f1 = 23;
+  double train_accuracy = 24;
 }
 
 message TrustscoresMessage {
diff --git a/nebula/core/pb/nebula_pb2.py b/nebula/core/pb/nebula_pb2.py
index bb470f06b..bc06160a2 100644
--- a/nebula/core/pb/nebula_pb2.py
+++ b/nebula/core/pb/nebula_pb2.py
@@ -13,7 +13,7 @@
 
 
 
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xa6\x06\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x12\x39\n\x13trustscores_message\x18\r \x01(\x0b\x32\x1a.nebula.TrustscoresMessageH\x00\x12\x35\n\x11sdflmodel_message\x18\x0e \x01(\x0b\x32\x18.nebula.SdflmodelMessageH\x00\x12\x41\n\x17reputationtable_message\x18\x0f \x01(\x0b\x32\x1e.nebula.ReputationtableMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\xc7\x01\n\x10SdflmodelMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.SdflmodelMessage.Action\x12\x0e\n\x06target\x18\x02 \x01(\t\x12\x12\n\nparameters\x18\x03 \x01(\x0c\x12\x0e\n\x06weight\x18\x04 \x01(\x03\x12\r\n\x05round\x18\x05 \x01(\x05\x12\x0f\n\x07node_id\x18\x06 \x01(\t\".\n\x06\x41\x63tion\x12\x12\n\x0eTRAINER_UPDATE\x10\x00\x12\x10\n\x0cGLOBAL_MODEL\x10\x01\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"\xa3\x01\n\x16ReputationtableMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05round\x18\x02 \x01(\x05\x12\x1d\n\x15reputation_table_json\x18\x03 \x01(\t\x12\x35\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32%.nebula.ReputationtableMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05TABLE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\x80\x04\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\x12\x17\n\x0f\x63lass_imbalance\x18\x11 \x01(\x02\x12\x12\n\nmodel_size\x18\x12 \x01(\x03\x12\x15\n\rlocal_entropy\x18\x13 \x01(\x02\x12\x14\n\x0cval_accuracy\x18\x14 \x01(\x02\x12\x12\n\ndp_enabled\x18\x15 \x01(\x08\x12\x12\n\ndp_epsilon\x18\x16 \x01(\x02\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\"\x88\x01\n\x12TrustscoresMessage\x12\x31\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32!.nebula.TrustscoresMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x19\n\x11trust_report_json\x18\x03 \x01(\t\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0cnebula.proto\x12\x06nebula\"\xa6\x06\n\x07Wrapper\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x35\n\x11\x64iscovery_message\x18\x02 \x01(\x0b\x32\x18.nebula.DiscoveryMessageH\x00\x12\x31\n\x0f\x63ontrol_message\x18\x03 \x01(\x0b\x32\x16.nebula.ControlMessageH\x00\x12\x37\n\x12\x66\x65\x64\x65ration_message\x18\x04 \x01(\x0b\x32\x19.nebula.FederationMessageH\x00\x12-\n\rmodel_message\x18\x05 \x01(\x0b\x32\x14.nebula.ModelMessageH\x00\x12\x37\n\x12\x63onnection_message\x18\x06 \x01(\x0b\x32\x19.nebula.ConnectionMessageH\x00\x12\x33\n\x10response_message\x18\x07 \x01(\x0b\x32\x17.nebula.ResponseMessageH\x00\x12\x37\n\x12reputation_message\x18\x08 \x01(\x0b\x32\x19.nebula.ReputationMessageH\x00\x12\x33\n\x10\x64iscover_message\x18\t \x01(\x0b\x32\x17.nebula.DiscoverMessageH\x00\x12-\n\roffer_message\x18\n \x01(\x0b\x32\x14.nebula.OfferMessageH\x00\x12+\n\x0clink_message\x18\x0b \x01(\x0b\x32\x13.nebula.LinkMessageH\x00\x12\x41\n\x17trustworthiness_message\x18\x0c \x01(\x0b\x32\x1e.nebula.TrustworthinessMessageH\x00\x12\x39\n\x13trustscores_message\x18\r \x01(\x0b\x32\x1a.nebula.TrustscoresMessageH\x00\x12\x35\n\x11sdflmodel_message\x18\x0e \x01(\x0b\x32\x18.nebula.SdflmodelMessageH\x00\x12\x41\n\x17reputationtable_message\x18\x0f \x01(\x0b\x32\x1e.nebula.ReputationtableMessageH\x00\x42\t\n\x07message\"\x9e\x01\n\x10\x44iscoveryMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.DiscoveryMessage.Action\x12\x10\n\x08latitude\x18\x02 \x01(\x02\x12\x11\n\tlongitude\x18\x03 \x01(\x02\"4\n\x06\x41\x63tion\x12\x0c\n\x08\x44ISCOVER\x10\x00\x12\x0c\n\x08REGISTER\x10\x01\x12\x0e\n\nDEREGISTER\x10\x02\"\xd1\x01\n\x0e\x43ontrolMessage\x12-\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1d.nebula.ControlMessage.Action\x12\x0b\n\x03log\x18\x02 \x01(\t\"\x82\x01\n\x06\x41\x63tion\x12\t\n\x05\x41LIVE\x10\x00\x12\x0c\n\x08OVERHEAD\x10\x01\x12\x0c\n\x08MOBILITY\x10\x02\x12\x0c\n\x08RECOVERY\x10\x03\x12\r\n\tWEAK_LINK\x10\x04\x12\x17\n\x13LEADERSHIP_TRANSFER\x10\x05\x12\x1b\n\x17LEADERSHIP_TRANSFER_ACK\x10\x06\"\xcd\x01\n\x11\x46\x65\x64\x65rationMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.FederationMessage.Action\x12\x11\n\targuments\x18\x02 \x03(\t\x12\r\n\x05round\x18\x03 \x01(\x05\"d\n\x06\x41\x63tion\x12\x14\n\x10\x46\x45\x44\x45RATION_START\x10\x00\x12\x0e\n\nREPUTATION\x10\x01\x12\x1e\n\x1a\x46\x45\x44\x45RATION_MODELS_INCLUDED\x10\x02\x12\x14\n\x10\x46\x45\x44\x45RATION_READY\x10\x03\"A\n\x0cModelMessage\x12\x12\n\nparameters\x18\x01 \x01(\x0c\x12\x0e\n\x06weight\x18\x02 \x01(\x03\x12\r\n\x05round\x18\x03 \x01(\x05\"\xc7\x01\n\x10SdflmodelMessage\x12/\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1f.nebula.SdflmodelMessage.Action\x12\x0e\n\x06target\x18\x02 \x01(\t\x12\x12\n\nparameters\x18\x03 \x01(\x0c\x12\x0e\n\x06weight\x18\x04 \x01(\x03\x12\r\n\x05round\x18\x05 \x01(\x05\x12\x0f\n\x07node_id\x18\x06 \x01(\t\".\n\x06\x41\x63tion\x12\x12\n\x0eTRAINER_UPDATE\x10\x00\x12\x10\n\x0cGLOBAL_MODEL\x10\x01\"\x8f\x01\n\x11\x43onnectionMessage\x12\x30\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32 .nebula.ConnectionMessage.Action\"H\n\x06\x41\x63tion\x12\x0b\n\x07\x43ONNECT\x10\x00\x12\x0e\n\nDISCONNECT\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\x95\x01\n\x0f\x44iscoverMessage\x12.\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1e.nebula.DiscoverMessage.Action\"R\n\x06\x41\x63tion\x12\x11\n\rDISCOVER_JOIN\x10\x00\x12\x12\n\x0e\x44ISCOVER_NODES\x10\x01\x12\x10\n\x0cLATE_CONNECT\x10\x02\x12\x0f\n\x0bRESTRUCTURE\x10\x03\"\xce\x01\n\x0cOfferMessage\x12+\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1b.nebula.OfferMessage.Action\x12\x13\n\x0bn_neighbors\x18\x02 \x01(\x02\x12\x0c\n\x04loss\x18\x03 \x01(\x02\x12\x12\n\nparameters\x18\x04 \x01(\x0c\x12\x0e\n\x06rounds\x18\x05 \x01(\x05\x12\r\n\x05round\x18\x06 \x01(\x05\x12\x0e\n\x06\x65pochs\x18\x07 \x01(\x05\"+\n\x06\x41\x63tion\x12\x0f\n\x0bOFFER_MODEL\x10\x00\x12\x10\n\x0cOFFER_METRIC\x10\x01\"w\n\x0bLinkMessage\x12*\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32\x1a.nebula.LinkMessage.Action\x12\r\n\x05\x61\x64\x64rs\x18\x02 \x01(\t\"-\n\x06\x41\x63tion\x12\x0e\n\nCONNECT_TO\x10\x00\x12\x13\n\x0f\x44ISCONNECT_FROM\x10\x01\"\x89\x01\n\x11ReputationMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\r\n\x05round\x18\x03 \x01(\x05\x12\x30\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32 .nebula.ReputationMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\"\xa3\x01\n\x16ReputationtableMessage\x12\x0f\n\x07node_id\x18\x01 \x01(\t\x12\r\n\x05round\x18\x02 \x01(\x05\x12\x1d\n\x15reputation_table_json\x18\x03 \x01(\t\x12\x35\n\x06\x61\x63tion\x18\x04 \x01(\x0e\x32%.nebula.ReputationtableMessage.Action\"\x13\n\x06\x41\x63tion\x12\t\n\x05TABLE\x10\x00\"#\n\x0fResponseMessage\x12\x10\n\x08response\x18\x01 \x01(\t\"\xaa\x04\n\x16TrustworthinessMessage\x12\x35\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32%.nebula.TrustworthinessMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x12\n\nbytes_sent\x18\x03 \x01(\x03\x12\x12\n\nbytes_recv\x18\x04 \x01(\x03\x12\x10\n\x08\x61\x63\x63uracy\x18\x05 \x01(\x01\x12\x0c\n\x04loss\x18\x06 \x01(\x01\x12\x0c\n\x04role\x18\x07 \x01(\t\x12\x13\n\x0b\x65nergy_grid\x18\x08 \x01(\x01\x12\x11\n\temissions\x18\t \x01(\x01\x12\x10\n\x08workload\x18\n \x01(\t\x12\x11\n\tcpu_model\x18\x0b \x01(\t\x12\x11\n\tgpu_model\x18\x0c \x01(\t\x12\x10\n\x08\x63pu_used\x18\r \x01(\x08\x12\x10\n\x08gpu_used\x18\x0e \x01(\x08\x12\x17\n\x0f\x65nergy_consumed\x18\x0f \x01(\x01\x12\x13\n\x0bsample_size\x18\x10 \x01(\x05\x12\x17\n\x0f\x63lass_imbalance\x18\x11 \x01(\x02\x12\x12\n\nmodel_size\x18\x12 \x01(\x03\x12\x15\n\rlocal_entropy\x18\x13 \x01(\x02\x12\x14\n\x0cval_accuracy\x18\x14 \x01(\x02\x12\x12\n\ndp_enabled\x18\x15 \x01(\x08\x12\x12\n\ndp_epsilon\x18\x16 \x01(\x02\x12\x10\n\x08macro_f1\x18\x17 \x01(\x01\x12\x16\n\x0etrain_accuracy\x18\x18 \x01(\x01\"\x14\n\x06\x41\x63tion\x12\n\n\x06REPORT\x10\x00\"\x88\x01\n\x12TrustscoresMessage\x12\x31\n\x06\x61\x63tion\x18\x01 \x01(\x0e\x32!.nebula.TrustscoresMessage.Action\x12\x0f\n\x07node_id\x18\x02 \x01(\t\x12\x19\n\x11trust_report_json\x18\x03 \x01(\t\"\x13\n\x06\x41\x63tion\x12\t\n\x05SHARE\x10\x00\x62\x06proto3')
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
 _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'nebula_pb2', globals())
@@ -67,11 +67,11 @@
   _RESPONSEMESSAGE._serialized_start=2617
   _RESPONSEMESSAGE._serialized_end=2652
   _TRUSTWORTHINESSMESSAGE._serialized_start=2655
-  _TRUSTWORTHINESSMESSAGE._serialized_end=3167
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=3147
-  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=3167
-  _TRUSTSCORESMESSAGE._serialized_start=3170
-  _TRUSTSCORESMESSAGE._serialized_end=3306
+  _TRUSTWORTHINESSMESSAGE._serialized_end=3209
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_start=3189
+  _TRUSTWORTHINESSMESSAGE_ACTION._serialized_end=3209
+  _TRUSTSCORESMESSAGE._serialized_start=3212
+  _TRUSTSCORESMESSAGE._serialized_end=3348
   _TRUSTSCORESMESSAGE_ACTION._serialized_start=2430
   _TRUSTSCORESMESSAGE_ACTION._serialized_end=2449
 # @@protoc_insertion_point(module_scope)
diff --git a/nebula/core/training/lightning.py b/nebula/core/training/lightning.py
index f5988ef00..b7551ea5d 100755
--- a/nebula/core/training/lightning.py
+++ b/nebula/core/training/lightning.py
@@ -295,9 +295,9 @@ async def train(self):
         try:
             self.create_trainer()
             logging.info(f"{'=' * 10} [Training] Started (check training logs for progress) {'=' * 10}")
-            val_loss, val_accuracy = await asyncio.to_thread(self._train_sync)
+            val_loss, val_accuracy, train_accuracy = await asyncio.to_thread(self._train_sync)
             logging.info(f"{'=' * 10} [Training] Finished (check training logs for progress) {'=' * 10}")
-            vme = ValidationMetricsEvent(val_loss, val_accuracy)
+            vme = ValidationMetricsEvent(val_loss, val_accuracy, train_accuracy)
             await EventManager.get_instance().publish_addonevent(vme)
         except Exception as e:
             logging_training.error(f"Error training model: {e}")
@@ -317,39 +317,51 @@ def _train_sync(self):
                 loss = raw_loss.item() if hasattr(raw_loss, "item") else raw_loss
 
             accuracy = validation_metrics.get("Validation/Accuracy")
-            return loss, accuracy
+            train_accuracy = None
+            get_train_accuracy = getattr(self.model, "get_latest_train_accuracy", None)
+            if callable(get_train_accuracy):
+                train_accuracy = get_train_accuracy()
+
+            return loss, accuracy, train_accuracy
         except Exception as e:
             logging_training.error(f"Error in _train_sync: {e}")
             tb = traceback.format_exc()
             logging_training.error(f"Traceback: {tb}")
             # If "raise", the exception will be managed by the main thread
-            return None, None
+            return None, None, None
 
     async def test(self):
         try:
             self.create_trainer()
             logging.info(f"{'=' * 10} [Testing] Started (check training logs for progress) {'=' * 10}")
-            loss, accuracy = await asyncio.to_thread(self._test_sync)
+            loss, accuracy, macro_f1 = await asyncio.to_thread(self._test_sync)
             logging.info(f"{'=' * 10} [Testing] Finished (check training logs for progress) {'=' * 10}")
-            tme = TestMetricsEvent(loss, accuracy)
+            tme = TestMetricsEvent(loss, accuracy, macro_f1)
             await EventManager.get_instance().publish_addonevent(tme)
         except Exception as e:
             logging_training.error(f"Error testing model: {e}")
             logging_training.error(traceback.format_exc())
 
+    def _metric_value(self, value):
+        return value.item() if hasattr(value, "item") else value
+
     def _test_sync(self):
         try:
             self._trainer.test(self.model, self.datamodule, verbose=True)
             metrics = self._trainer.callback_metrics
-            loss = metrics.get('val_loss/dataloader_idx_0', None).item()
-            accuracy = metrics.get('val_accuracy/dataloader_idx_0', None).item()
-            return loss, accuracy
+            loss = self._metric_value(metrics.get('val_loss/dataloader_idx_0'))
+            accuracy = self._metric_value(metrics.get('val_accuracy/dataloader_idx_0'))
+            macro_f1 = None
+            get_macro_f1 = getattr(self.model, "get_latest_test_macro_f1", None)
+            if callable(get_macro_f1):
+                macro_f1 = get_macro_f1()
+
+            return loss, accuracy, macro_f1
         except Exception as e:
             logging_training.error(f"Error in _test_sync: {e}")
             tb = traceback.format_exc()
             logging_training.error(f"Traceback: {tb}")
-            # If "raise", the exception will be managed by the main thread
-            return None, None
+            return None, None, None
 
     def cleanup(self):
         if self._trainer is not None:

From bc7ef3a099a59d8fbd51f065c9ba6b812a052510 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Thu, 4 Jun 2026 12:02:16 +0200
Subject: [PATCH 59/66] Adversarial Training CAPGD V1

---
 .../defenses/adversarial_training/__init__.py |  59 ++++
 .../base.py}                                  |   0
 .../config.py}                                |   9 +-
 .../defense.py}                               |  28 +-
 .../image.py}                                 |   4 +-
 .../logging.py}                               |   2 +-
 .../defenses/adversarial_training/tabular.py  | 199 +++++++++++
 .../defenses/adversarial_training_tabular.py  | 323 ------------------
 nebula/core/datasets/tabular_metadata.py      |  52 +++
 9 files changed, 332 insertions(+), 344 deletions(-)
 create mode 100644 nebula/addons/defenses/adversarial_training/__init__.py
 rename nebula/addons/defenses/{adversarial_training_base.py => adversarial_training/base.py} (100%)
 rename nebula/addons/defenses/{adversarial_training_config.py => adversarial_training/config.py} (90%)
 rename nebula/addons/defenses/{adversarial_training.py => adversarial_training/defense.py} (91%)
 rename nebula/addons/defenses/{adversarial_training_image.py => adversarial_training/image.py} (96%)
 rename nebula/addons/defenses/{adversarial_training_logging.py => adversarial_training/logging.py} (99%)
 create mode 100644 nebula/addons/defenses/adversarial_training/tabular.py
 delete mode 100644 nebula/addons/defenses/adversarial_training_tabular.py

diff --git a/nebula/addons/defenses/adversarial_training/__init__.py b/nebula/addons/defenses/adversarial_training/__init__.py
new file mode 100644
index 000000000..c417b1b93
--- /dev/null
+++ b/nebula/addons/defenses/adversarial_training/__init__.py
@@ -0,0 +1,59 @@
+from nebula.addons.defenses.adversarial_training.defense import (
+    CAA_TABULAR_DATASETS,
+    ERR_ALPHA,
+    ERR_APPLY_PROBABILITY,
+    ERR_CLIP_BOUNDS,
+    ERR_EPSILON,
+    ERR_IMAGE_ATTACK,
+    ERR_LOSS_WEIGHTS,
+    ERR_MIXED_WEIGHTS,
+    ERR_MODE,
+    ERR_STEPS,
+    ERR_TABULAR_ATTACK,
+    ERR_TABULAR_METADATA,
+    ERR_UNSUPPORTED_ATTACK,
+    IMAGE_ADVERSARIAL_ATTACKS,
+    IMAGE_DATASET_NORMALIZATION,
+    TABULAR_ADVERSARIAL_ATTACKS,
+    AdversarialExampleGenerator,
+    AdversarialTrainingConfig,
+    AdversarialTrainingDefense,
+    ImageAdversarialExampleGenerator,
+    ImageFGSMGenerator,
+    ImagePGDGenerator,
+    TabularAdversarialExampleGenerator,
+    TabularCAAGenerator,
+    TabularCAPGDGenerator,
+    TabularConstraintSet,
+    apply_adversarial_training_if_enabled,
+)
+
+__all__ = [
+    "CAA_TABULAR_DATASETS",
+    "ERR_ALPHA",
+    "ERR_APPLY_PROBABILITY",
+    "ERR_CLIP_BOUNDS",
+    "ERR_EPSILON",
+    "ERR_IMAGE_ATTACK",
+    "ERR_LOSS_WEIGHTS",
+    "ERR_MIXED_WEIGHTS",
+    "ERR_MODE",
+    "ERR_STEPS",
+    "ERR_TABULAR_ATTACK",
+    "ERR_TABULAR_METADATA",
+    "ERR_UNSUPPORTED_ATTACK",
+    "IMAGE_ADVERSARIAL_ATTACKS",
+    "IMAGE_DATASET_NORMALIZATION",
+    "TABULAR_ADVERSARIAL_ATTACKS",
+    "AdversarialExampleGenerator",
+    "AdversarialTrainingConfig",
+    "AdversarialTrainingDefense",
+    "ImageAdversarialExampleGenerator",
+    "ImageFGSMGenerator",
+    "ImagePGDGenerator",
+    "TabularAdversarialExampleGenerator",
+    "TabularCAAGenerator",
+    "TabularCAPGDGenerator",
+    "TabularConstraintSet",
+    "apply_adversarial_training_if_enabled",
+]
diff --git a/nebula/addons/defenses/adversarial_training_base.py b/nebula/addons/defenses/adversarial_training/base.py
similarity index 100%
rename from nebula/addons/defenses/adversarial_training_base.py
rename to nebula/addons/defenses/adversarial_training/base.py
diff --git a/nebula/addons/defenses/adversarial_training_config.py b/nebula/addons/defenses/adversarial_training/config.py
similarity index 90%
rename from nebula/addons/defenses/adversarial_training_config.py
rename to nebula/addons/defenses/adversarial_training/config.py
index 930144b5d..fcb6c5aa2 100644
--- a/nebula/addons/defenses/adversarial_training_config.py
+++ b/nebula/addons/defenses/adversarial_training/config.py
@@ -2,9 +2,11 @@
 from typing import Any
 
 IMAGE_ADVERSARIAL_ATTACKS = {"fgsm", "pgd"}
+TABULAR_ADVERSARIAL_ATTACKS = {"capgd"}
 CAA_TABULAR_DATASETS = {"AdultCensus"}
 
 ERR_IMAGE_ATTACK = "image adversarial_training.attack must be one of: fgsm, pgd"
+ERR_TABULAR_ATTACK = "tabular adversarial_training.attack must be one of: capgd"
 ERR_MODE = "adversarial_training.mode must be one of: clean, adversarial, mixed"
 ERR_EPSILON = "adversarial_training.epsilon must be >= 0"
 ERR_ALPHA = "adversarial_training.alpha must be >= 0"
@@ -51,8 +53,9 @@ def config_from_participant(participant_config: dict[str, Any]) -> AdversarialTr
 
     dataset_name = participant_config.get("data_args", {}).get("dataset")
     domain = str(raw.get("domain", "image")).lower()
-    # Tabular adversarial training exposes a single attack: CAA.
-    attack = "caa" if domain == "tabular" else str(raw.get("attack", "fgsm")).lower()
+    attack = str(raw.get("attack", "capgd" if domain == "tabular" else "fgsm")).lower()
+    if domain == "tabular" and attack == "caa":
+        attack = "capgd"
 
     return AdversarialTrainingConfig(
         enabled=True,
@@ -78,6 +81,8 @@ def validate_config(config: AdversarialTrainingConfig) -> None:
         raise ValueError(ERR_MODE)
     if config.domain == "image" and config.attack not in IMAGE_ADVERSARIAL_ATTACKS:
         raise ValueError(ERR_IMAGE_ATTACK)
+    if config.domain == "tabular" and config.attack not in TABULAR_ADVERSARIAL_ATTACKS:
+        raise ValueError(ERR_TABULAR_ATTACK)
     if config.epsilon < 0:
         raise ValueError(ERR_EPSILON)
     if config.alpha is not None and config.alpha < 0:
diff --git a/nebula/addons/defenses/adversarial_training.py b/nebula/addons/defenses/adversarial_training/defense.py
similarity index 91%
rename from nebula/addons/defenses/adversarial_training.py
rename to nebula/addons/defenses/adversarial_training/defense.py
index 6dbc7aea0..d4e8da2fd 100644
--- a/nebula/addons/defenses/adversarial_training.py
+++ b/nebula/addons/defenses/adversarial_training/defense.py
@@ -3,8 +3,8 @@
 
 import torch
 
-from nebula.addons.defenses.adversarial_training_base import AdversarialExampleGenerator
-from nebula.addons.defenses.adversarial_training_config import (
+from nebula.addons.defenses.adversarial_training.base import AdversarialExampleGenerator
+from nebula.addons.defenses.adversarial_training.config import (
     CAA_TABULAR_DATASETS,
     ERR_ALPHA,
     ERR_APPLY_PROBABILITY,
@@ -15,23 +15,26 @@
     ERR_MIXED_WEIGHTS,
     ERR_MODE,
     ERR_STEPS,
+    ERR_TABULAR_ATTACK,
     ERR_TABULAR_METADATA,
     ERR_UNSUPPORTED_ATTACK,
     IMAGE_ADVERSARIAL_ATTACKS,
     IMAGE_DATASET_NORMALIZATION,
+    TABULAR_ADVERSARIAL_ATTACKS,
     AdversarialTrainingConfig,
     config_from_participant,
     validate_config,
 )
-from nebula.addons.defenses.adversarial_training_image import (
+from nebula.addons.defenses.adversarial_training.image import (
     ImageAdversarialExampleGenerator,
     ImageFGSMGenerator,
     ImagePGDGenerator,
 )
-from nebula.addons.defenses.adversarial_training_logging import AdversarialTrainingSampleLogger
-from nebula.addons.defenses.adversarial_training_tabular import (
+from nebula.addons.defenses.adversarial_training.logging import AdversarialTrainingSampleLogger
+from nebula.addons.defenses.adversarial_training.tabular import (
     TabularAdversarialExampleGenerator,
     TabularCAAGenerator,
+    TabularCAPGDGenerator,
     TabularConstraintSet,
 )
 from nebula.core.datasets.tabular_metadata import CATEGORICAL, CONTINUOUS, INTEGER, TabularAdversarialMetadata
@@ -62,18 +65,8 @@ def from_participant_config(
         validate_config(config)
 
         if config.domain == "tabular":
-            # CAA needs dataset metadata. Keep the allow-list explicit while more tabular datasets are added.
-            if config.dataset_name not in CAA_TABULAR_DATASETS:
-                logging.warning(
-                    "[AdversarialTrainingDefense] Skipping CAA tabular adversarial training: "
-                    "dataset '%s' is not supported yet",
-                    config.dataset_name,
-                )
-                return None
-
             metadata = cls._get_tabular_metadata(partition)
-            # For tabular data, the only valid adversarial-training generator is CAA.
-            return cls(config=config, generator=TabularCAAGenerator(config, metadata))
+            return cls(config=config, generator=TabularCAPGDGenerator(config, metadata))
 
         if config.domain == "image":
             # Image attacks run in normalized model space, so each dataset must provide mean/std.
@@ -254,10 +247,12 @@ def apply_adversarial_training_if_enabled(model, participant_config: dict[str, A
     "ERR_MIXED_WEIGHTS",
     "ERR_MODE",
     "ERR_STEPS",
+    "ERR_TABULAR_ATTACK",
     "ERR_TABULAR_METADATA",
     "ERR_UNSUPPORTED_ATTACK",
     "IMAGE_ADVERSARIAL_ATTACKS",
     "IMAGE_DATASET_NORMALIZATION",
+    "TABULAR_ADVERSARIAL_ATTACKS",
     "AdversarialExampleGenerator",
     "AdversarialTrainingConfig",
     "AdversarialTrainingDefense",
@@ -266,6 +261,7 @@ def apply_adversarial_training_if_enabled(model, participant_config: dict[str, A
     "ImagePGDGenerator",
     "TabularAdversarialExampleGenerator",
     "TabularCAAGenerator",
+    "TabularCAPGDGenerator",
     "TabularConstraintSet",
     "apply_adversarial_training_if_enabled",
 ]
diff --git a/nebula/addons/defenses/adversarial_training_image.py b/nebula/addons/defenses/adversarial_training/image.py
similarity index 96%
rename from nebula/addons/defenses/adversarial_training_image.py
rename to nebula/addons/defenses/adversarial_training/image.py
index d9a84ae1c..cd6dbd129 100644
--- a/nebula/addons/defenses/adversarial_training_image.py
+++ b/nebula/addons/defenses/adversarial_training/image.py
@@ -1,7 +1,7 @@
 import torch
 
-from nebula.addons.defenses.adversarial_training_base import AdversarialExampleGenerator
-from nebula.addons.defenses.adversarial_training_config import AdversarialTrainingConfig
+from nebula.addons.defenses.adversarial_training.base import AdversarialExampleGenerator
+from nebula.addons.defenses.adversarial_training.config import AdversarialTrainingConfig
 
 
 class ImageAdversarialExampleGenerator(AdversarialExampleGenerator):
diff --git a/nebula/addons/defenses/adversarial_training_logging.py b/nebula/addons/defenses/adversarial_training/logging.py
similarity index 99%
rename from nebula/addons/defenses/adversarial_training_logging.py
rename to nebula/addons/defenses/adversarial_training/logging.py
index 2e0e489cb..013a398ac 100644
--- a/nebula/addons/defenses/adversarial_training_logging.py
+++ b/nebula/addons/defenses/adversarial_training/logging.py
@@ -2,7 +2,7 @@
 
 import torch
 
-from nebula.addons.defenses.adversarial_training_config import AdversarialTrainingConfig
+from nebula.addons.defenses.adversarial_training.config import AdversarialTrainingConfig
 from nebula.config.config import TRAINING_LOGGER
 
 logging_training = logging.getLogger(TRAINING_LOGGER)
diff --git a/nebula/addons/defenses/adversarial_training/tabular.py b/nebula/addons/defenses/adversarial_training/tabular.py
new file mode 100644
index 000000000..2d1d954e8
--- /dev/null
+++ b/nebula/addons/defenses/adversarial_training/tabular.py
@@ -0,0 +1,199 @@
+import torch
+import torch.nn.functional as F
+
+from nebula.addons.defenses.adversarial_training.base import AdversarialExampleGenerator
+from nebula.addons.defenses.adversarial_training.config import AdversarialTrainingConfig
+from nebula.core.datasets.tabular_metadata import CATEGORICAL, CONTINUOUS, INTEGER, TabularAdversarialMetadata
+
+
+class TabularConstraintSet:
+    """Projects tabular attack candidates back to the valid feature domain."""
+
+    def __init__(self, metadata: TabularAdversarialMetadata):
+        # The metadata is dataset-level and immutable; derived tensors are cached per device/dtype.
+        self.metadata = metadata
+        self._tensor_cache: dict[tuple[torch.device, torch.dtype], dict[str, torch.Tensor]] = {}
+
+    def tensors(self, x: torch.Tensor) -> dict[str, torch.Tensor]:
+        # Masks and bounds are reused in every CAPGD step, so build them once for each tensor placement.
+        key = (x.device, x.dtype)
+        cached = self._tensor_cache.get(key)
+        if cached is not None:
+            return cached
+
+        # Masks have shape (1, n_features), which broadcasts over the batch dimension.
+        cached = {
+            "continuous": self._feature_type_mask(x, CONTINUOUS),
+            "integer": self._feature_type_mask(x, INTEGER),
+            "categorical": self._feature_type_mask(x, CATEGORICAL),
+            "min": torch.tensor(self.metadata.feature_min_norm, dtype=x.dtype, device=x.device).view(1, -1),
+            "max": torch.tensor(self.metadata.feature_max_norm, dtype=x.dtype, device=x.device).view(1, -1),
+        }
+        cached["numeric"] = cached["continuous"] | cached["integer"]
+        cached["perturbable"] = cached["numeric"] | cached["categorical"]
+        cached["integer_step"] = self._integer_steps(cached["min"])
+        self._tensor_cache[key] = cached
+        return cached
+
+    def perturbable_mask(self, x: torch.Tensor) -> torch.Tensor:
+        # Used by the attack step to avoid moving immutable features in the first place.
+        return self.tensors(x)["perturbable"]
+
+    def project(self, x_candidate: torch.Tensor, x_clean: torch.Tensor, epsilon: float) -> torch.Tensor:
+        """Clamp numeric features, round integers, restore immutable features and fix one-hot groups."""
+        tensors = self.tensors(x_clean)
+        lower, upper = self._bounds(x_clean, epsilon, tensors)
+
+        # First force every value into its valid interval, then apply type-specific fixes.
+        x_projected = torch.max(torch.min(x_candidate, upper), lower)
+        x_projected = self._project_integer_features(x_projected, x_clean, lower, upper, tensors)
+        x_projected = self.project_categorical_groups(x_projected)
+        # Immutable features are copied back from the original clean sample as the final guardrail.
+        return torch.where(tensors["perturbable"], x_projected, x_clean)
+
+    def categorical_gradient_step(self, x_candidate: torch.Tensor, grad: torch.Tensor) -> torch.Tensor:
+        if not self.metadata.categorical_groups:
+            return x_candidate
+
+        # One-hot columns are discrete: instead of adding a fractional gradient,
+        # activate the category whose gradient most increases the adversarial loss.
+        x_stepped = x_candidate.clone()
+        for group in self.metadata.categorical_groups:
+            group_tensor = torch.tensor(group, dtype=torch.long, device=x_candidate.device)
+            selected = grad.index_select(1, group_tensor).argmax(dim=1)
+            x_stepped[:, group_tensor] = F.one_hot(selected, num_classes=len(group)).to(dtype=x_candidate.dtype)
+        return x_stepped
+
+    def project_categorical_groups(self, x_candidate: torch.Tensor) -> torch.Tensor:
+        if not self.metadata.categorical_groups:
+            return x_candidate
+
+        # Projection must always leave each one-hot group with exactly one active feature.
+        x_projected = x_candidate.clone()
+        for group in self.metadata.categorical_groups:
+            group_tensor = torch.tensor(group, dtype=torch.long, device=x_candidate.device)
+            selected = x_candidate.index_select(1, group_tensor).argmax(dim=1)
+            x_projected[:, group_tensor] = F.one_hot(selected, num_classes=len(group)).to(dtype=x_candidate.dtype)
+        return x_projected
+
+    def _feature_type_mask(self, x: torch.Tensor, feature_type: str) -> torch.Tensor:
+        return torch.tensor(
+            [value == feature_type for value in self.metadata.feature_types],
+            dtype=torch.bool,
+            device=x.device,
+        ).view(1, -1)
+
+    def _bounds(
+        self,
+        x_clean: torch.Tensor,
+        epsilon: float,
+        tensors: dict[str, torch.Tensor],
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        # Numeric features are restricted both by dataset bounds and by the epsilon ball around x_clean.
+        numeric_lower = torch.maximum(tensors["min"], x_clean - float(epsilon))
+        numeric_upper = torch.minimum(tensors["max"], x_clean + float(epsilon))
+        # Categorical features are handled by one-hot projection, not by an epsilon ball.
+        lower = torch.where(tensors["categorical"], tensors["min"], numeric_lower)
+        upper = torch.where(tensors["categorical"], tensors["max"], numeric_upper)
+        return lower, upper
+
+    def _integer_steps(self, minimum: torch.Tensor) -> torch.Tensor:
+        # Default step=1 is harmless for non-integer columns because the integer mask gates usage later.
+        integer_steps = torch.ones_like(minimum)
+        for idx, step in (self.metadata.integer_step_norm or {}).items():
+            integer_steps[0, int(idx)] = float(step)
+        return integer_steps
+
+    def _project_integer_features(
+        self,
+        x_projected: torch.Tensor,
+        x_clean: torch.Tensor,
+        lower: torch.Tensor,
+        upper: torch.Tensor,
+        tensors: dict[str, torch.Tensor],
+    ) -> torch.Tensor:
+        integer_mask = tensors["integer"]
+        if not integer_mask.any():
+            return x_projected
+
+        # Integer features may be normalized, so the valid values form a shifted grid:
+        # min, min + step, min + 2*step, ...
+        step = torch.clamp(tensors["integer_step"], min=torch.finfo(x_projected.dtype).eps)
+        grid_lower = torch.ceil((lower - tensors["min"]) / step) * step + tensors["min"]
+        grid_upper = torch.floor((upper - tensors["min"]) / step) * step + tensors["min"]
+        rounded = torch.round((x_projected - tensors["min"]) / step) * step + tensors["min"]
+        rounded = torch.max(torch.min(rounded, grid_upper), grid_lower)
+
+        # If epsilon is smaller than the normalized integer step, no valid integer move exists.
+        has_valid_grid = grid_lower <= grid_upper
+        rounded = torch.where(has_valid_grid, rounded, x_clean)
+        return torch.where(integer_mask, rounded, x_projected)
+
+
+class TabularAdversarialExampleGenerator(AdversarialExampleGenerator):
+    """Base generator for constrained tabular adversarial examples."""
+
+    def __init__(self, config: AdversarialTrainingConfig, metadata: TabularAdversarialMetadata):
+        # Generators share the same constraint layer; only the search strategy should vary.
+        self.config = config
+        self.metadata = metadata
+        self.constraints = TabularConstraintSet(metadata)
+
+    def _alpha(self, epsilon: float) -> float:
+        # By default, distribute the epsilon budget evenly across CAPGD steps.
+        if self.config.alpha is not None:
+            return float(self.config.alpha)
+        return float(epsilon) / max(int(self.config.steps), 1)
+
+    def _margin(self, logits: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        # Positive margin means some wrong class already beats the true class.
+        true_logits = logits.gather(1, y.view(-1, 1)).squeeze(1)
+        true_class_mask = F.one_hot(y, num_classes=logits.size(1)).bool()
+        other_logits = logits.masked_fill(true_class_mask, float("-inf"))
+        return other_logits.max(dim=1).values - true_logits
+
+
+class TabularCAPGDGenerator(TabularAdversarialExampleGenerator):
+    """First-phase constrained tabular CAPGD generator."""
+
+    def generate(self, model, x, y, criterion):
+        # Sample one attack strength for this batch, matching the image generator behavior.
+        epsilon = self._sample_epsilon(x.device)
+        x_clean = x.detach()
+        if epsilon <= 0.0:
+            return x_clean
+
+        steps = max(int(self.config.steps), 1)
+        step_size = self._alpha(epsilon)
+        perturbable_mask = self.constraints.perturbable_mask(x_clean).to(dtype=x_clean.dtype)
+
+        x_adv = x_clean.clone()
+        best_adv = x_adv.clone()
+        best_score = torch.full((x_clean.size(0),), float("-inf"), dtype=x_clean.dtype, device=x_clean.device)
+
+        for _ in range(steps):
+            # CAPGD step: move in the sign of the loss gradient, but only on perturbable features.
+            x_grad = x_adv.detach().requires_grad_(True)
+            logits = model(x_grad)
+            loss = criterion(logits, y)
+            grad = torch.autograd.grad(loss, x_grad, only_inputs=True)[0]
+
+            candidate = x_adv.detach() + float(step_size) * grad.sign() * perturbable_mask
+            candidate = self.constraints.categorical_gradient_step(candidate, grad)
+            # This is the key tabular rule: never score or return an invalid candidate.
+            candidate = self.constraints.project(candidate, x_clean, epsilon)
+
+            with torch.no_grad():
+                # Keep the strongest candidate per sample, not just the last step.
+                candidate_score = self._margin(model(candidate), y)
+                better = candidate_score > best_score
+                best_adv = torch.where(better.view(-1, 1), candidate, best_adv)
+                best_score = torch.where(better, candidate_score, best_score)
+
+            x_adv = candidate
+
+        return best_adv.detach()
+
+
+# Compatibility alias while old configs/UI still refer to the future CAA attack.
+TabularCAAGenerator = TabularCAPGDGenerator
diff --git a/nebula/addons/defenses/adversarial_training_tabular.py b/nebula/addons/defenses/adversarial_training_tabular.py
deleted file mode 100644
index 0c62e1217..000000000
--- a/nebula/addons/defenses/adversarial_training_tabular.py
+++ /dev/null
@@ -1,323 +0,0 @@
-import torch
-import torch.nn.functional as F
-
-from nebula.addons.defenses.adversarial_training_base import AdversarialExampleGenerator
-from nebula.addons.defenses.adversarial_training_config import AdversarialTrainingConfig
-from nebula.core.datasets.tabular_metadata import CATEGORICAL, CONTINUOUS, INTEGER, TabularAdversarialMetadata
-
-
-class TabularConstraintSet:
-    """Projection and mutation rules derived from tabular metadata."""
-
-    def __init__(self, metadata: TabularAdversarialMetadata):
-        # Store metadata and cache derived tensors by device/dtype for speed.
-        self.metadata = metadata
-        self._tensor_cache: dict[tuple[torch.device, torch.dtype], dict[str, torch.Tensor]] = {}
-
-    def tensors(self, x: torch.Tensor) -> dict[str, torch.Tensor]:
-        # Return reusable masks, bounds and integer steps for a batch tensor.
-        key = (x.device, x.dtype)
-        cached = self._tensor_cache.get(key)
-        if cached is not None:
-            return cached
-
-        # Convert metadata lists to tensors once per device/dtype; CAA uses them in every step.
-        cached = {
-            "continuous": torch.tensor(
-                [feature_type == CONTINUOUS for feature_type in self.metadata.feature_types],
-                dtype=torch.bool,
-                device=x.device,
-            ).view(1, -1),
-            "integer": torch.tensor(
-                [feature_type == INTEGER for feature_type in self.metadata.feature_types],
-                dtype=torch.bool,
-                device=x.device,
-            ).view(1, -1),
-            "categorical": torch.tensor(
-                [feature_type == CATEGORICAL for feature_type in self.metadata.feature_types],
-                dtype=torch.bool,
-                device=x.device,
-            ).view(1, -1),
-            "min": torch.tensor(self.metadata.feature_min_norm, dtype=x.dtype, device=x.device).view(1, -1),
-            "max": torch.tensor(self.metadata.feature_max_norm, dtype=x.dtype, device=x.device).view(1, -1),
-        }
-        cached["numeric"] = cached["continuous"] | cached["integer"]
-        cached["perturbable"] = cached["numeric"] | cached["categorical"]
-        cached["integer_step"] = self._integer_steps(cached["min"])
-        self._tensor_cache[key] = cached
-        return cached
-
-    def perturbable_mask(self, x: torch.Tensor) -> torch.Tensor:
-        # Expose the final boolean mask used to block immutable features.
-        return self.tensors(x)["perturbable"]
-
-    def project(self, x_adv: torch.Tensor, x_clean: torch.Tensor, epsilon: float) -> torch.Tensor:
-        # Project a candidate back to valid tabular values around the clean sample.
-        tensors = self.tensors(x_clean)
-        # Numeric features are bounded by epsilon; categorical one-hot features use dataset bounds.
-        numeric_lower = torch.maximum(tensors["min"], x_clean - float(epsilon))
-        numeric_upper = torch.minimum(tensors["max"], x_clean + float(epsilon))
-        lower = torch.where(tensors["categorical"], tensors["min"], numeric_lower)
-        upper = torch.where(tensors["categorical"], tensors["max"], numeric_upper)
-        x_adv = torch.max(torch.min(x_adv, upper), lower)
-
-        x_adv = self._project_integer_features(x_adv, x_clean, lower, upper, tensors)
-        x_adv = self.project_categorical_groups(x_adv)
-        return torch.where(tensors["perturbable"], x_adv, x_clean)
-
-    def project_categorical_groups(self, x_adv: torch.Tensor) -> torch.Tensor:
-        # Enforce one-hot validity after gradient or evolutionary changes.
-        if not self.metadata.categorical_groups:
-            return x_adv
-
-        # Each one-hot group must end with exactly one active category.
-        x_projected = x_adv.clone()
-        for group in self.metadata.categorical_groups:
-            group_tensor = torch.tensor(group, dtype=torch.long, device=x_adv.device)
-            group_values = x_adv.index_select(1, group_tensor)
-            selected = group_values.argmax(dim=1)
-            one_hot = F.one_hot(selected, num_classes=len(group)).to(dtype=x_adv.dtype)
-            x_projected[:, group_tensor] = one_hot
-        return x_projected
-
-    def categorical_gradient_step(self, x_candidate: torch.Tensor, grad: torch.Tensor) -> torch.Tensor:
-        # Apply a discrete gradient step to categorical one-hot groups.
-        if not self.metadata.categorical_groups:
-            return x_candidate
-
-        # For one-hot features, choose the category with the largest adversarial gradient.
-        x_stepped = x_candidate.clone()
-        for group in self.metadata.categorical_groups:
-            group_tensor = torch.tensor(group, dtype=torch.long, device=x_candidate.device)
-            selected = grad.index_select(1, group_tensor).argmax(dim=1)
-            one_hot = F.one_hot(selected, num_classes=len(group)).to(dtype=x_candidate.dtype)
-            x_stepped[:, group_tensor] = one_hot
-        return x_stepped
-
-    def randomize_categorical_groups(
-        self,
-        candidates: torch.Tensor,
-        mutation_probability: float,
-    ) -> torch.Tensor:
-        # Randomly switch categories for evolutionary exploration.
-        if not self.metadata.categorical_groups:
-            return candidates
-
-        original_shape = candidates.shape
-        flat_candidates = candidates.reshape(-1, original_shape[-1]).clone()
-        for group in self.metadata.categorical_groups:
-            # Mutation explores alternative categories when the gradient phase is not enough.
-            group_tensor = torch.tensor(group, dtype=torch.long, device=candidates.device)
-            current = flat_candidates.index_select(1, group_tensor).argmax(dim=1)
-            random_choice = torch.randint(len(group), current.shape, device=candidates.device)
-            mutate = torch.rand(current.shape, device=candidates.device) < float(mutation_probability)
-            selected = torch.where(mutate, random_choice, current)
-            one_hot = F.one_hot(selected, num_classes=len(group)).to(dtype=candidates.dtype)
-            flat_candidates[:, group_tensor] = one_hot
-        return flat_candidates.reshape(original_shape)
-
-    def _integer_steps(self, minimum: torch.Tensor) -> torch.Tensor:
-        # Build the normalized integer grid spacing tensor from metadata.
-        integer_steps = torch.ones_like(minimum)
-        for idx, step in (self.metadata.integer_step_norm or {}).items():
-            integer_steps[0, int(idx)] = float(step)
-        return integer_steps
-
-    def _project_integer_features(
-        self,
-        x_adv: torch.Tensor,
-        x_clean: torch.Tensor,
-        lower: torch.Tensor,
-        upper: torch.Tensor,
-        tensors: dict[str, torch.Tensor],
-    ) -> torch.Tensor:
-        # Round integer columns while keeping them inside the allowed epsilon interval.
-        integer_mask = tensors["integer"]
-        if not integer_mask.any():
-            return x_adv
-
-        # Integer features live on a normalized grid, so round to the closest valid grid value.
-        step = torch.clamp(tensors["integer_step"], min=torch.finfo(x_adv.dtype).eps)
-        projected_integer = torch.round((x_adv - tensors["min"]) / step) * step + tensors["min"]
-        grid_lower = torch.ceil((lower - tensors["min"]) / step) * step + tensors["min"]
-        grid_upper = torch.floor((upper - tensors["min"]) / step) * step + tensors["min"]
-        projected_integer = torch.max(torch.min(projected_integer, grid_upper), grid_lower)
-        has_valid_grid = grid_lower <= grid_upper
-        projected_integer = torch.where(has_valid_grid, projected_integer, x_clean)
-        return torch.where(integer_mask, projected_integer, x_adv)
-
-
-class TabularAdversarialExampleGenerator(AdversarialExampleGenerator):
-    """Base generator for constrained tabular adversarial examples."""
-
-    def __init__(self, config: AdversarialTrainingConfig, metadata: TabularAdversarialMetadata):
-        # Share config, metadata and constraints across CAA phases.
-        self.config = config
-        self.metadata = metadata
-        self.constraints = TabularConstraintSet(metadata)
-
-    def _alpha(self, epsilon: float) -> float:
-        # Use an explicit alpha when provided; otherwise distribute epsilon across steps.
-        if self.config.alpha is not None:
-            return float(self.config.alpha)
-        return float(epsilon) / max(int(self.config.steps), 1)
-
-    def _margin(self, logits: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
-        # Score how close each sample is to being misclassified.
-        # Positive margin means some wrong class beats the true class.
-        true_logits = logits.gather(1, y.view(-1, 1)).squeeze(1)
-        other_logits = logits.masked_fill(F.one_hot(y, num_classes=logits.size(1)).bool(), float("-inf"))
-        return other_logits.max(dim=1).values - true_logits
-
-    def _success_mask(self, model, x_adv: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
-        # Mark samples whose adversarial version changes the model prediction.
-        with torch.no_grad():
-            return torch.argmax(model(x_adv), dim=1) != y
-
-    def _better_mask(
-        self,
-        candidate_success: torch.Tensor,
-        candidate_score: torch.Tensor,
-        best_success: torch.Tensor,
-        best_score: torch.Tensor,
-    ) -> torch.Tensor:
-        # Prefer successful attacks, then candidates with a better adversarial margin.
-        return (candidate_success & ~best_success) | (
-            (candidate_success == best_success) & (candidate_score > best_score)
-        )
-
-
-class TabularCAAGenerator(TabularAdversarialExampleGenerator):
-    """CAA-style generator for constrained tabular adversarial training."""
-
-    def generate(self, model, x, y, criterion):
-        # Generate a constrained tabular adversarial batch with CAA.
-        epsilon = self._sample_epsilon(x.device)
-        x_clean = x.detach()
-        if epsilon <= 0.0:
-            return x_clean
-
-        # First try a gradient-guided CAA search; then mutate only samples that still resist.
-        x_adv = self._capgd_phase(model, x_clean, y, criterion, epsilon)
-        failed = ~self._success_mask(model, x_adv, y)
-        if failed.any():
-            x_fallback = self._evolutionary_phase(model, x_clean[failed], y[failed], x_adv[failed], epsilon)
-            x_adv = x_adv.clone()
-            x_adv[failed] = x_fallback
-        return x_adv.detach()
-
-    def _capgd_phase(self, model, x_clean: torch.Tensor, y: torch.Tensor, criterion, epsilon: float) -> torch.Tensor:
-        # Run the gradient-based part of CAA with projection after every candidate step.
-        steps = max(int(self.config.steps), 1)
-        step_size = self._alpha(epsilon)
-        perturbable_mask = self.constraints.perturbable_mask(x_clean)
-        x_adv = x_clean.clone()
-        best_adv = x_adv.clone()
-        best_score = torch.full((x_clean.size(0),), float("-inf"), dtype=x_clean.dtype, device=x_clean.device)
-        best_success = torch.zeros(x_clean.size(0), dtype=torch.bool, device=x_clean.device)
-        previous_loss = None
-
-        for _ in range(steps):
-            x_grad = x_adv.detach().requires_grad_(True)
-            logits = model(x_grad)
-            loss = criterion(logits, y)
-            grad = torch.autograd.grad(loss, x_grad, only_inputs=True)[0]
-
-            candidate = x_adv.detach() + float(step_size) * grad.sign() * perturbable_mask
-            candidate = self.constraints.categorical_gradient_step(candidate, grad)
-            candidate = self.constraints.project(candidate, x_clean, epsilon)
-
-            with torch.no_grad():
-                candidate_logits = model(candidate)
-                candidate_score = self._margin(candidate_logits, y)
-                candidate_success = torch.argmax(candidate_logits, dim=1) != y
-                # Keep successful adversarial samples first; otherwise keep the highest margin.
-                better = self._better_mask(candidate_success, candidate_score, best_success, best_score)
-                best_adv = torch.where(better.view(-1, 1), candidate, best_adv)
-                best_score = torch.where(better, candidate_score, best_score)
-                best_success = best_success | candidate_success
-
-                candidate_loss = F.cross_entropy(candidate_logits, y)
-                if previous_loss is not None and candidate_loss <= previous_loss:
-                    step_size *= 0.75
-                previous_loss = candidate_loss
-
-            x_adv = candidate
-
-        return best_adv.detach()
-
-    def _evolutionary_phase(
-        self,
-        model,
-        x_clean: torch.Tensor,
-        y: torch.Tensor,
-        x_seed: torch.Tensor,
-        epsilon: float,
-    ) -> torch.Tensor:
-        # Use random mutations as a fallback for samples not solved by the gradient phase.
-        if x_clean.numel() == 0:
-            return x_clean
-
-        tensors = self.constraints.tensors(x_clean)
-        perturbable_mask = tensors["perturbable"].to(dtype=x_clean.dtype)
-        batch_size = x_clean.size(0)
-        population_size = min(max(int(self.config.steps) * 4, 8), 32)
-        generations = min(max(int(self.config.steps), 3), 20)
-        mutation_scale = max(float(epsilon) / 2.0, torch.finfo(x_clean.dtype).eps)
-
-        best_adv = self.constraints.project(x_seed.detach(), x_clean, epsilon)
-        with torch.no_grad():
-            best_logits = model(best_adv)
-            best_score = self._margin(best_logits, y)
-            best_success = torch.argmax(best_logits, dim=1) != y
-
-        for _ in range(generations):
-            random_noise = torch.empty(
-                population_size,
-                *x_clean.shape,
-                dtype=x_clean.dtype,
-                device=x_clean.device,
-            ).uniform_(-float(epsilon), float(epsilon))
-            mutations = torch.randn(
-                population_size,
-                *x_clean.shape,
-                dtype=x_clean.dtype,
-                device=x_clean.device,
-            ) * mutation_scale
-            candidates = x_clean.unsqueeze(0) + random_noise * perturbable_mask
-            candidates[0] = best_adv + mutations[0] * perturbable_mask
-            if population_size > 1:
-                candidates[1:] = candidates[1:] + mutations[1:] * perturbable_mask
-            candidates = self.constraints.randomize_categorical_groups(candidates, mutation_probability=0.35)
-
-            flat_candidates = candidates.reshape(population_size * batch_size, -1)
-            flat_clean = x_clean.repeat(population_size, 1)
-            # Every random candidate is projected back to the valid tabular domain before scoring.
-            flat_candidates = self.constraints.project(flat_candidates, flat_clean, epsilon)
-            repeated_y = y.repeat(population_size)
-
-            with torch.no_grad():
-                logits = model(flat_candidates)
-                scores = self._margin(logits, repeated_y).view(population_size, batch_size)
-                successes = (torch.argmax(logits, dim=1) != repeated_y).view(population_size, batch_size)
-                candidate_rank = scores + successes.to(dtype=scores.dtype) * 1_000.0
-                best_population_idx = candidate_rank.argmax(dim=0)
-
-                selected = flat_candidates.view(population_size, batch_size, -1)[
-                    best_population_idx,
-                    torch.arange(batch_size, device=x_clean.device),
-                ]
-                selected_score = scores[
-                    best_population_idx,
-                    torch.arange(batch_size, device=x_clean.device),
-                ]
-                selected_success = successes[
-                    best_population_idx,
-                    torch.arange(batch_size, device=x_clean.device),
-                ]
-                better = self._better_mask(selected_success, selected_score, best_success, best_score)
-                best_adv = torch.where(better.view(-1, 1), selected, best_adv)
-                best_score = torch.where(better, selected_score, best_score)
-                best_success = best_success | selected_success
-
-        return best_adv.detach()
diff --git a/nebula/core/datasets/tabular_metadata.py b/nebula/core/datasets/tabular_metadata.py
index e7596fcad..e34397f6a 100644
--- a/nebula/core/datasets/tabular_metadata.py
+++ b/nebula/core/datasets/tabular_metadata.py
@@ -12,9 +12,14 @@
 ERR_FEATURE_MIN_LENGTH = "feature_min_norm length must match feature_names length"
 ERR_FEATURE_MAX_LENGTH = "feature_max_norm length must match feature_names length"
 ERR_UNSUPPORTED_FEATURE_TYPES = "Unsupported tabular feature types: {feature_types}"
+ERR_FEATURE_BOUNDS = "feature_min_norm must be <= feature_max_norm for every feature"
+ERR_INTEGER_STEP_INDEX = "integer_step_norm contains invalid feature indices: {indices}"
+ERR_INTEGER_STEP_VALUE = "integer_step_norm values must be > 0"
+ERR_INTEGER_STEP_TYPE = "integer_step_norm contains non-integer feature indices: {indices}"
 ERR_CATEGORICAL_GROUP_SIZE = "categorical_groups entries must contain at least two feature indices"
 ERR_CATEGORICAL_GROUP_INDEX = "categorical_groups contains invalid feature indices: {indices}"
 ERR_CATEGORICAL_GROUP_TYPE = "categorical_groups contains non-categorical feature indices: {indices}"
+ERR_CATEGORICAL_GROUP_OVERLAP = "categorical_groups contains duplicated feature indices: {indices}"
 ERR_CATEGORICAL_GROUP_COVERAGE = "categorical feature indices missing from categorical_groups: {indices}"
 
 
@@ -22,6 +27,8 @@
 class TabularAdversarialMetadata:
     """Minimal metadata for tabular adversarial training."""
 
+    # These fields describe the exact vector received by the model after preprocessing.
+    # Bounds and steps must use the same normalized space as the training tensors.
     feature_names: list[str]
     feature_types: list[str]
     feature_min_norm: list[float]
@@ -30,6 +37,8 @@ class TabularAdversarialMetadata:
     categorical_groups: list[list[int]] | None = None
 
     def __post_init__(self):
+        # Fail early if a dataset exposes incomplete metadata. The attack relies on
+        # these arrays lining up feature-by-feature.
         n_features = len(self.feature_names)
         if len(self.feature_types) != n_features:
             raise ValueError(ERR_FEATURE_TYPES_LENGTH)
@@ -37,9 +46,43 @@ def __post_init__(self):
             raise ValueError(ERR_FEATURE_MIN_LENGTH)
         if len(self.feature_max_norm) != n_features:
             raise ValueError(ERR_FEATURE_MAX_LENGTH)
+
+        # Every feature needs a valid normalized interval so projection can clamp safely.
+        invalid_bounds = [
+            idx
+            for idx, (min_value, max_value) in enumerate(
+                zip(self.feature_min_norm, self.feature_max_norm, strict=True)
+            )
+            if min_value > max_value
+        ]
+        if invalid_bounds:
+            raise ValueError(ERR_FEATURE_BOUNDS)
         invalid_types = set(self.feature_types) - {CONTINUOUS, INTEGER, CATEGORICAL, NON_PERTURBABLE}
         if invalid_types:
             raise ValueError(ERR_UNSUPPORTED_FEATURE_TYPES.format(feature_types=sorted(invalid_types)))
+
+        # Integer steps represent the normalized distance between consecutive integer values.
+        # They only make sense for features marked as INTEGER.
+        invalid_step_indices = [
+            idx
+            for idx in (self.integer_step_norm or {})
+            if int(idx) < 0 or int(idx) >= n_features
+        ]
+        if invalid_step_indices:
+            raise ValueError(ERR_INTEGER_STEP_INDEX.format(indices=invalid_step_indices))
+        non_integer_step_indices = [
+            idx
+            for idx in (self.integer_step_norm or {})
+            if self.feature_types[int(idx)] != INTEGER
+        ]
+        if non_integer_step_indices:
+            raise ValueError(ERR_INTEGER_STEP_TYPE.format(indices=non_integer_step_indices))
+        if any(step <= 0 for step in (self.integer_step_norm or {}).values()):
+            raise ValueError(ERR_INTEGER_STEP_VALUE)
+
+        # Categorical groups represent one original categorical column after one-hot encoding.
+        # Each group must be disjoint so projection can activate exactly one value per group.
+        grouped_counts: dict[int, int] = {}
         for group in self.categorical_groups or []:
             if len(group) < 2:
                 raise ValueError(ERR_CATEGORICAL_GROUP_SIZE)
@@ -49,7 +92,14 @@ def __post_init__(self):
             non_categorical_indices = [idx for idx in group if self.feature_types[idx] != CATEGORICAL]
             if non_categorical_indices:
                 raise ValueError(ERR_CATEGORICAL_GROUP_TYPE.format(indices=non_categorical_indices))
+            for idx in group:
+                grouped_counts[idx] = grouped_counts.get(idx, 0) + 1
+
+        duplicated_group_indices = sorted(idx for idx, count in grouped_counts.items() if count > 1)
+        if duplicated_group_indices:
+            raise ValueError(ERR_CATEGORICAL_GROUP_OVERLAP.format(indices=duplicated_group_indices))
 
+        # A categorical feature without a group cannot be projected back to a valid one-hot state.
         grouped_categorical_indices = {
             idx
             for group in self.categorical_groups or []
@@ -65,10 +115,12 @@ def __post_init__(self):
             raise ValueError(ERR_CATEGORICAL_GROUP_COVERAGE.format(indices=missing_categorical_indices))
 
     def to_dict(self) -> dict[str, Any]:
+        # Partitions persist metadata as JSON-like dictionaries in HDF5 attributes.
         return asdict(self)
 
     @classmethod
     def from_dict(cls, data: dict[str, Any]) -> TabularAdversarialMetadata:
+        # HDF5/JSON round-trips can turn integer keys into strings; normalize them here.
         return cls(
             feature_names=[str(value) for value in data["feature_names"]],
             feature_types=[str(value) for value in data["feature_types"]],

From 3d232b8f030584bc931c20185ccd7659534be28d Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 5 Jun 2026 11:14:35 +0200
Subject: [PATCH 60/66] Adversarial Training: Frontend updated and tabular
 finished, target loss and max loss implemented

---
 .../defenses/adversarial_training/__init__.py | 14 +--
 .../defenses/adversarial_training/config.py   | 63 ++++++++-----
 .../defenses/adversarial_training/defense.py  | 40 ++++----
 .../defenses/adversarial_training/image.py    |  9 +-
 .../defenses/adversarial_training/tabular.py  | 51 ++++++++--
 nebula/controller/scenarios.py                | 10 +-
 .../frontend/config/participant.json.example  |  9 +-
 .../js/deployment/adversarial-training.js     | 92 +++++++++++--------
 nebula/frontend/templates/deployment.html     | 54 ++++-------
 9 files changed, 189 insertions(+), 153 deletions(-)

diff --git a/nebula/addons/defenses/adversarial_training/__init__.py b/nebula/addons/defenses/adversarial_training/__init__.py
index c417b1b93..844956335 100644
--- a/nebula/addons/defenses/adversarial_training/__init__.py
+++ b/nebula/addons/defenses/adversarial_training/__init__.py
@@ -1,12 +1,9 @@
 from nebula.addons.defenses.adversarial_training.defense import (
-    CAA_TABULAR_DATASETS,
     ERR_ALPHA,
     ERR_APPLY_PROBABILITY,
-    ERR_CLIP_BOUNDS,
     ERR_EPSILON,
     ERR_IMAGE_ATTACK,
-    ERR_LOSS_WEIGHTS,
-    ERR_MIXED_WEIGHTS,
+    ERR_LOSS_INCREASE,
     ERR_MODE,
     ERR_STEPS,
     ERR_TABULAR_ATTACK,
@@ -15,6 +12,7 @@
     IMAGE_ADVERSARIAL_ATTACKS,
     IMAGE_DATASET_NORMALIZATION,
     TABULAR_ADVERSARIAL_ATTACKS,
+    TABULAR_ADVERSARIAL_DATASETS,
     AdversarialExampleGenerator,
     AdversarialTrainingConfig,
     AdversarialTrainingDefense,
@@ -22,21 +20,17 @@
     ImageFGSMGenerator,
     ImagePGDGenerator,
     TabularAdversarialExampleGenerator,
-    TabularCAAGenerator,
     TabularCAPGDGenerator,
     TabularConstraintSet,
     apply_adversarial_training_if_enabled,
 )
 
 __all__ = [
-    "CAA_TABULAR_DATASETS",
     "ERR_ALPHA",
     "ERR_APPLY_PROBABILITY",
-    "ERR_CLIP_BOUNDS",
     "ERR_EPSILON",
     "ERR_IMAGE_ATTACK",
-    "ERR_LOSS_WEIGHTS",
-    "ERR_MIXED_WEIGHTS",
+    "ERR_LOSS_INCREASE",
     "ERR_MODE",
     "ERR_STEPS",
     "ERR_TABULAR_ATTACK",
@@ -45,6 +39,7 @@
     "IMAGE_ADVERSARIAL_ATTACKS",
     "IMAGE_DATASET_NORMALIZATION",
     "TABULAR_ADVERSARIAL_ATTACKS",
+    "TABULAR_ADVERSARIAL_DATASETS",
     "AdversarialExampleGenerator",
     "AdversarialTrainingConfig",
     "AdversarialTrainingDefense",
@@ -52,7 +47,6 @@
     "ImageFGSMGenerator",
     "ImagePGDGenerator",
     "TabularAdversarialExampleGenerator",
-    "TabularCAAGenerator",
     "TabularCAPGDGenerator",
     "TabularConstraintSet",
     "apply_adversarial_training_if_enabled",
diff --git a/nebula/addons/defenses/adversarial_training/config.py b/nebula/addons/defenses/adversarial_training/config.py
index fcb6c5aa2..ca003a87f 100644
--- a/nebula/addons/defenses/adversarial_training/config.py
+++ b/nebula/addons/defenses/adversarial_training/config.py
@@ -3,18 +3,16 @@
 
 IMAGE_ADVERSARIAL_ATTACKS = {"fgsm", "pgd"}
 TABULAR_ADVERSARIAL_ATTACKS = {"capgd"}
-CAA_TABULAR_DATASETS = {"AdultCensus"}
+TABULAR_ADVERSARIAL_DATASETS = {"AdultCensus"}
 
 ERR_IMAGE_ATTACK = "image adversarial_training.attack must be one of: fgsm, pgd"
 ERR_TABULAR_ATTACK = "tabular adversarial_training.attack must be one of: capgd"
-ERR_MODE = "adversarial_training.mode must be one of: clean, adversarial, mixed"
+ERR_MODE = "adversarial_training.mode must be one of: adversarial, mixed"
 ERR_EPSILON = "adversarial_training.epsilon must be >= 0"
 ERR_ALPHA = "adversarial_training.alpha must be >= 0"
 ERR_STEPS = "adversarial_training.steps must be >= 1"
 ERR_APPLY_PROBABILITY = "adversarial_training.apply_probability must be in [0, 1]"
-ERR_LOSS_WEIGHTS = "adversarial_training loss weights must be >= 0"
-ERR_MIXED_WEIGHTS = "adversarial_training mixed mode requires at least one positive loss weight"
-ERR_CLIP_BOUNDS = "adversarial_training.clip_min must be smaller than clip_max"
+ERR_LOSS_INCREASE = "adversarial_training loss increase thresholds must be >= 0 and target <= max"
 ERR_TABULAR_METADATA = "Tabular adversarial training requires tabular_metadata"
 ERR_UNSUPPORTED_ATTACK = "Unsupported adversarial training attack: {attack}"
 
@@ -36,13 +34,13 @@ class AdversarialTrainingConfig:
     epsilon: float = 8.0 / 255.0
     alpha: float | None = None
     steps: int = 1
+    mode: str = "mixed"
     clean_weight: float = 0.5
     adversarial_weight: float = 0.5
-    mode: str = "mixed"
-    apply_probability: float = 1.0
-    clip_min: float = 0.0
-    clip_max: float = 1.0
+    apply_probability: float = 0.3
     log_adversarial_metrics: bool = True
+    target_loss_increase: float | None = None
+    max_loss_increase: float | None = None
 
 
 def config_from_participant(participant_config: dict[str, Any]) -> AdversarialTrainingConfig | None:
@@ -54,8 +52,9 @@ def config_from_participant(participant_config: dict[str, Any]) -> AdversarialTr
     dataset_name = participant_config.get("data_args", {}).get("dataset")
     domain = str(raw.get("domain", "image")).lower()
     attack = str(raw.get("attack", "capgd" if domain == "tabular" else "fgsm")).lower()
-    if domain == "tabular" and attack == "caa":
-        attack = "capgd"
+
+    mode = str(raw.get("mode", "mixed")).lower()
+    clean_weight, adversarial_weight = _loss_weights_for_mode(mode)
 
     return AdversarialTrainingConfig(
         enabled=True,
@@ -65,19 +64,29 @@ def config_from_participant(participant_config: dict[str, Any]) -> AdversarialTr
         epsilon=float(raw.get("epsilon", 8.0 / 255.0)),
         alpha=float(raw["alpha"]) if raw.get("alpha") is not None else None,
         steps=int(raw.get("steps", 1)),
-        clean_weight=float(raw.get("clean_weight", 0.5)),
-        adversarial_weight=float(raw.get("adversarial_weight", 0.5)),
-        mode=str(raw.get("mode", "mixed")).lower(),
-        apply_probability=float(raw.get("apply_probability", 1.0)),
-        clip_min=float(raw.get("clip_min", 0.0)),
-        clip_max=float(raw.get("clip_max", 1.0)),
-        log_adversarial_metrics=bool(raw.get("log_adversarial_metrics", True)),
+        mode=mode,
+        clean_weight=clean_weight,
+        adversarial_weight=adversarial_weight,
+        apply_probability=float(raw.get("apply_probability", 0.3)),
+        log_adversarial_metrics=True,
+        target_loss_increase=float(raw["target_loss_increase"])
+        if raw.get("target_loss_increase") is not None
+        else None,
+        max_loss_increase=float(raw["max_loss_increase"])
+        if raw.get("max_loss_increase") is not None
+        else None,
     )
 
 
+def _loss_weights_for_mode(mode: str) -> tuple[float, float]:
+    if mode == "adversarial":
+        return 0.0, 1.0
+    return 0.5, 0.5
+
+
 def validate_config(config: AdversarialTrainingConfig) -> None:
     # Fail early when a frontend/backend config value cannot produce a valid attack.
-    if config.mode not in {"clean", "adversarial", "mixed"}:
+    if config.mode not in {"adversarial", "mixed"}:
         raise ValueError(ERR_MODE)
     if config.domain == "image" and config.attack not in IMAGE_ADVERSARIAL_ATTACKS:
         raise ValueError(ERR_IMAGE_ATTACK)
@@ -91,9 +100,13 @@ def validate_config(config: AdversarialTrainingConfig) -> None:
         raise ValueError(ERR_STEPS)
     if not 0.0 <= config.apply_probability <= 1.0:
         raise ValueError(ERR_APPLY_PROBABILITY)
-    if config.clean_weight < 0 or config.adversarial_weight < 0:
-        raise ValueError(ERR_LOSS_WEIGHTS)
-    if config.mode == "mixed" and config.clean_weight + config.adversarial_weight == 0:
-        raise ValueError(ERR_MIXED_WEIGHTS)
-    if config.clip_min >= config.clip_max:
-        raise ValueError(ERR_CLIP_BOUNDS)
+    if config.target_loss_increase is not None and config.target_loss_increase < 0:
+        raise ValueError(ERR_LOSS_INCREASE)
+    if config.max_loss_increase is not None and config.max_loss_increase < 0:
+        raise ValueError(ERR_LOSS_INCREASE)
+    if (
+        config.target_loss_increase is not None
+        and config.max_loss_increase is not None
+        and config.target_loss_increase > config.max_loss_increase
+    ):
+        raise ValueError(ERR_LOSS_INCREASE)
diff --git a/nebula/addons/defenses/adversarial_training/defense.py b/nebula/addons/defenses/adversarial_training/defense.py
index d4e8da2fd..259784ab4 100644
--- a/nebula/addons/defenses/adversarial_training/defense.py
+++ b/nebula/addons/defenses/adversarial_training/defense.py
@@ -5,14 +5,11 @@
 
 from nebula.addons.defenses.adversarial_training.base import AdversarialExampleGenerator
 from nebula.addons.defenses.adversarial_training.config import (
-    CAA_TABULAR_DATASETS,
     ERR_ALPHA,
     ERR_APPLY_PROBABILITY,
-    ERR_CLIP_BOUNDS,
     ERR_EPSILON,
     ERR_IMAGE_ATTACK,
-    ERR_LOSS_WEIGHTS,
-    ERR_MIXED_WEIGHTS,
+    ERR_LOSS_INCREASE,
     ERR_MODE,
     ERR_STEPS,
     ERR_TABULAR_ATTACK,
@@ -21,6 +18,7 @@
     IMAGE_ADVERSARIAL_ATTACKS,
     IMAGE_DATASET_NORMALIZATION,
     TABULAR_ADVERSARIAL_ATTACKS,
+    TABULAR_ADVERSARIAL_DATASETS,
     AdversarialTrainingConfig,
     config_from_participant,
     validate_config,
@@ -33,7 +31,6 @@
 from nebula.addons.defenses.adversarial_training.logging import AdversarialTrainingSampleLogger
 from nebula.addons.defenses.adversarial_training.tabular import (
     TabularAdversarialExampleGenerator,
-    TabularCAAGenerator,
     TabularCAPGDGenerator,
     TabularConstraintSet,
 )
@@ -126,12 +123,6 @@ def compute_training_step(self, model, x, y, criterion):
             loss = criterion(logits, y)
             return loss, logits, {}
 
-        # "clean" mode keeps the normal training step but still goes through the defense hook.
-        if self.config.mode == "clean":
-            logits = model(x)
-            loss = criterion(logits, y)
-            return loss, logits, {}
-
         # Generate x_adv once and reuse it for logging, adversarial loss and metrics.
         x_adv = self.generator.generate(model, x, y, criterion)
         self._log_adversarial_samples(model, x, x_adv, y)
@@ -147,11 +138,8 @@ def compute_training_step(self, model, x, y, criterion):
 
         clean_logits = model(x)
         clean_loss = criterion(clean_logits, y)
-        total_weight = self.config.clean_weight + self.config.adversarial_weight
-        # "mixed" combines clean and adversarial losses with user-provided weights.
-        loss = (
-            self.config.clean_weight * clean_loss + self.config.adversarial_weight * adv_loss
-        ) / total_weight
+        # "mixed" uses a fixed 50/50 clean/adversarial objective.
+        loss = self.config.clean_weight * clean_loss + self.config.adversarial_weight * adv_loss
 
         return loss, clean_logits, self._extra_metrics({
             "Clean Loss": clean_loss,
@@ -176,7 +164,7 @@ def _extra_metrics(self, metrics):
 
 
 def _log_tabular_metadata(tabular_metadata: TabularAdversarialMetadata) -> None:
-    # Log a compact metadata summary to make CAA setup auditable.
+    # Log a compact metadata summary to make CAPGD setup auditable.
     integer_features = _feature_names_by_type(tabular_metadata, {INTEGER})
     continuous_features = _feature_names_by_type(tabular_metadata, {CONTINUOUS})
     categorical_features = _feature_names_by_type(tabular_metadata, {CATEGORICAL})
@@ -225,26 +213,32 @@ def apply_adversarial_training_if_enabled(model, participant_config: dict[str, A
         model.set_adversarial_training(defense)
         logging.info(
             "[AdversarialTrainingDefense] Enabled | dataset=%s | attack=%s | epsilon_max=%s | "
-            "epsilon_range=[%.6f, %.6f] | epsilon_step=%.6f | mode=%s",
+            "epsilon_range=[%.6f, %.6f] | epsilon_step=%.6f | steps=%s | mode=%s | "
+            "clean_weight=%.2f | adversarial_weight=%.2f | apply_probability=%.2f | "
+            "target_loss_increase=%s | max_loss_increase=%s | log_adversarial_metrics=%s",
             defense.config.dataset_name,
             defense.config.attack,
             defense.config.epsilon,
             defense.config.epsilon / 4.0,
             defense.config.epsilon,
             defense.config.epsilon / 8.0,
+            defense.config.steps,
             defense.config.mode,
+            defense.config.clean_weight,
+            defense.config.adversarial_weight,
+            defense.config.apply_probability,
+            defense.config.target_loss_increase,
+            defense.config.max_loss_increase,
+            defense.config.log_adversarial_metrics,
         )
 
 
 __all__ = [
-    "CAA_TABULAR_DATASETS",
     "ERR_ALPHA",
     "ERR_APPLY_PROBABILITY",
-    "ERR_CLIP_BOUNDS",
     "ERR_EPSILON",
     "ERR_IMAGE_ATTACK",
-    "ERR_LOSS_WEIGHTS",
-    "ERR_MIXED_WEIGHTS",
+    "ERR_LOSS_INCREASE",
     "ERR_MODE",
     "ERR_STEPS",
     "ERR_TABULAR_ATTACK",
@@ -253,6 +247,7 @@ def apply_adversarial_training_if_enabled(model, participant_config: dict[str, A
     "IMAGE_ADVERSARIAL_ATTACKS",
     "IMAGE_DATASET_NORMALIZATION",
     "TABULAR_ADVERSARIAL_ATTACKS",
+    "TABULAR_ADVERSARIAL_DATASETS",
     "AdversarialExampleGenerator",
     "AdversarialTrainingConfig",
     "AdversarialTrainingDefense",
@@ -260,7 +255,6 @@ def apply_adversarial_training_if_enabled(model, participant_config: dict[str, A
     "ImageFGSMGenerator",
     "ImagePGDGenerator",
     "TabularAdversarialExampleGenerator",
-    "TabularCAAGenerator",
     "TabularCAPGDGenerator",
     "TabularConstraintSet",
     "apply_adversarial_training_if_enabled",
diff --git a/nebula/addons/defenses/adversarial_training/image.py b/nebula/addons/defenses/adversarial_training/image.py
index cd6dbd129..585231f32 100644
--- a/nebula/addons/defenses/adversarial_training/image.py
+++ b/nebula/addons/defenses/adversarial_training/image.py
@@ -3,6 +3,9 @@
 from nebula.addons.defenses.adversarial_training.base import AdversarialExampleGenerator
 from nebula.addons.defenses.adversarial_training.config import AdversarialTrainingConfig
 
+IMAGE_CLIP_MIN = 0.0
+IMAGE_CLIP_MAX = 1.0
+
 
 class ImageAdversarialExampleGenerator(AdversarialExampleGenerator):
     def __init__(self, config: AdversarialTrainingConfig, mean: tuple[float, ...], std: tuple[float, ...]):
@@ -33,15 +36,15 @@ def _bounds(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
         # Convert valid pixel bounds to the normalized space where the model operates.
         mean = self._channel_tensor(self.mean, x)
         std = self._channel_tensor(self.std, x)
-        lower = (float(self.config.clip_min) - mean) / std
-        upper = (float(self.config.clip_max) - mean) / std
+        lower = (IMAGE_CLIP_MIN - mean) / std
+        upper = (IMAGE_CLIP_MAX - mean) / std
         return lower, upper
 
     def denormalize(self, x: torch.Tensor) -> torch.Tensor:
         # Convert normalized tensors back to pixel scale for logging.
         mean = self._channel_tensor(self.mean, x)
         std = self._channel_tensor(self.std, x)
-        return (x * std + mean).clamp(float(self.config.clip_min), float(self.config.clip_max))
+        return (x * std + mean).clamp(IMAGE_CLIP_MIN, IMAGE_CLIP_MAX)
 
     def _project(self, x_adv: torch.Tensor, x_clean: torch.Tensor, epsilon: float) -> torch.Tensor:
         # Keep the adversarial image inside both the epsilon ball and valid pixel bounds.
diff --git a/nebula/addons/defenses/adversarial_training/tabular.py b/nebula/addons/defenses/adversarial_training/tabular.py
index 2d1d954e8..4786a6ecf 100644
--- a/nebula/addons/defenses/adversarial_training/tabular.py
+++ b/nebula/addons/defenses/adversarial_training/tabular.py
@@ -40,7 +40,7 @@ def perturbable_mask(self, x: torch.Tensor) -> torch.Tensor:
         return self.tensors(x)["perturbable"]
 
     def project(self, x_candidate: torch.Tensor, x_clean: torch.Tensor, epsilon: float) -> torch.Tensor:
-        """Clamp numeric features, round integers, restore immutable features and fix one-hot groups."""
+        # Clamp numeric features, round integers, restore immutable features and fix one-hot groups.
         tensors = self.tensors(x_clean)
         lower, upper = self._bounds(x_clean, epsilon, tensors)
 
@@ -152,6 +152,10 @@ def _margin(self, logits: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         other_logits = logits.masked_fill(true_class_mask, float("-inf"))
         return other_logits.max(dim=1).values - true_logits
 
+    def _per_sample_loss(self, logits: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        # CAPGD needs per-sample scores so each row can stop once it is hard enough.
+        return F.cross_entropy(logits, y, reduction="none")
+
 
 class TabularCAPGDGenerator(TabularAdversarialExampleGenerator):
     """First-phase constrained tabular CAPGD generator."""
@@ -170,6 +174,8 @@ def generate(self, model, x, y, criterion):
         x_adv = x_clean.clone()
         best_adv = x_adv.clone()
         best_score = torch.full((x_clean.size(0),), float("-inf"), dtype=x_clean.dtype, device=x_clean.device)
+        use_loss_window = self._use_loss_window()
+        clean_loss = self._clean_loss(model, x_clean, y) if use_loss_window else None
 
         for _ in range(steps):
             # CAPGD step: move in the sign of the loss gradient, but only on perturbable features.
@@ -184,16 +190,49 @@ def generate(self, model, x, y, criterion):
             candidate = self.constraints.project(candidate, x_clean, epsilon)
 
             with torch.no_grad():
-                # Keep the strongest candidate per sample, not just the last step.
-                candidate_score = self._margin(model(candidate), y)
-                better = candidate_score > best_score
+                # Keep the best candidate per sample, not just the last step.
+                candidate_logits = model(candidate)
+                if use_loss_window:
+                    candidate_score = self._loss_increase(candidate_logits, y, clean_loss)
+                    better = self._loss_window_better(candidate_score, best_score)
+                else:
+                    candidate_score = self._margin(candidate_logits, y)
+                    better = candidate_score > best_score
                 best_adv = torch.where(better.view(-1, 1), candidate, best_adv)
                 best_score = torch.where(better, candidate_score, best_score)
 
+                if self._target_reached(best_score):
+                    break
+
             x_adv = candidate
 
         return best_adv.detach()
 
+    def _use_loss_window(self) -> bool:
+        return self.config.target_loss_increase is not None or self.config.max_loss_increase is not None
+
+    def _clean_loss(self, model, x_clean: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        # Baseline difficulty. Candidate scores become loss(candidate) - loss(clean).
+        with torch.no_grad():
+            return self._per_sample_loss(model(x_clean), y)
 
-# Compatibility alias while old configs/UI still refer to the future CAA attack.
-TabularCAAGenerator = TabularCAPGDGenerator
+    def _loss_increase(
+        self,
+        candidate_logits: torch.Tensor,
+        y: torch.Tensor,
+        clean_loss: torch.Tensor,
+    ) -> torch.Tensor:
+        return self._per_sample_loss(candidate_logits, y) - clean_loss
+
+    def _loss_window_better(self, candidate_score: torch.Tensor, best_score: torch.Tensor) -> torch.Tensor:
+        # A candidate must make the sample harder. If max_loss_increase is set, reject overshoots.
+        valid = candidate_score > 0.0
+        if self.config.max_loss_increase is not None:
+            valid = valid & (candidate_score <= float(self.config.max_loss_increase))
+        return valid & (candidate_score > best_score)
+
+    def _target_reached(self, best_score: torch.Tensor) -> bool:
+        # Once every sample has reached the requested hardness, stop taking stronger steps.
+        if self.config.target_loss_increase is None:
+            return False
+        return bool((best_score >= float(self.config.target_loss_increase)).all().item())
diff --git a/nebula/controller/scenarios.py b/nebula/controller/scenarios.py
index f9fa18a17..7325e650a 100644
--- a/nebula/controller/scenarios.py
+++ b/nebula/controller/scenarios.py
@@ -748,11 +748,9 @@ def __init__(self, scenario, user=None):
             for key in (
                 "epsilon",
                 "alpha",
-                "clean_weight",
-                "adversarial_weight",
                 "apply_probability",
-                "clip_min",
-                "clip_max",
+                "target_loss_increase",
+                "max_loss_increase",
             ):
                 if key in adversarial_training and adversarial_training[key] is not None:
                     participant_config["defense_args"]["adversarial_training"][key] = float(
@@ -766,10 +764,6 @@ def __init__(self, scenario, user=None):
                 participant_config["defense_args"]["adversarial_training"]["mode"] = str(
                     adversarial_training["mode"]
                 )
-            if "log_adversarial_metrics" in adversarial_training:
-                participant_config["defense_args"]["adversarial_training"]["log_adversarial_metrics"] = bool(
-                    adversarial_training["log_adversarial_metrics"]
-                )
             participant_config["device_args"]["accelerator"] = self.scenario.accelerator
             participant_config["device_args"]["gpu_id"] = self.scenario.gpu_id
             participant_config["device_args"]["logging"] = self.scenario.logginglevel
diff --git a/nebula/frontend/config/participant.json.example b/nebula/frontend/config/participant.json.example
index e3c65f409..88c017ba1 100755
--- a/nebula/frontend/config/participant.json.example
+++ b/nebula/frontend/config/participant.json.example
@@ -115,13 +115,10 @@
       "attack": "fgsm",
       "epsilon": 0.03,
       "steps": 1,
-      "clean_weight": 0.5,
-      "adversarial_weight": 0.5,
       "mode": "mixed",
-      "apply_probability": 1.0,
-      "clip_min": 0.0,
-      "clip_max": 1.0,
-      "log_adversarial_metrics": true
+      "apply_probability": 0.3,
+      "target_loss_increase": null,
+      "max_loss_increase": null
     },
     "reputation": {
       "enabled": false,
diff --git a/nebula/frontend/static/js/deployment/adversarial-training.js b/nebula/frontend/static/js/deployment/adversarial-training.js
index ceb965246..b02addf4e 100644
--- a/nebula/frontend/static/js/deployment/adversarial-training.js
+++ b/nebula/frontend/static/js/deployment/adversarial-training.js
@@ -8,22 +8,20 @@ const AdversarialTrainingManager = (function() {
         alpha: null,
         steps: 1,
         mode: "mixed",
-        clean_weight: 0.5,
-        adversarial_weight: 0.5,
-        apply_probability: 1.0,
-        clip_min: 0.0,
-        clip_max: 1.0,
-        log_adversarial_metrics: true
+        apply_probability: 0.3,
+        log_adversarial_metrics: true,
+        target_loss_increase: null,
+        max_loss_increase: null
     };
 
     const IMAGE_DATASETS = new Set(["MNIST", "FashionMNIST", "EMNIST", "CIFAR10", "CIFAR100"]);
-    const CAA_TABULAR_DATASETS = new Set(["AdultCensus"]);
+    const TABULAR_ADVERSARIAL_DATASETS = new Set(["AdultCensus"]);
     const IMAGE_ATTACK_OPTIONS = [
         {value: "fgsm", label: "FGSM"},
         {value: "pgd", label: "PGD"}
     ];
     const TABULAR_ATTACK_OPTIONS = [
-        {value: "caa", label: "CAA"}
+        {value: "capgd", label: "CAPGD"}
     ];
 
     function initializeAdversarialTraining() {
@@ -73,11 +71,16 @@ const AdversarialTrainingManager = (function() {
     function toggleAttackSettings(attack) {
         const pgdSettings = document.getElementById("adversarial-training-pgd-settings");
         const stepsTitle = document.getElementById("adversarialTrainingStepsTitle");
+        const lossWindowSettings = document.getElementById("adversarial-training-loss-window-settings");
+        const domain = document.getElementById("adversarialTrainingDomain")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.domain;
         if (!pgdSettings) return;
 
-        pgdSettings.style.display = ["pgd", "caa"].includes(attack) ? "block" : "none";
+        pgdSettings.style.display = ["pgd", "capgd"].includes(attack) ? "block" : "none";
+        if (lossWindowSettings) {
+            lossWindowSettings.style.display = domain === "tabular" ? "block" : "none";
+        }
         if (stepsTitle) {
-            stepsTitle.textContent = attack === "caa" ? "CAA search steps" : "PGD steps";
+            stepsTitle.textContent = domain === "tabular" ? "CAPGD steps" : "PGD steps";
         }
     }
 
@@ -91,7 +94,7 @@ const AdversarialTrainingManager = (function() {
 
         if (datasetNote) {
             datasetNote.style.display = domain === "unsupported" ? "block" : "none";
-            datasetNote.textContent = "Adversarial Training for tabular datasets currently supports AdultCensus with CAA.";
+            datasetNote.textContent = "Adversarial Training for tabular datasets currently supports AdultCensus with CAPGD.";
         }
         if (domainInput) {
             domainInput.value = domain === "unsupported" ? "tabular" : domain;
@@ -116,7 +119,7 @@ const AdversarialTrainingManager = (function() {
         if (IMAGE_DATASETS.has(dataset)) {
             return "image";
         }
-        if (CAA_TABULAR_DATASETS.has(dataset)) {
+        if (TABULAR_ADVERSARIAL_DATASETS.has(dataset)) {
             return "tabular";
         }
         return "unsupported";
@@ -126,7 +129,7 @@ const AdversarialTrainingManager = (function() {
         const attackSelect = document.getElementById("adversarialTrainingAttack");
         if (!attackSelect) return;
 
-        // Tabular datasets intentionally expose only CAA; image datasets expose FGSM/PGD.
+        // Tabular datasets intentionally expose only CAPGD; image datasets expose FGSM/PGD.
         const options = domain === "tabular" ? TABULAR_ATTACK_OPTIONS : IMAGE_ATTACK_OPTIONS;
         const currentAttack = preferredAttack || attackSelect.value;
         attackSelect.innerHTML = "";
@@ -167,7 +170,7 @@ const AdversarialTrainingManager = (function() {
     function getAdversarialTrainingConfig() {
         const domain = document.getElementById("adversarialTrainingDomain")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.domain;
         const attack = domain === "tabular"
-            ? "caa"
+            ? "capgd"
             : (document.getElementById("adversarialTrainingAttack")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.attack);
         const config = {
             enabled: Boolean(document.getElementById("adversarialTrainingSwitch")?.checked),
@@ -177,17 +180,27 @@ const AdversarialTrainingManager = (function() {
             alpha: optionalNumberValue("adversarialTrainingAlpha", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.alpha),
             steps: integerValue("adversarialTrainingSteps", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.steps),
             mode: document.getElementById("adversarialTrainingMode")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.mode,
-            clean_weight: numberValue("adversarialTrainingCleanWeight", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.clean_weight),
-            adversarial_weight: numberValue("adversarialTrainingAdversarialWeight", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.adversarial_weight),
             apply_probability: numberValue("adversarialTrainingApplyProbability", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.apply_probability),
-            clip_min: numberValue("adversarialTrainingClipMin", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.clip_min),
-            clip_max: numberValue("adversarialTrainingClipMax", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.clip_max),
-            log_adversarial_metrics: Boolean(document.getElementById("adversarialTrainingLogMetrics")?.checked)
+            target_loss_increase: optionalNumberValue(
+                "adversarialTrainingTargetLossIncrease",
+                DEFAULT_ADVERSARIAL_TRAINING_CONFIG.target_loss_increase
+            ),
+            max_loss_increase: optionalNumberValue(
+                "adversarialTrainingMaxLossIncrease",
+                DEFAULT_ADVERSARIAL_TRAINING_CONFIG.max_loss_increase
+            ),
+            log_adversarial_metrics: true
         };
 
         if (config.alpha === null || config.attack !== "pgd") {
             delete config.alpha;
         }
+        if (config.target_loss_increase === null) {
+            delete config.target_loss_increase;
+        }
+        if (config.max_loss_increase === null) {
+            delete config.max_loss_increase;
+        }
         return config;
     }
 
@@ -204,17 +217,15 @@ const AdversarialTrainingManager = (function() {
         setValue("adversarialTrainingEpsilon", adversarialTrainingConfig.epsilon);
         setValue("adversarialTrainingAlpha", adversarialTrainingConfig.alpha ?? "");
         setValue("adversarialTrainingSteps", adversarialTrainingConfig.steps);
-        setValue("adversarialTrainingMode", adversarialTrainingConfig.mode);
-        setValue("adversarialTrainingCleanWeight", adversarialTrainingConfig.clean_weight);
-        setValue("adversarialTrainingAdversarialWeight", adversarialTrainingConfig.adversarial_weight);
+        setValue(
+            "adversarialTrainingMode",
+            ["mixed", "adversarial"].includes(adversarialTrainingConfig.mode)
+                ? adversarialTrainingConfig.mode
+                : DEFAULT_ADVERSARIAL_TRAINING_CONFIG.mode
+        );
         setValue("adversarialTrainingApplyProbability", adversarialTrainingConfig.apply_probability);
-        setValue("adversarialTrainingClipMin", adversarialTrainingConfig.clip_min);
-        setValue("adversarialTrainingClipMax", adversarialTrainingConfig.clip_max);
-
-        const logMetricsInput = document.getElementById("adversarialTrainingLogMetrics");
-        if (logMetricsInput) {
-            logMetricsInput.checked = Boolean(adversarialTrainingConfig.log_adversarial_metrics);
-        }
+        setValue("adversarialTrainingTargetLossIncrease", adversarialTrainingConfig.target_loss_increase ?? "");
+        setValue("adversarialTrainingMaxLossIncrease", adversarialTrainingConfig.max_loss_increase ?? "");
 
         updateDatasetAvailability();
         const domain = document.getElementById("adversarialTrainingDomain")?.value || adversarialTrainingConfig.domain;
@@ -241,20 +252,27 @@ const AdversarialTrainingManager = (function() {
         if (config.epsilon < 0) {
             return "[Adversarial Training] Epsilon must be greater than or equal to 0.";
         }
-        if (["pgd", "caa"].includes(config.attack) && config.steps < 1) {
+        if (["pgd", "capgd"].includes(config.attack) && config.steps < 1) {
             return "[Adversarial Training] Search steps must be at least 1.";
         }
-        if (config.clean_weight < 0 || config.adversarial_weight < 0) {
-            return "[Adversarial Training] Loss weights must be greater than or equal to 0.";
-        }
-        if (config.mode === "mixed" && config.clean_weight + config.adversarial_weight === 0) {
-            return "[Adversarial Training] Mixed mode needs at least one positive loss weight.";
+        if (!["mixed", "adversarial"].includes(config.mode)) {
+            return "[Adversarial Training] Training mode must be Clean + adversarial or Adversarial only.";
         }
         if (config.apply_probability < 0 || config.apply_probability > 1) {
             return "[Adversarial Training] Apply probability must be between 0 and 1.";
         }
-        if (config.clip_min >= config.clip_max) {
-            return "[Adversarial Training] Min bound must be smaller than max bound.";
+        if (config.target_loss_increase !== undefined && config.target_loss_increase < 0) {
+            return "[Adversarial Training] Target loss increase must be greater than or equal to 0.";
+        }
+        if (config.max_loss_increase !== undefined && config.max_loss_increase < 0) {
+            return "[Adversarial Training] Max loss increase must be greater than or equal to 0.";
+        }
+        if (
+            config.target_loss_increase !== undefined
+            && config.max_loss_increase !== undefined
+            && config.target_loss_increase > config.max_loss_increase
+        ) {
+            return "[Adversarial Training] Target loss increase must be smaller than or equal to max loss increase.";
         }
         return null;
     }
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index 5ad22ac87..bfa249739 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -588,7 +588,7 @@ <h5 class="step-title">Enable/Disable Adversarial Training</h5>
                             style="display: inline; width: 80px; height: 30px;">
                     </div>
                     <small id="adversarial-training-dataset-note" class="form-text text-muted" style="display: none;">
-                        Adversarial Training for tabular datasets currently supports AdultCensus with CAA.
+                        Adversarial Training for tabular datasets currently supports AdultCensus with CAPGD.
                     </small>
                     <div id="adversarial-training-settings" style="margin-top: 10px; display: none;">
                         <input type="hidden" id="adversarialTrainingDomain" value="image">
@@ -606,6 +606,23 @@ <h5 class="step-title">Epsilon</h5>
                                 placeholder="Epsilon" min="0" step="0.001" value="0.031372549"
                                 style="display: inline; width: 80%">
                         </div>
+                        <h5 class="step-title">Apply probability</h5>
+                        <div class="form-check form-check-inline">
+                            <input type="number" class="form-control" id="adversarialTrainingApplyProbability"
+                                placeholder="Probability" min="0" max="1" step="0.05" value="0.3"
+                                style="display: inline; width: 80%">
+                        </div>
+                        <div id="adversarial-training-loss-window-settings" style="display: none;">
+                            <h5 class="step-title">Tabular hard-example loss window</h5>
+                            <div class="form-check form-check-inline">
+                                <input type="number" class="form-control" id="adversarialTrainingTargetLossIncrease"
+                                    placeholder="Target loss increase" min="0" step="0.01" value=""
+                                    style="display: inline; width: 39%">
+                                <input type="number" class="form-control" id="adversarialTrainingMaxLossIncrease"
+                                    placeholder="Max loss increase" min="0" step="0.01" value=""
+                                    style="display: inline; width: 39%">
+                            </div>
+                        </div>
                         <div id="adversarial-training-pgd-settings" style="display: none;">
                             <h5 class="step-title" id="adversarialTrainingStepsTitle">PGD steps</h5>
                             <div class="form-check form-check-inline">
@@ -621,43 +638,10 @@ <h5 class="step-title">Training mode</h5>
                                 style="display: inline; width: 80%">
                                 <option value="mixed" selected>Clean + adversarial</option>
                                 <option value="adversarial">Adversarial only</option>
-                                <option value="clean">Clean only</option>
                             </select>
                         </div>
-                        <details style="margin-top: 10px;">
-                            <summary class="step-title" style="cursor: pointer;">Advanced parameters</summary>
-                            <h5 class="step-title">Loss weights</h5>
-                            <div class="form-check form-check-inline">
-                                <input type="number" class="form-control" id="adversarialTrainingCleanWeight"
-                                    placeholder="Clean weight" min="0" step="0.1" value="0.5"
-                                    style="display: inline; width: 39%">
-                                <input type="number" class="form-control" id="adversarialTrainingAdversarialWeight"
-                                    placeholder="Adversarial weight" min="0" step="0.1" value="0.5"
-                                    style="display: inline; width: 39%">
-                            </div>
-                            <h5 class="step-title">Apply probability</h5>
-                            <div class="form-check form-check-inline">
-                                <input type="number" class="form-control" id="adversarialTrainingApplyProbability"
-                                    placeholder="Probability" min="0" max="1" step="0.05" value="1.0"
-                                    style="display: inline; width: 80%">
-                            </div>
-                            <h5 class="step-title">Pixel bounds</h5>
-                            <div class="form-check form-check-inline">
-                                <input type="number" class="form-control" id="adversarialTrainingClipMin"
-                                    placeholder="Min" step="0.1" value="0.0"
-                                    style="display: inline; width: 39%">
-                                <input type="number" class="form-control" id="adversarialTrainingClipMax"
-                                    placeholder="Max" step="0.1" value="1.0"
-                                    style="display: inline; width: 39%">
-                            </div>
-                            <h5 class="step-title">Log adversarial metrics</h5>
-                            <div class="form-check form-switch" style="margin-left: 23px;">
-                                <input class="form-check-input" type="checkbox" id="adversarialTrainingLogMetrics"
-                                    checked style="display: inline; width: 80px; height: 30px;">
-                            </div>
-                        </details>
                         <small class="form-text text-muted">
-                            Image datasets use FGSM/PGD. AdultCensus uses CAA for tabular adversarial training.
+                            Image datasets use FGSM/PGD. AdultCensus uses CAPGD for tabular adversarial training.
                         </small>
                     </div>
                 </div>

From 800b5931edca5371185bb5206ca3ba014e800878 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Fri, 5 Jun 2026 18:30:03 +0200
Subject: [PATCH 61/66] CAPGD name changed to constrained PGD. Adult Cencus and
 Breast Cancer finished. Other minor changes.

---
 .../defenses/adversarial_training/__init__.py |   4 +-
 .../defenses/adversarial_training/config.py   |   8 +-
 .../defenses/adversarial_training/defense.py  |   8 +-
 .../defenses/adversarial_training/tabular.py  |  12 +-
 .../trustworthiness/factsheet_common.py       |  42 +++-
 .../core/datasets/adultcensus/adultcensus.py  | 187 ++++++++----------
 .../datasets/breast_cancer/breast_cancer.py   | 148 +++++++-------
 nebula/core/datasets/covtype/covtype.py       | 133 +++++++------
 nebula/core/datasets/tabular_metadata.py      | 147 ++++++++++++++
 .../js/deployment/adversarial-training.js     |  16 +-
 nebula/frontend/templates/deployment.html     |   4 +-
 11 files changed, 445 insertions(+), 264 deletions(-)

diff --git a/nebula/addons/defenses/adversarial_training/__init__.py b/nebula/addons/defenses/adversarial_training/__init__.py
index 844956335..ddc977538 100644
--- a/nebula/addons/defenses/adversarial_training/__init__.py
+++ b/nebula/addons/defenses/adversarial_training/__init__.py
@@ -20,7 +20,7 @@
     ImageFGSMGenerator,
     ImagePGDGenerator,
     TabularAdversarialExampleGenerator,
-    TabularCAPGDGenerator,
+    TabularConstrainedPGDGenerator,
     TabularConstraintSet,
     apply_adversarial_training_if_enabled,
 )
@@ -47,7 +47,7 @@
     "ImageFGSMGenerator",
     "ImagePGDGenerator",
     "TabularAdversarialExampleGenerator",
-    "TabularCAPGDGenerator",
+    "TabularConstrainedPGDGenerator",
     "TabularConstraintSet",
     "apply_adversarial_training_if_enabled",
 ]
diff --git a/nebula/addons/defenses/adversarial_training/config.py b/nebula/addons/defenses/adversarial_training/config.py
index ca003a87f..a5ca04a15 100644
--- a/nebula/addons/defenses/adversarial_training/config.py
+++ b/nebula/addons/defenses/adversarial_training/config.py
@@ -2,11 +2,11 @@
 from typing import Any
 
 IMAGE_ADVERSARIAL_ATTACKS = {"fgsm", "pgd"}
-TABULAR_ADVERSARIAL_ATTACKS = {"capgd"}
-TABULAR_ADVERSARIAL_DATASETS = {"AdultCensus"}
+TABULAR_ADVERSARIAL_ATTACKS = {"constrained_pgd"}
+TABULAR_ADVERSARIAL_DATASETS = {"AdultCensus", "BreastCancer", "Covtype"}
 
 ERR_IMAGE_ATTACK = "image adversarial_training.attack must be one of: fgsm, pgd"
-ERR_TABULAR_ATTACK = "tabular adversarial_training.attack must be one of: capgd"
+ERR_TABULAR_ATTACK = "tabular adversarial_training.attack must be one of: constrained_pgd"
 ERR_MODE = "adversarial_training.mode must be one of: adversarial, mixed"
 ERR_EPSILON = "adversarial_training.epsilon must be >= 0"
 ERR_ALPHA = "adversarial_training.alpha must be >= 0"
@@ -51,7 +51,7 @@ def config_from_participant(participant_config: dict[str, Any]) -> AdversarialTr
 
     dataset_name = participant_config.get("data_args", {}).get("dataset")
     domain = str(raw.get("domain", "image")).lower()
-    attack = str(raw.get("attack", "capgd" if domain == "tabular" else "fgsm")).lower()
+    attack = str(raw.get("attack", "constrained_pgd" if domain == "tabular" else "fgsm")).lower()
 
     mode = str(raw.get("mode", "mixed")).lower()
     clean_weight, adversarial_weight = _loss_weights_for_mode(mode)
diff --git a/nebula/addons/defenses/adversarial_training/defense.py b/nebula/addons/defenses/adversarial_training/defense.py
index 259784ab4..4cd6b6923 100644
--- a/nebula/addons/defenses/adversarial_training/defense.py
+++ b/nebula/addons/defenses/adversarial_training/defense.py
@@ -31,7 +31,7 @@
 from nebula.addons.defenses.adversarial_training.logging import AdversarialTrainingSampleLogger
 from nebula.addons.defenses.adversarial_training.tabular import (
     TabularAdversarialExampleGenerator,
-    TabularCAPGDGenerator,
+    TabularConstrainedPGDGenerator,
     TabularConstraintSet,
 )
 from nebula.core.datasets.tabular_metadata import CATEGORICAL, CONTINUOUS, INTEGER, TabularAdversarialMetadata
@@ -63,7 +63,7 @@ def from_participant_config(
 
         if config.domain == "tabular":
             metadata = cls._get_tabular_metadata(partition)
-            return cls(config=config, generator=TabularCAPGDGenerator(config, metadata))
+            return cls(config=config, generator=TabularConstrainedPGDGenerator(config, metadata))
 
         if config.domain == "image":
             # Image attacks run in normalized model space, so each dataset must provide mean/std.
@@ -164,7 +164,7 @@ def _extra_metrics(self, metrics):
 
 
 def _log_tabular_metadata(tabular_metadata: TabularAdversarialMetadata) -> None:
-    # Log a compact metadata summary to make CAPGD setup auditable.
+    # Log a compact metadata summary to make constrained PGD setup auditable.
     integer_features = _feature_names_by_type(tabular_metadata, {INTEGER})
     continuous_features = _feature_names_by_type(tabular_metadata, {CONTINUOUS})
     categorical_features = _feature_names_by_type(tabular_metadata, {CATEGORICAL})
@@ -255,7 +255,7 @@ def apply_adversarial_training_if_enabled(model, participant_config: dict[str, A
     "ImageFGSMGenerator",
     "ImagePGDGenerator",
     "TabularAdversarialExampleGenerator",
-    "TabularCAPGDGenerator",
+    "TabularConstrainedPGDGenerator",
     "TabularConstraintSet",
     "apply_adversarial_training_if_enabled",
 ]
diff --git a/nebula/addons/defenses/adversarial_training/tabular.py b/nebula/addons/defenses/adversarial_training/tabular.py
index 4786a6ecf..2ae280be0 100644
--- a/nebula/addons/defenses/adversarial_training/tabular.py
+++ b/nebula/addons/defenses/adversarial_training/tabular.py
@@ -15,7 +15,7 @@ def __init__(self, metadata: TabularAdversarialMetadata):
         self._tensor_cache: dict[tuple[torch.device, torch.dtype], dict[str, torch.Tensor]] = {}
 
     def tensors(self, x: torch.Tensor) -> dict[str, torch.Tensor]:
-        # Masks and bounds are reused in every CAPGD step, so build them once for each tensor placement.
+        # Masks and bounds are reused in every constrained PGD step, so build them once per placement.
         key = (x.device, x.dtype)
         cached = self._tensor_cache.get(key)
         if cached is not None:
@@ -140,7 +140,7 @@ def __init__(self, config: AdversarialTrainingConfig, metadata: TabularAdversari
         self.constraints = TabularConstraintSet(metadata)
 
     def _alpha(self, epsilon: float) -> float:
-        # By default, distribute the epsilon budget evenly across CAPGD steps.
+        # By default, distribute the epsilon budget evenly across constrained PGD steps.
         if self.config.alpha is not None:
             return float(self.config.alpha)
         return float(epsilon) / max(int(self.config.steps), 1)
@@ -153,12 +153,12 @@ def _margin(self, logits: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         return other_logits.max(dim=1).values - true_logits
 
     def _per_sample_loss(self, logits: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
-        # CAPGD needs per-sample scores so each row can stop once it is hard enough.
+        # The attack needs per-sample scores so each row can stop once it is hard enough.
         return F.cross_entropy(logits, y, reduction="none")
 
 
-class TabularCAPGDGenerator(TabularAdversarialExampleGenerator):
-    """First-phase constrained tabular CAPGD generator."""
+class TabularConstrainedPGDGenerator(TabularAdversarialExampleGenerator):
+    """Constrained PGD generator for tabular adversarial examples."""
 
     def generate(self, model, x, y, criterion):
         # Sample one attack strength for this batch, matching the image generator behavior.
@@ -178,7 +178,7 @@ def generate(self, model, x, y, criterion):
         clean_loss = self._clean_loss(model, x_clean, y) if use_loss_window else None
 
         for _ in range(steps):
-            # CAPGD step: move in the sign of the loss gradient, but only on perturbable features.
+            # PGD step: move in the sign of the loss gradient, but only on perturbable features.
             x_grad = x_adv.detach().requires_grad_(True)
             logits = model(x_grad)
             loss = criterion(logits, y)
diff --git a/nebula/addons/trustworthiness/factsheet_common.py b/nebula/addons/trustworthiness/factsheet_common.py
index 7cfbe11d9..9c08c62c2 100644
--- a/nebula/addons/trustworthiness/factsheet_common.py
+++ b/nebula/addons/trustworthiness/factsheet_common.py
@@ -90,6 +90,28 @@ def inverse_score(value):
     return 1 / (1 + value)
 
 
+def get_enabled_defences(data):
+    # Return the active training-time defences declared in the scenario.
+    defences = []
+    if data.get("reputation", {}).get("enabled", False):
+        defences.append("reputation-based defence")
+    if data.get("feature_squeezing", {}).get("enabled", False):
+        defences.append("feature squeezing")
+    if data.get("adversarial_training", {}).get("enabled", False):
+        defences.append(_format_adversarial_training_defence(data["adversarial_training"]))
+    return defences
+
+
+def _format_adversarial_training_defence(adversarial_training):
+    attack = str(adversarial_training.get("attack", "")).upper()
+    domain = str(adversarial_training.get("domain", "")).lower()
+    if attack:
+        return f"adversarial training with {attack}"
+    if domain:
+        return f"adversarial training for {domain} data"
+    return "adversarial training"
+
+
 def build_project_background(data):
     # Build the natural-language scenario description used in factsheets.
     federation = data["federation"]
@@ -99,7 +121,7 @@ def build_project_background(data):
     aggregation_algorithm = data["agg_algorithm"]
     n_rounds = int(data["rounds"])
     attack = data["attack_params"]["attacks"]
-    with_reputation = data["reputation"]["enabled"]
+    enabled_defences = get_enabled_defences(data)
 
     base = (
         "For the project setup, the most important aspects are the following: "
@@ -113,8 +135,9 @@ def build_project_background(data):
     else:
         attack_text = "No attacks are used. "
 
-    if with_reputation:
-        defence_text = "A reputation-based defence is used, and the trustworthiness of the project is desired."
+    if enabled_defences:
+        defence_list = ", ".join(enabled_defences)
+        defence_text = f"The active defence mechanisms are: {defence_list}. The trustworthiness of the project is desired."
     else:
         defence_text = "No defence mechanism is used, and the trustworthiness of the project is desired."
 
@@ -145,6 +168,19 @@ def populate_common_pre_train_sections(factsheet, data, model):
     factsheet["configuration"]["reputation_enabled"] = bool(
         data.get("reputation", {}).get("enabled", False)
     )
+    adversarial_training = data.get("adversarial_training", {}) or {}
+    factsheet["configuration"]["adversarial_training"] = bool(
+        adversarial_training.get("enabled", False)
+    )
+    factsheet["configuration"]["adversarial_training_domain"] = (
+        adversarial_training.get("domain", "") if adversarial_training.get("enabled", False) else ""
+    )
+    factsheet["configuration"]["adversarial_training_attack"] = (
+        adversarial_training.get("attack", "") if adversarial_training.get("enabled", False) else ""
+    )
+    factsheet["configuration"]["adversarial_training_mode"] = (
+        adversarial_training.get("mode", "") if adversarial_training.get("enabled", False) else ""
+    )
     factsheet["configuration"]["visualization"] = True
     factsheet["configuration"]["monitoring"] = True
     factsheet["configuration"]["total_round_num"] = int(data["rounds"])
diff --git a/nebula/core/datasets/adultcensus/adultcensus.py b/nebula/core/datasets/adultcensus/adultcensus.py
index 51aa5e7a2..062ffc380 100644
--- a/nebula/core/datasets/adultcensus/adultcensus.py
+++ b/nebula/core/datasets/adultcensus/adultcensus.py
@@ -1,5 +1,6 @@
 # nebula/core/datasets/adultcensus/adultcensus.py
 
+import logging
 import os
 from typing import Any, ClassVar
 
@@ -8,7 +9,11 @@
 from torch.utils.data import Dataset
 
 from nebula.core.datasets.nebuladataset import NebulaDataset, NebulaPartitionHandler
-from nebula.core.datasets.tabular_metadata import CATEGORICAL, CONTINUOUS, INTEGER, TabularAdversarialMetadata
+from nebula.core.datasets.tabular_metadata import (
+    build_tabular_adversarial_metadata,
+)
+
+logger = logging.getLogger(__name__)
 
 
 class AdultCensusTorchDataset(Dataset):
@@ -25,6 +30,7 @@ def __init__(
         continuous_features: list[int] | None = None,
         integer_features: list[int] | None = None,
         categorical_features: list[int] | None = None,
+        non_perturbable_features: list[int] | None = None,
         categorical_groups: list[list[int]] | None = None,
         tabular_metadata: dict | None = None,
     ):
@@ -41,7 +47,7 @@ def __init__(
         self.x: np.ndarray = x.astype(np.float32, copy=False)
         self.y: np.ndarray = y_arr.astype(np.int64, copy=False)
 
-        # Nebula conventions
+        # Nebula dataset conventions used by partitioning, logging and model setup.
         self.data: np.ndarray = self.x
         self.targets: np.ndarray = self.y
         self.classes: list[str] = ["<=50K", ">50K"]
@@ -49,6 +55,7 @@ def __init__(
         self.continuous_features = continuous_features or []
         self.integer_features = integer_features or []
         self.categorical_features = categorical_features or []
+        self.non_perturbable_features = non_perturbable_features or []
         self.categorical_groups = categorical_groups or []
         self.tabular_metadata = tabular_metadata
         self.input_dim = int(self.x.shape[1])
@@ -120,6 +127,10 @@ class AdultCensusDataset(NebulaDataset):
         "sex",
         "native-country",
     ]
+    # Experimental wide attack surface for testing constrained PGD thoroughly.
+    # This intentionally allows broad changes, including categorical flips.
+    PERTURBABLE_INTEGER_COLUMNS: ClassVar[list[str]] = list(INTEGER_COLUMNS)
+    PERTURBABLE_CATEGORICAL_COLUMNS: ClassVar[list[str]] = list(CATEGORICAL_COLUMNS)
 
     def __init__(
         self,
@@ -219,17 +230,18 @@ def load_adult_census_dataset(self) -> tuple[AdultCensusTorchDataset, AdultCensu
                 "AdultCensusDataset requires pandas + scikit-learn. Install them (e.g., pip install pandas scikit-learn)."
             ) from e
 
-        # 1) Load from OpenML
+        # Raw Adult Census uses mixed pandas columns; the model receives the
+        # numeric matrix produced later by the ColumnTransformer.
         bunch = fetch_openml(data_id=1590, as_frame=True, data_home=data_dir)
         X_df = bunch.data.copy()
         y_raw = bunch.target
 
-        # 2) Target -> {0,1}
-        # Normalize spaces to avoid variants like ' >50K'
+        # Normalize target labels to {0, 1}; 1 means income >50K.
         y_str = y_raw.astype(str).str.strip()
         y: np.ndarray = (y_str == ">50K").astype(np.int64).to_numpy()
 
-        # 3) Replace '?' markers with np.nan and drop rows with missing configured features.
+        # Adult encodes missing values as '?'. Drop incomplete rows so the
+        # adversarial metadata is based on real observed feature ranges.
         X_df = X_df.replace(r"^\s*\?\s*$", np.nan, regex=True)
         self._validate_manual_schema(X_df.columns)
 
@@ -243,12 +255,12 @@ def load_adult_census_dataset(self) -> tuple[AdultCensusTorchDataset, AdultCensu
         valid_rows = ~X_df[configured_columns].isna().any(axis=1)
         removed_rows = int((~valid_rows).sum())
         if removed_rows:
-            import logging
-            logging.getLogger().info("[AdultCensus] Dropping %s rows with NA values", removed_rows)
+            logger.info("[AdultCensus] Dropping %s rows with NA values", removed_rows)
         X_df = X_df.loc[valid_rows].copy()
         y = y[valid_rows.to_numpy()]
 
-        # 4) Preprocess
+        # Numeric columns are standardized; categorical columns become one-hot
+        # columns. Constrained PGD metadata is built after this, in model input space.
         numeric_transformer = Pipeline(
             steps=[
                 ("impute", SimpleImputer(strategy="median")),
@@ -273,7 +285,7 @@ def load_adult_census_dataset(self) -> tuple[AdultCensusTorchDataset, AdultCensu
 
         preprocessor = ColumnTransformer(transformers=transformers, remainder="drop")
 
-        # 5) Split then fit on train
+        # Fit preprocessing only on train to avoid leaking test statistics.
         X_train_df, X_test_df, y_train, y_test = train_test_split(
             X_df,
             y,
@@ -285,10 +297,7 @@ def load_adult_census_dataset(self) -> tuple[AdultCensusTorchDataset, AdultCensu
 
         X_train = preprocessor.fit_transform(X_train_df)
         X_test = preprocessor.transform(X_test_df)
-        try:
-            feature_names = [str(name) for name in preprocessor.get_feature_names_out()]
-        except Exception:
-            feature_names = [f"feature_{i}" for i in range(X_train.shape[1])]
+        feature_names = self._feature_names(preprocessor, X_train.shape[1])
 
         # In case some sklearn path returns sparse matrices, densify safely
         if hasattr(X_train, "toarray"):
@@ -296,101 +305,77 @@ def load_adult_census_dataset(self) -> tuple[AdultCensusTorchDataset, AdultCensu
         if hasattr(X_test, "toarray"):
             X_test = X_test.toarray()
 
-        X_train_np: np.ndarray = np.asarray(X_train, dtype=np.float32)
-        import logging
-        logging.getLogger().info(f"[AdultCensus] X_train shape = {X_train_np.shape}")
-        logging.getLogger().info(f"[AdultCensus] INPUT_DIM (post-OHE) = {int(X_train_np.shape[1])}")
+        X_train_np = np.asarray(X_train, dtype=np.float32)
         X_test_np: np.ndarray = np.asarray(X_test, dtype=np.float32)
-        continuous_features = [
-            idx for idx, name in enumerate(feature_names)
-            if name.startswith("continuous__")
-        ]
-        integer_features = [
-            idx for idx, name in enumerate(feature_names)
-            if name.startswith("integer__")
-        ]
-        categorical_features = [
-            idx for idx, name in enumerate(feature_names)
-            if name.startswith("categorical__")
-        ]
-        continuous_feature_set = set(continuous_features)
-        integer_feature_set = set(integer_features)
-        categorical_feature_set = set(categorical_features)
-        assigned_feature_set = continuous_feature_set | integer_feature_set | categorical_feature_set
-        unknown_features = [
-            feature_names[idx]
-            for idx in range(len(feature_names))
-            if idx not in assigned_feature_set
-        ]
-        if unknown_features:
-            raise ValueError(f"AdultCensusDataset generated untyped features: {unknown_features}")
-        feature_type_by_idx = {
-            **{idx: CONTINUOUS for idx in continuous_feature_set},
-            **{idx: INTEGER for idx in integer_feature_set},
-            **{idx: CATEGORICAL for idx in categorical_feature_set},
-        }
+        metadata = self._build_adversarial_metadata(feature_names, X_train_np, preprocessor)
+        logger.info("[AdultCensus] X_train shape = %s", X_train_np.shape)
+        logger.info("[AdultCensus] INPUT_DIM (post-OHE) = %s", int(X_train_np.shape[1]))
+        self._log_adversarial_metadata(metadata, feature_names)
 
-        categorical_groups = self._build_categorical_groups(feature_names)
-        integer_step_norm = {}
-        if integer_features:
-            integer_scaler = preprocessor.named_transformers_["integer"].named_steps["scaler"]
-            integer_step_norm = {
-                idx: float(1.0 / scale)
-                for idx, scale in zip(integer_features, integer_scaler.scale_, strict=False)
-            }
-        tabular_metadata = TabularAdversarialMetadata(
-            feature_names=feature_names,
-            feature_types=[feature_type_by_idx[idx] for idx in range(len(feature_names))],
-            feature_min_norm=np.min(X_train_np, axis=0).astype(float).tolist(),
-            feature_max_norm=np.max(X_train_np, axis=0).astype(float).tolist(),
-            integer_step_norm=integer_step_norm,
-            categorical_groups=categorical_groups,
-        ).to_dict()
-        logging.getLogger().info(
-            "[AdultCensus] Tabular adversarial feature mask | continuous=%s | integer=%s | "
-            "categorical=%s | categorical_groups=%s | continuous_features=%s | integer_features=%s | "
-            "integer_step_norm=%s",
-            len(continuous_features),
-            len(integer_features),
-            len(categorical_features),
-            len(categorical_groups),
-            [feature_names[idx] for idx in continuous_features],
-            [feature_names[idx] for idx in integer_features],
-            integer_step_norm,
-        )
+        train_ds = self._make_dataset(X_train_np, y_train, feature_names, metadata)
+        test_ds = self._make_dataset(X_test_np, y_test, feature_names, metadata)
+
+        return train_ds, test_ds
 
-        train_ds = AdultCensusTorchDataset(
-            X_train_np,
-            np.asarray(y_train, dtype=np.int64),
+    @staticmethod
+    def _feature_names(preprocessor, n_features: int) -> list[str]:
+        try:
+            return [str(name) for name in preprocessor.get_feature_names_out()]
+        except Exception:
+            return [f"feature_{idx}" for idx in range(n_features)]
+
+    @staticmethod
+    def _make_dataset(x, y, feature_names, metadata) -> AdultCensusTorchDataset:
+        return AdultCensusTorchDataset(
+            x,
+            np.asarray(y, dtype=np.int64),
             feature_names=feature_names,
-            continuous_features=continuous_features,
-            integer_features=integer_features,
-            categorical_features=categorical_features,
-            categorical_groups=categorical_groups,
-            tabular_metadata=tabular_metadata,
+            continuous_features=[],
+            integer_features=metadata["integer_features"],
+            categorical_features=metadata["categorical_features"],
+            non_perturbable_features=metadata["non_perturbable_features"],
+            categorical_groups=metadata["categorical_groups"],
+            tabular_metadata=metadata["tabular_metadata"],
         )
-        test_ds = AdultCensusTorchDataset(
-            X_test_np,
-            np.asarray(y_test, dtype=np.int64),
+
+    @classmethod
+    def _build_adversarial_metadata(cls, feature_names, x_train, preprocessor) -> dict[str, Any]:
+        # Dataset responsibility ends here: declare which raw columns are perturbable.
+        # The shared metadata builder maps those declarations to transformed model features.
+        integer_scaler = preprocessor.named_transformers_["integer"].named_steps["scaler"]
+        integer_step_by_column = {
+            column: float(1.0 / scale)
+            for column, scale in zip(cls.INTEGER_COLUMNS, integer_scaler.scale_, strict=False)
+        }
+        return build_tabular_adversarial_metadata(
             feature_names=feature_names,
-            continuous_features=continuous_features,
-            integer_features=integer_features,
-            categorical_features=categorical_features,
-            categorical_groups=categorical_groups,
-            tabular_metadata=tabular_metadata,
+            x_train=x_train,
+            continuous_columns=cls.CONTINUOUS_COLUMNS,
+            integer_columns=cls.INTEGER_COLUMNS,
+            categorical_columns=cls.CATEGORICAL_COLUMNS,
+            perturbable_integer_columns=cls.PERTURBABLE_INTEGER_COLUMNS,
+            perturbable_categorical_columns=cls.PERTURBABLE_CATEGORICAL_COLUMNS,
+            integer_step_by_column=integer_step_by_column,
         )
 
-        return train_ds, test_ds
-
-    @classmethod
-    def _build_categorical_groups(cls, feature_names: list[str]) -> list[list[int]]:
-        groups = []
-        for column in cls.CATEGORICAL_COLUMNS:
-            prefix = f"categorical__{column}_"
-            group = [idx for idx, name in enumerate(feature_names) if name.startswith(prefix)]
-            if group:
-                groups.append(group)
-        return groups
+    @staticmethod
+    def _log_adversarial_metadata(metadata: dict[str, Any], feature_names: list[str]) -> None:
+        integer_features = metadata["integer_features"]
+        categorical_features = metadata["categorical_features"]
+        non_perturbable_features = metadata["non_perturbable_features"]
+        logger.info(
+            "[AdultCensus] Tabular adversarial feature mask | integer=%s | categorical=%s | "
+            "categorical_groups=%s | non_perturbable=%s | integer_features=%s | "
+            "categorical_preview=%s | non_perturbable_preview=%s | integer_step_norm=%s",
+            len(integer_features),
+            len(categorical_features),
+            len(metadata["categorical_groups"]),
+            len(non_perturbable_features),
+            [feature_names[idx] for idx in integer_features],
+            [feature_names[idx] for idx in categorical_features[:20]],
+            [feature_names[idx] for idx in non_perturbable_features[:20]],
+            metadata["integer_step_norm"],
+        )
 
     def generate_non_iid_map(self, dataset, partition: str = "dirichlet", partition_parameter: float = 0.5):
         if partition == "dirichlet":
diff --git a/nebula/core/datasets/breast_cancer/breast_cancer.py b/nebula/core/datasets/breast_cancer/breast_cancer.py
index ac7446770..f5e53ed7e 100644
--- a/nebula/core/datasets/breast_cancer/breast_cancer.py
+++ b/nebula/core/datasets/breast_cancer/breast_cancer.py
@@ -1,12 +1,15 @@
+import logging
 import os
-from typing import Tuple, Any
+from typing import Any
 
 import numpy as np
 import torch
 from torch.utils.data import Dataset
 
 from nebula.core.datasets.nebuladataset import NebulaDataset, NebulaPartitionHandler
-from nebula.core.datasets.tabular_metadata import CONTINUOUS, INTEGER, NON_PERTURBABLE, TabularAdversarialMetadata
+from nebula.core.datasets.tabular_metadata import build_tabular_adversarial_metadata
+
+logger = logging.getLogger(__name__)
 
 
 class BreastCancerTorchDataset(Dataset):
@@ -38,14 +41,14 @@ def __init__(
         self.x = x.astype(np.float32, copy=False)
         self.y = y.astype(np.int64, copy=False)
 
-        # Nebula conventions (some utilities expect these)
+        # Nebula dataset conventions used by partitioning, logging and model setup.
         self.data = self.x
         self.targets = self.y
         self.classes = ["0", "1"]
         self.feature_names = feature_names or [f"feature_{i}" for i in range(self.x.shape[1])]
-        self.continuous_features = continuous_features or list(range(self.x.shape[1]))
-        self.integer_features = integer_features or []
-        self.non_perturbable_features = non_perturbable_features or []
+        self.continuous_features = list(range(self.x.shape[1])) if continuous_features is None else continuous_features
+        self.integer_features = [] if integer_features is None else integer_features
+        self.non_perturbable_features = [] if non_perturbable_features is None else non_perturbable_features
         self.binary_features = []
         self.tabular_metadata = tabular_metadata
         self.input_dim = int(self.x.shape[1])
@@ -53,7 +56,7 @@ def __init__(
     def __len__(self) -> int:
         return int(self.y.shape[0])
 
-    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
+    def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]:
         x_i = torch.from_numpy(self.x[idx])
         y_i = torch.tensor(self.y[idx], dtype=torch.long)
         return x_i, y_i
@@ -97,7 +100,9 @@ class BreastCancerDataset(NebulaDataset):
     - tabular features (30)
     - deterministic stratified train/test split
     """
-    PERTURBABLE_CONTINUOUS_COLUMNS = [
+    # Raw sklearn feature names. These names are also the schema used to decide
+    # which variables adversarial training may perturb.
+    FEATURE_COLUMNS = [
         "mean radius",
         "mean texture",
         "mean perimeter",
@@ -129,8 +134,11 @@ class BreastCancerDataset(NebulaDataset):
         "worst symmetry",
         "worst fractal dimension",
     ]
+    # Breast Cancer has only continuous medical measurements. Keeping this as a
+    # list makes perturbability a dataset-level decision: remove a column here
+    # and the shared metadata builder will mark it as non-perturbable.
+    PERTURBABLE_CONTINUOUS_COLUMNS = list(FEATURE_COLUMNS)
     PERTURBABLE_INTEGER_COLUMNS = []
-    NON_PERTURBABLE_COLUMNS = []
 
     def __init__(
         self,
@@ -164,30 +172,7 @@ def initialize_dataset(self):
 
         self.data_partitioning(plot=True)
 
-    @classmethod
-    def _validate_manual_schema(cls, columns) -> None:
-        continuous_columns = set(cls.PERTURBABLE_CONTINUOUS_COLUMNS)
-        integer_columns = set(cls.PERTURBABLE_INTEGER_COLUMNS)
-        non_perturbable_columns = set(cls.NON_PERTURBABLE_COLUMNS)
-        overlapping_columns = sorted(
-            (continuous_columns & integer_columns)
-            | (continuous_columns & non_perturbable_columns)
-            | (integer_columns & non_perturbable_columns)
-        )
-        if overlapping_columns:
-            raise ValueError(f"BreastCancerDataset columns configured twice: {overlapping_columns}")
-
-        configured_columns = continuous_columns | integer_columns | non_perturbable_columns
-        dataset_columns = set(columns)
-        missing_columns = sorted(configured_columns - dataset_columns)
-        if missing_columns:
-            raise ValueError(f"BreastCancerDataset is missing configured columns: {missing_columns}")
-        unconfigured_columns = sorted(dataset_columns - configured_columns)
-        if unconfigured_columns:
-            raise ValueError(f"BreastCancerDataset has unconfigured columns: {unconfigured_columns}")
-
     def load_breast_cancer_dataset(self):
-        # Local cache directory (aunque load_breast_cancer no descarga, seguimos el patrón)
         data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
         os.makedirs(data_dir, exist_ok=True)
 
@@ -219,55 +204,70 @@ def load_breast_cancer_dataset(self):
         x_train = scaler.fit_transform(x_train)
         x_test = scaler.transform(x_test)
 
+        # Constrained PGD receives standardized tensors, so metadata bounds must also be
+        # computed in this transformed model-input space.
         x_train_np = np.asarray(x_train, dtype=np.float32)
         x_test_np = np.asarray(x_test, dtype=np.float32)
-        continuous_features = [
-            idx for idx, name in enumerate(feature_names)
-            if name in self.PERTURBABLE_CONTINUOUS_COLUMNS
-        ]
-        integer_features = [
-            idx for idx, name in enumerate(feature_names)
-            if name in self.PERTURBABLE_INTEGER_COLUMNS
-        ]
-        non_perturbable_features = [
-            idx for idx, name in enumerate(feature_names)
-            if name in self.NON_PERTURBABLE_COLUMNS
-        ]
-        continuous_feature_set = set(continuous_features)
-        integer_feature_set = set(integer_features)
-        tabular_metadata = TabularAdversarialMetadata(
-            feature_names=feature_names,
-            feature_types=[
-                CONTINUOUS if idx in continuous_feature_set
-                else INTEGER if idx in integer_feature_set
-                else NON_PERTURBABLE
-                for idx in range(len(feature_names))
-            ],
-            feature_min_norm=np.min(x_train_np, axis=0).astype(float).tolist(),
-            feature_max_norm=np.max(x_train_np, axis=0).astype(float).tolist(),
-            integer_step_norm={},
-        ).to_dict()
-
-        train_ds = BreastCancerTorchDataset(
-            x_train_np,
-            y_train,
+        metadata = self._build_adversarial_metadata(feature_names, x_train_np)
+        self._log_adversarial_metadata(metadata, feature_names)
+
+        return (
+            self._make_dataset(x_train_np, y_train, feature_names, metadata),
+            self._make_dataset(x_test_np, y_test, feature_names, metadata),
+        )
+
+    @classmethod
+    def _validate_manual_schema(cls, columns) -> None:
+        dataset_columns = set(columns)
+        expected_columns = set(cls.FEATURE_COLUMNS)
+        missing_columns = sorted(expected_columns - dataset_columns)
+        extra_columns = sorted(dataset_columns - expected_columns)
+        if missing_columns or extra_columns:
+            raise ValueError(
+                "BreastCancerDataset schema mismatch: "
+                f"missing={missing_columns}, extra={extra_columns}"
+            )
+
+    @classmethod
+    def _build_adversarial_metadata(cls, feature_names, x_train):
+        # The dataset only declares perturbable columns. The shared builder
+        # turns that declaration into feature types, bounds and masks for constrained PGD.
+        return build_tabular_adversarial_metadata(
             feature_names=feature_names,
-            continuous_features=continuous_features,
-            integer_features=integer_features,
-            non_perturbable_features=non_perturbable_features,
-            tabular_metadata=tabular_metadata,
+            x_train=x_train,
+            continuous_columns=cls.FEATURE_COLUMNS,
+            integer_columns=[],
+            categorical_columns=[],
+            perturbable_continuous_columns=cls.PERTURBABLE_CONTINUOUS_COLUMNS,
+            perturbable_integer_columns=cls.PERTURBABLE_INTEGER_COLUMNS,
         )
-        test_ds = BreastCancerTorchDataset(
-            x_test_np,
-            y_test,
+
+    @staticmethod
+    def _make_dataset(x, y, feature_names, metadata) -> BreastCancerTorchDataset:
+        # Store the same metadata on train and test. Training uses it to create
+        # adversarial examples; evaluation can inspect it for robustness reports.
+        return BreastCancerTorchDataset(
+            x,
+            y,
             feature_names=feature_names,
-            continuous_features=continuous_features,
-            integer_features=integer_features,
-            non_perturbable_features=non_perturbable_features,
-            tabular_metadata=tabular_metadata,
+            continuous_features=metadata["continuous_features"],
+            integer_features=metadata["integer_features"],
+            non_perturbable_features=metadata["non_perturbable_features"],
+            tabular_metadata=metadata["tabular_metadata"],
         )
 
-        return train_ds, test_ds
+    @staticmethod
+    def _log_adversarial_metadata(metadata: dict[str, Any], feature_names: list[str]) -> None:
+        continuous_features = metadata["continuous_features"]
+        non_perturbable_features = metadata["non_perturbable_features"]
+        logger.info(
+            "[BreastCancer] Tabular adversarial feature mask | continuous=%s | "
+            "non_perturbable=%s | continuous_features=%s | non_perturbable_preview=%s",
+            len(continuous_features),
+            len(non_perturbable_features),
+            [feature_names[idx] for idx in continuous_features],
+            [feature_names[idx] for idx in non_perturbable_features[:20]],
+        )
 
     def generate_non_iid_map(self, dataset, partition: str = "dirichlet", partition_parameter: float = 0.5):
         if partition == "dirichlet":
diff --git a/nebula/core/datasets/covtype/covtype.py b/nebula/core/datasets/covtype/covtype.py
index 22a24c682..e3be64318 100644
--- a/nebula/core/datasets/covtype/covtype.py
+++ b/nebula/core/datasets/covtype/covtype.py
@@ -1,14 +1,17 @@
 # nebula/core/datasets/covtype/covtype.py
 
+import logging
 import os
-from typing import Tuple, Any
+from typing import Any
 
 import numpy as np
 import torch
 from torch.utils.data import Dataset
 
 from nebula.core.datasets.nebuladataset import NebulaDataset, NebulaPartitionHandler
-from nebula.core.datasets.tabular_metadata import CONTINUOUS, INTEGER, NON_PERTURBABLE, TabularAdversarialMetadata
+from nebula.core.datasets.tabular_metadata import build_tabular_adversarial_metadata
+
+logger = logging.getLogger(__name__)
 
 
 class CovtypeTorchDataset(Dataset):
@@ -44,6 +47,7 @@ def __init__(
         self.x = x.astype(np.float32, copy=False)
         self.y = y.astype(np.int64, copy=False)
 
+        # Nebula dataset conventions used by partitioning, logging and model setup.
         self.data = self.x
         self.targets = self.y
 
@@ -60,7 +64,7 @@ def __init__(
     def __len__(self) -> int:
         return int(self.y.shape[0])
 
-    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
+    def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]:
         x_i = torch.from_numpy(self.x[idx])
         y_i = torch.tensor(self.y[idx], dtype=torch.long)
         return x_i, y_i
@@ -120,7 +124,7 @@ class CovtypeDataset(NebulaDataset):
     Requirements:
     - scikit-learn must be installed (for fetch_covtype + train_test_split).
     """
-    PERTURBABLE_CONTINUOUS_COLUMNS = [
+    CONTINUOUS_COLUMNS = [
         "Elevation",
         "Aspect",
         "Slope",
@@ -132,8 +136,7 @@ class CovtypeDataset(NebulaDataset):
         "Hillshade_3pm",
         "Horizontal_Distance_To_Fire_Points",
     ]
-    PERTURBABLE_INTEGER_COLUMNS = []
-    NON_PERTURBABLE_COLUMNS = [
+    BINARY_COLUMNS = [
         "Wilderness_Area_0",
         "Wilderness_Area_1",
         "Wilderness_Area_2",
@@ -179,6 +182,12 @@ class CovtypeDataset(NebulaDataset):
         "Soil_Type_38",
         "Soil_Type_39",
     ]
+    # Covtype has two kinds of inputs:
+    # - terrain measurements, which constrained PGD may perturb;
+    # - binary wilderness/soil indicators, which stay immutable to avoid broken
+    #   one-hot-like combinations.
+    PERTURBABLE_CONTINUOUS_COLUMNS = list(CONTINUOUS_COLUMNS)
+    PERTURBABLE_INTEGER_COLUMNS = []
 
     def __init__(
         self,
@@ -218,16 +227,16 @@ def initialize_dataset(self):
 
     @classmethod
     def _default_feature_names(cls, n_features: int) -> list[str]:
-        configured_columns = cls.PERTURBABLE_CONTINUOUS_COLUMNS + cls.NON_PERTURBABLE_COLUMNS
+        configured_columns = cls.CONTINUOUS_COLUMNS + cls.BINARY_COLUMNS
         if n_features == len(configured_columns):
             return configured_columns
         return [f"feature_{i}" for i in range(n_features)]
 
     @classmethod
     def _validate_manual_schema(cls, columns) -> None:
-        continuous_columns = set(cls.PERTURBABLE_CONTINUOUS_COLUMNS)
+        continuous_columns = set(cls.CONTINUOUS_COLUMNS)
         integer_columns = set(cls.PERTURBABLE_INTEGER_COLUMNS)
-        non_perturbable_columns = set(cls.NON_PERTURBABLE_COLUMNS)
+        non_perturbable_columns = set(cls.BINARY_COLUMNS)
         overlapping_columns = sorted(
             (continuous_columns & integer_columns)
             | (continuous_columns & non_perturbable_columns)
@@ -250,7 +259,6 @@ def load_covtype_dataset(self):
         Loads Covtype via sklearn, performs a deterministic train/test split,
         and wraps into torch Datasets.
         """
-        # Local cache directory for sklearn dataset downloads
         data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
         os.makedirs(data_dir, exist_ok=True)
 
@@ -274,27 +282,13 @@ def load_covtype_dataset(self):
         try:
             self._validate_manual_schema(feature_names)
         except ValueError:
-            if x.shape[1] != len(self.PERTURBABLE_CONTINUOUS_COLUMNS) + len(self.NON_PERTURBABLE_COLUMNS):
+            if x.shape[1] != len(self.CONTINUOUS_COLUMNS) + len(self.BINARY_COLUMNS):
                 raise
-            import logging
-            logging.getLogger().info(
+            logger.info(
                 "[Covtype] Replacing sklearn feature names with canonical Covtype names for adversarial metadata"
             )
             feature_names = self._default_feature_names(x.shape[1])
             self._validate_manual_schema(feature_names)
-        continuous_features = [
-            idx for idx, name in enumerate(feature_names)
-            if name in self.PERTURBABLE_CONTINUOUS_COLUMNS
-        ]
-        integer_features = [
-            idx for idx, name in enumerate(feature_names)
-            if name in self.PERTURBABLE_INTEGER_COLUMNS
-        ]
-        non_perturbable_features = [
-            idx for idx, name in enumerate(feature_names)
-            if name in self.NON_PERTURBABLE_COLUMNS
-        ]
-        binary_features = non_perturbable_features
 
         # Map labels to 0..6 (CrossEntropyLoss convention)
         # If already 0..6, this is harmless for 1..7 only if we detect min.
@@ -330,50 +324,69 @@ def load_covtype_dataset(self):
                 stratify=y_test,
             )
 
-        # Covtype has continuous features followed by binary wilderness/soil indicators.
-        # Scale only the continuous block; keep binary indicators as 0/1.
+        # Scale only the perturbable terrain measurements. The binary columns
+        # must remain 0/1 because they encode wilderness and soil indicators.
         scaler = StandardScaler()
         x_train = np.asarray(x_train, dtype=np.float32).copy()
         x_test = np.asarray(x_test, dtype=np.float32).copy()
+        continuous_features = [
+            idx for idx, name in enumerate(feature_names)
+            if name in self.CONTINUOUS_COLUMNS
+        ]
         x_train[:, continuous_features] = scaler.fit_transform(x_train[:, continuous_features])
         x_test[:, continuous_features] = scaler.transform(x_test[:, continuous_features])
-        continuous_feature_set = set(continuous_features)
-        integer_feature_set = set(integer_features)
-        tabular_metadata = TabularAdversarialMetadata(
-            feature_names=feature_names,
-            feature_types=[
-                CONTINUOUS if idx in continuous_feature_set
-                else INTEGER if idx in integer_feature_set
-                else NON_PERTURBABLE
-                for idx in range(len(feature_names))
-            ],
-            feature_min_norm=np.min(x_train, axis=0).astype(float).tolist(),
-            feature_max_norm=np.max(x_train, axis=0).astype(float).tolist(),
-            integer_step_norm={},
-        ).to_dict()
-
-        train_ds = CovtypeTorchDataset(
-            x_train,
-            y_train,
+        metadata = self._build_adversarial_metadata(feature_names, x_train)
+        self._log_adversarial_metadata(metadata, feature_names)
+
+        return (
+            self._make_dataset(x_train, y_train, feature_names, metadata),
+            self._make_dataset(x_test, y_test, feature_names, metadata),
+        )
+
+    @staticmethod
+    def _make_dataset(
+        x: np.ndarray,
+        y: np.ndarray,
+        feature_names: list[str],
+        metadata: dict[str, Any],
+    ) -> CovtypeTorchDataset:
+        return CovtypeTorchDataset(
+            x,
+            y,
             feature_names=feature_names,
-            continuous_features=continuous_features,
-            integer_features=integer_features,
-            non_perturbable_features=non_perturbable_features,
-            binary_features=binary_features,
-            tabular_metadata=tabular_metadata,
+            continuous_features=metadata["continuous_features"],
+            integer_features=metadata["integer_features"],
+            non_perturbable_features=metadata["non_perturbable_features"],
+            binary_features=metadata["non_perturbable_features"],
+            tabular_metadata=metadata["tabular_metadata"],
         )
-        test_ds = CovtypeTorchDataset(
-            x_test,
-            y_test,
+
+    @classmethod
+    def _build_adversarial_metadata(cls, feature_names, x_train):
+        # Dataset responsibility: declare which variables are perturbable. The
+        # shared builder maps those declarations to feature masks and bounds.
+        return build_tabular_adversarial_metadata(
             feature_names=feature_names,
-            continuous_features=continuous_features,
-            integer_features=integer_features,
-            non_perturbable_features=non_perturbable_features,
-            binary_features=binary_features,
-            tabular_metadata=tabular_metadata,
+            x_train=x_train,
+            continuous_columns=cls.CONTINUOUS_COLUMNS,
+            integer_columns=[],
+            categorical_columns=[],
+            perturbable_continuous_columns=cls.PERTURBABLE_CONTINUOUS_COLUMNS,
+            perturbable_integer_columns=cls.PERTURBABLE_INTEGER_COLUMNS,
         )
 
-        return train_ds, test_ds
+    @staticmethod
+    def _log_adversarial_metadata(metadata: dict[str, Any], feature_names: list[str]) -> None:
+        continuous_features = metadata["continuous_features"]
+        non_perturbable_features = metadata["non_perturbable_features"]
+        logger.info(
+            "[Covtype] Tabular adversarial feature mask | continuous=%s | non_perturbable=%s | "
+            "continuous_features=%s | non_perturbable_preview=%s",
+            len(continuous_features),
+            len(non_perturbable_features),
+            [feature_names[idx] for idx in continuous_features],
+            [feature_names[idx] for idx in non_perturbable_features[:20]],
+        )
 
     def generate_non_iid_map(self, dataset, partition: str = "dirichlet", partition_parameter: float = 0.5):
         if partition == "dirichlet":
diff --git a/nebula/core/datasets/tabular_metadata.py b/nebula/core/datasets/tabular_metadata.py
index e34397f6a..85d240099 100644
--- a/nebula/core/datasets/tabular_metadata.py
+++ b/nebula/core/datasets/tabular_metadata.py
@@ -132,3 +132,150 @@ def from_dict(cls, data: dict[str, Any]) -> TabularAdversarialMetadata:
                 for group in data.get("categorical_groups") or []
             ],
         )
+
+
+def build_tabular_adversarial_metadata(
+    *,
+    feature_names: list[str],
+    x_train,
+    continuous_columns: list[str] | tuple[str, ...] = (),
+    integer_columns: list[str] | tuple[str, ...] = (),
+    categorical_columns: list[str] | tuple[str, ...] = (),
+    perturbable_continuous_columns: list[str] | tuple[str, ...] = (),
+    perturbable_integer_columns: list[str] | tuple[str, ...] = (),
+    perturbable_categorical_columns: list[str] | tuple[str, ...] = (),
+    integer_step_by_column: dict[str, float] | None = None,
+) -> dict[str, Any]:
+    """Build tabular adversarial metadata from dataset-level perturbability lists."""
+    # Datasets should only decide which raw columns are perturbable. This helper
+    # maps that decision to the transformed feature vector consumed by the model.
+    _validate_perturbable_columns(
+        continuous_columns=continuous_columns,
+        integer_columns=integer_columns,
+        categorical_columns=categorical_columns,
+        perturbable_continuous_columns=perturbable_continuous_columns,
+        perturbable_integer_columns=perturbable_integer_columns,
+        perturbable_categorical_columns=perturbable_categorical_columns,
+    )
+
+    perturbable_continuous = set(perturbable_continuous_columns)
+    perturbable_integer = set(perturbable_integer_columns)
+    perturbable_categorical = set(perturbable_categorical_columns)
+
+    # Continuous/integer transformed features usually keep their raw column name
+    # after an optional transformer prefix, for example "integer__age".
+    continuous_features = [
+        idx
+        for idx, name in enumerate(feature_names)
+        if _raw_feature_name(name) in perturbable_continuous
+    ]
+    integer_features = [
+        idx
+        for idx, name in enumerate(feature_names)
+        if _raw_feature_name(name) in perturbable_integer
+    ]
+    # One raw categorical column becomes several one-hot features, for example
+    # "categorical__sex_Female" and "categorical__sex_Male".
+    categorical_features = [
+        idx
+        for idx, name in enumerate(feature_names)
+        if _categorical_column_name(name, categorical_columns) in perturbable_categorical
+    ]
+
+    continuous_feature_set = set(continuous_features)
+    integer_feature_set = set(integer_features)
+    categorical_feature_set = set(categorical_features)
+    perturbable_feature_set = continuous_feature_set | integer_feature_set | categorical_feature_set
+    non_perturbable_features = [
+        idx
+        for idx in range(len(feature_names))
+        if idx not in perturbable_feature_set
+    ]
+
+    categorical_groups = _categorical_groups(feature_names, perturbable_categorical)
+    integer_step_norm = _integer_step_norm(feature_names, integer_features, integer_step_by_column or {})
+    # The attack consumes only TabularAdversarialMetadata. The extra lists are
+    # returned so dataset wrappers and logs can expose the same mask clearly.
+    tabular_metadata = TabularAdversarialMetadata(
+        feature_names=feature_names,
+        feature_types=[
+            CONTINUOUS if idx in continuous_feature_set
+            else INTEGER if idx in integer_feature_set
+            else CATEGORICAL if idx in categorical_feature_set
+            else NON_PERTURBABLE
+            for idx in range(len(feature_names))
+        ],
+        feature_min_norm=[float(value) for value in x_train.min(axis=0)],
+        feature_max_norm=[float(value) for value in x_train.max(axis=0)],
+        integer_step_norm=integer_step_norm,
+        categorical_groups=categorical_groups,
+    ).to_dict()
+
+    return {
+        "continuous_features": continuous_features,
+        "integer_features": integer_features,
+        "categorical_features": categorical_features,
+        "non_perturbable_features": non_perturbable_features,
+        "categorical_groups": categorical_groups,
+        "integer_step_norm": integer_step_norm,
+        "tabular_metadata": tabular_metadata,
+    }
+
+
+def _validate_perturbable_columns(
+    *,
+    continuous_columns,
+    integer_columns,
+    categorical_columns,
+    perturbable_continuous_columns,
+    perturbable_integer_columns,
+    perturbable_categorical_columns,
+) -> None:
+    invalid_continuous = sorted(set(perturbable_continuous_columns) - set(continuous_columns))
+    invalid_integer = sorted(set(perturbable_integer_columns) - set(integer_columns))
+    invalid_categorical = sorted(set(perturbable_categorical_columns) - set(categorical_columns))
+    if invalid_continuous or invalid_integer or invalid_categorical:
+        raise ValueError(
+            "Perturbable columns must exist in the dataset schema: "
+            f"continuous={invalid_continuous}, integer={invalid_integer}, categorical={invalid_categorical}"
+        )
+
+
+def _raw_feature_name(feature_name: str) -> str:
+    # Strip sklearn ColumnTransformer prefixes such as "integer__" or
+    # "categorical__" while leaving plain feature names untouched.
+    return feature_name.split("__", maxsplit=1)[1] if "__" in feature_name else feature_name
+
+
+def _categorical_column_name(feature_name: str, categorical_columns) -> str | None:
+    # Recover the raw categorical column name from a one-hot feature name.
+    raw_name = _raw_feature_name(feature_name)
+    for column in categorical_columns:
+        if raw_name.startswith(f"{column}_"):
+            return column
+    return None
+
+
+def _categorical_groups(feature_names: list[str], perturbable_categorical_columns: set[str]) -> list[list[int]]:
+    # Constrained PGD projects each group back to exactly one active one-hot value.
+    groups = []
+    for column in perturbable_categorical_columns:
+        prefix = f"categorical__{column}_"
+        group = [idx for idx, name in enumerate(feature_names) if name.startswith(prefix)]
+        if group:
+            groups.append(group)
+    return groups
+
+
+def _integer_step_norm(
+    feature_names: list[str],
+    integer_features: list[int],
+    integer_step_by_column: dict[str, float],
+) -> dict[int, float]:
+    # Integer columns may be scaled. The step tells constrained PGD what "+1 raw unit"
+    # means in the normalized model-input space.
+    return {
+        idx: float(integer_step_by_column[_raw_feature_name(feature_names[idx])])
+        for idx in integer_features
+        if _raw_feature_name(feature_names[idx]) in integer_step_by_column
+    }
diff --git a/nebula/frontend/static/js/deployment/adversarial-training.js b/nebula/frontend/static/js/deployment/adversarial-training.js
index b02addf4e..cb072cf9e 100644
--- a/nebula/frontend/static/js/deployment/adversarial-training.js
+++ b/nebula/frontend/static/js/deployment/adversarial-training.js
@@ -15,13 +15,13 @@ const AdversarialTrainingManager = (function() {
     };
 
     const IMAGE_DATASETS = new Set(["MNIST", "FashionMNIST", "EMNIST", "CIFAR10", "CIFAR100"]);
-    const TABULAR_ADVERSARIAL_DATASETS = new Set(["AdultCensus"]);
+    const TABULAR_ADVERSARIAL_DATASETS = new Set(["AdultCensus", "BreastCancer", "Covtype"]);
     const IMAGE_ATTACK_OPTIONS = [
         {value: "fgsm", label: "FGSM"},
         {value: "pgd", label: "PGD"}
     ];
     const TABULAR_ATTACK_OPTIONS = [
-        {value: "capgd", label: "CAPGD"}
+        {value: "constrained_pgd", label: "Constrained PGD"}
     ];
 
     function initializeAdversarialTraining() {
@@ -75,12 +75,12 @@ const AdversarialTrainingManager = (function() {
         const domain = document.getElementById("adversarialTrainingDomain")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.domain;
         if (!pgdSettings) return;
 
-        pgdSettings.style.display = ["pgd", "capgd"].includes(attack) ? "block" : "none";
+        pgdSettings.style.display = ["pgd", "constrained_pgd"].includes(attack) ? "block" : "none";
         if (lossWindowSettings) {
             lossWindowSettings.style.display = domain === "tabular" ? "block" : "none";
         }
         if (stepsTitle) {
-            stepsTitle.textContent = domain === "tabular" ? "CAPGD steps" : "PGD steps";
+            stepsTitle.textContent = domain === "tabular" ? "Constrained PGD steps" : "PGD steps";
         }
     }
 
@@ -94,7 +94,7 @@ const AdversarialTrainingManager = (function() {
 
         if (datasetNote) {
             datasetNote.style.display = domain === "unsupported" ? "block" : "none";
-            datasetNote.textContent = "Adversarial Training for tabular datasets currently supports AdultCensus with CAPGD.";
+            datasetNote.textContent = "Adversarial Training for tabular datasets currently supports AdultCensus, BreastCancer, and Covtype with constrained PGD.";
         }
         if (domainInput) {
             domainInput.value = domain === "unsupported" ? "tabular" : domain;
@@ -129,7 +129,7 @@ const AdversarialTrainingManager = (function() {
         const attackSelect = document.getElementById("adversarialTrainingAttack");
         if (!attackSelect) return;
 
-        // Tabular datasets intentionally expose only CAPGD; image datasets expose FGSM/PGD.
+        // Tabular datasets intentionally expose only constrained PGD; image datasets expose FGSM/PGD.
         const options = domain === "tabular" ? TABULAR_ATTACK_OPTIONS : IMAGE_ATTACK_OPTIONS;
         const currentAttack = preferredAttack || attackSelect.value;
         attackSelect.innerHTML = "";
@@ -170,7 +170,7 @@ const AdversarialTrainingManager = (function() {
     function getAdversarialTrainingConfig() {
         const domain = document.getElementById("adversarialTrainingDomain")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.domain;
         const attack = domain === "tabular"
-            ? "capgd"
+            ? "constrained_pgd"
             : (document.getElementById("adversarialTrainingAttack")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.attack);
         const config = {
             enabled: Boolean(document.getElementById("adversarialTrainingSwitch")?.checked),
@@ -252,7 +252,7 @@ const AdversarialTrainingManager = (function() {
         if (config.epsilon < 0) {
             return "[Adversarial Training] Epsilon must be greater than or equal to 0.";
         }
-        if (["pgd", "capgd"].includes(config.attack) && config.steps < 1) {
+        if (["pgd", "constrained_pgd"].includes(config.attack) && config.steps < 1) {
             return "[Adversarial Training] Search steps must be at least 1.";
         }
         if (!["mixed", "adversarial"].includes(config.mode)) {
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index bfa249739..01e9ed5a4 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -588,7 +588,7 @@ <h5 class="step-title">Enable/Disable Adversarial Training</h5>
                             style="display: inline; width: 80px; height: 30px;">
                     </div>
                     <small id="adversarial-training-dataset-note" class="form-text text-muted" style="display: none;">
-                        Adversarial Training for tabular datasets currently supports AdultCensus with CAPGD.
+                        Adversarial Training for tabular datasets currently supports AdultCensus, BreastCancer, and Covtype with constrained PGD.
                     </small>
                     <div id="adversarial-training-settings" style="margin-top: 10px; display: none;">
                         <input type="hidden" id="adversarialTrainingDomain" value="image">
@@ -641,7 +641,7 @@ <h5 class="step-title">Training mode</h5>
                             </select>
                         </div>
                         <small class="form-text text-muted">
-                            Image datasets use FGSM/PGD. AdultCensus uses CAPGD for tabular adversarial training.
+                            Image datasets use FGSM/PGD. AdultCensus, BreastCancer, and Covtype use constrained PGD for tabular adversarial training.
                         </small>
                     </div>
                 </div>

From c774e9d2bd25cd168164579fdf6676c7e23bbb84 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Mon, 8 Jun 2026 14:37:20 +0200
Subject: [PATCH 62/66] Adversarial training for tabular data updated: Margin
 window implemented. Covtype and KDDCUP99 updated and fixed for adversarial
 training

---
 .../defenses/adversarial_training/__init__.py |   4 +
 .../defenses/adversarial_training/config.py   |  26 ++-
 .../defenses/adversarial_training/defense.py  |  10 +-
 .../defenses/adversarial_training/tabular.py  |  51 ++++-
 nebula/core/datasets/covtype/covtype.py       |  44 ++--
 nebula/core/datasets/kddcup99/kddcup99.py     | 201 ++++++++++--------
 .../frontend/config/participant.json.example  |   5 +-
 .../js/deployment/adversarial-training.js     |  89 +++++++-
 nebula/frontend/templates/deployment.html     |  28 ++-
 9 files changed, 333 insertions(+), 125 deletions(-)

diff --git a/nebula/addons/defenses/adversarial_training/__init__.py b/nebula/addons/defenses/adversarial_training/__init__.py
index ddc977538..772ac6cda 100644
--- a/nebula/addons/defenses/adversarial_training/__init__.py
+++ b/nebula/addons/defenses/adversarial_training/__init__.py
@@ -1,9 +1,11 @@
 from nebula.addons.defenses.adversarial_training.defense import (
     ERR_ALPHA,
     ERR_APPLY_PROBABILITY,
+    ERR_CANDIDATE_SELECTION,
     ERR_EPSILON,
     ERR_IMAGE_ATTACK,
     ERR_LOSS_INCREASE,
+    ERR_MARGIN_WINDOW,
     ERR_MODE,
     ERR_STEPS,
     ERR_TABULAR_ATTACK,
@@ -28,9 +30,11 @@
 __all__ = [
     "ERR_ALPHA",
     "ERR_APPLY_PROBABILITY",
+    "ERR_CANDIDATE_SELECTION",
     "ERR_EPSILON",
     "ERR_IMAGE_ATTACK",
     "ERR_LOSS_INCREASE",
+    "ERR_MARGIN_WINDOW",
     "ERR_MODE",
     "ERR_STEPS",
     "ERR_TABULAR_ATTACK",
diff --git a/nebula/addons/defenses/adversarial_training/config.py b/nebula/addons/defenses/adversarial_training/config.py
index a5ca04a15..8cbcdddcd 100644
--- a/nebula/addons/defenses/adversarial_training/config.py
+++ b/nebula/addons/defenses/adversarial_training/config.py
@@ -3,7 +3,7 @@
 
 IMAGE_ADVERSARIAL_ATTACKS = {"fgsm", "pgd"}
 TABULAR_ADVERSARIAL_ATTACKS = {"constrained_pgd"}
-TABULAR_ADVERSARIAL_DATASETS = {"AdultCensus", "BreastCancer", "Covtype"}
+TABULAR_ADVERSARIAL_DATASETS = {"AdultCensus", "BreastCancer", "Covtype", "KDDCUP99"}
 
 ERR_IMAGE_ATTACK = "image adversarial_training.attack must be one of: fgsm, pgd"
 ERR_TABULAR_ATTACK = "tabular adversarial_training.attack must be one of: constrained_pgd"
@@ -12,7 +12,11 @@
 ERR_ALPHA = "adversarial_training.alpha must be >= 0"
 ERR_STEPS = "adversarial_training.steps must be >= 1"
 ERR_APPLY_PROBABILITY = "adversarial_training.apply_probability must be in [0, 1]"
+ERR_CANDIDATE_SELECTION = (
+    "tabular adversarial_training.candidate_selection must be one of: none, loss_window, margin_window"
+)
 ERR_LOSS_INCREASE = "adversarial_training loss increase thresholds must be >= 0 and target <= max"
+ERR_MARGIN_WINDOW = "adversarial_training margin thresholds must satisfy target_margin <= max_margin"
 ERR_TABULAR_METADATA = "Tabular adversarial training requires tabular_metadata"
 ERR_UNSUPPORTED_ATTACK = "Unsupported adversarial training attack: {attack}"
 
@@ -39,8 +43,11 @@ class AdversarialTrainingConfig:
     adversarial_weight: float = 0.5
     apply_probability: float = 0.3
     log_adversarial_metrics: bool = True
+    candidate_selection: str = "none"
     target_loss_increase: float | None = None
     max_loss_increase: float | None = None
+    target_margin: float | None = 0.0
+    max_margin: float | None = 0.5
 
 
 def config_from_participant(participant_config: dict[str, Any]) -> AdversarialTrainingConfig | None:
@@ -69,12 +76,19 @@ def config_from_participant(participant_config: dict[str, Any]) -> AdversarialTr
         adversarial_weight=adversarial_weight,
         apply_probability=float(raw.get("apply_probability", 0.3)),
         log_adversarial_metrics=True,
+        candidate_selection=str(raw.get("candidate_selection", "none")).lower(),
         target_loss_increase=float(raw["target_loss_increase"])
         if raw.get("target_loss_increase") is not None
         else None,
         max_loss_increase=float(raw["max_loss_increase"])
         if raw.get("max_loss_increase") is not None
         else None,
+        target_margin=float(raw["target_margin"])
+        if raw.get("target_margin") is not None
+        else 0.0,
+        max_margin=float(raw["max_margin"])
+        if raw.get("max_margin") is not None
+        else 0.5,
     )
 
 
@@ -92,6 +106,10 @@ def validate_config(config: AdversarialTrainingConfig) -> None:
         raise ValueError(ERR_IMAGE_ATTACK)
     if config.domain == "tabular" and config.attack not in TABULAR_ADVERSARIAL_ATTACKS:
         raise ValueError(ERR_TABULAR_ATTACK)
+    if config.domain == "tabular" and config.candidate_selection not in {"none", "loss_window", "margin_window"}:
+        raise ValueError(ERR_CANDIDATE_SELECTION)
+    if config.domain == "image" and config.candidate_selection != "none":
+        raise ValueError(ERR_CANDIDATE_SELECTION)
     if config.epsilon < 0:
         raise ValueError(ERR_EPSILON)
     if config.alpha is not None and config.alpha < 0:
@@ -110,3 +128,9 @@ def validate_config(config: AdversarialTrainingConfig) -> None:
         and config.target_loss_increase > config.max_loss_increase
     ):
         raise ValueError(ERR_LOSS_INCREASE)
+    if (
+        config.target_margin is not None
+        and config.max_margin is not None
+        and config.target_margin > config.max_margin
+    ):
+        raise ValueError(ERR_MARGIN_WINDOW)
diff --git a/nebula/addons/defenses/adversarial_training/defense.py b/nebula/addons/defenses/adversarial_training/defense.py
index 4cd6b6923..b79285009 100644
--- a/nebula/addons/defenses/adversarial_training/defense.py
+++ b/nebula/addons/defenses/adversarial_training/defense.py
@@ -7,9 +7,11 @@
 from nebula.addons.defenses.adversarial_training.config import (
     ERR_ALPHA,
     ERR_APPLY_PROBABILITY,
+    ERR_CANDIDATE_SELECTION,
     ERR_EPSILON,
     ERR_IMAGE_ATTACK,
     ERR_LOSS_INCREASE,
+    ERR_MARGIN_WINDOW,
     ERR_MODE,
     ERR_STEPS,
     ERR_TABULAR_ATTACK,
@@ -215,7 +217,8 @@ def apply_adversarial_training_if_enabled(model, participant_config: dict[str, A
             "[AdversarialTrainingDefense] Enabled | dataset=%s | attack=%s | epsilon_max=%s | "
             "epsilon_range=[%.6f, %.6f] | epsilon_step=%.6f | steps=%s | mode=%s | "
             "clean_weight=%.2f | adversarial_weight=%.2f | apply_probability=%.2f | "
-            "target_loss_increase=%s | max_loss_increase=%s | log_adversarial_metrics=%s",
+            "candidate_selection=%s | target_loss_increase=%s | max_loss_increase=%s | "
+            "target_margin=%s | max_margin=%s | log_adversarial_metrics=%s",
             defense.config.dataset_name,
             defense.config.attack,
             defense.config.epsilon,
@@ -227,8 +230,11 @@ def apply_adversarial_training_if_enabled(model, participant_config: dict[str, A
             defense.config.clean_weight,
             defense.config.adversarial_weight,
             defense.config.apply_probability,
+            defense.config.candidate_selection,
             defense.config.target_loss_increase,
             defense.config.max_loss_increase,
+            defense.config.target_margin,
+            defense.config.max_margin,
             defense.config.log_adversarial_metrics,
         )
 
@@ -236,9 +242,11 @@ def apply_adversarial_training_if_enabled(model, participant_config: dict[str, A
 __all__ = [
     "ERR_ALPHA",
     "ERR_APPLY_PROBABILITY",
+    "ERR_CANDIDATE_SELECTION",
     "ERR_EPSILON",
     "ERR_IMAGE_ATTACK",
     "ERR_LOSS_INCREASE",
+    "ERR_MARGIN_WINDOW",
     "ERR_MODE",
     "ERR_STEPS",
     "ERR_TABULAR_ATTACK",
diff --git a/nebula/addons/defenses/adversarial_training/tabular.py b/nebula/addons/defenses/adversarial_training/tabular.py
index 2ae280be0..661556d9b 100644
--- a/nebula/addons/defenses/adversarial_training/tabular.py
+++ b/nebula/addons/defenses/adversarial_training/tabular.py
@@ -174,7 +174,9 @@ def generate(self, model, x, y, criterion):
         x_adv = x_clean.clone()
         best_adv = x_adv.clone()
         best_score = torch.full((x_clean.size(0),), float("-inf"), dtype=x_clean.dtype, device=x_clean.device)
+        best_distance = torch.full((x_clean.size(0),), float("inf"), dtype=x_clean.dtype, device=x_clean.device)
         use_loss_window = self._use_loss_window()
+        use_margin_window = self._use_margin_window()
         clean_loss = self._clean_loss(model, x_clean, y) if use_loss_window else None
 
         for _ in range(steps):
@@ -195,13 +197,18 @@ def generate(self, model, x, y, criterion):
                 if use_loss_window:
                     candidate_score = self._loss_increase(candidate_logits, y, clean_loss)
                     better = self._loss_window_better(candidate_score, best_score)
+                elif use_margin_window:
+                    candidate_score = self._margin(candidate_logits, y)
+                    candidate_distance = self._margin_window_distance(candidate_score)
+                    better = self._margin_window_better(candidate_score, candidate_distance, best_score, best_distance)
+                    best_distance = torch.where(better, candidate_distance, best_distance)
                 else:
                     candidate_score = self._margin(candidate_logits, y)
                     better = candidate_score > best_score
                 best_adv = torch.where(better.view(-1, 1), candidate, best_adv)
                 best_score = torch.where(better, candidate_score, best_score)
 
-                if self._target_reached(best_score):
+                if self._target_reached(best_score, best_distance):
                     break
 
             x_adv = candidate
@@ -209,7 +216,10 @@ def generate(self, model, x, y, criterion):
         return best_adv.detach()
 
     def _use_loss_window(self) -> bool:
-        return self.config.target_loss_increase is not None or self.config.max_loss_increase is not None
+        return self.config.candidate_selection == "loss_window"
+
+    def _use_margin_window(self) -> bool:
+        return self.config.candidate_selection == "margin_window"
 
     def _clean_loss(self, model, x_clean: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         # Baseline difficulty. Candidate scores become loss(candidate) - loss(clean).
@@ -231,8 +241,35 @@ def _loss_window_better(self, candidate_score: torch.Tensor, best_score: torch.T
             valid = valid & (candidate_score <= float(self.config.max_loss_increase))
         return valid & (candidate_score > best_score)
 
-    def _target_reached(self, best_score: torch.Tensor) -> bool:
-        # Once every sample has reached the requested hardness, stop taking stronger steps.
-        if self.config.target_loss_increase is None:
-            return False
-        return bool((best_score >= float(self.config.target_loss_increase)).all().item())
+    def _margin_window_distance(self, margin: torch.Tensor) -> torch.Tensor:
+        # Distance is zero inside the window and positive outside. This gives a
+        # soft fallback when discrete tabular steps jump over the desired range.
+        distance = torch.zeros_like(margin)
+        if self.config.target_margin is not None:
+            target = torch.full_like(margin, float(self.config.target_margin))
+            distance = torch.maximum(distance, target - margin)
+        if self.config.max_margin is not None:
+            maximum = torch.full_like(margin, float(self.config.max_margin))
+            distance = torch.maximum(distance, margin - maximum)
+        return distance
+
+    def _margin_window_better(
+        self,
+        candidate_score: torch.Tensor,
+        candidate_distance: torch.Tensor,
+        best_score: torch.Tensor,
+        best_distance: torch.Tensor,
+    ) -> torch.Tensor:
+        closer = candidate_distance < best_distance
+        same_distance = candidate_distance == best_distance
+        stronger = candidate_score > best_score
+        return closer | (same_distance & stronger)
+
+    def _target_reached(self, best_score: torch.Tensor, best_distance: torch.Tensor) -> bool:
+        if self._use_loss_window():
+            if self.config.target_loss_increase is None:
+                return False
+            return bool((best_score >= float(self.config.target_loss_increase)).all().item())
+        if self._use_margin_window():
+            return bool((best_distance <= torch.finfo(best_distance.dtype).eps).all().item())
+        return False
diff --git a/nebula/core/datasets/covtype/covtype.py b/nebula/core/datasets/covtype/covtype.py
index e3be64318..ec3ef65d9 100644
--- a/nebula/core/datasets/covtype/covtype.py
+++ b/nebula/core/datasets/covtype/covtype.py
@@ -16,7 +16,7 @@
 
 class CovtypeTorchDataset(Dataset):
     """
-    Simple torch Dataset wrapper for tabular Covtype data.
+    Torch Dataset wrapper for tabular Covtype data.
 
     Returns:
         x: torch.float32 tensor of shape (n_features,)
@@ -81,18 +81,16 @@ class CovtypePartitionHandler(NebulaPartitionHandler):
     def __init__(self, file_path: str, prefix: str, config: Any, empty: bool = False):
         super().__init__(file_path, prefix, config, empty)
 
-        # For tabular data we typically don't apply torchvision transforms.
-        # If you later want normalization here, do it explicitly and carefully
-        # (train stats vs test stats, per-partition stats, etc.).
+        # Tabular features are already preprocessed before partitioning, so no
+        # torchvision-style transform is applied here.
         self.transform = None
 
     def __getitem__(self, idx: int):
         data, target = super().__getitem__(idx)
 
-        # Defensive: depending on how NebulaPartitionHandler stores/returns,
-        # "data" might be list/tuple/np.ndarray. Ensure we end up with 1D float32 tensor.
+        # Partition storage can return lists, numpy arrays or tensors. The model
+        # expects a 1D float32 tensor for each tabular sample.
         if isinstance(data, tuple):
-            # Some vision datasets store (img, meta). For tabular we ignore extras.
             data = data[0]
 
         if isinstance(data, torch.Tensor):
@@ -100,7 +98,7 @@ def __getitem__(self, idx: int):
         else:
             x = torch.tensor(np.asarray(data), dtype=torch.float32)
 
-        # Ensure target in [0..num_classes-1] and torch.long
+        # Targets are stored as class indices and consumed by CrossEntropyLoss.
         if isinstance(target, torch.Tensor):
             y = target.to(dtype=torch.long)
         else:
@@ -119,7 +117,7 @@ class CovtypeDataset(NebulaDataset):
     Notes:
     - Covtype has 7 classes.
     - Features are tabular (54 features in the classic version).
-    - We provide a simple train/test split with fixed seed.
+    - Deterministic stratified train/test split.
 
     Requirements:
     - scikit-learn must be installed (for fetch_covtype + train_test_split).
@@ -184,10 +182,14 @@ class CovtypeDataset(NebulaDataset):
     ]
     # Covtype has two kinds of inputs:
     # - terrain measurements, which constrained PGD may perturb;
-    # - binary wilderness/soil indicators, which stay immutable to avoid broken
-    #   one-hot-like combinations.
+    # - binary wilderness/soil indicators, which are already one-hot-like.
+    #
+    # The binary groups are immutable in the current metadata. This avoids
+    # invalid wilderness/soil combinations while still exercising constrained
+    # PGD on the numeric part of the dataset.
     PERTURBABLE_CONTINUOUS_COLUMNS = list(CONTINUOUS_COLUMNS)
     PERTURBABLE_INTEGER_COLUMNS = []
+    NON_PERTURBABLE_COLUMNS = list(BINARY_COLUMNS)
 
     def __init__(
         self,
@@ -236,7 +238,7 @@ def _default_feature_names(cls, n_features: int) -> list[str]:
     def _validate_manual_schema(cls, columns) -> None:
         continuous_columns = set(cls.CONTINUOUS_COLUMNS)
         integer_columns = set(cls.PERTURBABLE_INTEGER_COLUMNS)
-        non_perturbable_columns = set(cls.BINARY_COLUMNS)
+        non_perturbable_columns = set(cls.NON_PERTURBABLE_COLUMNS)
         overlapping_columns = sorted(
             (continuous_columns & integer_columns)
             | (continuous_columns & non_perturbable_columns)
@@ -290,13 +292,13 @@ def load_covtype_dataset(self):
             feature_names = self._default_feature_names(x.shape[1])
             self._validate_manual_schema(feature_names)
 
-        # Map labels to 0..6 (CrossEntropyLoss convention)
-        # If already 0..6, this is harmless for 1..7 only if we detect min.
+        # sklearn usually returns labels in 1..7. CrossEntropyLoss expects
+        # zero-based class indices, so map them to 0..6 when needed.
         y = np.asarray(y).reshape(-1)
         if y.min() == 1:
             y = y - 1
 
-        # Split "grande"
+        # Build a deterministic stratified train/test split.
         x_train, x_test, y_train, y_test = train_test_split(
             x, y,
             test_size=self.test_size,
@@ -305,7 +307,8 @@ def load_covtype_dataset(self):
             stratify=y,
         )
 
-        # Submuestreo estratificado (corto y determinista)
+        # Optional stratified limits keep experiments manageable without
+        # changing the class distribution unnecessarily.
         if self.train_limit is not None and len(y_train) > self.train_limit:
             x_train, _, y_train, _ = train_test_split(
                 x_train, y_train,
@@ -324,8 +327,8 @@ def load_covtype_dataset(self):
                 stratify=y_test,
             )
 
-        # Scale only the perturbable terrain measurements. The binary columns
-        # must remain 0/1 because they encode wilderness and soil indicators.
+        # Scale only the terrain measurements. The binary columns must remain
+        # exact 0/1 values because they encode wilderness and soil indicators.
         scaler = StandardScaler()
         x_train = np.asarray(x_train, dtype=np.float32).copy()
         x_test = np.asarray(x_test, dtype=np.float32).copy()
@@ -364,7 +367,8 @@ def _make_dataset(
     @classmethod
     def _build_adversarial_metadata(cls, feature_names, x_train):
         # Dataset responsibility: declare which variables are perturbable. The
-        # shared builder maps those declarations to feature masks and bounds.
+        # shared builder marks every other feature, including binary indicators,
+        # as non-perturbable and creates the masks consumed by constrained PGD.
         return build_tabular_adversarial_metadata(
             feature_names=feature_names,
             x_train=x_train,
@@ -380,7 +384,7 @@ def _log_adversarial_metadata(metadata: dict[str, Any], feature_names: list[str]
         continuous_features = metadata["continuous_features"]
         non_perturbable_features = metadata["non_perturbable_features"]
         logger.info(
-            "[Covtype] Tabular adversarial feature mask | continuous=%s | non_perturbable=%s | "
+            "[Covtype] Tabular adversarial feature mask | continuous=%s | binary_non_perturbable=%s | "
             "continuous_features=%s | non_perturbable_preview=%s",
             len(continuous_features),
             len(non_perturbable_features),
diff --git a/nebula/core/datasets/kddcup99/kddcup99.py b/nebula/core/datasets/kddcup99/kddcup99.py
index 644af9e3b..72c3db0f4 100644
--- a/nebula/core/datasets/kddcup99/kddcup99.py
+++ b/nebula/core/datasets/kddcup99/kddcup99.py
@@ -1,18 +1,20 @@
 import logging
 import os
-from typing import Tuple, Any
+from typing import Any
 
 import numpy as np
 import torch
 from torch.utils.data import Dataset
 
 from nebula.core.datasets.nebuladataset import NebulaDataset, NebulaPartitionHandler
-from nebula.core.datasets.tabular_metadata import CONTINUOUS, INTEGER, NON_PERTURBABLE, TabularAdversarialMetadata
+from nebula.core.datasets.tabular_metadata import build_tabular_adversarial_metadata
+
+logger = logging.getLogger(__name__)
 
 
 class KDDCUP99TorchDataset(Dataset):
     """
-    Simple torch Dataset wrapper for tabular KDDCUP99 data.
+    Torch Dataset wrapper for tabular KDDCUP99 data.
 
     Returns:
         x: torch.float32 tensor of shape (n_features,)
@@ -59,7 +61,7 @@ def __init__(
     def __len__(self) -> int:
         return int(self.y.shape[0])
 
-    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
+    def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]:
         x_i = torch.from_numpy(self.x[idx])
         y_i = torch.tensor(self.y[idx], dtype=torch.long)
         return x_i, y_i
@@ -76,14 +78,15 @@ class KDDCUP99PartitionHandler(NebulaPartitionHandler):
     def __init__(self, file_path: str, prefix: str, config: Any, empty: bool = False):
         super().__init__(file_path, prefix, config, empty)
 
-        # For tabular data we typically don't apply torchvision transforms.
+        # Tabular features are already preprocessed before partitioning, so no
+        # torchvision-style transform is applied here.
         self.transform = None
 
     def __getitem__(self, idx: int):
         data, target = super().__getitem__(idx)
 
-        # Defensive: depending on how NebulaPartitionHandler stores/returns,
-        # "data" might be list/tuple/np.ndarray. Ensure we end up with 1D float32 tensor.
+        # Partition storage can return lists, numpy arrays or tensors. The model
+        # expects a 1D float32 tensor for each tabular sample.
         if isinstance(data, tuple):
             data = data[0]
 
@@ -92,7 +95,7 @@ def __getitem__(self, idx: int):
         else:
             x = torch.tensor(np.asarray(data), dtype=torch.float32)
 
-        # Ensure target in [0..num_classes-1] and torch.long
+        # Targets are stored as class indices and consumed by CrossEntropyLoss.
         if isinstance(target, torch.Tensor):
             y = target.to(dtype=torch.long)
         else:
@@ -112,7 +115,7 @@ class KDDCUP99Dataset(NebulaDataset):
     - KDDCUP99 is a tabular intrusion-detection dataset.
     - sklearn fetch_kddcup99 exposes 41 features.
     - Targets are mapped to a binary task: normal vs attack.
-    - Some columns are categorical/string-like, so we one-hot encode them.
+    - Categorical string columns are one-hot encoded.
     - Targets may come as bytes/strings, so we decode before mapping labels.
 
     Requirements:
@@ -162,7 +165,7 @@ class KDDCUP99Dataset(NebulaDataset):
         "dst_host_rerror_rate",
         "dst_host_srv_rerror_rate",
     ]
-    PERTURBABLE_CONTINUOUS_COLUMNS = [
+    CONTINUOUS_COLUMNS = [
         "serror_rate",
         "srv_serror_rate",
         "rerror_rate",
@@ -179,7 +182,7 @@ class KDDCUP99Dataset(NebulaDataset):
         "dst_host_rerror_rate",
         "dst_host_srv_rerror_rate",
     ]
-    PERTURBABLE_INTEGER_COLUMNS = [
+    INTEGER_COLUMNS = [
         "duration",
         "src_bytes",
         "dst_bytes",
@@ -198,10 +201,12 @@ class KDDCUP99Dataset(NebulaDataset):
         "dst_host_count",
         "dst_host_srv_count",
     ]
-    NON_PERTURBABLE_RAW_COLUMNS = [
+    CATEGORICAL_COLUMNS = [
         "protocol_type",
         "service",
         "flag",
+    ]
+    NON_PERTURBABLE_COLUMNS = [
         "land",
         "logged_in",
         "root_shell",
@@ -209,6 +214,12 @@ class KDDCUP99Dataset(NebulaDataset):
         "is_host_login",
         "is_guest_login",
     ]
+    # KDDCUP99 exposes mixed network-traffic features. For the first supported
+    # adversarial-training version, constrained PGD may perturb numeric traffic
+    # measurements and counters. Protocol/service/flag one-hot columns and
+    # binary login/status flags stay immutable to avoid invalid records.
+    PERTURBABLE_CONTINUOUS_COLUMNS = list(CONTINUOUS_COLUMNS)
+    PERTURBABLE_INTEGER_COLUMNS = list(INTEGER_COLUMNS)
 
     def __init__(
         self,
@@ -259,18 +270,22 @@ def _ensure_raw_feature_names(cls, x):
 
     @classmethod
     def _validate_manual_schema(cls, columns) -> None:
-        continuous_columns = set(cls.PERTURBABLE_CONTINUOUS_COLUMNS)
-        integer_columns = set(cls.PERTURBABLE_INTEGER_COLUMNS)
-        non_perturbable_columns = set(cls.NON_PERTURBABLE_RAW_COLUMNS)
+        continuous_columns = set(cls.CONTINUOUS_COLUMNS)
+        integer_columns = set(cls.INTEGER_COLUMNS)
+        categorical_columns = set(cls.CATEGORICAL_COLUMNS)
+        non_perturbable_columns = set(cls.NON_PERTURBABLE_COLUMNS)
         overlapping_columns = sorted(
             (continuous_columns & integer_columns)
+            | (continuous_columns & categorical_columns)
             | (continuous_columns & non_perturbable_columns)
+            | (integer_columns & categorical_columns)
             | (integer_columns & non_perturbable_columns)
+            | (categorical_columns & non_perturbable_columns)
         )
         if overlapping_columns:
             raise ValueError(f"KDDCUP99Dataset columns configured twice: {overlapping_columns}")
 
-        configured_columns = continuous_columns | integer_columns | non_perturbable_columns
+        configured_columns = continuous_columns | integer_columns | categorical_columns | non_perturbable_columns
         dataset_columns = set(columns)
         missing_columns = sorted(configured_columns - dataset_columns)
         if missing_columns:
@@ -284,7 +299,6 @@ def load_kddcup99_dataset(self):
         Loads KDDCUP99 via sklearn, performs deterministic preprocessing
         and train/test split, and wraps into torch Datasets.
         """
-        # Local cache directory for sklearn dataset downloads
         data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
         os.makedirs(data_dir, exist_ok=True)
 
@@ -296,7 +310,6 @@ def load_kddcup99_dataset(self):
         except Exception as e:
             raise ImportError(
                 "KDDCUP99Dataset requires scikit-learn and pandas. "
-                "Install them (e.g., pip install scikit-learn pandas)."
             ) from e
 
         kdd = fetch_kddcup99(
@@ -312,7 +325,8 @@ def load_kddcup99_dataset(self):
         x = kdd.data
         y = kdd.target
 
-        # Defensive conversion to pandas objects
+        # fetch_kddcup99 can return numpy arrays depending on sklearn version.
+        # The preprocessing below expects pandas columns.
         if not hasattr(x, "columns"):
             x = pd.DataFrame(x)
         if not hasattr(y, "astype"):
@@ -320,47 +334,30 @@ def load_kddcup99_dataset(self):
         x = self._ensure_raw_feature_names(x)
         self._validate_manual_schema(x.columns)
 
-        # Decode bytes -> str where needed
         def _decode_if_bytes(v):
             if isinstance(v, (bytes, bytearray)):
                 return v.decode("utf-8", errors="ignore")
             return v
 
-        # Some KDDCUP99 columns are categorical (e.g. protocol/service/flag).
-        # We decode bytes and one-hot encode object/category columns.
+        # Decode bytes before one-hot encoding categorical columns and mapping labels.
         for col in x.columns:
             if x[col].dtype == object:
                 x[col] = x[col].map(_decode_if_bytes)
 
         y = y.map(_decode_if_bytes)
 
-        # One-hot encode categorical columns, keep numeric ones as-is.
+        # One-hot encode protocol/service/flag and keep numeric columns as-is.
         x = pd.get_dummies(x, drop_first=False)
         feature_names = [str(col) for col in x.columns]
-        logging.getLogger().info("[KDDCUP99] Encoded feature dimension: %s", len(feature_names))
-        continuous_features = [
-            x.columns.get_loc(col)
-            for col in self.PERTURBABLE_CONTINUOUS_COLUMNS
-            if col in x.columns
-        ]
-        integer_features = [
-            x.columns.get_loc(col)
-            for col in self.PERTURBABLE_INTEGER_COLUMNS
-            if col in x.columns
-        ]
-        perturbable_features = set(continuous_features) | set(integer_features)
-        non_perturbable_features = [i for i in range(len(feature_names)) if i not in perturbable_features]
-        binary_features = non_perturbable_features
+        logger.info("[KDDCUP99] Encoded feature dimension: %s", len(feature_names))
 
         # Map labels to a binary task: 0 = normal, 1 = attack.
         y = pd.Series(y).astype(str)
         y = y.str.strip()
         y = (y != "normal.").astype(np.int64).to_numpy(copy=False)
-
-        classes = ["normal", "attack"]
         self.num_classes = 2
 
-        # Split "grande"
+        # Build a deterministic stratified train/test split.
         x_train, x_test, y_train, y_test = train_test_split(
             x, y,
             test_size=self.test_size,
@@ -369,7 +366,8 @@ def _decode_if_bytes(v):
             stratify=y,
         )
 
-        # Submuestreo estratificado (corto y determinista)
+        # Optional stratified limits keep experiments manageable without
+        # changing the class distribution unnecessarily.
         if self.train_limit is not None and len(y_train) > self.train_limit:
             x_train, _, y_train, _ = train_test_split(
                 x_train, y_train,
@@ -378,7 +376,7 @@ def _decode_if_bytes(v):
                 shuffle=True,
                 stratify=y_train,
             )
-            logging.getLogger().info("[KDDCUP99] Limited train split to %s samples", len(y_train))
+            logger.info("[KDDCUP99] Limited train split to %s samples", len(y_train))
 
         if self.test_limit is not None and len(y_test) > self.test_limit:
             x_test, _, y_test, _ = train_test_split(
@@ -388,64 +386,97 @@ def _decode_if_bytes(v):
                 shuffle=True,
                 stratify=y_test,
             )
-            logging.getLogger().info("[KDDCUP99] Limited test split to %s samples", len(y_test))
+            logger.info("[KDDCUP99] Limited test split to %s samples", len(y_test))
 
         x_train_np = x_train.astype(np.float32).to_numpy(copy=True)
         x_test_np = x_test.astype(np.float32).to_numpy(copy=True)
 
-        # Scale perturbable numeric columns after splitting. One-hot and binary flags stay unchanged.
+        # Scale perturbable numeric columns after splitting. One-hot categorical
+        # columns and binary flags remain exact 0/1 values.
+        continuous_features = self._column_indices(x_train.columns, self.CONTINUOUS_COLUMNS)
+        integer_features = self._column_indices(x_train.columns, self.INTEGER_COLUMNS)
         scaled_features = continuous_features + integer_features
+        integer_step_by_column = {}
         if scaled_features:
             scaler = StandardScaler()
             x_train_np[:, scaled_features] = scaler.fit_transform(x_train_np[:, scaled_features])
             x_test_np[:, scaled_features] = scaler.transform(x_test_np[:, scaled_features])
-        integer_step_norm = {}
-        if integer_features:
-            integer_step_norm = {
-                idx: float(1.0 / scale)
-                for idx, scale in zip(integer_features, scaler.scale_[len(continuous_features):], strict=False)
+            integer_scales = scaler.scale_[len(continuous_features):]
+            integer_step_by_column = {
+                column: float(1.0 / scale)
+                for column, scale in zip(self.INTEGER_COLUMNS, integer_scales, strict=False)
             }
-        continuous_feature_set = set(continuous_features)
-        integer_feature_set = set(integer_features)
-        tabular_metadata = TabularAdversarialMetadata(
-            feature_names=feature_names,
-            feature_types=[
-                CONTINUOUS if idx in continuous_feature_set
-                else INTEGER if idx in integer_feature_set
-                else NON_PERTURBABLE
-                for idx in range(len(feature_names))
-            ],
-            feature_min_norm=np.min(x_train_np, axis=0).astype(float).tolist(),
-            feature_max_norm=np.max(x_train_np, axis=0).astype(float).tolist(),
-            integer_step_norm=integer_step_norm,
-        ).to_dict()
-
-        train_ds = KDDCUP99TorchDataset(
-            x_train_np,
-            y_train,
-            feature_names=feature_names,
-            continuous_features=continuous_features,
-            integer_features=integer_features,
-            non_perturbable_features=non_perturbable_features,
-            binary_features=binary_features,
-            tabular_metadata=tabular_metadata,
+
+        metadata = self._build_adversarial_metadata(feature_names, x_train_np, integer_step_by_column)
+        self._log_adversarial_metadata(metadata, feature_names)
+
+        return (
+            self._make_dataset(x_train_np, y_train, feature_names, metadata),
+            self._make_dataset(x_test_np, y_test, feature_names, metadata),
         )
-        test_ds = KDDCUP99TorchDataset(
-            x_test_np,
-            y_test,
+
+    @staticmethod
+    def _column_indices(columns, names: list[str]) -> list[int]:
+        return [columns.get_loc(name) for name in names if name in columns]
+
+    @staticmethod
+    def _make_dataset(
+        x: np.ndarray,
+        y: np.ndarray,
+        feature_names: list[str],
+        metadata: dict[str, Any],
+    ) -> KDDCUP99TorchDataset:
+        dataset = KDDCUP99TorchDataset(
+            x,
+            y,
             feature_names=feature_names,
-            continuous_features=continuous_features,
-            integer_features=integer_features,
-            non_perturbable_features=non_perturbable_features,
-            binary_features=binary_features,
-            tabular_metadata=tabular_metadata,
+            continuous_features=metadata["continuous_features"],
+            integer_features=metadata["integer_features"],
+            non_perturbable_features=metadata["non_perturbable_features"],
+            binary_features=metadata["non_perturbable_features"],
+            tabular_metadata=metadata["tabular_metadata"],
         )
+        dataset.classes = ["normal", "attack"]
+        return dataset
 
-        # Optional: preserve original class names for inspection/debugging
-        train_ds.classes = classes
-        test_ds.classes = classes
+    @classmethod
+    def _build_adversarial_metadata(
+        cls,
+        feature_names: list[str],
+        x_train: np.ndarray,
+        integer_step_by_column: dict[str, float],
+    ) -> dict[str, Any]:
+        # Dataset responsibility: declare which raw variables are perturbable.
+        # The shared builder maps that declaration to transformed feature masks,
+        # bounds and integer steps in model-input space.
+        return build_tabular_adversarial_metadata(
+            feature_names=feature_names,
+            x_train=x_train,
+            continuous_columns=cls.CONTINUOUS_COLUMNS,
+            integer_columns=cls.INTEGER_COLUMNS,
+            categorical_columns=cls.CATEGORICAL_COLUMNS,
+            perturbable_continuous_columns=cls.PERTURBABLE_CONTINUOUS_COLUMNS,
+            perturbable_integer_columns=cls.PERTURBABLE_INTEGER_COLUMNS,
+            integer_step_by_column=integer_step_by_column,
+        )
 
-        return train_ds, test_ds
+    @staticmethod
+    def _log_adversarial_metadata(metadata: dict[str, Any], feature_names: list[str]) -> None:
+        continuous_features = metadata["continuous_features"]
+        integer_features = metadata["integer_features"]
+        non_perturbable_features = metadata["non_perturbable_features"]
+        logger.info(
+            "[KDDCUP99] Tabular adversarial feature mask | continuous=%s | integer=%s | "
+            "non_perturbable=%s | continuous_features=%s | integer_features=%s | "
+            "non_perturbable_preview=%s | integer_step_norm=%s",
+            len(continuous_features),
+            len(integer_features),
+            len(non_perturbable_features),
+            [feature_names[idx] for idx in continuous_features],
+            [feature_names[idx] for idx in integer_features],
+            [feature_names[idx] for idx in non_perturbable_features[:20]],
+            metadata["integer_step_norm"],
+        )
 
     def generate_non_iid_map(self, dataset, partition: str = "dirichlet", partition_parameter: float = 0.5):
         if partition == "dirichlet":
diff --git a/nebula/frontend/config/participant.json.example b/nebula/frontend/config/participant.json.example
index 88c017ba1..9d9552fa3 100755
--- a/nebula/frontend/config/participant.json.example
+++ b/nebula/frontend/config/participant.json.example
@@ -117,8 +117,11 @@
       "steps": 1,
       "mode": "mixed",
       "apply_probability": 0.3,
+      "candidate_selection": "none",
       "target_loss_increase": null,
-      "max_loss_increase": null
+      "max_loss_increase": null,
+      "target_margin": 0,
+      "max_margin": 0.5
     },
     "reputation": {
       "enabled": false,
diff --git a/nebula/frontend/static/js/deployment/adversarial-training.js b/nebula/frontend/static/js/deployment/adversarial-training.js
index cb072cf9e..be1f3aca5 100644
--- a/nebula/frontend/static/js/deployment/adversarial-training.js
+++ b/nebula/frontend/static/js/deployment/adversarial-training.js
@@ -10,12 +10,15 @@ const AdversarialTrainingManager = (function() {
         mode: "mixed",
         apply_probability: 0.3,
         log_adversarial_metrics: true,
+        candidate_selection: "none",
         target_loss_increase: null,
-        max_loss_increase: null
+        max_loss_increase: null,
+        target_margin: 0,
+        max_margin: 0.5
     };
 
     const IMAGE_DATASETS = new Set(["MNIST", "FashionMNIST", "EMNIST", "CIFAR10", "CIFAR100"]);
-    const TABULAR_ADVERSARIAL_DATASETS = new Set(["AdultCensus", "BreastCancer", "Covtype"]);
+    const TABULAR_ADVERSARIAL_DATASETS = new Set(["AdultCensus", "BreastCancer", "Covtype", "KDDCUP99"]);
     const IMAGE_ATTACK_OPTIONS = [
         {value: "fgsm", label: "FGSM"},
         {value: "pgd", label: "PGD"}
@@ -27,6 +30,7 @@ const AdversarialTrainingManager = (function() {
     function initializeAdversarialTraining() {
         setupAdversarialTrainingSwitch();
         setupAttackSelector();
+        setupCandidateSelectionSelector();
         setupDatasetAwareness();
         setAdversarialTrainingConfig(DEFAULT_ADVERSARIAL_TRAINING_CONFIG);
     }
@@ -52,6 +56,15 @@ const AdversarialTrainingManager = (function() {
         });
     }
 
+    function setupCandidateSelectionSelector() {
+        const candidateSelectionSelect = document.getElementById("adversarialTrainingCandidateSelection");
+        if (!candidateSelectionSelect) return;
+
+        candidateSelectionSelect.addEventListener("change", function() {
+            toggleCandidateSelectionSettings(this.value);
+        });
+    }
+
     function setupDatasetAwareness() {
         const datasetSelect = document.getElementById("datasetSelect");
         if (!datasetSelect) return;
@@ -71,17 +84,39 @@ const AdversarialTrainingManager = (function() {
     function toggleAttackSettings(attack) {
         const pgdSettings = document.getElementById("adversarial-training-pgd-settings");
         const stepsTitle = document.getElementById("adversarialTrainingStepsTitle");
+        const candidateSelectionSettings = document.getElementById("adversarial-training-candidate-selection-settings");
         const lossWindowSettings = document.getElementById("adversarial-training-loss-window-settings");
+        const marginWindowSettings = document.getElementById("adversarial-training-margin-window-settings");
         const domain = document.getElementById("adversarialTrainingDomain")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.domain;
         if (!pgdSettings) return;
 
         pgdSettings.style.display = ["pgd", "constrained_pgd"].includes(attack) ? "block" : "none";
-        if (lossWindowSettings) {
-            lossWindowSettings.style.display = domain === "tabular" ? "block" : "none";
+        if (candidateSelectionSettings) {
+            candidateSelectionSettings.style.display = domain === "tabular" ? "block" : "none";
         }
         if (stepsTitle) {
             stepsTitle.textContent = domain === "tabular" ? "Constrained PGD steps" : "PGD steps";
         }
+        if (domain !== "tabular") {
+            if (lossWindowSettings) lossWindowSettings.style.display = "none";
+            if (marginWindowSettings) marginWindowSettings.style.display = "none";
+            return;
+        }
+        toggleCandidateSelectionSettings(
+            document.getElementById("adversarialTrainingCandidateSelection")?.value
+                || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.candidate_selection
+        );
+    }
+
+    function toggleCandidateSelectionSettings(candidateSelection) {
+        const lossWindowSettings = document.getElementById("adversarial-training-loss-window-settings");
+        const marginWindowSettings = document.getElementById("adversarial-training-margin-window-settings");
+        if (lossWindowSettings) {
+            lossWindowSettings.style.display = candidateSelection === "loss_window" ? "block" : "none";
+        }
+        if (marginWindowSettings) {
+            marginWindowSettings.style.display = candidateSelection === "margin_window" ? "block" : "none";
+        }
     }
 
     function updateDatasetAvailability() {
@@ -94,7 +129,7 @@ const AdversarialTrainingManager = (function() {
 
         if (datasetNote) {
             datasetNote.style.display = domain === "unsupported" ? "block" : "none";
-            datasetNote.textContent = "Adversarial Training for tabular datasets currently supports AdultCensus, BreastCancer, and Covtype with constrained PGD.";
+            datasetNote.textContent = "Adversarial Training for tabular datasets currently supports AdultCensus, BreastCancer, Covtype, and KDDCUP99 with constrained PGD.";
         }
         if (domainInput) {
             domainInput.value = domain === "unsupported" ? "tabular" : domain;
@@ -181,6 +216,8 @@ const AdversarialTrainingManager = (function() {
             steps: integerValue("adversarialTrainingSteps", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.steps),
             mode: document.getElementById("adversarialTrainingMode")?.value || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.mode,
             apply_probability: numberValue("adversarialTrainingApplyProbability", DEFAULT_ADVERSARIAL_TRAINING_CONFIG.apply_probability),
+            candidate_selection: document.getElementById("adversarialTrainingCandidateSelection")?.value
+                || DEFAULT_ADVERSARIAL_TRAINING_CONFIG.candidate_selection,
             target_loss_increase: optionalNumberValue(
                 "adversarialTrainingTargetLossIncrease",
                 DEFAULT_ADVERSARIAL_TRAINING_CONFIG.target_loss_increase
@@ -189,18 +226,35 @@ const AdversarialTrainingManager = (function() {
                 "adversarialTrainingMaxLossIncrease",
                 DEFAULT_ADVERSARIAL_TRAINING_CONFIG.max_loss_increase
             ),
+            target_margin: optionalNumberValue(
+                "adversarialTrainingTargetMargin",
+                DEFAULT_ADVERSARIAL_TRAINING_CONFIG.target_margin
+            ),
+            max_margin: optionalNumberValue(
+                "adversarialTrainingMaxMargin",
+                DEFAULT_ADVERSARIAL_TRAINING_CONFIG.max_margin
+            ),
             log_adversarial_metrics: true
         };
 
         if (config.alpha === null || config.attack !== "pgd") {
             delete config.alpha;
         }
-        if (config.target_loss_increase === null) {
+        if (config.domain !== "tabular") {
+            delete config.candidate_selection;
+        }
+        if (config.candidate_selection !== "loss_window" || config.target_loss_increase === null) {
             delete config.target_loss_increase;
         }
-        if (config.max_loss_increase === null) {
+        if (config.candidate_selection !== "loss_window" || config.max_loss_increase === null) {
             delete config.max_loss_increase;
         }
+        if (config.candidate_selection !== "margin_window" || config.target_margin === null) {
+            delete config.target_margin;
+        }
+        if (config.candidate_selection !== "margin_window" || config.max_margin === null) {
+            delete config.max_margin;
+        }
         return config;
     }
 
@@ -224,8 +278,16 @@ const AdversarialTrainingManager = (function() {
                 : DEFAULT_ADVERSARIAL_TRAINING_CONFIG.mode
         );
         setValue("adversarialTrainingApplyProbability", adversarialTrainingConfig.apply_probability);
+        setValue(
+            "adversarialTrainingCandidateSelection",
+            ["none", "loss_window", "margin_window"].includes(adversarialTrainingConfig.candidate_selection)
+                ? adversarialTrainingConfig.candidate_selection
+                : DEFAULT_ADVERSARIAL_TRAINING_CONFIG.candidate_selection
+        );
         setValue("adversarialTrainingTargetLossIncrease", adversarialTrainingConfig.target_loss_increase ?? "");
         setValue("adversarialTrainingMaxLossIncrease", adversarialTrainingConfig.max_loss_increase ?? "");
+        setValue("adversarialTrainingTargetMargin", adversarialTrainingConfig.target_margin ?? 0);
+        setValue("adversarialTrainingMaxMargin", adversarialTrainingConfig.max_margin ?? 0.5);
 
         updateDatasetAvailability();
         const domain = document.getElementById("adversarialTrainingDomain")?.value || adversarialTrainingConfig.domain;
@@ -261,6 +323,12 @@ const AdversarialTrainingManager = (function() {
         if (config.apply_probability < 0 || config.apply_probability > 1) {
             return "[Adversarial Training] Apply probability must be between 0 and 1.";
         }
+        if (
+            config.candidate_selection !== undefined
+            && !["none", "loss_window", "margin_window"].includes(config.candidate_selection)
+        ) {
+            return "[Adversarial Training] Candidate selection must be None, Loss window, or Margin window.";
+        }
         if (config.target_loss_increase !== undefined && config.target_loss_increase < 0) {
             return "[Adversarial Training] Target loss increase must be greater than or equal to 0.";
         }
@@ -274,6 +342,13 @@ const AdversarialTrainingManager = (function() {
         ) {
             return "[Adversarial Training] Target loss increase must be smaller than or equal to max loss increase.";
         }
+        if (
+            config.target_margin !== undefined
+            && config.max_margin !== undefined
+            && config.target_margin > config.max_margin
+        ) {
+            return "[Adversarial Training] Target margin must be smaller than or equal to max margin.";
+        }
         return null;
     }
 
diff --git a/nebula/frontend/templates/deployment.html b/nebula/frontend/templates/deployment.html
index 01e9ed5a4..f5171f14e 100755
--- a/nebula/frontend/templates/deployment.html
+++ b/nebula/frontend/templates/deployment.html
@@ -588,7 +588,7 @@ <h5 class="step-title">Enable/Disable Adversarial Training</h5>
                             style="display: inline; width: 80px; height: 30px;">
                     </div>
                     <small id="adversarial-training-dataset-note" class="form-text text-muted" style="display: none;">
-                        Adversarial Training for tabular datasets currently supports AdultCensus, BreastCancer, and Covtype with constrained PGD.
+                        Adversarial Training for tabular datasets currently supports AdultCensus, BreastCancer, Covtype, and KDDCUP99 with constrained PGD.
                     </small>
                     <div id="adversarial-training-settings" style="margin-top: 10px; display: none;">
                         <input type="hidden" id="adversarialTrainingDomain" value="image">
@@ -612,8 +612,19 @@ <h5 class="step-title">Apply probability</h5>
                                 placeholder="Probability" min="0" max="1" step="0.05" value="0.3"
                                 style="display: inline; width: 80%">
                         </div>
+                        <div id="adversarial-training-candidate-selection-settings" style="display: none;">
+                            <h5 class="step-title">Tabular candidate selection</h5>
+                            <div class="form-check form-check-inline">
+                                <select class="form-control" id="adversarialTrainingCandidateSelection"
+                                    style="display: inline; width: 80%">
+                                    <option value="none" selected>None</option>
+                                    <option value="loss_window">Loss window</option>
+                                    <option value="margin_window">Margin window</option>
+                                </select>
+                            </div>
+                        </div>
                         <div id="adversarial-training-loss-window-settings" style="display: none;">
-                            <h5 class="step-title">Tabular hard-example loss window</h5>
+                            <h5 class="step-title">Tabular loss window</h5>
                             <div class="form-check form-check-inline">
                                 <input type="number" class="form-control" id="adversarialTrainingTargetLossIncrease"
                                     placeholder="Target loss increase" min="0" step="0.01" value=""
@@ -623,6 +634,17 @@ <h5 class="step-title">Tabular hard-example loss window</h5>
                                     style="display: inline; width: 39%">
                             </div>
                         </div>
+                        <div id="adversarial-training-margin-window-settings" style="display: none;">
+                            <h5 class="step-title">Tabular margin window</h5>
+                            <div class="form-check form-check-inline">
+                                <input type="number" class="form-control" id="adversarialTrainingTargetMargin"
+                                    placeholder="Target margin" step="0.01" value="0"
+                                    style="display: inline; width: 39%">
+                                <input type="number" class="form-control" id="adversarialTrainingMaxMargin"
+                                    placeholder="Max margin" step="0.01" value="0.5"
+                                    style="display: inline; width: 39%">
+                            </div>
+                        </div>
                         <div id="adversarial-training-pgd-settings" style="display: none;">
                             <h5 class="step-title" id="adversarialTrainingStepsTitle">PGD steps</h5>
                             <div class="form-check form-check-inline">
@@ -641,7 +663,7 @@ <h5 class="step-title">Training mode</h5>
                             </select>
                         </div>
                         <small class="form-text text-muted">
-                            Image datasets use FGSM/PGD. AdultCensus, BreastCancer, and Covtype use constrained PGD for tabular adversarial training.
+                            Image datasets use FGSM/PGD. AdultCensus, BreastCancer, Covtype, and KDDCUP99 use constrained PGD for tabular adversarial training.
                         </small>
                     </div>
                 </div>

From 48c124232a4a3e6938e41c95b62086a734ed6c34 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Wed, 10 Jun 2026 18:40:37 +0200
Subject: [PATCH 63/66] Robustness metrics revised and fixed, trustworthiness
 metrics revised, models updated: Data type, factsheets updated, tabular
 robustness metrics deleted

---
 .../defenses/adversarial_training/config.py   |  11 +-
 .../defenses/adversarial_training/defense.py  |   3 +-
 .../addons/trustworthiness/cfl_factsheet.py   |   4 +-
 .../configs/eval_metrics_cfl.json             |  28 +-
 .../configs/eval_metrics_cfl_images.json      |  28 +-
 .../configs/eval_metrics_cfl_tabular.json     |  70 +--
 .../configs/eval_metrics_dfl.json             |  28 +-
 .../configs/eval_metrics_dfl_images.json      |  28 +-
 .../configs/eval_metrics_dfl_tabular.json     |  70 +--
 .../configs/factsheet_template_cfl.json       |   8 +-
 .../factsheet_template_cfl_images.json        |   8 +-
 .../factsheet_template_cfl_tabular.json       |   7 +-
 .../configs/factsheet_template_dfl.json       |   8 +-
 .../factsheet_template_dfl_images.json        |   8 +-
 .../factsheet_template_dfl_tabular.json       |   7 +-
 .../addons/trustworthiness/dfl_factsheet.py   |   1 +
 .../trustworthiness/factsheet_common.py       |  50 +-
 .../trustworthiness/factsheet_populators.py   |  78 ++-
 .../trustworthiness/helpers/robustness.py     | 473 +++++++++++++-----
 nebula/core/datasets/image_metadata.py        |  14 +
 nebula/core/datasets/kddcup99/kddcup99.py     |   4 +-
 nebula/core/models/adultcensus/mlp.py         |   1 +
 nebula/core/models/breast_cancer/mlp.py       |   1 +
 nebula/core/models/cifar10/cnn.py             |   1 +
 nebula/core/models/cifar10/cnnV2.py           |   1 +
 nebula/core/models/cifar10/cnnV3.py           |   1 +
 nebula/core/models/cifar10/fastermobilenet.py |   1 +
 nebula/core/models/cifar10/resnet.py          |   1 +
 nebula/core/models/cifar10/simplemobilenet.py |   1 +
 nebula/core/models/cifar100/cnn.py            |   1 +
 nebula/core/models/covtype/mlp.py             |   1 +
 nebula/core/models/emnist/cnn.py              |   1 +
 nebula/core/models/emnist/mlp.py              |   1 +
 nebula/core/models/fashionmnist/cnn.py        |   1 +
 nebula/core/models/fashionmnist/mlp.py        |   1 +
 nebula/core/models/kddcup99/mlp.py            |   1 +
 nebula/core/models/mnist/cnn.py               |   1 +
 nebula/core/models/mnist/mlp.py               |   1 +
 nebula/core/models/sentiment140/cnn.py        |   1 +
 nebula/core/models/sentiment140/rnn.py        |   1 +
 40 files changed, 571 insertions(+), 384 deletions(-)
 create mode 100644 nebula/core/datasets/image_metadata.py

diff --git a/nebula/addons/defenses/adversarial_training/config.py b/nebula/addons/defenses/adversarial_training/config.py
index 8cbcdddcd..48dd73e6e 100644
--- a/nebula/addons/defenses/adversarial_training/config.py
+++ b/nebula/addons/defenses/adversarial_training/config.py
@@ -1,6 +1,8 @@
 from dataclasses import dataclass
 from typing import Any
 
+from nebula.core.datasets.image_metadata import IMAGE_DATASET_NORMALIZATION
+
 IMAGE_ADVERSARIAL_ATTACKS = {"fgsm", "pgd"}
 TABULAR_ADVERSARIAL_ATTACKS = {"constrained_pgd"}
 TABULAR_ADVERSARIAL_DATASETS = {"AdultCensus", "BreastCancer", "Covtype", "KDDCUP99"}
@@ -20,15 +22,6 @@
 ERR_TABULAR_METADATA = "Tabular adversarial training requires tabular_metadata"
 ERR_UNSUPPORTED_ATTACK = "Unsupported adversarial training attack: {attack}"
 
-IMAGE_DATASET_NORMALIZATION = {
-    "MNIST": ((0.5,), (0.5,)),
-    "FashionMNIST": ((0.5,), (0.5,)),
-    "EMNIST": ((0.5,), (0.5,)),
-    "CIFAR10": ((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)),
-    "CIFAR100": ((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)),
-}
-
-
 @dataclass(frozen=True)
 class AdversarialTrainingConfig:
     enabled: bool = False
diff --git a/nebula/addons/defenses/adversarial_training/defense.py b/nebula/addons/defenses/adversarial_training/defense.py
index b79285009..1e0cbc8de 100644
--- a/nebula/addons/defenses/adversarial_training/defense.py
+++ b/nebula/addons/defenses/adversarial_training/defense.py
@@ -25,6 +25,7 @@
     config_from_participant,
     validate_config,
 )
+from nebula.core.datasets.image_metadata import get_image_normalization
 from nebula.addons.defenses.adversarial_training.image import (
     ImageAdversarialExampleGenerator,
     ImageFGSMGenerator,
@@ -69,7 +70,7 @@ def from_participant_config(
 
         if config.domain == "image":
             # Image attacks run in normalized model space, so each dataset must provide mean/std.
-            normalization = IMAGE_DATASET_NORMALIZATION.get(config.dataset_name)
+            normalization = get_image_normalization(config.dataset_name)
             if normalization is None:
                 logging.warning(
                     "[AdversarialTrainingDefense] Skipping adversarial training: dataset '%s' has no image bounds",
diff --git a/nebula/addons/trustworthiness/cfl_factsheet.py b/nebula/addons/trustworthiness/cfl_factsheet.py
index b8cbe104b..1144571db 100755
--- a/nebula/addons/trustworthiness/cfl_factsheet.py
+++ b/nebula/addons/trustworthiness/cfl_factsheet.py
@@ -23,7 +23,6 @@
     get_underfitting_score,
 )
 from nebula.addons.trustworthiness.factsheet_common import (
-    cap_score,
     get_factsheet_path,
     get_factsheet_template_name,
     get_trustworthiness_dir,
@@ -67,6 +66,7 @@ def populate_factsheet_cfl(
             data["federation"],
             model,
             self.factsheet_template_file_nm,
+            dataset_name=data["dataset"],
         )
 
         try:
@@ -129,7 +129,7 @@ def populate_factsheet_cfl(
 
             # Convert class imbalance and runtime summaries into factsheet fields.
             class_imbalance_score = get_class_imbalance_score(avg_class_imbalance)
-            factsheet["fairness"]["class_imbalance"] = cap_score(class_imbalance_score)
+            factsheet["fairness"]["class_imbalance"] = class_imbalance_score
             populate_reputation(factsheet, reputation_summary)
 
             underfitting_score = get_underfitting_score(scenario_name, participant_idx)
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_cfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_cfl.json
index 635d2e9a0..520e32ed6 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_cfl.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_cfl.json
@@ -7,7 +7,7 @@
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_clever"
+                "field_path": "performance/test_clever_score"
               }
             ],
             "operation": "get_value",
@@ -27,40 +27,40 @@
             "description": "Inverse loss sensitivity score; higher values indicate lower sensitivity of the loss to input perturbations.",
             "weight": 0.2
           },
-          "clipped_adversarial_accuracy": {
+          "adversarial_accuracy": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_adv_accuracy"
+                "field_path": "performance/test_adv_accuracy"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Adversarial accuracy clipped to the expected score range; higher values indicate better predictive performance under adversarial perturbations.",
+            "description": "Adversarial accuracy; higher values indicate better predictive performance under adversarial perturbations.",
             "weight": 0.2
           },
-          "clipped_empirical_robustness": {
+          "empirical_robustness_score": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_empirical_robustness"
+                "field_path": "performance/test_empirical_robustness_score"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Empirical robustness clipped to the expected score range; higher values indicate stronger resistance to adversarial perturbations.",
+            "description": "Empirical robustness score; higher values indicate stronger resistance to adversarial perturbations.",
             "weight": 0.15
           },
-          "clipped_confidence_score": {
+          "confidence_score": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_confidence_score"
+                "field_path": "performance/test_confidence_score"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Confidence score clipped to the expected score range; higher values indicate more stable predictive confidence.",
+            "description": "Confidence score; higher values indicate more stable predictive confidence.",
             "weight": 0.1
           },
           "inverse_attack_success_rate": {
@@ -665,7 +665,7 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_clever"
+                "field_path": "performance/test_clever_score"
               },
               {
                 "source": "factsheet",
@@ -673,15 +673,15 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_adv_accuracy"
+                "field_path": "performance/test_adv_accuracy"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_empirical_robustness"
+                "field_path": "performance/test_empirical_robustness_score"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_confidence_score"
+                "field_path": "performance/test_confidence_score"
               },
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json
index 635d2e9a0..520e32ed6 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_images.json
@@ -7,7 +7,7 @@
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_clever"
+                "field_path": "performance/test_clever_score"
               }
             ],
             "operation": "get_value",
@@ -27,40 +27,40 @@
             "description": "Inverse loss sensitivity score; higher values indicate lower sensitivity of the loss to input perturbations.",
             "weight": 0.2
           },
-          "clipped_adversarial_accuracy": {
+          "adversarial_accuracy": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_adv_accuracy"
+                "field_path": "performance/test_adv_accuracy"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Adversarial accuracy clipped to the expected score range; higher values indicate better predictive performance under adversarial perturbations.",
+            "description": "Adversarial accuracy; higher values indicate better predictive performance under adversarial perturbations.",
             "weight": 0.2
           },
-          "clipped_empirical_robustness": {
+          "empirical_robustness_score": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_empirical_robustness"
+                "field_path": "performance/test_empirical_robustness_score"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Empirical robustness clipped to the expected score range; higher values indicate stronger resistance to adversarial perturbations.",
+            "description": "Empirical robustness score; higher values indicate stronger resistance to adversarial perturbations.",
             "weight": 0.15
           },
-          "clipped_confidence_score": {
+          "confidence_score": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_confidence_score"
+                "field_path": "performance/test_confidence_score"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Confidence score clipped to the expected score range; higher values indicate more stable predictive confidence.",
+            "description": "Confidence score; higher values indicate more stable predictive confidence.",
             "weight": 0.1
           },
           "inverse_attack_success_rate": {
@@ -665,7 +665,7 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_clever"
+                "field_path": "performance/test_clever_score"
               },
               {
                 "source": "factsheet",
@@ -673,15 +673,15 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_adv_accuracy"
+                "field_path": "performance/test_adv_accuracy"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_empirical_robustness"
+                "field_path": "performance/test_empirical_robustness_score"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_confidence_score"
+                "field_path": "performance/test_confidence_score"
               },
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json
index 635d2e9a0..a75400052 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_cfl_tabular.json
@@ -3,65 +3,29 @@
       "resilience_to_attacks": {
         "weight": 0.4,
         "metrics": {
-          "certified_robustness": {
+          "adversarial_accuracy": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_clever"
+                "field_path": "performance/test_adv_accuracy"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
-            "weight": 0.2
-          },
-          "inverse_loss_sensitivity": {
-            "inputs": [
-              {
-                "source": "factsheet",
-                "field_path": "performance/inverse_test_loss_sensitivity"
-              }
-            ],
-            "operation": "get_value",
-            "type": "true_score",
-            "description": "Inverse loss sensitivity score; higher values indicate lower sensitivity of the loss to input perturbations.",
-            "weight": 0.2
+            "description": "Adversarial accuracy; higher values indicate better predictive performance under adversarial perturbations.",
+            "weight": 0.4444444444
           },
-          "clipped_adversarial_accuracy": {
+          "confidence_score": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_adv_accuracy"
+                "field_path": "performance/test_confidence_score"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Adversarial accuracy clipped to the expected score range; higher values indicate better predictive performance under adversarial perturbations.",
-            "weight": 0.2
-          },
-          "clipped_empirical_robustness": {
-            "inputs": [
-              {
-                "source": "factsheet",
-                "field_path": "performance/clipped_test_empirical_robustness"
-              }
-            ],
-            "operation": "get_value",
-            "type": "true_score",
-            "description": "Empirical robustness clipped to the expected score range; higher values indicate stronger resistance to adversarial perturbations.",
-            "weight": 0.15
-          },
-          "clipped_confidence_score": {
-            "inputs": [
-              {
-                "source": "factsheet",
-                "field_path": "performance/clipped_test_confidence_score"
-              }
-            ],
-            "operation": "get_value",
-            "type": "true_score",
-            "description": "Confidence score clipped to the expected score range; higher values indicate more stable predictive confidence.",
-            "weight": 0.1
+            "description": "Confidence score; higher values indicate more stable predictive confidence.",
+            "weight": 0.2222222222
           },
           "inverse_attack_success_rate": {
             "inputs": [
@@ -73,7 +37,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Inverse attack success rate; higher values indicate a lower fraction of successful adversarial attacks.",
-            "weight": 0.15
+            "weight": 0.3333333334
           }
         }
       },
@@ -665,23 +629,11 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_clever"
-              },
-              {
-                "source": "factsheet",
-                "field_path": "performance/inverse_test_loss_sensitivity"
-              },
-              {
-                "source": "factsheet",
-                "field_path": "performance/clipped_test_adv_accuracy"
-              },
-              {
-                "source": "factsheet",
-                "field_path": "performance/clipped_test_empirical_robustness"
+                "field_path": "performance/test_adv_accuracy"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_confidence_score"
+                "field_path": "performance/test_confidence_score"
               },
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
index 80cb9486e..b43295c1d 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl.json
@@ -7,7 +7,7 @@
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_clever"
+                "field_path": "performance/test_clever_score"
               }
             ],
             "operation": "get_value",
@@ -27,40 +27,40 @@
             "description": "Inverse loss sensitivity score; higher values indicate lower sensitivity of the loss to input perturbations.",
             "weight": 0.2
           },
-          "clipped_adversarial_accuracy": {
+          "adversarial_accuracy": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_adv_accuracy"
+                "field_path": "performance/test_adv_accuracy"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Adversarial accuracy clipped to the expected score range; higher values indicate better predictive performance under adversarial perturbations.",
+            "description": "Adversarial accuracy; higher values indicate better predictive performance under adversarial perturbations.",
             "weight": 0.2
           },
-          "clipped_empirical_robustness": {
+          "empirical_robustness_score": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_empirical_robustness"
+                "field_path": "performance/test_empirical_robustness_score"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Empirical robustness clipped to the expected score range; higher values indicate stronger resistance to adversarial perturbations.",
+            "description": "Empirical robustness score; higher values indicate stronger resistance to adversarial perturbations.",
             "weight": 0.15
           },
-          "clipped_confidence_score": {
+          "confidence_score": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_confidence_score"
+                "field_path": "performance/test_confidence_score"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Confidence score clipped to the expected score range; higher values indicate more stable predictive confidence.",
+            "description": "Confidence score; higher values indicate more stable predictive confidence.",
             "weight": 0.1
           },
           "inverse_attack_success_rate": {
@@ -645,7 +645,7 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_clever"
+                "field_path": "performance/test_clever_score"
               },
               {
                 "source": "factsheet",
@@ -653,15 +653,15 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_adv_accuracy"
+                "field_path": "performance/test_adv_accuracy"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_empirical_robustness"
+                "field_path": "performance/test_empirical_robustness_score"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_confidence_score"
+                "field_path": "performance/test_confidence_score"
               },
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json
index 80cb9486e..b43295c1d 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_images.json
@@ -7,7 +7,7 @@
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_clever"
+                "field_path": "performance/test_clever_score"
               }
             ],
             "operation": "get_value",
@@ -27,40 +27,40 @@
             "description": "Inverse loss sensitivity score; higher values indicate lower sensitivity of the loss to input perturbations.",
             "weight": 0.2
           },
-          "clipped_adversarial_accuracy": {
+          "adversarial_accuracy": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_adv_accuracy"
+                "field_path": "performance/test_adv_accuracy"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Adversarial accuracy clipped to the expected score range; higher values indicate better predictive performance under adversarial perturbations.",
+            "description": "Adversarial accuracy; higher values indicate better predictive performance under adversarial perturbations.",
             "weight": 0.2
           },
-          "clipped_empirical_robustness": {
+          "empirical_robustness_score": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_empirical_robustness"
+                "field_path": "performance/test_empirical_robustness_score"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Empirical robustness clipped to the expected score range; higher values indicate stronger resistance to adversarial perturbations.",
+            "description": "Empirical robustness score; higher values indicate stronger resistance to adversarial perturbations.",
             "weight": 0.15
           },
-          "clipped_confidence_score": {
+          "confidence_score": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_confidence_score"
+                "field_path": "performance/test_confidence_score"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Confidence score clipped to the expected score range; higher values indicate more stable predictive confidence.",
+            "description": "Confidence score; higher values indicate more stable predictive confidence.",
             "weight": 0.1
           },
           "inverse_attack_success_rate": {
@@ -645,7 +645,7 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_clever"
+                "field_path": "performance/test_clever_score"
               },
               {
                 "source": "factsheet",
@@ -653,15 +653,15 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_adv_accuracy"
+                "field_path": "performance/test_adv_accuracy"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_empirical_robustness"
+                "field_path": "performance/test_empirical_robustness_score"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_confidence_score"
+                "field_path": "performance/test_confidence_score"
               },
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json
index 80cb9486e..b7770033d 100755
--- a/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json
+++ b/nebula/addons/trustworthiness/configs/eval_metrics_dfl_tabular.json
@@ -3,65 +3,29 @@
       "resilience_to_attacks": {
         "weight": 0.4,
         "metrics": {
-          "certified_robustness": {
+          "adversarial_accuracy": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_clever"
+                "field_path": "performance/test_adv_accuracy"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Cross Lipschitz Extreme Value for network Robustness: attack-agnostic estimator of the lower bound βL",
-            "weight": 0.2
-          },
-          "inverse_loss_sensitivity": {
-            "inputs": [
-              {
-                "source": "factsheet",
-                "field_path": "performance/inverse_test_loss_sensitivity"
-              }
-            ],
-            "operation": "get_value",
-            "type": "true_score",
-            "description": "Inverse loss sensitivity score; higher values indicate lower sensitivity of the loss to input perturbations.",
-            "weight": 0.2
+            "description": "Adversarial accuracy; higher values indicate better predictive performance under adversarial perturbations.",
+            "weight": 0.4444444444
           },
-          "clipped_adversarial_accuracy": {
+          "confidence_score": {
             "inputs": [
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_adv_accuracy"
+                "field_path": "performance/test_confidence_score"
               }
             ],
             "operation": "get_value",
             "type": "true_score",
-            "description": "Adversarial accuracy clipped to the expected score range; higher values indicate better predictive performance under adversarial perturbations.",
-            "weight": 0.2
-          },
-          "clipped_empirical_robustness": {
-            "inputs": [
-              {
-                "source": "factsheet",
-                "field_path": "performance/clipped_test_empirical_robustness"
-              }
-            ],
-            "operation": "get_value",
-            "type": "true_score",
-            "description": "Empirical robustness clipped to the expected score range; higher values indicate stronger resistance to adversarial perturbations.",
-            "weight": 0.15
-          },
-          "clipped_confidence_score": {
-            "inputs": [
-              {
-                "source": "factsheet",
-                "field_path": "performance/clipped_test_confidence_score"
-              }
-            ],
-            "operation": "get_value",
-            "type": "true_score",
-            "description": "Confidence score clipped to the expected score range; higher values indicate more stable predictive confidence.",
-            "weight": 0.1
+            "description": "Confidence score; higher values indicate more stable predictive confidence.",
+            "weight": 0.2222222222
           },
           "inverse_attack_success_rate": {
             "inputs": [
@@ -73,7 +37,7 @@
             "operation": "get_value",
             "type": "true_score",
             "description": "Inverse attack success rate; higher values indicate a lower fraction of successful adversarial attacks.",
-            "weight": 0.15
+            "weight": 0.3333333334
           }
         }
       },
@@ -645,23 +609,11 @@
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_clever"
-              },
-              {
-                "source": "factsheet",
-                "field_path": "performance/inverse_test_loss_sensitivity"
-              },
-              {
-                "source": "factsheet",
-                "field_path": "performance/clipped_test_adv_accuracy"
-              },
-              {
-                "source": "factsheet",
-                "field_path": "performance/clipped_test_empirical_robustness"
+                "field_path": "performance/test_adv_accuracy"
               },
               {
                 "source": "factsheet",
-                "field_path": "performance/clipped_test_confidence_score"
+                "field_path": "performance/test_confidence_score"
               },
               {
                 "source": "factsheet",
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_cfl.json b/nebula/addons/trustworthiness/configs/factsheet_template_cfl.json
index 0ba2db196..61a465fa0 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template_cfl.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_cfl.json
@@ -47,11 +47,11 @@
 		"test_acc_avg": "",
 		"test_macro_f1": "",
 		"clipped_test_feature_importance_cv": "",
-		"clipped_test_clever": "",
+		"test_clever_score": "",
 		"inverse_test_loss_sensitivity": "",
-		"clipped_test_adv_accuracy": "",
-		"clipped_test_empirical_robustness": "",
-		"clipped_test_confidence_score": "",
+		"test_adv_accuracy": "",
+		"test_empirical_robustness_score": "",
+		"test_confidence_score": "",
 		"inverse_test_attack_success_rate": ""
 	},
 	"fairness": {
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_cfl_images.json b/nebula/addons/trustworthiness/configs/factsheet_template_cfl_images.json
index 0ba2db196..61a465fa0 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template_cfl_images.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_cfl_images.json
@@ -47,11 +47,11 @@
 		"test_acc_avg": "",
 		"test_macro_f1": "",
 		"clipped_test_feature_importance_cv": "",
-		"clipped_test_clever": "",
+		"test_clever_score": "",
 		"inverse_test_loss_sensitivity": "",
-		"clipped_test_adv_accuracy": "",
-		"clipped_test_empirical_robustness": "",
-		"clipped_test_confidence_score": "",
+		"test_adv_accuracy": "",
+		"test_empirical_robustness_score": "",
+		"test_confidence_score": "",
 		"inverse_test_attack_success_rate": ""
 	},
 	"fairness": {
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_cfl_tabular.json b/nebula/addons/trustworthiness/configs/factsheet_template_cfl_tabular.json
index 0ba2db196..75b539f1b 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template_cfl_tabular.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_cfl_tabular.json
@@ -47,11 +47,8 @@
 		"test_acc_avg": "",
 		"test_macro_f1": "",
 		"clipped_test_feature_importance_cv": "",
-		"clipped_test_clever": "",
-		"inverse_test_loss_sensitivity": "",
-		"clipped_test_adv_accuracy": "",
-		"clipped_test_empirical_robustness": "",
-		"clipped_test_confidence_score": "",
+		"test_adv_accuracy": "",
+		"test_confidence_score": "",
 		"inverse_test_attack_success_rate": ""
 	},
 	"fairness": {
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
index 031be171e..c5ea5af60 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_dfl.json
@@ -48,11 +48,11 @@
 		"test_acc": "",
 		"test_macro_f1": "",
 		"clipped_test_feature_importance_cv": "",
-		"clipped_test_clever": "",
+		"test_clever_score": "",
 		"inverse_test_loss_sensitivity": "",
-		"clipped_test_adv_accuracy": "",
-		"clipped_test_empirical_robustness": "",
-		"clipped_test_confidence_score": "",
+		"test_adv_accuracy": "",
+		"test_empirical_robustness_score": "",
+		"test_confidence_score": "",
 		"inverse_test_attack_success_rate": ""
 	},
 	"fairness": {
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_dfl_images.json b/nebula/addons/trustworthiness/configs/factsheet_template_dfl_images.json
index 031be171e..c5ea5af60 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template_dfl_images.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_dfl_images.json
@@ -48,11 +48,11 @@
 		"test_acc": "",
 		"test_macro_f1": "",
 		"clipped_test_feature_importance_cv": "",
-		"clipped_test_clever": "",
+		"test_clever_score": "",
 		"inverse_test_loss_sensitivity": "",
-		"clipped_test_adv_accuracy": "",
-		"clipped_test_empirical_robustness": "",
-		"clipped_test_confidence_score": "",
+		"test_adv_accuracy": "",
+		"test_empirical_robustness_score": "",
+		"test_confidence_score": "",
 		"inverse_test_attack_success_rate": ""
 	},
 	"fairness": {
diff --git a/nebula/addons/trustworthiness/configs/factsheet_template_dfl_tabular.json b/nebula/addons/trustworthiness/configs/factsheet_template_dfl_tabular.json
index 031be171e..5e2e841a8 100755
--- a/nebula/addons/trustworthiness/configs/factsheet_template_dfl_tabular.json
+++ b/nebula/addons/trustworthiness/configs/factsheet_template_dfl_tabular.json
@@ -48,11 +48,8 @@
 		"test_acc": "",
 		"test_macro_f1": "",
 		"clipped_test_feature_importance_cv": "",
-		"clipped_test_clever": "",
-		"inverse_test_loss_sensitivity": "",
-		"clipped_test_adv_accuracy": "",
-		"clipped_test_empirical_robustness": "",
-		"clipped_test_confidence_score": "",
+		"test_adv_accuracy": "",
+		"test_confidence_score": "",
 		"inverse_test_attack_success_rate": ""
 	},
 	"fairness": {
diff --git a/nebula/addons/trustworthiness/dfl_factsheet.py b/nebula/addons/trustworthiness/dfl_factsheet.py
index d8f4a6afd..8cb8b752c 100644
--- a/nebula/addons/trustworthiness/dfl_factsheet.py
+++ b/nebula/addons/trustworthiness/dfl_factsheet.py
@@ -62,6 +62,7 @@ def populate_factsheet_dfl(
             data["federation"],
             model,
             self.factsheet_template_file_nm,
+            dataset_name=data["dataset"],
         )
 
         factsheet_file = get_factsheet_path(scenario_name, self.factsheet_file_nm)
diff --git a/nebula/addons/trustworthiness/factsheet_common.py b/nebula/addons/trustworthiness/factsheet_common.py
index 9c08c62c2..2b6051e4a 100644
--- a/nebula/addons/trustworthiness/factsheet_common.py
+++ b/nebula/addons/trustworthiness/factsheet_common.py
@@ -8,32 +8,55 @@
 # Shared helpers for trustworthiness factsheet generation.
 DATA_TYPE_IMAGES = "images"
 DATA_TYPE_TABULAR = "tabular"
+DATASET_DATA_TYPES = {
+    "mnist": DATA_TYPE_IMAGES,
+    "fashionmnist": DATA_TYPE_IMAGES,
+    "emnist": DATA_TYPE_IMAGES,
+    "cifar10": DATA_TYPE_IMAGES,
+    "cifar100": DATA_TYPE_IMAGES,
+    "kddcup99": DATA_TYPE_TABULAR,
+    "adultcensus": DATA_TYPE_TABULAR,
+    "breastcancer": DATA_TYPE_TABULAR,
+    "covtype": DATA_TYPE_TABULAR,
+    "sentiment140": DATA_TYPE_TABULAR,
+}
+
+
+def get_dataset_data_type(dataset_name):
+    # Infer the data type from Nebula's built-in dataset names.
+    if dataset_name is None:
+        return ""
+
+    normalized_name = str(dataset_name).strip().lower().replace("_", "").replace("-", "")
+    return DATASET_DATA_TYPES.get(normalized_name, "")
 
 
-def get_model_data_type(model):
-    # Return the data type declared by the model, when available.
+def get_model_data_type(model, dataset_name=None):
+    # Return the model-declared data type, falling back to the dataset name.
     if not hasattr(model, "get_data_type"):
-        return ""
+        return get_dataset_data_type(dataset_name)
 
     try:
         data_type = model.get_data_type()
     except AttributeError:
-        return ""
+        return get_dataset_data_type(dataset_name)
 
     if data_type is None:
-        return ""
-    return str(data_type).strip()
+        return get_dataset_data_type(dataset_name)
+
+    data_type = str(data_type).strip()
+    return data_type or get_dataset_data_type(dataset_name)
 
 
-def get_normalized_model_data_type(model):
+def get_normalized_model_data_type(model, dataset_name=None):
     # Normalize the model data type before matching templates or profiles.
-    return get_model_data_type(model).lower()
+    return get_model_data_type(model, dataset_name=dataset_name).lower()
 
 
-def get_factsheet_template_name(federation, model, default_template_name):
+def get_factsheet_template_name(federation, model, default_template_name, dataset_name=None):
     # Select a data-type-specific template when one exists for the federation.
     federation_prefix = "dfl" if str(federation).upper() in {"DFL", "SDFL"} else "cfl"
-    data_type = get_normalized_model_data_type(model)
+    data_type = get_normalized_model_data_type(model, dataset_name=dataset_name)
 
     if data_type not in {DATA_TYPE_IMAGES, DATA_TYPE_TABULAR}:
         return default_template_name
@@ -90,6 +113,11 @@ def inverse_score(value):
     return 1 / (1 + value)
 
 
+def inverse_bounded_score(value):
+    # Invert an error already bounded in [0, 1] while keeping the full score range.
+    return min(max(1 - float(value), 0.0), 1.0)
+
+
 def get_enabled_defences(data):
     # Return the active training-time defences declared in the scenario.
     defences = []
@@ -153,7 +181,7 @@ def populate_common_pre_train_sections(factsheet, data, model):
     factsheet["project"]["background"] = build_project_background(data)
 
     factsheet["data"]["provenance"] = data["dataset"]
-    factsheet["data"]["type"] = get_model_data_type(model)
+    factsheet["data"]["type"] = get_model_data_type(model, dataset_name=data["dataset"])
     factsheet["data"]["preprocessing"] = data["topology"]
 
     factsheet["participants"]["client_num"] = data["n_nodes"] or ""
diff --git a/nebula/addons/trustworthiness/factsheet_populators.py b/nebula/addons/trustworthiness/factsheet_populators.py
index 25f5180bd..bee3b1e22 100644
--- a/nebula/addons/trustworthiness/factsheet_populators.py
+++ b/nebula/addons/trustworthiness/factsheet_populators.py
@@ -17,7 +17,7 @@
 )
 from nebula.addons.trustworthiness.helpers.robustness import (
     attack_success_rate,
-    compute_adversarial_accuracy_art,
+    get_adversarial_accuracy,
     get_clever_score,
     get_confidence_score,
     get_empirical_robustness_score,
@@ -30,6 +30,7 @@
     DATA_TYPE_TABULAR,
     cap_score,
     get_normalized_model_data_type,
+    inverse_bounded_score,
     inverse_score,
 )
 
@@ -52,7 +53,9 @@ def populate_profile_metrics(
 ):
     # Select the profile-specific populator, falling back to the shared metric set.
     federation_profile = get_federation_profile(federation)
-    data_type = get_normalized_model_data_type(model)
+    data_type = str(factsheet.get("data", {}).get("type", "")).strip().lower()
+    if not data_type:
+        data_type = get_normalized_model_data_type(model)
     populator = PROFILE_POPULATORS.get((federation_profile, data_type), populate_common_profile_metrics)
 
     populator(
@@ -65,23 +68,27 @@ def populate_profile_metrics(
 
 
 def populate_cfl_images_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
-    # Populate the current shared metrics for CFL image factsheets.
+    # Image factsheets include all image-compatible robustness metrics.
     populate_common_profile_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
+    populate_image_robustness_metrics(factsheet, model, test_loader)
 
 
 def populate_cfl_tabular_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
-    # Populate the current shared metrics for CFL tabular factsheets.
+    # Tabular factsheets use only metrics shared by valid tabular and image workflows.
     populate_common_profile_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
+    remove_image_only_robustness_metrics(factsheet)
 
 
 def populate_dfl_images_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
-    # Populate the current shared metrics for DFL/SDFL image factsheets.
+    # Image factsheets include all image-compatible robustness metrics.
     populate_common_profile_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
+    populate_image_robustness_metrics(factsheet, model, test_loader)
 
 
 def populate_dfl_tabular_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
-    # Populate the current shared metrics for DFL/SDFL tabular factsheets.
+    # Tabular factsheets use only metrics shared by valid tabular and image workflows.
     populate_common_profile_metrics(factsheet, model, train_loader, test_loader, test_accuracy)
+    remove_image_only_robustness_metrics(factsheet)
 
 
 def populate_common_profile_metrics(factsheet, model, train_loader, test_loader, test_accuracy):
@@ -99,7 +106,7 @@ def populate_common_profile_metrics(factsheet, model, train_loader, test_loader,
         test_sample,
     )
     populate_common_explainability_metrics(factsheet, explainability_metrics)
-    populate_common_robustness_metrics(factsheet, model, test_loader, test_sample)
+    populate_common_robustness_metrics(factsheet, model, test_loader)
 
 
 def populate_common_model_quality_metrics(
@@ -120,10 +127,10 @@ def populate_common_model_quality_metrics(
 
     # Fairness and calibration metrics expressed as inverse scores.
     overfitting_value = max(0.0, float(factsheet["performance"]["train_accuracy"]) - float(test_accuracy))
-    factsheet["fairness"]["inverse_overfitting"] = inverse_score(overfitting_value)
+    factsheet["fairness"]["inverse_overfitting"] = inverse_bounded_score(overfitting_value)
 
     well_calibration_error_value = get_well_calibration_error(model, test_loader)
-    factsheet["fairness"]["inverse_well_calibration_error"] = inverse_score(well_calibration_error_value)
+    factsheet["fairness"]["inverse_well_calibration_error"] = inverse_bounded_score(well_calibration_error_value)
 
     generalized_entropy_index_value = get_generalized_entropy_index(model, test_loader)
     factsheet["fairness"]["inverse_generalized_entropy_index"] = inverse_score(generalized_entropy_index_value)
@@ -134,9 +141,9 @@ def populate_common_model_quality_metrics(
     coefficient_of_variation_value = get_coefficient_of_variation(model, test_loader)
     factsheet["fairness"]["inverse_coefficient_of_variation"] = inverse_score(coefficient_of_variation_value)
 
-    # Confidence is capped so factsheet scores stay within the expected range.
+    # Confidence is already a probability-like score in [0, 1].
     value_confidence_score = get_confidence_score(model, test_sample)
-    factsheet["performance"]["clipped_test_confidence_score"] = cap_score(value_confidence_score)
+    factsheet["performance"]["test_confidence_score"] = value_confidence_score
 
 
 def populate_common_explainability_metrics(factsheet, explainability_metrics):
@@ -149,36 +156,53 @@ def populate_common_explainability_metrics(factsheet, explainability_metrics):
     factsheet["performance"]["clipped_test_feature_importance_cv"] = cap_score(feature_importance)
 
 
-def populate_common_robustness_metrics(factsheet, model, test_loader, test_sample):
-    # Populate adversarial robustness metrics shared by the current factsheet profiles.
+def populate_common_robustness_metrics(factsheet, model, test_loader):
+    # Populate robustness metrics valid for both image and tabular datasets.
     lr = factsheet["configuration"]["learning_rate"]
     num_classes = model.get_num_classes()
 
-    # Sample-based robustness scores.
+    # Loader-based adversarial accuracy.
+    value_adv_accuracy = get_adversarial_accuracy(model, test_loader, num_classes, lr)
+    factsheet["performance"]["test_adv_accuracy"] = value_adv_accuracy
+
+    # Attack success is inverted so higher remains better in the factsheet.
+    value_attack_success_rate = attack_success_rate(
+        model,
+        test_loader,
+    )
+    factsheet["performance"]["inverse_test_attack_success_rate"] = 1 - value_attack_success_rate
+
+
+def populate_image_robustness_metrics(factsheet, model, test_loader):
+    # Populate image-only continuous-input robustness metrics.
+    lr = factsheet["configuration"]["learning_rate"]
+    num_classes = model.get_num_classes()
+    test_sample = next(iter(test_loader))
+
     value_clever = get_clever_score(model, test_sample, num_classes, lr)
-    factsheet["performance"]["clipped_test_clever"] = cap_score(value_clever)
+    factsheet["performance"]["test_clever_score"] = value_clever
 
     value_loss_sensitivity = get_loss_sensitivity_score(model, test_sample, num_classes, lr)
     factsheet["performance"]["inverse_test_loss_sensitivity"] = inverse_score(value_loss_sensitivity)
 
-    # Loader-based adversarial accuracy.
-    value_adv_accuracy = compute_adversarial_accuracy_art(model, test_loader, num_classes, lr)
-    factsheet["performance"]["clipped_test_adv_accuracy"] = cap_score(value_adv_accuracy)
-
     value_empirical_robustness = get_empirical_robustness_score(
         model,
         test_sample,
         num_classes,
         lr,
     )
-    factsheet["performance"]["clipped_test_empirical_robustness"] = cap_score(value_empirical_robustness)
-
-    # Attack success is inverted so higher remains better in the factsheet.
-    value_attack_success_rate = attack_success_rate(
-        model,
-        test_sample,
-    )
-    factsheet["performance"]["inverse_test_attack_success_rate"] = 1 - value_attack_success_rate
+    factsheet["performance"]["test_empirical_robustness_score"] = value_empirical_robustness
+
+
+def remove_image_only_robustness_metrics(factsheet):
+    # Drop stale values when an existing factsheet was created before tabular metrics were split.
+    performance = factsheet.get("performance", {})
+    for field in (
+        "test_clever_score",
+        "inverse_test_loss_sensitivity",
+        "test_empirical_robustness_score",
+    ):
+        performance.pop(field, None)
 
 
 PROFILE_POPULATORS = {
diff --git a/nebula/addons/trustworthiness/helpers/robustness.py b/nebula/addons/trustworthiness/helpers/robustness.py
index 13611842b..a64fc5001 100644
--- a/nebula/addons/trustworthiness/helpers/robustness.py
+++ b/nebula/addons/trustworthiness/helpers/robustness.py
@@ -6,13 +6,23 @@
 import torch.nn.functional as F
 from art.estimators.classification import PyTorchClassifier
 from art.metrics import clever_u, empirical_robustness, loss_sensitivity
+from nebula.core.datasets.image_metadata import get_image_normalization
 from torch import nn, optim
 
 logger = logging.getLogger(__name__)
 
 R_L2 = 2
+ROBUSTNESS_EPSILON = 0.03
+# ART CLEVER is an L2 lower-bound estimate; the attack radius maps to a full trust score.
+CLEVER_REFERENCE = R_L2
+# ART empirical robustness is a relative perturbation distance; this maps 0.2 to a full trust score.
+EMPIRICAL_ROBUSTNESS_REFERENCE = 0.2
+TABULAR_ATTACK_STEPS = 3
+ADVERSARIAL_LOG_SAMPLES = 2
+ADVERSARIAL_LOG_FEATURES = 12
 
 def _build_art_classifier(model, input_shape, nb_classes, learning_rate):
+    # Wrap the PyTorch model with the ART classifier interface used by ART metrics.
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.Adam(model.parameters(), learning_rate)
 
@@ -26,6 +36,7 @@ def _build_art_classifier(model, input_shape, nb_classes, learning_rate):
 
 
 def _validate_test_sample_tensors(test_sample):
+    # Shared guard for sample-based metrics that expect a non-empty (x, y) batch.
     if not (isinstance(test_sample, (tuple, list)) and len(test_sample) >= 2):
         raise ValueError("`test_sample` must contain samples and labels.")
 
@@ -37,26 +48,207 @@ def _validate_test_sample_tensors(test_sample):
 
 
 def _coerce_max_samples(max_samples, default=8):
+    # Keep metric calls bounded even if configuration values are missing or invalid.
     try:
         return max(1, int(max_samples))
     except Exception:
         return default
 
 
+def _coerce_tabular_metadata(metadata):
+    # Accept both serialized dataset metadata and the typed metadata object.
+    if metadata is None:
+        return None
+
+    # Keep tabular-only imports lazy so image workflows do not depend on them.
+    from nebula.core.datasets.tabular_metadata import TabularAdversarialMetadata
+
+    if isinstance(metadata, TabularAdversarialMetadata):
+        return metadata
+    return TabularAdversarialMetadata.from_dict(metadata)
+
+
+def _get_tabular_metadata_from_dataset(dataset):
+    # Dataloaders can wrap datasets; walk through wrappers until metadata is found.
+    if dataset is None:
+        return None
+
+    metadata = getattr(dataset, "tabular_metadata", None)
+    if metadata is not None:
+        return _coerce_tabular_metadata(metadata)
+
+    return _get_tabular_metadata_from_dataset(getattr(dataset, "dataset", None))
+
+
+def _get_tabular_metadata_from_loader(data_loader):
+    # Return None for image datasets, which keeps the adversarial path on FGSM.
+    return _get_tabular_metadata_from_dataset(getattr(data_loader, "dataset", None))
+
+
+def _get_dataset_name_from_dataset(dataset):
+    # Dataset wrappers keep the real dataset in `.dataset`; walk through them.
+    if dataset is None:
+        return None
+
+    dataset_name = getattr(dataset, "dataset_name", None)
+    if dataset_name is not None:
+        return dataset_name
+
+    config = getattr(dataset, "config", None)
+    participant = getattr(config, "participant", None)
+    if isinstance(config, dict):
+        participant = config.get("participant", participant)
+    if isinstance(participant, dict):
+        dataset_name = participant.get("data_args", {}).get("dataset")
+        if dataset_name is not None:
+            return dataset_name
+
+    return _get_dataset_name_from_dataset(getattr(dataset, "dataset", None))
+
+
+def _get_image_normalization_from_loader(data_loader):
+    # Resolve image mean/std from shared dataset metadata instead of inferring by channels.
+    dataset_name = _get_dataset_name_from_dataset(getattr(data_loader, "dataset", None))
+    normalization = get_image_normalization(dataset_name)
+    if normalization is not None:
+        logger.info("[Robustness] Image normalization loaded | dataset=%s | mean/std=%s", dataset_name, normalization)
+    return normalization
+
+
+def _build_fixed_epsilon_tabular_generator(epsilon, tabular_metadata):
+    # Reuse the tabular adversarial-training generator, but make evaluation deterministic.
+    from nebula.addons.defenses.adversarial_training.config import AdversarialTrainingConfig
+    from nebula.addons.defenses.adversarial_training.tabular import TabularConstrainedPGDGenerator
+
+    class FixedEpsilonTabularConstrainedPGDGenerator(TabularConstrainedPGDGenerator):
+        def _sample_epsilon(self, device):
+            # Training samples epsilon; factsheet metrics should use the requested epsilon exactly.
+            self.last_epsilon = float(self.config.epsilon)
+            return self.last_epsilon
+
+    config = AdversarialTrainingConfig(
+        domain="tabular",
+        attack="constrained_pgd",
+        epsilon=float(epsilon),
+        steps=TABULAR_ATTACK_STEPS,
+        candidate_selection="none",
+    )
+    return FixedEpsilonTabularConstrainedPGDGenerator(config, tabular_metadata)
+
+
+def _build_tabular_generator(epsilon, tabular_metadata):
+    # A missing generator intentionally means "use the image/default FGSM path".
+    tabular_metadata = _coerce_tabular_metadata(tabular_metadata)
+    if tabular_metadata is None:
+        return None
+
+    return _build_fixed_epsilon_tabular_generator(epsilon, tabular_metadata)
+
+
+def _attack_name(tabular_generator):
+    # Keep log messages explicit about which adversarial path is active.
+    return "tabular_constrained_pgd" if tabular_generator is not None else "fgsm"
+
+
+def _tensor_range(tensor):
+    # Compact numeric summary for batch-level logging.
+    if tensor.numel() == 0:
+        return "empty"
+
+    tensor = tensor.detach().float().cpu()
+    return "min={:.6f}, max={:.6f}, mean={:.6f}".format(
+        tensor.min().item(),
+        tensor.max().item(),
+        tensor.mean().item(),
+    )
+
+
+def _format_preview_vector(vector, feature_names=None, max_features=ADVERSARIAL_LOG_FEATURES):
+    # Log only a small prefix of the flattened vector to keep factsheet logs readable.
+    values = vector.detach().flatten().float().cpu().tolist()
+    preview_values = values[:max_features]
+
+    if feature_names:
+        names = list(feature_names)[:max_features]
+        items = [
+            "{}={:.6f}".format(name, float(value))
+            for name, value in zip(names, preview_values, strict=False)
+        ]
+    else:
+        items = ["{:.6f}".format(float(value)) for value in preview_values]
+
+    suffix = ", ..." if len(values) > max_features else ""
+    return "[" + ", ".join(items) + suffix + "]"
+
+
+def _log_adversarial_generation(metric_name, samples, labels, x_adv, epsilon, tabular_generator, batch_idx):
+    # Log one representative batch per metric invocation to inspect generated samples.
+    if batch_idx != 0:
+        return
+
+    attack = _attack_name(tabular_generator)
+    clean = samples.detach().cpu()
+    adv = x_adv.detach().cpu()
+    delta = adv - clean
+    flat_delta = delta.reshape(delta.shape[0], -1).float()
+    feature_names = getattr(getattr(tabular_generator, "metadata", None), "feature_names", None)
+
+    logger.info(
+        "[Robustness] %s adversarial generation | attack=%s | epsilon=%.6f | "
+        "clean_shape=%s | adv_shape=%s | clean=%s | adv=%s | "
+        "delta_linf=%.6f | delta_l2_mean=%.6f",
+        metric_name,
+        attack,
+        float(epsilon),
+        tuple(clean.shape),
+        tuple(adv.shape),
+        _tensor_range(clean),
+        _tensor_range(adv),
+        flat_delta.abs().max().item() if flat_delta.numel() else 0.0,
+        flat_delta.norm(p=2, dim=1).mean().item() if flat_delta.numel() else 0.0,
+    )
+
+    n_preview = min(int(clean.shape[0]), ADVERSARIAL_LOG_SAMPLES)
+    labels_cpu = labels.detach().cpu() if torch.is_tensor(labels) else labels
+    for sample_idx in range(n_preview):
+        label = labels_cpu[sample_idx].item() if torch.is_tensor(labels_cpu) else None
+        logger.info(
+            "[Robustness] %s adversarial sample %s | attack=%s | label=%s | "
+            "clean=%s | adversarial=%s | delta=%s",
+            metric_name,
+            sample_idx,
+            attack,
+            label,
+            _format_preview_vector(clean[sample_idx], feature_names),
+            _format_preview_vector(adv[sample_idx], feature_names),
+            _format_preview_vector(delta[sample_idx]),
+        )
+
+
+def _generate_adversarial_samples(
+    model,
+    samples,
+    labels,
+    epsilon=ROBUSTNESS_EPSILON,
+    tabular_generator=None,
+    image_normalization=None,
+):
+    # Central switch: FGSM for images, constrained PGD for tabular datasets.
+    if tabular_generator is None:
+        return fgsm_attack(
+            model,
+            samples,
+            labels,
+            epsilon=epsilon,
+            image_normalization=image_normalization,
+        )
+
+    return tabular_generator.generate(model, samples, labels, nn.CrossEntropyLoss())
+
+
 def get_clever_score(model, test_sample, nb_classes, learning_rate, max_samples=8):
-    """
-    Calculates the CLEVER score as the mean score over multiple samples.
-
-    Args:
-        model (object): The model.
-        test_sample (object): A batch from the test dataloader.
-        nb_classes (int): The nb_classes of the model.
-        learning_rate (float): The learning rate of the model.
-        max_samples (int): Maximum number of samples from the batch to evaluate.
-
-    Returns:
-        float: Mean CLEVER score across the selected samples.
-    """
+    # Calculates and scales ART CLEVER into a trust score.
+
     samples, _ = _validate_test_sample_tensors(test_sample)
 
     input_shape = tuple(samples.shape[1:]) if samples.dim() >= 2 else tuple(samples.shape)
@@ -69,6 +261,7 @@ def get_clever_score(model, test_sample, nb_classes, learning_rate, max_samples=
 
     clever_scores = []
     for idx in range(n_samples):
+        # ART CLEVER evaluates one input at a time without the batch dimension.
         background = samples[idx].detach().cpu()
         sample_np = background.numpy()
 
@@ -92,23 +285,19 @@ def get_clever_score(model, test_sample, nb_classes, learning_rate, max_samples=
     if not clever_scores:
         return 0.0
 
-    return float(np.mean(clever_scores))
+    raw_score = float(np.mean(clever_scores))
+    score = min(max(raw_score / CLEVER_REFERENCE, 0.0), 1.0)
+    logger.info(
+        "[Robustness] CLEVER | raw_l2=%.6f | reference=%.6f | score=%.6f",
+        raw_score,
+        CLEVER_REFERENCE,
+        score,
+    )
+    return score
 
 def get_loss_sensitivity_score(model, test_sample, nb_classes, learning_rate, max_samples=8):
+    # Calculates the loss sensitivity score as the mean score over multiple samples.
 
-    """
-    Calculates the loss sensitivity score as the mean score over multiple samples.
-
-    Args:
-        model (object): The model.
-        test_sample (object): A batch from the test dataloader.
-        nb_classes (int): The nb_classes of the model.
-        learning_rate (float): The learning rate of the model.
-        max_samples (int): Maximum number of samples from the batch to evaluate.
-
-    Returns:
-        float: Mean loss sensitivity score across the selected samples.
-    """
     samples, labels = _validate_test_sample_tensors(test_sample)
 
     max_samples = _coerce_max_samples(max_samples)
@@ -119,6 +308,7 @@ def get_loss_sensitivity_score(model, test_sample, nb_classes, learning_rate, ma
 
     sensitivity_scores = []
     for idx in range(n_samples):
+        # ART loss_sensitivity expects a batch and one-hot labels.
         sample = samples[idx].detach().cpu().unsqueeze(0)
         label = labels[idx].detach().cpu().unsqueeze(0)
         label = F.one_hot(label, num_classes=nb_classes).float()
@@ -141,39 +331,54 @@ def get_loss_sensitivity_score(model, test_sample, nb_classes, learning_rate, ma
     return float(np.mean(sensitivity_scores))
 
 
-def compute_adversarial_accuracy_art(
+def get_adversarial_accuracy(
     model,
     test_loader,
     nb_classes,
     learning_rate,
-    epsilon=0.03
+    epsilon=ROBUSTNESS_EPSILON
 ):
-    """
-    Computes adversarial accuracy using FGSM attack.
-
-    Args:
-        model (object): The model.
-        test_loader (DataLoader): DataLoader providing test samples.
-        nb_classes (int): The nb_classes of the model.
-        learning_rate (float): The learning rate of the model.
-        epsilon (float): Maximum perturbation magnitude for the attacks.
-
-    Returns:
-        float: The adversarial accuracy score.
-    """
+    # Computes adversarial accuracy on generated adversarial samples.
 
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.eval()
     model.to(device)
+    # If metadata exists, adversarial examples preserve tabular feature constraints.
+    tabular_generator = _build_tabular_generator(
+        epsilon,
+        _get_tabular_metadata_from_loader(test_loader),
+    )
+    image_normalization = None if tabular_generator is not None else _get_image_normalization_from_loader(test_loader)
+    logger.info(
+        "[Robustness] adversarial accuracy | attack=%s | epsilon=%.6f",
+        _attack_name(tabular_generator),
+        float(epsilon),
+    )
 
     correct = 0
     total = 0
 
-    for samples, labels in test_loader:
+    for batch_idx, (samples, labels) in enumerate(test_loader):
         samples = samples.to(device)
         labels = labels.to(device)
 
-        x_adv = fgsm_attack(model, samples, labels, epsilon=epsilon)
+        x_adv = _generate_adversarial_samples(
+            model,
+            samples,
+            labels,
+            epsilon=epsilon,
+            tabular_generator=tabular_generator,
+            image_normalization=image_normalization,
+        )
+        _log_adversarial_generation(
+            "adversarial_accuracy",
+            samples,
+            labels,
+            x_adv,
+            epsilon,
+            tabular_generator,
+            batch_idx,
+        )
 
         with torch.no_grad():
             outputs = model(x_adv)
@@ -195,21 +400,8 @@ def get_empirical_robustness_score(
     attack_params = None,
     max_samples = 128,
 ):
-    """
-    Calculates the Empirical Robustness score using Adversarial Robustness Toolbox (ART).
-
-    Args:
-        model (object): The model.
-        test_sample (object): A batch from the test dataloader (samples, labels).
-        nb_classes (int): The nb_classes of the model.
-        learning_rate (float): The learning rate of the model.
-        attack_name (str): Attack key supported by ART empirical_robustness.
-        attack_params (dict | None): Optional attack parameters.
-        max_samples (int): Max number of samples from the batch to use.
-
-    Returns:
-        float: Empirical robustness score (>= 0.0). If it cannot be computed, returns 0.0.
-    """
+    # Calculates and scales ART empirical robustness into a trust score.
+
     try:
         samples, _ = _validate_test_sample_tensors(test_sample)
 
@@ -219,20 +411,27 @@ def get_empirical_robustness_score(
 
         classifier = _build_art_classifier(model, samples.shape[1:], nb_classes, learning_rate)
 
-        score = empirical_robustness(
+        raw_score = empirical_robustness(
             classifier=classifier,
             x=x,
             attack_name=attack_name,
             attack_params=attack_params,
         )
 
-        if isinstance(score, np.ndarray):
-            score = float(np.mean(score))
+        if isinstance(raw_score, np.ndarray):
+            raw_score = float(np.mean(raw_score))
 
-        if score is None or (isinstance(score, float) and math.isnan(score)):
+        if raw_score is None or (isinstance(raw_score, float) and math.isnan(raw_score)):
             return 0.0
 
-        return float(score)
+        score = min(max(float(raw_score) / EMPIRICAL_ROBUSTNESS_REFERENCE, 0.0), 1.0)
+        logger.info(
+            "[Robustness] empirical robustness | raw_distance=%.6f | reference=%.6f | score=%.6f",
+            float(raw_score),
+            EMPIRICAL_ROBUSTNESS_REFERENCE,
+            score,
+        )
+        return score
 
     except Exception as exc:
         logger.warning("Could not compute empirical robustness (ART). Returning 0.0")
@@ -240,25 +439,27 @@ def get_empirical_robustness_score(
         return 0.0
 
 
-def _get_image_normalization_for_samples(samples):
-    if not isinstance(samples, torch.Tensor) or samples.ndim < 4:
-        return None
+def _get_image_normalization_for_samples(samples, image_normalization=None):
+    # Image normalization must come from dataset metadata; do not infer it by channel count.
+    if image_normalization is not None:
+        return image_normalization
 
-    channels = int(samples.shape[1])
-    if channels == 1:
-        return (0.5,), (0.5,)
-    if channels == 3:
-        return (0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)
+    if isinstance(samples, torch.Tensor) and samples.ndim >= 4:
+        logger.warning(
+            "[Robustness] Image normalization missing; FGSM will perturb without normalized-space clamping."
+        )
     return None
 
 
 def _channel_tensor(values, samples):
+    # Broadcast channel statistics over the batch and spatial dimensions.
     shape = [1, len(values)] + [1] * max(samples.dim() - 2, 0)
     return torch.tensor(values, dtype=samples.dtype, device=samples.device).view(*shape)
 
 
-def _fgsm_step_and_clamp(samples, grad, epsilon):
-    normalization = _get_image_normalization_for_samples(samples)
+def _fgsm_step_and_clamp(samples, grad, epsilon, image_normalization=None):
+    # Clamp image attacks in normalized space; leave non-image tensors unclamped here.
+    normalization = _get_image_normalization_for_samples(samples, image_normalization=image_normalization)
     if normalization is None:
         return samples + epsilon * grad.sign()
 
@@ -275,19 +476,9 @@ def _fgsm_step_and_clamp(samples, grad, epsilon):
     return torch.max(torch.min(x_adv, upper), lower)
 
 
-def fgsm_attack(model, samples, labels, epsilon=0.03):
-    """
-        Performs an FGSM (Fast Gradient Sign Method) adversarial attack on a batch of samples.
+def fgsm_attack(model, samples, labels, epsilon=ROBUSTNESS_EPSILON, image_normalization=None):
+    # Performs an FGSM (Fast Gradient Sign Method) adversarial attack on a batch of samples.
 
-        Args:
-            model (torch.nn.Module): The PyTorch model to attack.
-            samples (torch.Tensor): Input samples to perturb, shape (B, ...).
-            labels (torch.Tensor): True labels corresponding to the samples.
-            epsilon (float, optional): Maximum perturbation magnitude for the attack. Defaults to 0.03.
-
-        Returns:
-            torch.Tensor: Adversarially perturbed samples with the same shape as `samples`.
-    """
     try:
         device = next(model.parameters()).device
     except Exception:
@@ -295,13 +486,21 @@ def fgsm_attack(model, samples, labels, epsilon=0.03):
 
     samples = samples.clone().detach().to(device)
     labels = labels.to(device)
+    # Gradients are needed only with respect to the input batch.
     samples.requires_grad = True
 
     outputs = model(samples)
     logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
     loss = nn.CrossEntropyLoss()(logits, labels)
     grad = torch.autograd.grad(loss, samples, only_inputs=True)[0]
-    x_adv = _fgsm_step_and_clamp(samples, grad, epsilon)
+    x_adv = _fgsm_step_and_clamp(samples, grad, epsilon, image_normalization=image_normalization)
+    logger.debug(
+        "[Robustness] FGSM batch generated | epsilon=%.6f | samples_shape=%s | grad=%s | adv=%s",
+        float(epsilon),
+        tuple(samples.shape),
+        _tensor_range(grad),
+        _tensor_range(x_adv),
+    )
 
     return x_adv.detach()
 
@@ -312,18 +511,8 @@ def get_confidence_score(
     max_samples = 128,
     use_true_label = True,
 ):
-    """
-    Calculates the confidence score.
-
-    Args:
-        model (object): The model.
-        test_sample (object): A batch from the test dataloader (samples, labels).
-        max_samples (int): Max number of samples from the batch to use.
-        use_true_label (bool): Whether to compute confidence with respect to the true labels. Defaults to True.
-
-    Returns:
-        float: Confidence score.
-    """
+    # Calculates the confidence score.
+
     try:
         if not isinstance(model, torch.nn.Module):
             logger.warning("Model is not a torch.nn.Module")
@@ -350,6 +539,7 @@ def get_confidence_score(
             probs = torch.softmax(logits, dim=1)
 
             if use_true_label and isinstance(y, torch.Tensor):
+                # True-label confidence is used when labels are available.
                 if y.ndim > 1:
                     y_idx = torch.argmax(y, dim=1)
                 else:
@@ -368,46 +558,65 @@ def get_confidence_score(
         return 0.0
 
 
-def attack_success_rate(model, test_sample,epsilon=0.03):
-    """
-    Calculates the ASR.
-
-    Args:
-        model (object): The model.
-        test_sample (object): A batch from the test dataloader (samples, labels).
-        epsilon (float): Maximum perturbation magnitude for the attacks.
-
-    Returns:
-        float: The ASR.
-    """
+def attack_success_rate(model, test_loader, epsilon=ROBUSTNESS_EPSILON):
+    # Computes ASR over originally correct predictions only.
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.eval()
     model.to(device)
+    # Tabular datasets use constrained PGD; image datasets fall back to FGSM.
+    tabular_generator = _build_tabular_generator(
+        epsilon,
+        _get_tabular_metadata_from_loader(test_loader),
+    )
+    image_normalization = None if tabular_generator is not None else _get_image_normalization_from_loader(test_loader)
+    logger.info(
+        "[Robustness] attack success rate | attack=%s | epsilon=%.6f",
+        _attack_name(tabular_generator),
+        float(epsilon),
+    )
 
-    images, labels = test_sample
-    images = images.to(device)
-    labels = labels.to(device)
-
-    with torch.no_grad():
-        outputs = model(images)
-        logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
-        preds = logits.argmax(dim=1)
-
-    correct_mask = preds.eq(labels)
-    num_correct = correct_mask.sum().item()
+    successful_attacks = 0
+    num_correct = 0
 
-    if num_correct == 0:
-        return 0.0
+    for batch_idx, (samples, labels) in enumerate(test_loader):
+        samples = samples.to(device)
+        labels = labels.to(device)
 
-    x_adv = fgsm_attack(model, images, labels, epsilon=epsilon)
+        with torch.no_grad():
+            outputs = model(samples)
+            logits = outputs[0] if isinstance(outputs, (tuple, list)) else outputs
+            preds = logits.argmax(dim=1)
 
-    with torch.no_grad():
-        outputs_adv = model(x_adv)
-        logits_adv = outputs_adv[0] if isinstance(outputs_adv, (tuple, list)) else outputs_adv
-        preds_adv = logits_adv.argmax(dim=1)
+        correct_mask = preds.eq(labels)
+        batch_correct = correct_mask.sum().item()
+        if batch_correct == 0:
+            # ASR is defined over clean-correct samples, so this batch contributes nothing.
+            continue
+
+        x_adv = _generate_adversarial_samples(
+            model,
+            samples,
+            labels,
+            epsilon=epsilon,
+            tabular_generator=tabular_generator,
+            image_normalization=image_normalization,
+        )
+        _log_adversarial_generation(
+            "attack_success_rate",
+            samples,
+            labels,
+            x_adv,
+            epsilon,
+            tabular_generator,
+            batch_idx,
+        )
 
-    successful_attacks = (correct_mask & preds_adv.ne(labels)).sum().item()
+        with torch.no_grad():
+            outputs_adv = model(x_adv)
+            logits_adv = outputs_adv[0] if isinstance(outputs_adv, (tuple, list)) else outputs_adv
+            preds_adv = logits_adv.argmax(dim=1)
 
-    asr = successful_attacks / num_correct
+        successful_attacks += (correct_mask & preds_adv.ne(labels)).sum().item()
+        num_correct += batch_correct
 
-    return asr
+    return successful_attacks / num_correct if num_correct > 0 else 0.0
diff --git a/nebula/core/datasets/image_metadata.py b/nebula/core/datasets/image_metadata.py
new file mode 100644
index 000000000..0b206fbf8
--- /dev/null
+++ b/nebula/core/datasets/image_metadata.py
@@ -0,0 +1,14 @@
+IMAGE_DATASET_NORMALIZATION = {
+    "MNIST": ((0.5,), (0.5,)),
+    "FashionMNIST": ((0.5,), (0.5,)),
+    "EMNIST": ((0.5,), (0.5,)),
+    "CIFAR10": ((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)),
+    "CIFAR100": ((0.4914, 0.4822, 0.4465), (0.2471, 0.2435, 0.2616)),
+}
+
+
+def get_image_normalization(dataset_name):
+    # Shared source of image mean/std values used by attacks in normalized model space.
+    if dataset_name is None:
+        return None
+    return IMAGE_DATASET_NORMALIZATION.get(str(dataset_name))
diff --git a/nebula/core/datasets/kddcup99/kddcup99.py b/nebula/core/datasets/kddcup99/kddcup99.py
index 72c3db0f4..db6ae8488 100644
--- a/nebula/core/datasets/kddcup99/kddcup99.py
+++ b/nebula/core/datasets/kddcup99/kddcup99.py
@@ -233,8 +233,8 @@ def __init__(
         seed: int = 42,
         config_dir: str | None = None,
         test_size: float = 0.2,
-        train_limit: int | None = 12000,
-        test_limit: int | None = 2000,
+        train_limit: int | None = 20000,
+        test_limit: int | None = 4000,
         subset: str | None = None,
         percent10: bool = True,
     ):
diff --git a/nebula/core/models/adultcensus/mlp.py b/nebula/core/models/adultcensus/mlp.py
index d3fff9e94..3a7c2595c 100644
--- a/nebula/core/models/adultcensus/mlp.py
+++ b/nebula/core/models/adultcensus/mlp.py
@@ -26,6 +26,7 @@ def __init__(
     ):
         # NebulaModel expects something like input_channels first; for tabular we pass input_dim there.
         super().__init__(input_dim, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {"beta1": 0.9, "beta2": 0.999, "amsgrad": True}
 
diff --git a/nebula/core/models/breast_cancer/mlp.py b/nebula/core/models/breast_cancer/mlp.py
index 11a6ec833..577d16290 100644
--- a/nebula/core/models/breast_cancer/mlp.py
+++ b/nebula/core/models/breast_cancer/mlp.py
@@ -20,6 +20,7 @@ def __init__(
         # pero en la práctica se usa ese primer argumento como "input shape info".
         # Para tabular, pasamos input_dim en input_channels para mantener la firma.
         super().__init__(input_dim, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         # Mantengo el mismo patrón que tu MLP de FashionMNIST.
         self.config = {"beta1": 0.9, "beta2": 0.999, "amsgrad": True}
diff --git a/nebula/core/models/cifar10/cnn.py b/nebula/core/models/cifar10/cnn.py
index db9df51e6..cdd70ddcf 100755
--- a/nebula/core/models/cifar10/cnn.py
+++ b/nebula/core/models/cifar10/cnn.py
@@ -15,6 +15,7 @@ def __init__(
         data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {"beta1": 0.851436, "beta2": 0.999689, "amsgrad": True}
 
diff --git a/nebula/core/models/cifar10/cnnV2.py b/nebula/core/models/cifar10/cnnV2.py
index 0b23eef34..f5bcb5c6f 100755
--- a/nebula/core/models/cifar10/cnnV2.py
+++ b/nebula/core/models/cifar10/cnnV2.py
@@ -15,6 +15,7 @@ def __init__(
         data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {"beta1": 0.851436, "beta2": 0.999689, "amsgrad": True}
 
diff --git a/nebula/core/models/cifar10/cnnV3.py b/nebula/core/models/cifar10/cnnV3.py
index 8b4585208..2aff83dd0 100755
--- a/nebula/core/models/cifar10/cnnV3.py
+++ b/nebula/core/models/cifar10/cnnV3.py
@@ -15,6 +15,7 @@ def __init__(
         data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {"beta1": 0.851436, "beta2": 0.999689, "amsgrad": True}
 
diff --git a/nebula/core/models/cifar10/fastermobilenet.py b/nebula/core/models/cifar10/fastermobilenet.py
index 7be70c64d..20ec7704a 100755
--- a/nebula/core/models/cifar10/fastermobilenet.py
+++ b/nebula/core/models/cifar10/fastermobilenet.py
@@ -16,6 +16,7 @@ def __init__(
         data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {"beta1": 0.851436, "beta2": 0.999689, "amsgrad": True}
 
diff --git a/nebula/core/models/cifar10/resnet.py b/nebula/core/models/cifar10/resnet.py
index d2da13f3b..255191511 100755
--- a/nebula/core/models/cifar10/resnet.py
+++ b/nebula/core/models/cifar10/resnet.py
@@ -42,6 +42,7 @@ def __init__(
         data_type="Images",
     ):
         super().__init__()
+        self.data_type = data_type
         if metrics is None:
             metrics = MetricCollection([
                 MulticlassAccuracy(num_classes=num_classes),
diff --git a/nebula/core/models/cifar10/simplemobilenet.py b/nebula/core/models/cifar10/simplemobilenet.py
index 9791f5735..b394a101d 100755
--- a/nebula/core/models/cifar10/simplemobilenet.py
+++ b/nebula/core/models/cifar10/simplemobilenet.py
@@ -21,6 +21,7 @@ def __init__(
         data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {"beta1": 0.851436, "beta2": 0.999689, "amsgrad": True}
 
diff --git a/nebula/core/models/cifar100/cnn.py b/nebula/core/models/cifar100/cnn.py
index 0a005973f..6c2de6b41 100755
--- a/nebula/core/models/cifar100/cnn.py
+++ b/nebula/core/models/cifar100/cnn.py
@@ -15,6 +15,7 @@ def __init__(
         data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {
             "lr": 8.0505e-05,
diff --git a/nebula/core/models/covtype/mlp.py b/nebula/core/models/covtype/mlp.py
index 0f684dd06..6eb55a348 100644
--- a/nebula/core/models/covtype/mlp.py
+++ b/nebula/core/models/covtype/mlp.py
@@ -20,6 +20,7 @@ def __init__(
         # pero en la práctica se usa ese primer argumento como "input shape info".
         # Para tabular, pasamos input_dim en input_channels para mantener la firma.
         super().__init__(input_dim, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         # Mantengo el mismo patrón que tu MLP de FashionMNIST.
         self.config = {"beta1": 0.9, "beta2": 0.999, "amsgrad": True}
diff --git a/nebula/core/models/emnist/cnn.py b/nebula/core/models/emnist/cnn.py
index 17c7f6040..22bd80a2e 100755
--- a/nebula/core/models/emnist/cnn.py
+++ b/nebula/core/models/emnist/cnn.py
@@ -15,6 +15,7 @@ def __init__(
         data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {"beta1": 0.851436, "beta2": 0.999689, "amsgrad": True}
 
diff --git a/nebula/core/models/emnist/mlp.py b/nebula/core/models/emnist/mlp.py
index 6d5e420e6..4887165fc 100755
--- a/nebula/core/models/emnist/mlp.py
+++ b/nebula/core/models/emnist/mlp.py
@@ -15,6 +15,7 @@ def __init__(
         data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {"beta1": 0.851436, "beta2": 0.999689, "amsgrad": True}
 
diff --git a/nebula/core/models/fashionmnist/cnn.py b/nebula/core/models/fashionmnist/cnn.py
index 72837d204..bef3d1eca 100755
--- a/nebula/core/models/fashionmnist/cnn.py
+++ b/nebula/core/models/fashionmnist/cnn.py
@@ -15,6 +15,7 @@ def __init__(
         data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {"beta1": 0.851436, "beta2": 0.999689, "amsgrad": True}
 
diff --git a/nebula/core/models/fashionmnist/mlp.py b/nebula/core/models/fashionmnist/mlp.py
index 4704674e0..ac289c7d5 100755
--- a/nebula/core/models/fashionmnist/mlp.py
+++ b/nebula/core/models/fashionmnist/mlp.py
@@ -15,6 +15,7 @@ def __init__(
         data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {"beta1": 0.851436, "beta2": 0.999689, "amsgrad": True}
 
diff --git a/nebula/core/models/kddcup99/mlp.py b/nebula/core/models/kddcup99/mlp.py
index 539f3ad98..2de38af46 100644
--- a/nebula/core/models/kddcup99/mlp.py
+++ b/nebula/core/models/kddcup99/mlp.py
@@ -16,6 +16,7 @@ def __init__(
         data_type="Tabular",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.input_size = input_size
         self.example_input_array = torch.zeros(1, self.input_size)
diff --git a/nebula/core/models/mnist/cnn.py b/nebula/core/models/mnist/cnn.py
index 78e520b4e..94bdcbdc5 100755
--- a/nebula/core/models/mnist/cnn.py
+++ b/nebula/core/models/mnist/cnn.py
@@ -15,6 +15,7 @@ def __init__(
         data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {"beta1": 0.851436, "beta2": 0.999689, "amsgrad": True}
 
diff --git a/nebula/core/models/mnist/mlp.py b/nebula/core/models/mnist/mlp.py
index 9fdc48bb7..f316dc110 100755
--- a/nebula/core/models/mnist/mlp.py
+++ b/nebula/core/models/mnist/mlp.py
@@ -15,6 +15,7 @@ def __init__(
         data_type="Images",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.example_input_array = torch.zeros(1, 1, 28, 28)
         self.learning_rate = learning_rate
diff --git a/nebula/core/models/sentiment140/cnn.py b/nebula/core/models/sentiment140/cnn.py
index ab305fda7..f5c2d9d46 100755
--- a/nebula/core/models/sentiment140/cnn.py
+++ b/nebula/core/models/sentiment140/cnn.py
@@ -17,6 +17,7 @@ def __init__(
         data_type="Tabular",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {"beta1": 0.851436, "beta2": 0.999689, "amsgrad": True}
         self.example_input_array = torch.zeros(1, 1, 28, 28)
diff --git a/nebula/core/models/sentiment140/rnn.py b/nebula/core/models/sentiment140/rnn.py
index aa1915d53..d02b1e76e 100755
--- a/nebula/core/models/sentiment140/rnn.py
+++ b/nebula/core/models/sentiment140/rnn.py
@@ -15,6 +15,7 @@ def __init__(
         data_type="Tabular",
     ):
         super().__init__(input_channels, num_classes, learning_rate, metrics, confusion_matrix, seed)
+        self.data_type = data_type
 
         self.config = {"beta1": 0.851436, "beta2": 0.999689, "amsgrad": True}
 

From 8e0a7a1657d1728339a7397801606aee4a8e56c3 Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Thu, 11 Jun 2026 13:08:01 +0200
Subject: [PATCH 64/66] Citations

---
 README.md                                     | 29 +++++++++++++++++++
 .../trustworthiness/helpers/model_quality.py  |  5 ++++
 .../core/datasets/adultcensus/adultcensus.py  |  2 ++
 .../datasets/breast_cancer/breast_cancer.py   |  3 ++
 nebula/core/datasets/covtype/covtype.py       |  2 ++
 nebula/core/datasets/kddcup99/kddcup99.py     |  3 ++
 nebula/core/training/dp.py                    |  3 ++
 7 files changed, 47 insertions(+)

diff --git a/README.md b/README.md
index 5dfc69a53..725d78280 100755
--- a/README.md
+++ b/README.md
@@ -159,4 +159,33 @@ We would like to thank the following projects for their contributions which have
 - [FastAPI](https://github.com/tiangolo/fastapi) for the RESTful API
 - [Fedstellar](https://github.com/CyberDataLab/fedstellar) platform and [p2pfl](https://github.com/pguijas/p2pfl/) library
 - [Adversarial Robustness Toolbox (ART)](https://github.com/Trusted-AI/adversarial-robustness-toolbox) for the implementation of adversarial attacks
+- [Opacus](https://github.com/meta-pytorch/opacus) for differential privacy training support
+- [AI Fairness 360 (AIF360)](https://github.com/Trusted-AI/AIF360) for fairness metric definitions
+- [HolisticAI](https://github.com/holistic-ai/holisticai) for trustworthiness and fairness metric definitions
 - [D3.js](https://github.com/d3/d3-force) for the network visualizations
+
+## Third-party Differential Privacy
+
+NEBULA uses Opacus for differential privacy training:
+
+- Yousefpour, A., Shilov, I., Sablayrolles, A., Testuggine, D., Prasad, K., Malek, M., Nguyen, J., Ghosh, S., Bharadwaj, A., Zhao, J., Cormode, G., & Mironov, I. (2021). Opacus: User-Friendly Differential Privacy Library in PyTorch. arXiv:2109.12298. Licensed under Apache License 2.0: https://github.com/meta-pytorch/opacus/blob/main/LICENSE
+
+## Third-party Trustworthiness Metrics
+
+NEBULA implements some trustworthiness and fairness metrics following definitions documented in external toolkits:
+
+- AI Fairness 360 (AIF360). AI Fairness 360 [Software]. https://github.com/Trusted-AI/AIF360. Licensed under Apache License 2.0: https://github.com/Trusted-AI/AIF360/blob/main/LICENSE
+
+- Holistic AI. HolisticAI [Software]. https://github.com/holistic-ai/holisticai. Licensed under Apache License 2.0: https://github.com/holistic-ai/holisticai/blob/main/LICENSE
+
+## Third-party Tabular Datasets
+
+NEBULA preprocesses these datasets for experiments, including splitting, scaling, encoding, label mapping, filtering, and/or sample limiting depending on the dataset.
+
+- Becker, B. & Kohavi, R. (1996). Adult [Dataset]. UCI Machine Learning Repository. https://doi.org/10.24432/C5XW20. Licensed under CC BY 4.0: https://creativecommons.org/licenses/by/4.0/
+
+- Blackard, J. (1998). Covertype [Dataset]. UCI Machine Learning Repository. https://doi.org/10.24432/C50K5N. Licensed under CC BY 4.0: https://creativecommons.org/licenses/by/4.0/
+
+- Wolberg, W., Mangasarian, O., Street, N., & Street, W. (1993). Breast Cancer Wisconsin (Diagnostic) [Dataset]. UCI Machine Learning Repository. https://doi.org/10.24432/C5DW2B. Licensed under CC BY 4.0: https://creativecommons.org/licenses/by/4.0/
+
+- Stolfo, S., Fan, W., Lee, W., Prodromidis, A., & Chan, P. (1999). KDD Cup 1999 Data [Dataset]. UCI Machine Learning Repository. https://doi.org/10.24432/C51C7N. Licensed under CC BY 4.0: https://creativecommons.org/licenses/by/4.0/
diff --git a/nebula/addons/trustworthiness/helpers/model_quality.py b/nebula/addons/trustworthiness/helpers/model_quality.py
index 4887f5719..979583607 100644
--- a/nebula/addons/trustworthiness/helpers/model_quality.py
+++ b/nebula/addons/trustworthiness/helpers/model_quality.py
@@ -4,6 +4,11 @@
 import numpy as np
 import torch
 
+# AIF360: AI Fairness 360 [Software]. https://github.com/Trusted-AI/AIF360
+# Licensed under Apache License 2.0: https://github.com/Trusted-AI/AIF360/blob/main/LICENSE
+# HolisticAI: open-source library to assess and improve AI trustworthiness.
+# Licensed under Apache License 2.0: https://github.com/holistic-ai/holisticai/blob/main/LICENSE
+
 logger = logging.getLogger(__name__)
 
 def _extract_model_logits(model_output):
diff --git a/nebula/core/datasets/adultcensus/adultcensus.py b/nebula/core/datasets/adultcensus/adultcensus.py
index 062ffc380..6618ccad9 100644
--- a/nebula/core/datasets/adultcensus/adultcensus.py
+++ b/nebula/core/datasets/adultcensus/adultcensus.py
@@ -1,4 +1,6 @@
 # nebula/core/datasets/adultcensus/adultcensus.py
+# Becker, B. & Kohavi, R. (1996). Adult [Dataset]. UCI Machine Learning Repository. https://doi.org/10.24432/C5XW20.
+# Licensed under CC BY 4.0: https://creativecommons.org/licenses/by/4.0/
 
 import logging
 import os
diff --git a/nebula/core/datasets/breast_cancer/breast_cancer.py b/nebula/core/datasets/breast_cancer/breast_cancer.py
index f5e53ed7e..04fbcf9ae 100644
--- a/nebula/core/datasets/breast_cancer/breast_cancer.py
+++ b/nebula/core/datasets/breast_cancer/breast_cancer.py
@@ -1,3 +1,6 @@
+# Wolberg, W., Mangasarian, O., Street, N., & Street, W. (1993). Breast Cancer Wisconsin (Diagnostic) [Dataset]. UCI Machine Learning Repository. https://doi.org/10.24432/C5DW2B.
+# Licensed under CC BY 4.0: https://creativecommons.org/licenses/by/4.0/
+
 import logging
 import os
 from typing import Any
diff --git a/nebula/core/datasets/covtype/covtype.py b/nebula/core/datasets/covtype/covtype.py
index ec3ef65d9..2ef0a360c 100644
--- a/nebula/core/datasets/covtype/covtype.py
+++ b/nebula/core/datasets/covtype/covtype.py
@@ -1,4 +1,6 @@
 # nebula/core/datasets/covtype/covtype.py
+# Blackard, J. (1998). Covertype [Dataset]. UCI Machine Learning Repository. https://doi.org/10.24432/C50K5N.
+# Licensed under CC BY 4.0: https://creativecommons.org/licenses/by/4.0/
 
 import logging
 import os
diff --git a/nebula/core/datasets/kddcup99/kddcup99.py b/nebula/core/datasets/kddcup99/kddcup99.py
index db6ae8488..494265bbe 100644
--- a/nebula/core/datasets/kddcup99/kddcup99.py
+++ b/nebula/core/datasets/kddcup99/kddcup99.py
@@ -1,3 +1,6 @@
+# Stolfo, S., Fan, W., Lee, W., Prodromidis, A., & Chan, P. (1999). KDD Cup 1999 Data [Dataset]. UCI Machine Learning Repository. https://doi.org/10.24432/C51C7N.
+# Licensed under CC BY 4.0: https://creativecommons.org/licenses/by/4.0/
+
 import logging
 import os
 from typing import Any
diff --git a/nebula/core/training/dp.py b/nebula/core/training/dp.py
index 15b094c88..446511409 100644
--- a/nebula/core/training/dp.py
+++ b/nebula/core/training/dp.py
@@ -1,3 +1,6 @@
+# Opacus: User-Friendly Differential Privacy Library in PyTorch. Yousefpour et al. (2021). arXiv:2109.12298.
+# Licensed under Apache License 2.0: https://github.com/meta-pytorch/opacus/blob/main/LICENSE
+
 class SimpleDPState:
     # Minimal mutable state used to pass Opacus-wrapped objects between hooks.
     def __init__(self):

From 51e6bc3833185f86e4c68b6e7f81dce930ad122a Mon Sep 17 00:00:00 2001
From: "Juan J." <juanjetm1@gmail.com>
Date: Thu, 11 Jun 2026 16:26:42 +0200
Subject: [PATCH 65/66] DP error fixed

---
 nebula/core/models/breast_cancer/mlp.py | 6 ------
 nebula/core/models/covtype/mlp.py       | 6 ------
 nebula/core/models/nebulamodel.py       | 1 -
 nebula/core/training/lightning_dp.py    | 9 +++++++--
 4 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/nebula/core/models/breast_cancer/mlp.py b/nebula/core/models/breast_cancer/mlp.py
index 577d16290..27c6a51ba 100644
--- a/nebula/core/models/breast_cancer/mlp.py
+++ b/nebula/core/models/breast_cancer/mlp.py
@@ -16,13 +16,9 @@ def __init__(
         seed=None,
         data_type="Tabular",
     ):
-        # OJO: NebulaModel está pensado para imágenes (input_channels),
-        # pero en la práctica se usa ese primer argumento como "input shape info".
-        # Para tabular, pasamos input_dim en input_channels para mantener la firma.
         super().__init__(input_dim, num_classes, learning_rate, metrics, confusion_matrix, seed)
         self.data_type = data_type
 
-        # Mantengo el mismo patrón que tu MLP de FashionMNIST.
         self.config = {"beta1": 0.9, "beta2": 0.999, "amsgrad": True}
 
         self.example_input_array = torch.rand(1, input_dim)
@@ -34,8 +30,6 @@ def __init__(
         self.l3 = torch.nn.Linear(128, num_classes)
 
     def forward(self, x):
-        # En tabular, x debe ser (batch, input_dim).
-        # A veces puede venir con dimensión extra (batch, 1, input_dim) por loaders.
         if x.dim() == 3 and x.size(1) == 1:
             x = x.squeeze(1)
 
diff --git a/nebula/core/models/covtype/mlp.py b/nebula/core/models/covtype/mlp.py
index 6eb55a348..bb93fbc97 100644
--- a/nebula/core/models/covtype/mlp.py
+++ b/nebula/core/models/covtype/mlp.py
@@ -16,13 +16,9 @@ def __init__(
         seed=None,
         data_type="Tabular",
     ):
-        # OJO: NebulaModel está pensado para imágenes (input_channels),
-        # pero en la práctica se usa ese primer argumento como "input shape info".
-        # Para tabular, pasamos input_dim en input_channels para mantener la firma.
         super().__init__(input_dim, num_classes, learning_rate, metrics, confusion_matrix, seed)
         self.data_type = data_type
 
-        # Mantengo el mismo patrón que tu MLP de FashionMNIST.
         self.config = {"beta1": 0.9, "beta2": 0.999, "amsgrad": True}
 
         self.example_input_array = torch.rand(1, input_dim)
@@ -34,8 +30,6 @@ def __init__(
         self.l3 = torch.nn.Linear(128, num_classes)
 
     def forward(self, x):
-        # En tabular, x debe ser (batch, input_dim).
-        # A veces puede venir con dimensión extra (batch, 1, input_dim) por loaders.
         if x.dim() == 3 and x.size(1) == 1:
             x = x.squeeze(1)
 
diff --git a/nebula/core/models/nebulamodel.py b/nebula/core/models/nebulamodel.py
index a6557fed6..5973f1518 100755
--- a/nebula/core/models/nebulamodel.py
+++ b/nebula/core/models/nebulamodel.py
@@ -160,7 +160,6 @@ def generate_confusion_matrix(self, phase, print_cm=False, plot_cm=False):
 
             del cm_numpy, classes, fig, ax
 
-        # Restablecer la matriz de confusión
         if phase == "Test (Local)":
             self.cm.reset()
         else:
diff --git a/nebula/core/training/lightning_dp.py b/nebula/core/training/lightning_dp.py
index bed08a973..6fde329f4 100644
--- a/nebula/core/training/lightning_dp.py
+++ b/nebula/core/training/lightning_dp.py
@@ -42,7 +42,7 @@ def create_dp_plugin(self):
         )
 
     def _train_sync(self):
-        # Keep the public Lightning trainer contract: train once and return loss/accuracy.
+        # Keep the public Lightning trainer contract: train once and return validation and training metrics.
         try:
             self._fit_with_dp()
 
@@ -57,7 +57,12 @@ def _train_sync(self):
                 loss = raw_loss.item() if hasattr(raw_loss, "item") else raw_loss
 
             accuracy = validation_metrics.get("Validation/Accuracy")
-            return loss, accuracy
+            train_accuracy = None
+            get_train_accuracy = getattr(self.model, "get_latest_train_accuracy", None)
+            if callable(get_train_accuracy):
+                train_accuracy = get_train_accuracy()
+
+            return loss, accuracy, train_accuracy
 
         except Exception as e:
             logging_training.error(f"Error in _train_sync with Differential Privacy: {e}")

From ce3f59680a01452da335ee2d85b62750b9f74065 Mon Sep 17 00:00:00 2001
From: enriquetomasmb <enriquetomasmb@gmail.com>
Date: Fri, 12 Jun 2026 10:59:47 +0200
Subject: [PATCH 66/66] Refactor code for improved readability and consistency

---
 .github/PULL_REQUEST_TEMPLATE.md              |  4 +-
 CLA.md                                        |  4 +-
 COMMERCIAL_INFO.md                            |  4 +-
 CONTRIBUTING.md                               |  4 +-
 app/deployer.py                               | 30 ++++----
 app/windows/install.ps1                       |  2 +-
 docs/_prebuilt/commercial-faq.md              | 10 +--
 .../attacks/communications/floodingattack.py  |  4 +-
 .../attacks/model/gllneuroninversion.py       |  2 +-
 .../addons/attacks/model/swappingweights.py   |  2 +-
 .../networksimulation/networksimulator.py     |  2 +-
 nebula/config/config.py                       |  2 +-
 nebula/controller/controller.py               | 72 +++++++++----------
 nebula/controller/http_helpers.py             | 18 ++---
 nebula/core/datasets/datamodule.py            |  2 +-
 nebula/core/role.py                           |  2 +-
 .../staticarbitrationpolicy.py                |  8 +--
 .../distanceneighborpolicy.py                 |  4 +-
 .../neighborpolicies/fcneighborpolicy.py      |  6 +-
 .../neighborpolicies/idleneighborpolicy.py    |  8 +--
 .../neighborpolicies/ringneighborpolicy.py    |  8 +--
 .../awareness/satraining/satraining.py        | 13 ++--
 .../trainingpolicy/bpstrainingpolicy.py       | 16 ++---
 .../satraining/trainingpolicy/fastreboot.py   |  4 +-
 .../trainingpolicy/htstrainingpolicy.py       | 30 ++++----
 .../trainingpolicy/qdstrainingpolicy.py       | 60 ++++++++--------
 .../trainingpolicy/trainingpolicy.py          | 14 ++--
 .../distcandidateselector.py                  | 10 +--
 .../candidateselection/fccandidateselector.py |  2 +-
 .../ringcandidateselector.py                  |  2 +-
 .../stdcandidateselector.py                   |  6 +-
 .../modelhandlers/defaultmodelhandler.py      |  6 +-
 .../modelhandlers/stdmodelhandler.py          |  6 +-
 nebula/frontend/static/css/deployment.css     |  2 +-
 .../frontend/static/js/deployment/topology.js | 26 +++----
 .../static/js/deployment/ui-controls.js       | 38 +++++-----
 nebula/physical/api.py                        |  6 +-
 nebula/physical/node.sh                       |  4 +-
 nebula/utils.py                               |  2 +-
 39 files changed, 222 insertions(+), 223 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index f61055dbb..ace900221 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -17,5 +17,5 @@ Please fill out the following template to help us review your pull request.
 <!-- List any related issues, e.g. "Closes #123" -->
 
 ## Signed-off-by
-Signed-off-by: *Your Name (email)*  
-Date: *YYYY-MM-DD*
\ No newline at end of file
+Signed-off-by: *Your Name (email)*
+Date: *YYYY-MM-DD*
diff --git a/CLA.md b/CLA.md
index 6a87eb8c4..d321de5ea 100644
--- a/CLA.md
+++ b/CLA.md
@@ -18,5 +18,5 @@ By submitting a pull request, patch or code snippet, you agree that:
    your contribution and that Authors may use, sell or license the
    software containing your contribution at its sole discretion.
 
-Signed-off-by: *Enrique Tomás Martínez Beltrán (enriquetomas@um.es)*  
-Date: *2025-06-25*
\ No newline at end of file
+Signed-off-by: *Enrique Tomás Martínez Beltrán (enriquetomas@um.es)*
+Date: *2025-06-25*
diff --git a/COMMERCIAL_INFO.md b/COMMERCIAL_INFO.md
index c22826a6e..81bcec4a7 100644
--- a/COMMERCIAL_INFO.md
+++ b/COMMERCIAL_INFO.md
@@ -1,6 +1,6 @@
 # NEBULA Enterprise License
 
-This repository is published under **GNU AGPL v3.0**.  
+This repository is published under **GNU AGPL v3.0**.
 If you wish to embed NEBULA in closed-source products, offer it as a hosted service, or obtain an SLA, please e-mail **enriquetomas@um.es** and **alberto.huertas@um.es**.
 
-A bespoke commercial agreement (OEM / subscription / SaaS) will be provided on request.
\ No newline at end of file
+A bespoke commercial agreement (OEM / subscription / SaaS) will be provided on request.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a123f7cc5..6a2779a85 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,7 +6,7 @@ Follow conventional-commit style.
 ## 2 • Sign the CLA
 When you open your first Pull Request, **CLA-assistant** will block the
 merge until you tick the box confirming you accept the
-[ICLA](CLA.md).  
+[ICLA](CLA.md).
 Add a Developer-Certificate-of-Origin line in every commit:
 
 ```
@@ -23,4 +23,4 @@ The pull request will be reviewed by the maintainers.
 The maintainers will provide feedback on the pull request.
 
 ## 6 • Merge the Pull Request
-The pull request will be merged by the maintainers.
\ No newline at end of file
+The pull request will be merged by the maintainers.
diff --git a/app/deployer.py b/app/deployer.py
index 968ef62da..fc21f23ef 100644
--- a/app/deployer.py
+++ b/app/deployer.py
@@ -289,17 +289,17 @@ def run_script(self, script):
     def kill_script_processes(self, pids_file):
         """
         Forcefully terminates processes listed in a given PID file, including their child processes.
-    
+
         Args:
             pids_file (str): Path to the file containing PIDs, one per line.
-    
+
         Behavior:
             - Reads the PIDs from the file.
             - For each PID, checks if the process exists.
             - If it exists, kills all child processes recursively before killing the main process.
             - Handles and logs exceptions such as missing processes or invalid PID entries.
             - Logs warnings and errors appropriately.
-    
+
         Typical use case:
             Used to clean up running processes related to a scenario or script that has been deleted or stopped.
         """
@@ -344,7 +344,7 @@ def run_observer():
     """
     Starts a watchdog observer to monitor the configuration directory for changes.
 
-    This function is typically used to execute additional scripts or trigger events 
+    This function is typically used to execute additional scripts or trigger events
     during the execution of a federated learning session by monitoring file system changes.
 
     Main functionalities:
@@ -357,7 +357,7 @@ def run_observer():
         - Trigger specific actions during a federation lifecycle.
 
     Note:
-        The observer runs in a blocking mode and will keep the process alive 
+        The observer runs in a blocking mode and will keep the process alive
         until manually stopped or interrupted.
     """
     # Watchdog for running additional scripts in the host machine (i.e. during the execution of a federation)
@@ -373,7 +373,7 @@ class Deployer:
     """
     Handles the configuration and initialization of deployment parameters for the NEBULA system.
 
-    This class reads and stores various deployment-related settings such as port assignments, 
+    This class reads and stores various deployment-related settings such as port assignments,
     environment paths, logging configuration, and system mode (production, development, or simulation).
 
     Main functionalities:
@@ -438,7 +438,7 @@ def configure_logger(self):
         """
         Configures the logging system for the deployment controller.
 
-        This method sets up both console and file logging with a consistent format and appropriate log levels. 
+        This method sets up both console and file logging with a consistent format and appropriate log levels.
         It also ensures that Uvicorn loggers are properly configured to avoid duplicate log outputs.
 
         Main functionalities:
@@ -452,7 +452,7 @@ def configure_logger(self):
             - Ensures clean and consistent logging output during deployment.
 
         Note:
-            This method does not set up file logging directly, but prepares the base configuration 
+            This method does not set up file logging directly, but prepares the base configuration
             and Uvicorn logger behavior for further logging use.
         """
         log_console_format = "[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s"
@@ -475,7 +475,7 @@ def ensure_directory_access(self, directory_path: str) -> str:
         """
         Ensures that the specified directory exists and is writable.
 
-        This method attempts to create the directory if it does not exist and verifies 
+        This method attempts to create the directory if it does not exist and verifies
         write access by writing and deleting a temporary metadata file.
 
         Args:
@@ -521,8 +521,8 @@ def start(self):
         """
         Starts the NEBULA deployment process and all associated services.
 
-        This method initializes the NEBULA platform by setting up the environment, 
-        checking port availability, starting key services (controller, frontend, WAF), 
+        This method initializes the NEBULA platform by setting up the environment,
+        checking port availability, starting key services (controller, frontend, WAF),
         and launching a filesystem observer to react to configuration changes.
 
         Main functionalities:
@@ -539,7 +539,7 @@ def start(self):
             - Central entry point for managing NEBULA components during deployment.
 
         Note:
-            The method blocks indefinitely until manually interrupted, 
+            The method blocks indefinitely until manually interrupted,
             and ensures graceful shutdown upon receiving SIGINT or SIGTERM.
         """
         banner = """
@@ -616,8 +616,8 @@ def signal_handler(self, sig, frame):
         """
         Handles system termination signals to ensure a clean shutdown.
 
-        This method is triggered when the application receives SIGTERM or SIGINT signals 
-        (e.g., via Ctrl+C or `kill`). It logs the event, performs cleanup actions, and 
+        This method is triggered when the application receives SIGTERM or SIGINT signals
+        (e.g., via Ctrl+C or `kill`). It logs the event, performs cleanup actions, and
         terminates the process gracefully.
 
         Args:
@@ -749,7 +749,7 @@ def run_controller(self):
                 )
 
         network_name = f"{os.environ['USER']}_nebula-net-base"
-        
+
         try:
             subprocess.check_call(["nvidia-smi"])
             self.gpu_available = True
diff --git a/app/windows/install.ps1 b/app/windows/install.ps1
index 88aa7f1ac..6d9dc7004 100644
--- a/app/windows/install.ps1
+++ b/app/windows/install.ps1
@@ -1,2 +1,2 @@
 # Run make install
-make install
\ No newline at end of file
+make install
diff --git a/docs/_prebuilt/commercial-faq.md b/docs/_prebuilt/commercial-faq.md
index 197ef81a9..274c0fcbd 100644
--- a/docs/_prebuilt/commercial-faq.md
+++ b/docs/_prebuilt/commercial-faq.md
@@ -1,13 +1,13 @@
 # Commercial FAQ — NEBULA Enterprise
 
-**Q 1. What does the commercial license cover?**  
+**Q 1. What does the commercial license cover?**
 To be determined.
 
-**Q 2. Does the commercial edition include extra features?**  
+**Q 2. Does the commercial edition include extra features?**
 To be determined.
 
-**Q 3. Pricing model?**  
+**Q 3. Pricing model?**
 To be determined.
 
-**Q 4. Can we contribute back fixes?**  
-Absolutely; your patches remain under AGPL in the community edition, and you can keep proprietary extensions private under the commercial agreement.
\ No newline at end of file
+**Q 4. Can we contribute back fixes?**
+Absolutely; your patches remain under AGPL in the community edition, and you can keep proprietary extensions private under the commercial agreement.
diff --git a/nebula/addons/attacks/communications/floodingattack.py b/nebula/addons/attacks/communications/floodingattack.py
index 146854fa3..73dc0394c 100644
--- a/nebula/addons/attacks/communications/floodingattack.py
+++ b/nebula/addons/attacks/communications/floodingattack.py
@@ -69,9 +69,9 @@ async def wrapper(*args, **kwargs):
                                 )
                             _, *new_args = args  # Exclude self argument
                             await func(*new_args, **kwargs)
-                _, *new_args = args 
+                _, *new_args = args
                 return await func(*new_args)
-            
+
             return wrapper
 
         return decorator
diff --git a/nebula/addons/attacks/model/gllneuroninversion.py b/nebula/addons/attacks/model/gllneuroninversion.py
index 64cb3d215..e52a5a930 100644
--- a/nebula/addons/attacks/model/gllneuroninversion.py
+++ b/nebula/addons/attacks/model/gllneuroninversion.py
@@ -66,4 +66,4 @@ def model_attack(self, received_weights):
         # Inject random noise of the same shape and type
         received_weights[target_key] = torch.empty_like(target_weights).uniform_(0, noise_scale)
 
-        return received_weights
\ No newline at end of file
+        return received_weights
diff --git a/nebula/addons/attacks/model/swappingweights.py b/nebula/addons/attacks/model/swappingweights.py
index 95aa89208..36eb1e7a0 100644
--- a/nebula/addons/attacks/model/swappingweights.py
+++ b/nebula/addons/attacks/model/swappingweights.py
@@ -109,4 +109,4 @@ def model_attack(self, received_weights):
         if self.layer_idx + 2 < len(layer_keys):
             received_weights[layer_keys[self.layer_idx + 2]] = received_weights[layer_keys[self.layer_idx + 2]][:, perm]
 
-        return received_weights
\ No newline at end of file
+        return received_weights
diff --git a/nebula/addons/networksimulation/networksimulator.py b/nebula/addons/networksimulation/networksimulator.py
index e296a1527..9dfd4853e 100644
--- a/nebula/addons/networksimulation/networksimulator.py
+++ b/nebula/addons/networksimulation/networksimulator.py
@@ -6,7 +6,7 @@ class NetworkSimulator(ABC):
     Abstract base class representing a network simulator interface.
 
     This interface defines the required methods for controlling and simulating network conditions between nodes.
-    A concrete implementation is expected to manage artificial delays, bandwidth restrictions, packet loss, 
+    A concrete implementation is expected to manage artificial delays, bandwidth restrictions, packet loss,
     or other configurable conditions typically used in network emulation or testing.
 
     Required asynchronous methods:
diff --git a/nebula/config/config.py b/nebula/config/config.py
index 5ef336e3a..cae3cf7f8 100755
--- a/nebula/config/config.py
+++ b/nebula/config/config.py
@@ -55,7 +55,7 @@ def reset_logging_configuration(self):
 
         self.__set_default_logging(mode="a")
         self.__set_training_logging(mode="a")
-        
+
     def shutdown_logging(self):
         """
         Properly shuts down all loggers and their handlers in the system.
diff --git a/nebula/controller/controller.py b/nebula/controller/controller.py
index a00d142d1..0d7142dbe 100755
--- a/nebula/controller/controller.py
+++ b/nebula/controller/controller.py
@@ -264,24 +264,24 @@ async def get_available_gpu():
 
 def validate_physical_fields(data: dict):
     if data.get("deployment") != "physical":
-        return                                   
- 
+        return
+
     ips = data.get("physical_ips")
     if not ips:
         raise HTTPException(
             status_code=400,
             detail="physical deployment requires 'physical_ips'"
         )
- 
+
     if len(ips) != data.get("n_nodes"):
         raise HTTPException(
             status_code=400,
             detail="'physical_ips' must have the same length as 'n_nodes'"
         )
- 
+
     try:
         for ip in ips:
-            ipaddress.ip_address(ip)            
+            ipaddress.ip_address(ip)
             print(ip)
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
@@ -347,21 +347,21 @@ async def stop_scenario(
 ):
     """
     Stops the execution of a federated learning scenario and performs cleanup operations.
-    
+
     This endpoint:
         - Stops all participant containers associated with the specified scenario.
         - Removes Docker containers and network resources tied to the scenario and user.
         - Sets the scenario's status to "finished" in the database.
         - Optionally finalizes all active scenarios if the 'all' flag is set.
-    
+
     Args:
         scenario_name (str): Name of the scenario to stop.
         username (str): User who initiated the stop operation.
         all (bool): Whether to stop all running scenarios instead of just one (default: False).
-    
+
     Raises:
         HTTPException: Returns a 500 status code if any step fails.
-    
+
     Note:
         This function does not currently trigger statistics generation.
     """
@@ -847,27 +847,27 @@ async def discover_vpn():
             stdout=asyncio.subprocess.PIPE,
             stderr=asyncio.subprocess.PIPE,
         )
- 
+
         # 2) Wait for it to finish and capture stdout/stderr
         out, err = await proc.communicate()
         if proc.returncode != 0:
             # If the CLI returned an error, raise to be caught below
             raise RuntimeError(err.decode())
- 
+
         # 3) Parse the JSON output
         data = json.loads(out.decode())
- 
+
         # 4) Collect only the IPv4 addresses from each peer
         ips = []
         for peer in data.get("Peer", {}).values():
             for ip in peer.get("TailscaleIPs", []):
-                if ":" not in ip:  
+                if ":" not in ip:
                     # Skip IPv6 entries (they contain colons)
                     ips.append(ip)
- 
+
         # 5) Return the list of IPv4s
         return {"ips": ips}
- 
+
     except Exception as e:
         # 6) Log any failure and respond with HTTP 500
         logging.error(f"Error discovering VPN devices: {e}")
@@ -877,14 +877,14 @@ async def discover_vpn():
 @app.get("/physical/run/{ip}", tags=["physical"])
 async def physical_run(ip: str):
     status, data = await remote_get(ip, "/run/")
- 
+
     if status == 200:
         return data
     if status is None:
         raise HTTPException(status_code=502, detail=f"Node unreachable: {data}")
     raise HTTPException(status_code=status, detail=data)
- 
- 
+
+
 @app.get("/physical/stop/{ip}", tags=["physical"])
 async def physical_stop(ip: str):
     status, data = await remote_get(ip, "/stop/")
@@ -893,8 +893,8 @@ async def physical_stop(ip: str):
     if status is None:
         raise HTTPException(status_code=502, detail=f"Node unreachable: {data}")
     raise HTTPException(status_code=status, detail=data)
- 
- 
+
+
 @app.put("/physical/setup/{ip}", tags=["physical"],
          status_code=status.HTTP_201_CREATED)
 async def physical_setup(
@@ -903,7 +903,7 @@ async def physical_setup(
     global_test: UploadFile = File(..., description="Global Dataset*.h5*"),
     train_set:   UploadFile = File(..., description="Training dataset*.h5*"),
 ):
- 
+
     form = aiohttp.FormData()
     await config.seek(0)
     form.add_field("config", config.file,
@@ -914,17 +914,17 @@ async def physical_setup(
     await train_set.seek(0)
     form.add_field("train_set", train_set.file,
                    filename=train_set.filename, content_type="application/octet-stream")
- 
+
     status_code, data = await remote_post_form(
         ip, "/setup/", form, method="PUT"
     )
- 
+
     if status_code == 201:
         return data
     if status_code is None:
         raise HTTPException(status_code=502, detail=f"Node unreachable: {data}")
     raise HTTPException(status_code=status_code, detail=data)
- 
+
 # ──────────────────────────────────────────────────────────────
 # Physical · single-node state
 # ──────────────────────────────────────────────────────────────
@@ -932,22 +932,22 @@ async def physical_setup(
 async def get_physical_node_state(ip: str):
     """
     Query a single Raspberry Pi (or other node) for its training state.
- 
+
     Parameters
     ----------
     ip : str
         IP address or hostname of the node.
- 
+
     Returns
     -------
     dict
-        • running (bool) – True if a training process is active.  
+        • running (bool) – True if a training process is active.
         • error   (str)  – Optional error message when the node is unreachable
                             or returns a non-200 HTTP status.
     """
     # Short global timeout so a dead node doesn't block the whole request
     timeout = aiohttp.ClientTimeout(total=3)            # seconds
- 
+
     try:
         async with aiohttp.ClientSession(timeout=timeout) as session:
             async with session.get(f"http://{ip}/state/") as resp:
@@ -960,8 +960,8 @@ async def get_physical_node_state(ip: str):
     except Exception as exc:
         # Network errors, timeouts, DNS failures, …
         return {"running": False, "error": str(exc)}
- 
- 
+
+
 # ──────────────────────────────────────────────────────────────
 # Physical · aggregate state for an entire scenario
 # ──────────────────────────────────────────────────────────────
@@ -969,12 +969,12 @@ async def get_physical_node_state(ip: str):
 async def get_physical_scenario_state(scenario_name: str):
     """
     Check the training state of *every* physical node assigned to a scenario.
- 
+
     Parameters
     ----------
     scenario_name : str
         Scenario identifier.
- 
+
     Returns
     -------
     dict
@@ -989,16 +989,16 @@ async def get_physical_scenario_state(scenario_name: str):
     scenario = await get_scenario_by_name(scenario_name)
     if not scenario:
         raise HTTPException(status_code=404, detail="Scenario not found")
- 
+
     nodes = await list_nodes_by_scenario_name(scenario_name)
     if not nodes:
         raise HTTPException(status_code=404, detail="No nodes found for scenario")
- 
+
     # 2) Probe all nodes concurrently
     ips   = [n["ip"] for n in nodes]
     tasks = [get_physical_node_state(ip) for ip in ips]
     states = await asyncio.gather(*tasks)               # parallel HTTP calls
- 
+
     # 3) Aggregate results
     nodes_state  = dict(zip(ips, states))
     any_running  = any(s.get("running") for s in states)
@@ -1007,7 +1007,7 @@ async def get_physical_scenario_state(scenario_name: str):
     all_available = all(
         (not s.get("running")) and (not s.get("error")) for s in states
     )
- 
+
     return {
         "running": any_running,
         "nodes_state": nodes_state,
diff --git a/nebula/controller/http_helpers.py b/nebula/controller/http_helpers.py
index ed60f44e5..886cc57e7 100644
--- a/nebula/controller/http_helpers.py
+++ b/nebula/controller/http_helpers.py
@@ -1,13 +1,13 @@
 from __future__ import annotations
- 
+
 import logging
 from typing import Optional, Union
- 
+
 import aiohttp
 from aiohttp import FormData
- 
+
 _TIMEOUT = aiohttp.ClientTimeout(total=15)
- 
+
 async def _request_json(
     method: str,
     host: str,
@@ -27,12 +27,12 @@ async def _request_json(
     except Exception as exc:
         logging.error("[%s] %s%s – %s", method.upper(), host, endpoint, exc)
         return None, str(exc)
- 
- 
+
+
 async def remote_get(host: str, endpoint: str):
     return await _request_json("GET", host, endpoint)
- 
- 
+
+
 async def remote_post_form(
     host: str,
     endpoint: str,
@@ -40,4 +40,4 @@ async def remote_post_form(
     *,
     method: str = "POST",
 ):
-    return await _request_json(method, host, endpoint, data=form)
\ No newline at end of file
+    return await _request_json(method, host, endpoint, data=form)
diff --git a/nebula/core/datasets/datamodule.py b/nebula/core/datasets/datamodule.py
index 04413f35a..aae9bf820 100755
--- a/nebula/core/datasets/datamodule.py
+++ b/nebula/core/datasets/datamodule.py
@@ -46,7 +46,7 @@ def __init__(
         self.data_val = None
         self.global_te_subset = None
         self.local_te_subset = None
-        
+
     def get_samples_per_label(self):
         return self._samples_per_label
 
diff --git a/nebula/core/role.py b/nebula/core/role.py
index 6bc4343f8..dc5281983 100755
--- a/nebula/core/role.py
+++ b/nebula/core/role.py
@@ -10,7 +10,7 @@ class Role(Enum):
     PROXY = "proxy"
     IDLE = "idle"
     SERVER = "server"
-    
+
 def factory_node_role(role: str) -> Role:
     if role == "trainer":
         return Role.TRAINER
diff --git a/nebula/core/situationalawareness/awareness/arbitrationpolicies/staticarbitrationpolicy.py b/nebula/core/situationalawareness/awareness/arbitrationpolicies/staticarbitrationpolicy.py
index e0dedab5c..dd17b5c35 100644
--- a/nebula/core/situationalawareness/awareness/arbitrationpolicies/staticarbitrationpolicy.py
+++ b/nebula/core/situationalawareness/awareness/arbitrationpolicies/staticarbitrationpolicy.py
@@ -8,11 +8,11 @@ class SAP(ArbitrationPolicy):  # Static Arbitatrion Policy
     """
     Static Arbitration Policy for the Reasoner module.
 
-    This class implements a fixed priority arbitration mechanism for 
-    SA (Situational Awareness) components. Each SA component category 
+    This class implements a fixed priority arbitration mechanism for
+    SA (Situational Awareness) components. Each SA component category
     is assigned a static weight representing its priority level.
 
-    In case of conflicting SA commands, the policy selects the command 
+    In case of conflicting SA commands, the policy selects the command
     whose originating component has the highest priority weight.
 
     Attributes:
@@ -21,7 +21,7 @@ class SAP(ArbitrationPolicy):  # Static Arbitatrion Policy
 
     Methods:
         init(config): Placeholder for initialization with external configuration.
-        tie_break(sac1, sac2): Resolves conflicts between two SA commands by 
+        tie_break(sac1, sac2): Resolves conflicts between two SA commands by
             comparing their category weights, returning True if sac1 wins.
     """
     def __init__(self, verbose):
diff --git a/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/distanceneighborpolicy.py b/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/distanceneighborpolicy.py
index a1d29c675..421cb8e37 100644
--- a/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/distanceneighborpolicy.py
+++ b/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/distanceneighborpolicy.py
@@ -16,7 +16,7 @@ class DistanceNeighborPolicy(NeighborPolicy):
     - When to discard or replace existing neighbors.
     - Keeping track of current neighbors and known nodes with their distances.
 
-    The policy operates under the assumption that physical proximity 
+    The policy operates under the assumption that physical proximity
     can be beneficial for performance and robustness in the network.
 
     Attributes:
@@ -26,7 +26,7 @@ class DistanceNeighborPolicy(NeighborPolicy):
         addr (str | None): The address of this node (used for self-identification).
         neighbors_lock (Locker): Async lock for safe access to `neighbors`.
         nodes_known_lock (Locker): Async lock for safe access to `nodes_known`.
-        nodes_distances (dict[str, tuple[float, tuple[float, float]]] | None): 
+        nodes_distances (dict[str, tuple[float, tuple[float, float]]] | None):
             Mapping from node IDs to a tuple containing (distance, (latitude, longitude)).
         nodes_distances_lock (Locker): Async lock for safe access to `nodes_distances`.
         _verbose (bool): Whether to enable verbose logging for debugging purposes.
diff --git a/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/fcneighborpolicy.py b/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/fcneighborpolicy.py
index e395a199a..443282f65 100644
--- a/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/fcneighborpolicy.py
+++ b/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/fcneighborpolicy.py
@@ -8,8 +8,8 @@ class FCNeighborPolicy(NeighborPolicy):
     """
     Neighbor policy for fully-connected (FC) structured topologies.
 
-    This policy assumes a fully-connected topology where every node should attempt 
-    to connect to all known nodes. It always accepts incoming neighbor connections 
+    This policy assumes a fully-connected topology where every node should attempt
+    to connect to all known nodes. It always accepts incoming neighbor connections
     and considers the neighbor list incomplete if there are known nodes that are not yet connected.
 
     The goal is to maintain full connectivity across all known nodes in the federation.
@@ -23,7 +23,7 @@ class FCNeighborPolicy(NeighborPolicy):
         nodes_known_lock (Locker): Async lock for safe access to `nodes_known`.
         _verbose (bool): Whether to enable verbose logging for debugging purposes.
     """
-    
+
     def __init__(self):
         self.max_neighbors = None
         self.nodes_known = set()
diff --git a/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/idleneighborpolicy.py b/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/idleneighborpolicy.py
index 648c8605e..d1d7d5025 100644
--- a/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/idleneighborpolicy.py
+++ b/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/idleneighborpolicy.py
@@ -8,11 +8,11 @@ class IDLENeighborPolicy(NeighborPolicy):
     """
     Neighbor policy for minimal connectivity scenarios.
 
-    This policy only attempts to discover or establish new neighbor connections 
-    if the node is currently isolated (i.e., has no neighbors). All incoming 
+    This policy only attempts to discover or establish new neighbor connections
+    if the node is currently isolated (i.e., has no neighbors). All incoming
     connection requests are accepted regardless of the current neighbor state.
 
-    This policy is suitable for scenarios where minimal intervention is preferred, 
+    This policy is suitable for scenarios where minimal intervention is preferred,
     and connections are formed opportunistically rather than proactively.
 
     Attributes:
@@ -24,7 +24,7 @@ class IDLENeighborPolicy(NeighborPolicy):
         nodes_known_lock (Locker): Async lock for thread-safe access to `nodes_known`.
         _verbose (bool): Enables verbose logging for debugging and traceability.
     """
-    
+
     def __init__(self):
         self.max_neighbors = None
         self.nodes_known = set()
diff --git a/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/ringneighborpolicy.py b/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/ringneighborpolicy.py
index afd9b1d59..e6933b5b7 100644
--- a/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/ringneighborpolicy.py
+++ b/nebula/core/situationalawareness/awareness/sanetwork/neighborpolicies/ringneighborpolicy.py
@@ -10,9 +10,9 @@ class RINGNeighborPolicy(NeighborPolicy):
     """
     Neighbor policy for ring topologies.
 
-    This policy maintains a strict limit on the number of neighbors per node, 
-    enforcing a ring-like structure. Each node connects to a fixed number of 
-    neighbors (by default 2), and excess connections are detected and marked 
+    This policy maintains a strict limit on the number of neighbors per node,
+    enforcing a ring-like structure. Each node connects to a fixed number of
+    neighbors (by default 2), and excess connections are detected and marked
     for removal.
 
     The policy ensures:
@@ -34,7 +34,7 @@ class RINGNeighborPolicy(NeighborPolicy):
         _excess_neighbors_removed_lock (Locker): Lock for accessing the removal tracking set.
         _verbose (bool): Enables verbose logging.
     """
-    
+
     RECENTLY_REMOVED_BAN_TIME = 20
 
     def __init__(self):
diff --git a/nebula/core/situationalawareness/awareness/satraining/satraining.py b/nebula/core/situationalawareness/awareness/satraining/satraining.py
index 94c18f40c..813cc11fe 100644
--- a/nebula/core/situationalawareness/awareness/satraining/satraining.py
+++ b/nebula/core/situationalawareness/awareness/satraining/satraining.py
@@ -6,9 +6,9 @@
 from nebula.addons.functions import print_msg_box
 from nebula.core.situationalawareness.awareness.sareasoner import SAReasoner, SAMComponent
 from nebula.core.eventmanager import EventManager
-    
-RESTRUCTURE_COOLDOWN = 5    
-    
+
+RESTRUCTURE_COOLDOWN = 5
+
 class SATraining(SAMComponent):
     """
     SATraining is a Situational Awareness (SA) component responsible for enhancing
@@ -24,7 +24,7 @@ class SATraining(SAMComponent):
         _sar (SAReasoner): Reference to the shared situational reasoner.
         _trainning_policy: Instantiated training policy strategy.
     """
-    
+
     def __init__(self, config):
         """
         Initialize the SATraining component with a given configuration.
@@ -61,7 +61,7 @@ def tp(self):
         """
         Returns the currently active training policy instance.
         """
-        return self._trainning_policy    
+        return self._trainning_policy
 
     async def init(self):
         """
@@ -69,7 +69,7 @@ async def init(self):
         This setup enables the policy to make informed decisions based on local topology.
         """
         config = {}
-        config["nodes"] = set(await self.sar.get_nodes_known(neighbors_only=True)) 
+        config["nodes"] = set(await self.sar.get_nodes_known(neighbors_only=True))
         await self.tp.init(config)
 
     async def sa_component_actions(self):
@@ -79,4 +79,3 @@ async def sa_component_actions(self):
         """
         logging.info("SA Trainng evaluating current scenario")
         asyncio.create_task(self.tp.get_evaluation_results())
-
diff --git a/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/bpstrainingpolicy.py b/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/bpstrainingpolicy.py
index 0353a8020..32874c6ae 100644
--- a/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/bpstrainingpolicy.py
+++ b/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/bpstrainingpolicy.py
@@ -4,18 +4,18 @@
 from nebula.core.nebulaevents import RoundEndEvent
 
 class BPSTrainingPolicy(TrainingPolicy):
-    
+
     def __init__(self, config=None):
         pass
-    
+
     async def init(self, config):
-        await self.register_sa_agent()    
+        await self.register_sa_agent()
 
     async def get_evaluation_results(self):
         sac = factory_sa_command(
             "connectivity",
             SACommandAction.MAINTAIN_CONNECTIONS,
-            self, 
+            self,
             "",
             SACommandPRIO.LOW,
             False,
@@ -24,15 +24,15 @@ async def get_evaluation_results(self):
         )
         await self.suggest_action(sac)
         await self.notify_all_suggestions_done(RoundEndEvent)
-    
+
     async def get_agent(self) -> str:
         return "SATraining_BPSTP"
 
     async def register_sa_agent(self):
         await SuggestionBuffer.get_instance().register_event_agents(RoundEndEvent, self)
-    
+
     async def suggest_action(self, sac : SACommand):
         await SuggestionBuffer.get_instance().register_suggestion(RoundEndEvent, self, sac)
-    
+
     async def notify_all_suggestions_done(self, event_type):
-        await SuggestionBuffer.get_instance().notify_all_suggestions_done_for_agent(self, event_type)
\ No newline at end of file
+        await SuggestionBuffer.get_instance().notify_all_suggestions_done_for_agent(self, event_type)
diff --git a/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/fastreboot.py b/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/fastreboot.py
index dd8fc438d..39a0791c4 100644
--- a/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/fastreboot.py
+++ b/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/fastreboot.py
@@ -24,13 +24,13 @@ def __init__(
         self._upgrade_lr = FR_LEARNING_RATE                     # Increased value for learning rate
         self._current_lr = VANILLA_LEARNING_RATE
         self._verbose = config["verbose"]
-        
+
         self._learning_rate_lock = Locker(name="learning_rate_lock", async_lock=True)
         self._weight_modifier = {}
         self._weight_modifier_lock = Locker(name="weight_modifier_lock", async_lock=True)
 
         self._fr_in_progress = False
-        
+
     async def init(self, config):
         #await EventManager.get_instance().subscribe_node_event(UpdateNeighborEvent)
         #await EventManager.get_instance().subscribe_node_event(AggregationEvent)
diff --git a/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/htstrainingpolicy.py b/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/htstrainingpolicy.py
index e37209ece..29b0524ba 100644
--- a/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/htstrainingpolicy.py
+++ b/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/htstrainingpolicy.py
@@ -6,19 +6,19 @@
 # "Hybrid Training Strategy"    (HTS)
 class HTSTrainingPolicy(TrainingPolicy):
     """
-    Implements a Hybrid Training Strategy (HTS) that combines multiple training policies 
-    (e.g., QDS, FRTS) to collaboratively decide on the evaluation and potential pruning 
+    Implements a Hybrid Training Strategy (HTS) that combines multiple training policies
+    (e.g., QDS, FRTS) to collaboratively decide on the evaluation and potential pruning
     of neighbors in a decentralized federated learning scenario.
-    
+
     Attributes:
         TRAINING_POLICY (set): Names of training policy classes to instantiate and manage.
     """
-    
+
     TRAINING_POLICY = {
         "Quality-Driven Selection",
         "Fast Reboot Training Strategy",
     }
-    
+
     def __init__(self, config):
         """
         Initializes the HTS policy with the node's address and verbosity level.
@@ -33,34 +33,34 @@ def __init__(self, config):
         self._verbose = config["verbose"]
         self._training_policies : set[TrainingPolicy] = set()
         self._training_policies.add([factory_training_policy(x, config) for x in self.TRAINING_POLICY])
-        
+
     def __str__(self):
-        return "HTS"    
-        
+        return "HTS"
+
     @property
     def tps(self):
-        return self._training_policies  
+        return self._training_policies
 
     async def init(self, config):
         for tp in self.tps:
-            await tp.init(config)    
+            await tp.init(config)
 
     async def update_neighbors(self, node, remove=False):
         pass
-    
+
     async def get_evaluation_results(self):
         """
         Asynchronously calls the `get_evaluation_results` of each policy,
         and logs the nodes each policy would remove.
-        
+
         Returns:
             None (future version may merge all evaluations).
         """
         nodes_to_remove = dict()
         for tp in self.tps:
             nodes_to_remove[tp] = await tp.get_evaluation_results()
-        
+
         for tp, nodes in nodes_to_remove.items():
             logging.info(f"Training Policy: {tp}, nodes to remove: {nodes}")
-            
-        return None
\ No newline at end of file
+
+        return None
diff --git a/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/qdstrainingpolicy.py b/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/qdstrainingpolicy.py
index f067f7e84..535097b45 100644
--- a/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/qdstrainingpolicy.py
+++ b/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/qdstrainingpolicy.py
@@ -15,13 +15,13 @@
 class QDSTrainingPolicy(TrainingPolicy):
     """
     Implements a Quality-Driven Selection (QDS) strategy for training in DFL.
-    
+
     This policy tracks the cosine similarity of neighbor model updates over time,
     and detects nodes that are inactive or provide redundant updates.
     Based on these evaluations, the policy suggests disconnecting such nodes
     to promote better model convergence and network efficiency.
     """
-    
+
     MAX_HISTORIC_SIZE = 10
     SIMILARITY_THRESHOLD = 0.73
     INACTIVE_THRESHOLD = 3
@@ -31,7 +31,7 @@ class QDSTrainingPolicy(TrainingPolicy):
     def __init__(self, config : dict):
         """
         Initializes the QDS training policy.
-        
+
         Args:
             config (dict): Configuration dictionary with keys:
                 - "addr": Local node address.
@@ -46,7 +46,7 @@ def __init__(self, config : dict):
         self._last_check = 0
         self._check_done = False
         self._evaluation_results = set()
-        
+
     def __str__(self):
         return "QDS"
 
@@ -94,21 +94,21 @@ async def _process_aggregation_event(self, agg_ev : AggregationEvent):
             for addr, updt in updates.items():
                 if addr == self._addr: continue
                 if not addr in self._nodes.keys(): continue
-                
+
                 deque_history, missed_count = self._nodes[addr]
                 if addr in missing_nodes:
                     if self._verbose: logging.info(f"Node inactivity counter increased for: {addr}")
                     self._nodes[addr] = (deque_history, missed_count + 1)   # Inactive rounds counter +1
                 else:
                     self._nodes[addr] = (deque_history, 0)                  # Reset inactive counter
-                    
-                #TODO Do it for the ones not using the last update received cause they are missing this round                      
+
+                #TODO Do it for the ones not using the last update received cause they are missing this round
                 (model,_) = updt
-                (self_model, _) = self_updt 
+                (self_model, _) = self_updt
                 cos_sim = cosine_metric(self_model, model, similarity=True)
                 self._nodes[addr][0].append(cos_sim)
         self._evaluation_results = await self.evaluate()
-        
+
     async def _get_nodes(self):
         """
         Safely returns a copy of the current node tracking dictionary.
@@ -118,8 +118,8 @@ async def _get_nodes(self):
         """
         async with self._nodes_lock:
             nodes = self._nodes.copy()
-        return nodes    
-    
+        return nodes
+
     async def evaluate(self):
         """
         Evaluates the current neighbor set to determine inactive or redundant nodes.
@@ -131,10 +131,10 @@ async def evaluate(self):
             self._grace_rounds -= 1
             if self._verbose: logging.info("Grace time hasnt finished...")
             return None
-        
+
         if self._verbose: logging.info("Evaluation in process")
-    
-        result = set()     
+
+        result = set()
         if self._last_check == 0:
             self._check_done = True
             nodes = await self._get_nodes()
@@ -149,18 +149,18 @@ async def evaluate(self):
                         if self._verbose: logging.info(f"Node: {node} hadn't participated in any of the last {self.INACTIVE_THRESHOLD} rounds")
                     else:
                         if self._verbose: logging.info(f"Node: {node} inactivity counter: {inactivity_counter}")
-                        
+
                     if node not in self._round_missing_nodes:
                         if last_sim < self.SIMILARITY_THRESHOLD:
                             if self._verbose: logging.info(f"Node: {node} got a similarity value of: {last_sim} under threshold: {self.SIMILARITY_THRESHOLD}")
                         else:
                             if self._verbose: logging.info(f"Node: {node} got a redundant model, cossine simmilarity: {last_sim} over threshold: {self.SIMILARITY_THRESHOLD}")
                             redundant_nodes.add((node, last_sim))
-                        
+
             if self._verbose: logging.info(f"Inactive nodes on aggregations: {inactive_nodes}")
             if self._verbose: logging.info(f"Redundant nodes on aggregations: {redundant_nodes}")
             if inactive_nodes:
-                result = result.union(inactive_nodes)    
+                result = result.union(inactive_nodes)
             if len(redundant_nodes):
                 sorted_redundant_nodes = sorted(redundant_nodes, key=lambda x: x[1])
                 n_discarded = math.ceil((len(redundant_nodes)/2))
@@ -171,11 +171,11 @@ async def evaluate(self):
         else:
             if self._verbose: logging.info(f"Evaluation is on cooldown... | {self.CHECK_COOLDOWN - self._last_check} rounds remaining")
             self._check_done = False
-            
+
         self._last_check = (self._last_check + 1)  % self.CHECK_COOLDOWN
-                             
+
         return result
-    
+
     async def get_evaluation_results(self):
         """
         Triggers suggested actions based on last evaluation results.
@@ -186,14 +186,14 @@ async def get_evaluation_results(self):
             for node_discarded in self._evaluation_results:
                 args = (node_discarded, False, True)
                 sac = factory_sa_command(
-                    "connectivity",                        
+                    "connectivity",
                     SACommandAction.DISCONNECT,
-                    self,           
-                    node_discarded,                       
-                    SACommandPRIO.MEDIUM,                 
-                    False,                                
-                    CommunicationsManager.get_instance().disconnect,  
-                    *args                                  
+                    self,
+                    node_discarded,
+                    SACommandPRIO.MEDIUM,
+                    False,
+                    CommunicationsManager.get_instance().disconnect,
+                    *args
                 )
                 await self.suggest_action(sac)
             await self.notify_all_suggestions_done(RoundEndEvent)
@@ -203,9 +203,9 @@ async def get_agent(self) -> str:
 
     async def register_sa_agent(self):
         await SuggestionBuffer.get_instance().register_event_agents(RoundEndEvent, self)
-    
+
     async def suggest_action(self, sac : SACommand):
         await SuggestionBuffer.get_instance().register_suggestion(RoundEndEvent, self, sac)
-    
+
     async def notify_all_suggestions_done(self, event_type):
-        await SuggestionBuffer.get_instance().notify_all_suggestions_done_for_agent(self, event_type)
\ No newline at end of file
+        await SuggestionBuffer.get_instance().notify_all_suggestions_done_for_agent(self, event_type)
diff --git a/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/trainingpolicy.py b/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/trainingpolicy.py
index cd9dae7c1..74d1b426f 100644
--- a/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/trainingpolicy.py
+++ b/nebula/core/situationalawareness/awareness/satraining/trainingpolicy/trainingpolicy.py
@@ -2,7 +2,7 @@
 from nebula.core.situationalawareness.awareness.sautils.samoduleagent import SAModuleAgent
 
 class TrainingPolicy(SAModuleAgent):
-    
+
     @abstractmethod
     async def init(self, config):
         pass
@@ -10,20 +10,20 @@ async def init(self, config):
     @abstractmethod
     async def get_evaluation_results(self):
         pass
-    
-    
+
+
 def factory_training_policy(training_policy, config) -> TrainingPolicy:
     from nebula.core.situationalawareness.awareness.satraining.trainingpolicy.bpstrainingpolicy import BPSTrainingPolicy
     from nebula.core.situationalawareness.awareness.satraining.trainingpolicy.qdstrainingpolicy import QDSTrainingPolicy
     from nebula.core.situationalawareness.awareness.satraining.trainingpolicy.htstrainingpolicy import HTSTrainingPolicy
     from nebula.core.situationalawareness.awareness.satraining.trainingpolicy.fastreboot import FastReboot
-    
+
     options = {
         "Broad-Propagation Strategy": BPSTrainingPolicy,   # "Broad-Propagation Strategy"  (BPS) -- default value
         "Quality-Driven Selection": QDSTrainingPolicy,   # "Quality-Driven Selection"    (QDS)
         "Hybrid Training Strategy": HTSTrainingPolicy,   # "Hybrid Training Strategy"    (HTS)
         "Fast Reboot Training Strategy": FastReboot,         # "Fast Reboot Training Strategy" (FRTS)
-    } 
-    
+    }
+
     cs = options.get(training_policy, BPSTrainingPolicy)
-    return cs(config)
\ No newline at end of file
+    return cs(config)
diff --git a/nebula/core/situationalawareness/discovery/candidateselection/distcandidateselector.py b/nebula/core/situationalawareness/discovery/candidateselection/distcandidateselector.py
index d389f8bbb..fec0c1b09 100644
--- a/nebula/core/situationalawareness/discovery/candidateselection/distcandidateselector.py
+++ b/nebula/core/situationalawareness/discovery/candidateselection/distcandidateselector.py
@@ -10,17 +10,17 @@ class DistanceCandidateSelector(CandidateSelector):
     """
     Selects candidate nodes based on their physical proximity.
 
-    This selector uses geolocation data to filter candidates within a 
-    maximum distance threshold. It listens for GPS updates and maintains 
+    This selector uses geolocation data to filter candidates within a
+    maximum distance threshold. It listens for GPS updates and maintains
     a mapping of node identifiers to their distances and coordinates.
 
     Attributes:
-        MAX_DISTANCE_THRESHOLD (int): Maximum distance (in meters) allowed 
+        MAX_DISTANCE_THRESHOLD (int): Maximum distance (in meters) allowed
             for a node to be considered a valid candidate.
         candidates (list): List of candidate nodes to be evaluated.
-        candidates_lock (Locker): Async lock for managing concurrent access 
+        candidates_lock (Locker): Async lock for managing concurrent access
             to the candidate list.
-        nodes_distances (dict): Maps node IDs to a tuple containing the 
+        nodes_distances (dict): Maps node IDs to a tuple containing the
             distance and GPS coordinates.
         nodes_distances_lock (Locker): Async lock for the distance mapping.
         _verbose (bool): Flag to enable verbose logging for debugging.
diff --git a/nebula/core/situationalawareness/discovery/candidateselection/fccandidateselector.py b/nebula/core/situationalawareness/discovery/candidateselection/fccandidateselector.py
index 5e82db6b8..b0840b804 100644
--- a/nebula/core/situationalawareness/discovery/candidateselection/fccandidateselector.py
+++ b/nebula/core/situationalawareness/discovery/candidateselection/fccandidateselector.py
@@ -24,7 +24,7 @@ class FCCandidateSelector(CandidateSelector):
     Inherits from:
         CandidateSelector: Base class interface for candidate selection logic.
     """
-    
+
     def __init__(self):
         self.candidates = []
         self.candidates_lock = Locker(name="candidates_lock")
diff --git a/nebula/core/situationalawareness/discovery/candidateselection/ringcandidateselector.py b/nebula/core/situationalawareness/discovery/candidateselection/ringcandidateselector.py
index 5a90df6c5..d1b3bb33c 100644
--- a/nebula/core/situationalawareness/discovery/candidateselection/ringcandidateselector.py
+++ b/nebula/core/situationalawareness/discovery/candidateselection/ringcandidateselector.py
@@ -27,7 +27,7 @@ class RINGCandidateSelector(CandidateSelector):
     Inherits from:
         CandidateSelector: Base interface for candidate selection strategies.
     """
-    
+
     def __init__(self):
         self._candidates = []
         self._rejected_candidates = []
diff --git a/nebula/core/situationalawareness/discovery/candidateselection/stdcandidateselector.py b/nebula/core/situationalawareness/discovery/candidateselection/stdcandidateselector.py
index fb20b59a7..bbb5fd7db 100644
--- a/nebula/core/situationalawareness/discovery/candidateselection/stdcandidateselector.py
+++ b/nebula/core/situationalawareness/discovery/candidateselection/stdcandidateselector.py
@@ -9,8 +9,8 @@ class STDandidateSelector(CandidateSelector):
     Candidate selector for scenarios without a predefined structural topology.
 
     In cases where the federation topology is not explicitly structured,
-    this selector chooses candidates based on the average number of neighbors 
-    indicated in their offers. It selects approximately as many candidates as the 
+    this selector chooses candidates based on the average number of neighbors
+    indicated in their offers. It selects approximately as many candidates as the
     average neighbor count, aiming to balance connectivity dynamically.
 
     Attributes:
@@ -27,7 +27,7 @@ class STDandidateSelector(CandidateSelector):
     Inherits from:
         CandidateSelector: Base interface for candidate selection strategies.
     """
-    
+
     def __init__(self):
         self.candidates = []
         self.candidates_lock = Locker(name="candidates_lock")
diff --git a/nebula/core/situationalawareness/discovery/modelhandlers/defaultmodelhandler.py b/nebula/core/situationalawareness/discovery/modelhandlers/defaultmodelhandler.py
index fa8aec8d4..bd16bae8b 100644
--- a/nebula/core/situationalawareness/discovery/modelhandlers/defaultmodelhandler.py
+++ b/nebula/core/situationalawareness/discovery/modelhandlers/defaultmodelhandler.py
@@ -7,14 +7,14 @@ class DefaultModelHandler(ModelHandler):
     """
     Provides the initial default model.
 
-    This handler returns the baseline model with default weights, 
-    typically used at the start of the federation or when no suitable 
+    This handler returns the baseline model with default weights,
+    typically used at the start of the federation or when no suitable
     model offers have been received from peers.
 
     Inherits from:
         ModelHandler: Provides the base interface for model operations.
     """
-    
+
     def __init__(self):
         self.model = None
         self.rounds = 0
diff --git a/nebula/core/situationalawareness/discovery/modelhandlers/stdmodelhandler.py b/nebula/core/situationalawareness/discovery/modelhandlers/stdmodelhandler.py
index 15975dee1..83506249e 100644
--- a/nebula/core/situationalawareness/discovery/modelhandlers/stdmodelhandler.py
+++ b/nebula/core/situationalawareness/discovery/modelhandlers/stdmodelhandler.py
@@ -4,7 +4,7 @@
 
 class STDModelHandler(ModelHandler):
     """
-    Handles the selection and acquisition of the most up-to-date model 
+    Handles the selection and acquisition of the most up-to-date model
     during the discovery phase of the federation process.
 
     This handler choose the first model received.
@@ -13,10 +13,10 @@ class STDModelHandler(ModelHandler):
         ModelHandler: Provides the base interface for model operations.
 
     Intended Use:
-        Used during the initial, when a node discovers others and must 
+        Used during the initial, when a node discovers others and must
         align itself with the most recent global model state.
     """
-    
+
     def __init__(self):
         self.model = None
         self.rounds = 0
diff --git a/nebula/frontend/static/css/deployment.css b/nebula/frontend/static/css/deployment.css
index 03fa0ab30..2fecffe9a 100644
--- a/nebula/frontend/static/css/deployment.css
+++ b/nebula/frontend/static/css/deployment.css
@@ -234,4 +234,4 @@ button[title]:hover::after {
 #predefined-topology-nodes:disabled{
     background:#e9ecef;
     cursor:not-allowed;
-}
\ No newline at end of file
+}
diff --git a/nebula/frontend/static/js/deployment/topology.js b/nebula/frontend/static/js/deployment/topology.js
index 28d5ec0ad..5b6a99ce9 100644
--- a/nebula/frontend/static/js/deployment/topology.js
+++ b/nebula/frontend/static/js/deployment/topology.js
@@ -533,7 +533,7 @@ const TopologyManager = (function() {
 
     function updateIPsAndPorts() {
         const isPhysical = document.getElementById("physical-devices-radio").checked;
-    
+
         /*  ⬅︎  if physical deployment get default IPs        */
         if (isPhysical) {
             gData.nodes.forEach((node, idx) => {
@@ -541,11 +541,11 @@ const TopologyManager = (function() {
             });
             return;
         }
-    
+
         /*  Docker or Process → generate sintetic IPs                       */
         const isProcess = document.getElementById("process-radio").checked;
         const baseIP = "192.168.50";
-    
+
         gData.nodes.forEach((node, idx) => {
             node.ip   = isProcess ? "127.0.0.1" : `${baseIP}.${idx + 2}`;
             node.port = (45001 + idx).toString();
@@ -572,23 +572,23 @@ const TopologyManager = (function() {
 
     function setPhysicalIPs(ipList = []) {
         if (!ipList.length) return;
- 
+
         /*  1. Update input for the user                 */
         const nodesInput = document.getElementById('predefined-topology-nodes');
         if (nodesInput) {
             nodesInput.value = ipList.length;
-            nodesInput.disabled = true;                 
-            nodesInput.classList.add('disabled');       
+            nodesInput.disabled = true;
+            nodesInput.classList.add('disabled');
         }
- 
+
         /*  2. Regenerate topology         */
         generatePredefinedTopology();           // ← create Nodes and Links
- 
+
         /*  3. Assign IPs                                             */
         gData.nodes.forEach((n, idx) => {
             n.ip = ipList[idx] || n.ip;         // if more nodes than IPs
         });
- 
+
         updateGraph();                          // redraw
     }
 
@@ -660,7 +660,7 @@ const TopologyManager = (function() {
                 generatePredefinedTopology();
                 return;
             }
- 
+
             // Ensure each node has the required properties
             data.nodes = data.nodes.map(node => ({
                 id: node.id,
@@ -670,13 +670,13 @@ const TopologyManager = (function() {
                 neighbors: node.neighbors || [],
                 links: node.links || []
             }));
- 
+
             // Ensure each link has the required properties
             data.links = data.links.map(link => ({
                 source: link.source,
                 target: link.target
             }));
- 
+
             gData = data;
             updateGraph();
         },
@@ -690,7 +690,7 @@ const TopologyManager = (function() {
                 nodes: [],
                 links: []
             };
-            // Update graph 
+            // Update graph
             if (Graph) {
                 Graph.graphData(gData);
             }
diff --git a/nebula/frontend/static/js/deployment/ui-controls.js b/nebula/frontend/static/js/deployment/ui-controls.js
index ed02efa76..97a76dce9 100644
--- a/nebula/frontend/static/js/deployment/ui-controls.js
+++ b/nebula/frontend/static/js/deployment/ui-controls.js
@@ -11,11 +11,11 @@ const UIControls = (function() {
         /* === control Physical + Predefined => block input === */
         document.querySelectorAll('input[name="deploymentRadioOptions"]')
         .forEach(r => r.addEventListener('change', togglePredefinedNodesInput));
- 
+
         ['custom-topology-btn', 'predefined-topology-btn']
         .forEach(id => document.getElementById(id)
                 .addEventListener('change', togglePredefinedNodesInput));
- 
+
         togglePredefinedNodesInput();
         setupVpnDiscover();
         setupParticipantDisplay();
@@ -650,33 +650,33 @@ const UIControls = (function() {
         const radios      = document.querySelectorAll('input[name="deploymentRadioOptions"]');
         const discoverBtn = document.getElementById('discoverDevicesBtn');
         if (!discoverBtn || !radios.length) return;
- 
+
         const toggle = () => {
             const sel = document.querySelector('input[name="deploymentRadioOptions"]:checked');
             discoverBtn.disabled = sel.value !== 'physical';
         };
- 
+
         radios.forEach(r => r.addEventListener('change', toggle));
         toggle();
     }
- 
+
     function setupVpnDiscover() {
         const discoverBtn = document.getElementById('discoverDevicesBtn');
         if (!discoverBtn) return;
-      
+
         discoverBtn.addEventListener('click', async () => {
             try {
                 const res = await fetch('/platform/api/discover-vpn');
                 if (!res.ok) throw new Error(res.statusText);
-          
+
                 const { ips } = await res.json();
-          
+
                 const form = document.getElementById('vpn-form');
                 form.innerHTML = '';
-                
+
                 const currentScenario = window.ScenarioManager.getScenariosList()[window.ScenarioManager.getActualScenario()];
                 const selectedIPs = currentScenario?.physical_ips || [];
- 
+
                 ips.forEach(ip => {
                     const wrapper = document.createElement('div');
                     wrapper.classList.add('form-check');
@@ -687,18 +687,18 @@ const UIControls = (function() {
                     `;
                     form.appendChild(wrapper);
                 });
-          
+
                 const modal = new bootstrap.Modal(document.getElementById('vpnModal'));
                 modal.show();
- 
+
                 document.getElementById('vpn-accept-btn').onclick = () => {
                     const selected = Array.from(form.querySelectorAll('input:checked'))
                                         .map(i => i.value);
-                
+
                     window.ScenarioManager.setPhysicalIPs(selected);
-                    
+
                     window.TopologyManager.setPhysicalIPs(selected);
-                    
+
                     modal.hide();
                 };
             } catch (err) {
@@ -707,19 +707,19 @@ const UIControls = (function() {
             }
         });
     }
- 
+
     function togglePredefinedNodesInput() {
     const deployment   = document.querySelector('input[name="deploymentRadioOptions"]:checked')?.value;
     const isPredefined = document.getElementById('predefined-topology-btn').checked;
     const nodesInput   = document.getElementById('predefined-topology-nodes');
-    
+
     if (!nodesInput) return;
-    
+
     const disable = deployment === 'physical' && isPredefined;
     nodesInput.disabled = disable;
     nodesInput.classList.toggle('disabled', disable);
     }
- 
+
     function setupDeploymentRadios() {
         const radios = document.querySelectorAll('input[name="deploymentRadioOptions"]');
         radios.forEach(radio => {
diff --git a/nebula/physical/api.py b/nebula/physical/api.py
index 6d44428e7..ac6b17ffe 100644
--- a/nebula/physical/api.py
+++ b/nebula/physical/api.py
@@ -379,8 +379,8 @@ def setup_new_run():
 
     Expected multipart-form fields
     -------------------------------
-    * **config**     – JSON with scenario, network and security arguments  
-    * **global_test** – shared evaluation dataset (`*.h5`)  
+    * **config**     – JSON with scenario, network and security arguments
+    * **global_test** – shared evaluation dataset (`*.h5`)
     * **train_set**   – participant-specific training dataset (`*.h5`)
 
     The function rewrites paths inside *config*, validates neighbour IPs
@@ -489,4 +489,4 @@ def setup_new_run():
 # -----------------------------------------------------------------------------
 if __name__ == "__main__":
     # Local testing:  python main.py
-    app.run(host="0.0.0.0", port=8000, debug=False)
\ No newline at end of file
+    app.run(host="0.0.0.0", port=8000, debug=False)
diff --git a/nebula/physical/node.sh b/nebula/physical/node.sh
index 6fe24f688..f7f2469ba 100644
--- a/nebula/physical/node.sh
+++ b/nebula/physical/node.sh
@@ -1,7 +1,7 @@
 ###############################################################################
 # RUN NEBULA PHYSICAL NODE ─────────────────────────────────────
 ###############################################################################
-VENV_DIR=".venv" 
+VENV_DIR=".venv"
 APP_PORT=8000
 
 source "${VENV_DIR}/bin/activate"
@@ -13,4 +13,4 @@ fi
 
 echo "· Launching Gunicorn (Flask) on port ${APP_PORT} …"
 export FLASK_APP=api.py
-exec gunicorn -w 1 -b "0.0.0.0:${APP_PORT}" "api:app"
\ No newline at end of file
+exec gunicorn -w 1 -b "0.0.0.0:${APP_PORT}" "api:app"
diff --git a/nebula/utils.py b/nebula/utils.py
index cfc7a558b..9d7557968 100644
--- a/nebula/utils.py
+++ b/nebula/utils.py
@@ -174,7 +174,7 @@ def check_docker_by_prefix(cls, prefix):
             for container in containers:
                 if container.name.startswith(prefix):
                     return True
-                
+
             return False
 
         except docker.errors.APIError: