Source code for decent_bench.benchmark._metric_result

from collections.abc import Mapping, Sequence
from dataclasses import dataclass
from typing import Literal

import pandas as pd

from decent_bench.algorithms import Algorithm
from decent_bench.benchmark._compute.compute_tables import aggregate_table_metrics
from decent_bench.metrics import Metric
from decent_bench.metrics._metrics_view import NetworkMetricsView
from decent_bench.networks import Network


[docs] @dataclass class MetricResult: """ Result of metric computation, containing raw data and statistics across agents and trials. This class is used to store the computed metrics from a benchmark execution. It is returned by the :func:`~decent_bench.benchmark.compute_metrics` function and contains all the information about the computed metrics, including agent-level metrics, table statistics, and plot data for visualization. * `network_views`: contains the raw network-level metrics for each algorithm, organized by algorithm where each algorithm maps to a sequence of trials, with each trial containing a :class:`~decent_bench.metrics.NetworkMetricsView`. * `raw_table_results`: contains raw metric evaluations in a dictionary mapping Metric to pandas.DataFrame. Each DataFrame has columns (algorithm, trial, agent, value). Table metrics are evaluated only at the *most recent* iteration reached during benchmarking. * `raw_plot_results`: contains raw metric evaluations in a dictionary mapping Metric to pandas.DataFrame. Each DataFrame has columns (algorithm, trial, agent, iteration, value). Plot metrics are evaluated at *all* iterations reached during benchmarking. * `table_results`: contains the aggregated results in a pandas.DataFrame with columns (metric, algorithm, statistic, mean, std). * `plot_results`: contains the aggregated results in a pandas.DataFrame with columns (metric, algorithm, iteration, mean, min, max). `table_results` can be recomputed with a new set of statistics across agents by using :meth:`update_table_results`. Use the properties `algorithms`, `table_metrics` and `plot_metrics` to check for which algorithms and metrics the object stores data. Note that these methods assume that all attributes have the same set of metrics and algorithms, since the object is generated by the backend; no sanity check is performed, so altering any of the attributes might lead to unexpected results. """ network_views: Mapping[Algorithm[Network], Sequence[NetworkMetricsView]] | None raw_table_results: Mapping[Metric, pd.DataFrame] | None raw_plot_results: Mapping[Metric, pd.DataFrame] | None table_results: pd.DataFrame | None plot_results: pd.DataFrame | None
[docs] def update_table_results(self, statistics_across_agents: list[str] | None) -> pd.DataFrame | None: """Recompute aggregated table statistics from stored raw table results.""" if not self.raw_table_results: return None self.table_results = aggregate_table_metrics(self.raw_table_results, statistics_across_agents) return self.table_results
@property def algorithms(self) -> list[str]: """Return ``name`` of available algorithms, which can be used for filtering in :func:`~decent_bench.benchmark.display_metrics`.""" # noqa: E501 if self.network_views is not None: return sorted({algorithm.name for algorithm in self.network_views}) data = self._which_attribute() if not data: return [] return sorted(next(iter(data.values()))["algorithm"].unique()) @property def table_metrics(self) -> list[str]: """Return ``description`` of available table metrics, which can be used for filtering in :func:`~decent_bench.benchmark.display_metrics`.""" # noqa: E501 data = self._which_attribute("table") if not data: return [] return sorted({metric.description for metric in (data.keys() or [])}) @property def plot_metrics(self) -> list[str]: """Return ``description`` of available plot metrics, which can be used for filtering in :func:`~decent_bench.benchmark.display_metrics`.""" # noqa: E501 data = self._which_attribute("plot") if not data: return [] return sorted({metric.description for metric in (data.keys() or [])}) @property def iterations(self) -> list[int]: """Return all the iterations that were reached in at least one trial by at least one algorithm.""" if not self.raw_plot_results: return [] return sorted(next(iter(self.raw_plot_results.values()))["iteration"].unique()) def _which_attribute(self, type_: Literal["all", "table", "plot"] = "all") -> Mapping[Metric, pd.DataFrame] | None: if type_ == "all": candidates = [self.raw_table_results, self.raw_plot_results] elif type_ == "table": candidates = [self.raw_table_results] else: candidates = [self.raw_plot_results] return next((c for c in candidates if c is not None), None)