Source code for decent_bench.benchmark._metric_result
from collections.abc import Mapping, Sequence
from dataclasses import dataclass
from typing import Literal
import pandas as pd
from decent_bench.algorithms import Algorithm
from decent_bench.benchmark._compute.compute_tables import aggregate_table_metrics
from decent_bench.metrics import Metric
from decent_bench.metrics._metrics_view import NetworkMetricsView
from decent_bench.networks import Network
[docs]
@dataclass
class MetricResult:
"""
Result of metric computation, containing raw data and statistics across agents and trials.
This class is used to store the computed metrics from a benchmark execution.
It is returned by the :func:`~decent_bench.benchmark.compute_metrics` function and contains
all the information about the computed metrics, including agent-level metrics, table statistics,
and plot data for visualization.
* `network_views`: contains the raw network-level metrics for each algorithm, organized by algorithm where
each algorithm maps to a sequence of trials, with each trial containing a
:class:`~decent_bench.metrics.NetworkMetricsView`.
* `raw_table_results`: contains raw metric evaluations in a dictionary mapping Metric to pandas.DataFrame. Each
DataFrame has columns (algorithm, trial, agent, value). Table metrics are evaluated only at the *most recent*
iteration reached during benchmarking.
* `raw_plot_results`: contains raw metric evaluations in a dictionary mapping Metric to pandas.DataFrame. Each
DataFrame has columns (algorithm, trial, agent, iteration, value). Plot metrics are evaluated at *all*
iterations reached during benchmarking.
* `table_results`: contains the aggregated results in a pandas.DataFrame with columns
(metric, algorithm, statistic, mean, std).
* `plot_results`: contains the aggregated results in a pandas.DataFrame with columns
(metric, algorithm, iteration, mean, min, max).
`table_results` can be recomputed with a new set of statistics across agents by using :meth:`update_table_results`.
Use the properties `algorithms`, `table_metrics` and `plot_metrics` to check for which algorithms and metrics
the object stores data. Note that these methods assume that all attributes have the same set of metrics and
algorithms, since the object is generated by the backend; no sanity check is performed, so altering any of the
attributes might lead to unexpected results.
"""
network_views: Mapping[Algorithm[Network], Sequence[NetworkMetricsView]] | None
raw_table_results: Mapping[Metric, pd.DataFrame] | None
raw_plot_results: Mapping[Metric, pd.DataFrame] | None
table_results: pd.DataFrame | None
plot_results: pd.DataFrame | None
[docs]
def update_table_results(self, statistics_across_agents: list[str] | None) -> pd.DataFrame | None:
"""Recompute aggregated table statistics from stored raw table results."""
if not self.raw_table_results:
return None
self.table_results = aggregate_table_metrics(self.raw_table_results, statistics_across_agents)
return self.table_results
@property
def algorithms(self) -> list[str]:
"""Return ``name`` of available algorithms, which can be used for filtering in :func:`~decent_bench.benchmark.display_metrics`.""" # noqa: E501
if self.network_views is not None:
return sorted({algorithm.name for algorithm in self.network_views})
data = self._which_attribute()
if not data:
return []
return sorted(next(iter(data.values()))["algorithm"].unique())
@property
def table_metrics(self) -> list[str]:
"""Return ``description`` of available table metrics, which can be used for filtering in :func:`~decent_bench.benchmark.display_metrics`.""" # noqa: E501
data = self._which_attribute("table")
if not data:
return []
return sorted({metric.description for metric in (data.keys() or [])})
@property
def plot_metrics(self) -> list[str]:
"""Return ``description`` of available plot metrics, which can be used for filtering in :func:`~decent_bench.benchmark.display_metrics`.""" # noqa: E501
data = self._which_attribute("plot")
if not data:
return []
return sorted({metric.description for metric in (data.keys() or [])})
@property
def iterations(self) -> list[int]:
"""Return all the iterations that were reached in at least one trial by at least one algorithm."""
if not self.raw_plot_results:
return []
return sorted(next(iter(self.raw_plot_results.values()))["iteration"].unique())
def _which_attribute(self, type_: Literal["all", "table", "plot"] = "all") -> Mapping[Metric, pd.DataFrame] | None:
if type_ == "all":
candidates = [self.raw_table_results, self.raw_plot_results]
elif type_ == "table":
candidates = [self.raw_table_results]
else:
candidates = [self.raw_plot_results]
return next((c for c in candidates if c is not None), None)