Redo Training Resutls Analysis

This commit is contained in:
Tobias Hölzer 2026-01-19 16:35:38 +01:00
parent 2664579a75
commit 7d874f7f92
16 changed files with 1455 additions and 2227 deletions

View file

@ -12,7 +12,6 @@ Pages:
import streamlit as st import streamlit as st
from entropice.dashboard.views.autogluon_analysis_page import render_autogluon_analysis_page
from entropice.dashboard.views.dataset_page import render_dataset_page from entropice.dashboard.views.dataset_page import render_dataset_page
from entropice.dashboard.views.inference_page import render_inference_page from entropice.dashboard.views.inference_page import render_inference_page
from entropice.dashboard.views.model_state_page import render_model_state_page from entropice.dashboard.views.model_state_page import render_model_state_page
@ -28,7 +27,6 @@ def main():
overview_page = st.Page(render_overview_page, title="Overview", icon="🏡", default=True) overview_page = st.Page(render_overview_page, title="Overview", icon="🏡", default=True)
data_page = st.Page(render_dataset_page, title="Dataset", icon="📊") data_page = st.Page(render_dataset_page, title="Dataset", icon="📊")
training_analysis_page = st.Page(render_training_analysis_page, title="Training Results Analysis", icon="🦾") training_analysis_page = st.Page(render_training_analysis_page, title="Training Results Analysis", icon="🦾")
autogluon_page = st.Page(render_autogluon_analysis_page, title="AutoGluon Analysis", icon="🤖")
model_state_page = st.Page(render_model_state_page, title="Model State", icon="🧮") model_state_page = st.Page(render_model_state_page, title="Model State", icon="🧮")
inference_page = st.Page(render_inference_page, title="Inference", icon="🗺️") inference_page = st.Page(render_inference_page, title="Inference", icon="🗺️")
@ -36,7 +34,7 @@ def main():
{ {
"Overview": [overview_page], "Overview": [overview_page],
"Data": [data_page], "Data": [data_page],
"Experiments": [training_analysis_page, autogluon_page, model_state_page], "Experiments": [training_analysis_page, model_state_page],
"Inference": [inference_page], "Inference": [inference_page],
} }
) )

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,417 @@
"""Hyperparameter space plotting functions."""
import matplotlib.colors as mcolors
import pandas as pd
import plotly.graph_objects as go
from entropice.dashboard.utils.colors import get_cmap, get_palette
def plot_performance_summary(results: pd.DataFrame, refit_metric: str) -> tuple[pd.DataFrame, pd.DataFrame, dict]:
"""Compute performance summary statistics.
Args:
results: DataFrame with CV results.
refit_metric: The metric used for refit (e.g., 'f1', 'f1_weighted').
Returns:
Tuple of (best_scores_df, score_stats_df, best_params_dict).
"""
# Get all test score columns
score_cols = [col for col in results.columns if col.startswith("mean_test_")]
if not score_cols:
return pd.DataFrame(), pd.DataFrame(), {}
# Calculate best scores
best_scores = []
for col in score_cols:
metric_name = col.replace("mean_test_", "").replace("_", " ").title()
best_score = results[col].max()
best_scores.append({"Metric": metric_name, "Best Score": f"{best_score:.4f}"})
# Calculate score statistics
score_stats = []
for col in score_cols:
metric_name = col.replace("mean_test_", "").replace("_", " ").title()
mean_score = results[col].mean()
std_score = results[col].std()
score_stats.append(
{
"Metric": metric_name,
"Mean ± Std": f"{mean_score:.4f} ± {std_score:.4f}",
}
)
# Get best parameter combination
refit_col = f"mean_test_{refit_metric}"
if refit_col not in results.columns and score_cols:
refit_col = score_cols[0]
best_idx = results[refit_col].idxmax()
best_row = results.loc[best_idx]
# Extract parameter columns
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
best_params = {col.replace("param_", ""): best_row[col] for col in param_cols}
return pd.DataFrame(best_scores), pd.DataFrame(score_stats), best_params
def plot_parameter_distributions(results: pd.DataFrame, param_grid: dict | None = None) -> dict[str, go.Figure]:
"""Create histogram charts for parameter distributions.
Args:
results: DataFrame with CV results.
param_grid: Optional parameter grid with distribution information.
Returns:
Dictionary mapping parameter names to Plotly figures.
"""
# Get parameter columns
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
if not param_cols:
return {}
cmap = get_cmap("parameter_distribution")
bar_color = mcolors.rgb2hex(cmap(0.5))
charts = {}
for param_col in param_cols:
param_name = param_col.replace("param_", "")
param_values = results[param_col].dropna()
if len(param_values) == 0:
continue
# Determine if parameter is numeric
if pd.api.types.is_numeric_dtype(param_values):
# Create histogram for numeric parameters
fig = go.Figure()
fig.add_trace(
go.Histogram(
x=param_values,
nbinsx=30,
marker_color=bar_color,
name=param_name,
)
)
fig.update_layout(
title=f"Distribution of {param_name}",
xaxis_title=param_name,
yaxis_title="Count",
height=400,
showlegend=False,
)
else:
# Create bar chart for categorical parameters
value_counts = param_values.value_counts().reset_index()
value_counts.columns = [param_name, "count"]
fig = go.Figure()
fig.add_trace(
go.Bar(
x=value_counts[param_name],
y=value_counts["count"],
marker_color=bar_color,
name=param_name,
)
)
fig.update_layout(
title=f"Distribution of {param_name}",
xaxis_title=param_name,
yaxis_title="Count",
height=400,
showlegend=False,
)
charts[param_name] = fig
return charts
def plot_score_vs_parameters(
results: pd.DataFrame, metric: str, param_grid: dict | None = None
) -> dict[str, go.Figure]:
"""Create scatter plots of score vs each parameter.
Args:
results: DataFrame with CV results.
metric: The metric to plot (e.g., 'f1', 'accuracy').
param_grid: Optional parameter grid with distribution information.
Returns:
Dictionary mapping parameter names to Plotly figures.
"""
score_col = f"mean_test_{metric}"
if score_col not in results.columns:
return {}
# Get parameter columns
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
if not param_cols:
return {}
# Get colormap
hex_colors = get_palette(metric, n_colors=256)
charts = {}
for param_col in param_cols:
param_name = param_col.replace("param_", "")
param_values = results[param_col].dropna()
if len(param_values) == 0:
continue
# Check if this parameter uses log scale
use_log = False
if param_grid and param_name in param_grid:
param_config = param_grid[param_name]
if isinstance(param_config, dict) and param_config.get("distribution") == "loguniform":
use_log = True
# Create scatter plot
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=results[param_col],
y=results[score_col],
mode="markers",
marker={
"size": 8,
"color": results[score_col],
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
"showscale": False,
"opacity": 0.6,
},
text=[
f"{param_name}: {val}<br>Score: {score:.4f}"
for val, score in zip(results[param_col], results[score_col])
],
hovertemplate="%{text}<extra></extra>",
)
)
fig.update_layout(
title=f"{metric.replace('_', ' ').title()} vs {param_name}",
xaxis_title=param_name,
xaxis_type="log" if use_log else "linear",
yaxis_title=metric.replace("_", " ").title(),
height=400,
showlegend=False,
)
charts[param_name] = fig
return charts
def plot_parameter_correlations(results: pd.DataFrame, metric: str) -> go.Figure | None:
"""Create correlation bar chart between parameters and score.
Args:
results: DataFrame with CV results.
metric: The metric to analyze (e.g., 'f1', 'accuracy').
Returns:
Plotly figure or None if no numeric parameters found.
"""
score_col = f"mean_test_{metric}"
if score_col not in results.columns:
return None
# Get numeric parameter columns
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
numeric_params = [col for col in param_cols if pd.api.types.is_numeric_dtype(results[col])]
if not numeric_params:
return None
# Calculate correlations
correlations = []
for param_col in numeric_params:
param_name = param_col.replace("param_", "")
corr = results[[param_col, score_col]].corr().iloc[0, 1]
correlations.append({"Parameter": param_name, "Correlation": corr})
corr_df = pd.DataFrame(correlations).sort_values("Correlation", ascending=False)
# Get colormap (use diverging colormap for correlation)
hex_colors = get_palette("correlation", n_colors=256)
# Create bar chart
fig = go.Figure()
fig.add_trace(
go.Bar(
x=corr_df["Correlation"],
y=corr_df["Parameter"],
orientation="h",
marker={
"color": corr_df["Correlation"],
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
"cmin": -1,
"cmax": 1,
"showscale": False,
},
text=[f"{c:.3f}" for c in corr_df["Correlation"]],
hovertemplate="%{y}<br>Correlation: %{x:.3f}<extra></extra>",
)
)
fig.update_layout(
xaxis_title="Correlation with Score",
yaxis_title="Parameter",
height=max(300, len(correlations) * 30),
showlegend=False,
)
return fig
def plot_parameter_interactions(results: pd.DataFrame, metric: str, param_grid: dict | None = None) -> list[go.Figure]:
"""Create scatter plots showing parameter interactions.
Args:
results: DataFrame with CV results.
metric: The metric to visualize (e.g., 'f1', 'accuracy').
param_grid: Optional parameter grid with distribution information.
Returns:
List of Plotly figures showing parameter interactions.
"""
score_col = f"mean_test_{metric}"
if score_col not in results.columns:
return []
# Get numeric parameter columns
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
numeric_params = [col for col in param_cols if pd.api.types.is_numeric_dtype(results[col])]
if len(numeric_params) < 2:
return []
# Get colormap
hex_colors = get_palette(metric, n_colors=256)
# Create scatter plots for parameter pairs
charts = []
param_names = [col.replace("param_", "") for col in numeric_params]
for i, x_param in enumerate(param_names[:-1]):
for y_param in param_names[i + 1 :]:
x_col = f"param_{x_param}"
y_col = f"param_{y_param}"
# Check if parameters use log scale
x_use_log = False
y_use_log = False
if param_grid:
if x_param in param_grid:
x_config = param_grid[x_param]
if isinstance(x_config, dict) and x_config.get("distribution") == "loguniform":
x_use_log = True
if y_param in param_grid:
y_config = param_grid[y_param]
if isinstance(y_config, dict) and y_config.get("distribution") == "loguniform":
y_use_log = True
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=results[x_col],
y=results[y_col],
mode="markers",
marker={
"size": 8,
"color": results[score_col],
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
"showscale": True,
"colorbar": {"title": metric.replace("_", " ").title()},
"opacity": 0.7,
},
text=[
f"{x_param}: {x_val}<br>{y_param}: {y_val}<br>Score: {score:.4f}"
for x_val, y_val, score in zip(results[x_col], results[y_col], results[score_col])
],
hovertemplate="%{text}<extra></extra>",
)
)
fig.update_layout(
title=f"{metric.replace('_', ' ').title()} by {x_param} and {y_param}",
xaxis_title=x_param,
xaxis_type="log" if x_use_log else "linear",
yaxis_title=y_param,
yaxis_type="log" if y_use_log else "linear",
height=500,
width=500,
)
charts.append(fig)
return charts
def plot_score_evolution(results: pd.DataFrame, metric: str) -> go.Figure | None:
"""Create line chart showing score evolution over iterations.
Args:
results: DataFrame with CV results.
metric: The metric to visualize (e.g., 'f1', 'accuracy').
Returns:
Plotly figure or None if metric not found.
"""
score_col = f"mean_test_{metric}"
if score_col not in results.columns:
return None
# Add iteration number
iterations = list(range(len(results)))
scores = results[score_col].to_numpy()
best_so_far = results[score_col].cummax().to_numpy()
# Get colormap
cmap = get_cmap("score_evolution")
score_color = mcolors.rgb2hex(cmap(0.3))
best_color = mcolors.rgb2hex(cmap(0.7))
# Create line chart
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=iterations,
y=scores,
mode="lines",
name="Score",
line={"color": score_color, "width": 1},
opacity=0.6,
hovertemplate="Iteration: %{x}<br>Score: %{y:.4f}<extra></extra>",
)
)
fig.add_trace(
go.Scatter(
x=iterations,
y=best_so_far,
mode="lines",
name="Best So Far",
line={"color": best_color, "width": 2},
hovertemplate="Iteration: %{x}<br>Best So Far: %{y:.4f}<extra></extra>",
)
)
fig.update_layout(
title=f"{metric.replace('_', ' ').title()} Evolution",
xaxis_title="Iteration",
yaxis_title=metric.replace("_", " ").title(),
height=300,
hovermode="x unified",
)
return fig

View file

@ -0,0 +1,97 @@
"""Metrics visualization plots."""
import numpy as np
import plotly.graph_objects as go
import xarray as xr
def plot_confusion_matrix(cm_data: xr.DataArray, title: str = "Confusion Matrix", normalize: str = "none") -> go.Figure:
"""Plot an interactive confusion matrix heatmap.
Args:
cm_data: XArray DataArray with confusion matrix data (dimensions: true_label, predicted_label).
title: Title for the plot.
normalize: Normalization mode - "none", "true", or "pred".
Returns:
Plotly figure with the interactive confusion matrix heatmap.
"""
# Get the data as numpy array
cm_array = cm_data.values.astype(float)
labels = cm_data.coords["true_label"].values.tolist()
# Store original counts for display
cm_counts = cm_data.values
# Apply normalization
if normalize == "true":
# Normalize over true labels (rows) - each row sums to 1
row_sums = cm_array.sum(axis=1, keepdims=True)
cm_normalized = np.divide(cm_array, row_sums, where=row_sums != 0)
colorbar_title = "Proportion"
elif normalize == "pred":
# Normalize over predicted labels (columns) - each column sums to 1
col_sums = cm_array.sum(axis=0, keepdims=True)
cm_normalized = np.divide(cm_array, col_sums, where=col_sums != 0)
colorbar_title = "Proportion"
else:
# No normalization
cm_normalized = cm_array
colorbar_title = "Count"
# Create annotations for the heatmap
annotations = []
for i, true_label in enumerate(labels):
for j, pred_label in enumerate(labels):
count = int(cm_counts[i, j])
normalized_val = cm_normalized[i, j]
# Format text based on normalization mode
if normalize == "none":
# Show count and percentage of total
total = cm_counts.sum()
pct = (count / total * 100) if total > 0 else 0
text = f"{count}<br>({pct:.1f}%)"
else:
# Show percentage only for normalized versions
text = f"{normalized_val:.1%}"
# Determine text color based on normalized value
threshold = cm_normalized.max() / 2 if cm_normalized.max() > 0 else 0.5
text_color = "white" if normalized_val > threshold else "black"
annotations.append(
{
"x": pred_label,
"y": true_label,
"text": text,
"showarrow": False,
"font": {"size": 10, "color": text_color},
}
)
# Create the heatmap with normalized values for coloring
fig = go.Figure(
data=go.Heatmap(
z=cm_normalized,
x=labels,
y=labels,
colorscale="Blues",
colorbar={"title": colorbar_title},
hoverongaps=False,
hovertemplate="True: %{y}<br>Predicted: %{x}<br>Count: %{customdata}<extra></extra>",
customdata=cm_counts,
)
)
# Add annotations
fig.update_layout(
annotations=annotations,
xaxis={"title": "Predicted Label", "side": "bottom"},
yaxis={"title": "True Label", "autorange": "reversed"},
width=600,
height=550,
)
return fig

View file

@ -0,0 +1,180 @@
"""Regression analysis plotting functions."""
from typing import cast
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from entropice.dashboard.utils.colors import get_palette
def plot_regression_scatter(
y_true: np.ndarray | pd.Series,
y_pred: np.ndarray | pd.Series,
title: str = "True vs Predicted",
) -> go.Figure:
"""Create scatter plot of true vs predicted values for regression.
Args:
y_true: True target values.
y_pred: Predicted target values.
title: Title for the plot.
Returns:
Plotly figure with regression scatter plot.
"""
# Convert to numpy arrays if needed
y_true_np = cast(np.ndarray, y_true.to_numpy()) if isinstance(y_true, pd.Series) else y_true
y_pred_np = cast(np.ndarray, y_pred.to_numpy()) if isinstance(y_pred, pd.Series) else y_pred
# Calculate metrics
mse = np.mean((y_true_np - y_pred_np) ** 2)
mae = np.mean(np.abs(y_true_np - y_pred_np))
r2 = 1 - (np.sum((y_true_np - y_pred_np) ** 2) / np.sum((y_true_np - np.mean(y_true_np)) ** 2))
# Get colormap
hex_colors = get_palette("r2", n_colors=256)
# Calculate point density for coloring
from scipy.stats import gaussian_kde
try:
# Create KDE for density estimation
xy = np.vstack([y_true_np, y_pred_np])
kde = gaussian_kde(xy)
density = kde(xy)
except (np.linalg.LinAlgError, ValueError):
# Fallback if KDE fails (e.g., all points identical)
density = np.ones(len(y_true_np))
# Create figure
fig = go.Figure()
# Add scatter plot
fig.add_trace(
go.Scatter(
x=y_true_np,
y=y_pred_np,
mode="markers",
marker={
"size": 6,
"color": density,
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
"showscale": False,
"opacity": 0.6,
},
text=[f"True: {true:.3f}<br>Pred: {pred:.3f}" for true, pred in zip(y_true_np, y_pred_np)],
hovertemplate="%{text}<extra></extra>",
name="Data",
)
)
# Add diagonal line (perfect prediction)
min_val = min(y_true_np.min(), y_pred_np.min())
max_val = max(y_true_np.max(), y_pred_np.max())
fig.add_trace(
go.Scatter(
x=[min_val, max_val],
y=[min_val, max_val],
mode="lines",
line={"color": "red", "dash": "dash", "width": 2},
name="Perfect Prediction",
hovertemplate="y = x<extra></extra>",
)
)
# Add metrics as annotation
metrics_text = f"R² = {r2:.4f}<br>MSE = {mse:.4f}<br>MAE = {mae:.4f}"
fig.add_annotation(
x=0.02,
y=0.98,
xref="paper",
yref="paper",
text=metrics_text,
showarrow=False,
bgcolor="white",
bordercolor="black",
borderwidth=1,
xanchor="left",
yanchor="top",
font={"size": 12},
)
fig.update_layout(
title=title,
xaxis_title="True Values",
yaxis_title="Predicted Values",
height=500,
showlegend=True,
legend={"x": 0.98, "y": 0.02, "xanchor": "right", "yanchor": "bottom"},
)
# Make axes equal
fig.update_xaxes(scaleanchor="y", scaleratio=1)
return fig
def plot_residuals(
y_true: np.ndarray | pd.Series,
y_pred: np.ndarray | pd.Series,
title: str = "Residual Plot",
) -> go.Figure:
"""Create residual plot for regression diagnostics.
Args:
y_true: True target values.
y_pred: Predicted target values.
title: Title for the plot.
Returns:
Plotly figure with residual plot.
"""
# Convert to numpy arrays if needed
y_true_np = cast(np.ndarray, y_true.to_numpy()) if isinstance(y_true, pd.Series) else y_true
y_pred_np = cast(np.ndarray, y_pred.to_numpy()) if isinstance(y_pred, pd.Series) else y_pred
# Calculate residuals
residuals = y_true_np - y_pred_np
# Get colormap
hex_colors = get_palette("r2", n_colors=256)
# Create figure
fig = go.Figure()
# Add scatter plot
fig.add_trace(
go.Scatter(
x=y_pred,
y=residuals,
mode="markers",
marker={
"size": 6,
"color": np.abs(residuals),
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
"showscale": True,
"colorbar": {"title": "Abs Residual"},
"opacity": 0.6,
},
text=[f"Pred: {pred:.3f}<br>Residual: {res:.3f}" for pred, res in zip(y_pred, residuals)],
hovertemplate="%{text}<extra></extra>",
)
)
# Add zero line
fig.add_hline(y=0, line_dash="dash", line_color="red", line_width=2)
fig.update_layout(
title=title,
xaxis_title="Predicted Values",
yaxis_title="Residuals (True - Predicted)",
height=400,
showlegend=False,
)
return fig

View file

@ -0,0 +1,185 @@
"""Training Result Sections."""
import streamlit as st
from entropice.dashboard.plots.metrics import plot_confusion_matrix
from entropice.dashboard.utils.formatters import format_metric_name
from entropice.dashboard.utils.loaders import TrainingResult
from entropice.dashboard.utils.stats import CVMetricStatistics
from entropice.utils.types import GridConfig
def render_run_information(selected_result: TrainingResult, refit_metric):
"""Render training run configuration overview.
Args:
selected_result: The selected TrainingResult object.
refit_metric: The refit metric used for model selection.
"""
st.header("📋 Run Information")
grid_config = GridConfig.from_grid_level(f"{selected_result.settings.grid}{selected_result.settings.level}") # ty:ignore[invalid-argument-type]
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
st.metric("Task", selected_result.settings.task.capitalize())
with col2:
st.metric("Target", selected_result.settings.target.capitalize())
with col3:
st.metric("Grid", grid_config.display_name)
with col4:
st.metric("Model", selected_result.settings.model.upper())
with col5:
st.metric("Trials", len(selected_result.results))
st.caption(f"**Refit Metric:** {format_metric_name(refit_metric)}")
def _render_metrics(metrics: dict[str, float]):
"""Render a set of metrics in a two-column layout.
Args:
metrics: Dictionary of metric names and their values.
"""
ncols = min(5, len(metrics))
cols = st.columns(ncols)
for idx, (metric_name, metric_value) in enumerate(metrics.items()):
with cols[idx % ncols]:
st.metric(format_metric_name(metric_name), f"{metric_value:.4f}")
def render_metrics_section(selected_result: TrainingResult):
"""Render test metrics overview showing final model performance.
Args:
selected_result: The selected TrainingResult object.
"""
# Test
st.header("🎯 Test Set Performance")
st.caption("Performance metrics on the held-out test set (best model from hyperparameter search)")
_render_metrics(selected_result.test_metrics)
# Train
st.header("🏋️‍♂️ Training Set Performance")
st.caption("Performance metrics on the training set (best model from hyperparameter search)")
_render_metrics(selected_result.train_metrics)
# Combined / All
st.header("🧮 Overall Performance")
st.caption("Overall performance metrics combining training and test sets")
_render_metrics(selected_result.combined_metrics)
@st.fragment
def render_confusion_matrices(selected_result: TrainingResult):
"""Render confusion matrices for classification tasks.
Args:
selected_result: The selected TrainingResult object.
"""
st.header("🎭 Confusion Matrices")
# Check if this is a classification task
if selected_result.settings.task not in ["binary", "count_regimes", "density_regimes"]:
st.info(
"📊 Confusion matrices are only available for classification tasks "
"(binary, count_regimes, density_regimes)."
)
st.caption("Coming soon for regression tasks: residual plots and error distributions.")
return
# Check if confusion matrix data is available
if selected_result.confusion_matrix is None:
st.warning("⚠️ No confusion matrix data found for this training result.")
return
cm = selected_result.confusion_matrix
# Add normalization selection
st.subheader("Display Options")
normalize_option = st.radio(
"Normalization",
options=["No normalization", "Normalize over True Labels", "Normalize over Predicted Labels"],
horizontal=True,
help="Choose how to normalize the confusion matrix values",
)
# Map selection to normalization mode
normalize_map = {
"No normalization": "none",
"Normalize over True Labels": "true",
"Normalize over Predicted Labels": "pred",
}
normalize_mode = normalize_map[normalize_option]
cols = st.columns(3)
with cols[0]:
# Test Set Confusion Matrix
st.subheader("Test Set")
st.caption("Held-out test set")
fig_test = plot_confusion_matrix(cm["test"], title="Test Set", normalize=normalize_mode)
st.plotly_chart(fig_test, width="stretch")
with cols[1]:
# Training Set Confusion Matrix
st.subheader("Training Set")
st.caption("Training set")
fig_train = plot_confusion_matrix(cm["train"], title="Training Set", normalize=normalize_mode)
st.plotly_chart(fig_train, width="stretch")
with cols[2]:
# Combined Confusion Matrix
st.subheader("Combined")
st.caption("Train + Test sets")
fig_combined = plot_confusion_matrix(cm["combined"], title="Combined", normalize=normalize_mode)
st.plotly_chart(fig_combined, width="stretch")
def render_cv_statistics_section(cv_stats: CVMetricStatistics, test_score: float):
"""Render cross-validation statistics for selected metric.
Args:
cv_stats: CVMetricStatistics object containing cross-validation statistics.
test_score: The test set score for the selected metric.
"""
st.header("📈 Cross-Validation Statistics")
st.caption("Performance during hyperparameter search (averaged across CV folds)")
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
st.metric("Best Score", f"{cv_stats.best_score:.4f}")
with col2:
st.metric("Mean Score", f"{cv_stats.mean_score:.4f}")
with col3:
st.metric("Std Dev", f"{cv_stats.std_score:.4f}")
with col4:
st.metric("Worst Score", f"{cv_stats.worst_score:.4f}")
with col5:
st.metric("Median Score", f"{cv_stats.median_score:.4f}")
if cv_stats.mean_cv_std is not None:
st.info(f"**Mean CV Std:** {cv_stats.mean_cv_std:.4f} - Average standard deviation across CV folds")
# Compare with test metric
st.subheader("CV vs Test Performance")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Best CV Score", f"{cv_stats.best_score:.4f}")
with col2:
st.metric("Test Score", f"{test_score:.4f}")
with col3:
delta = test_score - cv_stats.best_score
delta_pct = (delta / cv_stats.best_score * 100) if cv_stats.best_score != 0 else 0
st.metric("Difference", f"{delta:+.4f}", delta=f"{delta_pct:+.2f}%")
if abs(delta) > cv_stats.std_score:
st.warning(
"⚠️ Test performance differs significantly (larger than the CV standard deviation) from CV performance. "
"This may indicate overfitting or data distribution mismatch between training and test sets."
)

View file

@ -2,15 +2,16 @@
from datetime import datetime from datetime import datetime
import pandas as pd
import streamlit as st import streamlit as st
from entropice.dashboard.utils.loaders import TrainingResult from entropice.dashboard.utils.loaders import AutogluonTrainingResult, TrainingResult
from entropice.utils.types import ( from entropice.utils.types import (
GridConfig, GridConfig,
) )
def render_training_results_summary(training_results: list[TrainingResult]): def render_training_results_summary(training_results: list[TrainingResult | AutogluonTrainingResult]):
"""Render summary metrics for training results.""" """Render summary metrics for training results."""
st.header("📊 Training Results Summary") st.header("📊 Training Results Summary")
col1, col2, col3, col4 = st.columns(4) col1, col2, col3, col4 = st.columns(4)
@ -23,7 +24,7 @@ def render_training_results_summary(training_results: list[TrainingResult]):
st.metric("Total Runs", len(training_results)) st.metric("Total Runs", len(training_results))
with col3: with col3:
models = {tr.settings.model for tr in training_results} models = {tr.settings.model for tr in training_results if hasattr(tr.settings, "model")}
st.metric("Model Types", len(models)) st.metric("Model Types", len(models))
with col4: with col4:
@ -33,14 +34,14 @@ def render_training_results_summary(training_results: list[TrainingResult]):
@st.fragment @st.fragment
def render_experiment_results(training_results: list[TrainingResult]): # noqa: C901 def render_experiment_results(training_results: list[TrainingResult | AutogluonTrainingResult]): # noqa: C901
"""Render detailed experiment results table and expandable details.""" """Render detailed experiment results table and expandable details."""
st.header("🎯 Experiment Results") st.header("🎯 Experiment Results")
# Filters # Filters
experiments = sorted({tr.experiment for tr in training_results if tr.experiment}) experiments = sorted({tr.experiment for tr in training_results if tr.experiment})
tasks = sorted({tr.settings.task for tr in training_results}) tasks = sorted({tr.settings.task for tr in training_results})
models = sorted({tr.settings.model for tr in training_results}) models = sorted({tr.settings.model if isinstance(tr, TrainingResult) else "autogluon" for tr in training_results})
grids = sorted({f"{tr.settings.grid}-{tr.settings.level}" for tr in training_results}) grids = sorted({f"{tr.settings.grid}-{tr.settings.level}" for tr in training_results})
# Create filter columns # Create filter columns
@ -87,14 +88,26 @@ def render_experiment_results(training_results: list[TrainingResult]): # noqa:
filtered_results = [tr for tr in filtered_results if tr.experiment == selected_experiment] filtered_results = [tr for tr in filtered_results if tr.experiment == selected_experiment]
if selected_task != "All": if selected_task != "All":
filtered_results = [tr for tr in filtered_results if tr.settings.task == selected_task] filtered_results = [tr for tr in filtered_results if tr.settings.task == selected_task]
if selected_model != "All": if selected_model != "All" and selected_model != "autogluon":
filtered_results = [tr for tr in filtered_results if tr.settings.model == selected_model] filtered_results = [
tr for tr in filtered_results if isinstance(tr, TrainingResult) and tr.settings.model == selected_model
]
elif selected_model == "autogluon":
filtered_results = [tr for tr in filtered_results if isinstance(tr, AutogluonTrainingResult)]
if selected_grid != "All": if selected_grid != "All":
filtered_results = [tr for tr in filtered_results if f"{tr.settings.grid}-{tr.settings.level}" == selected_grid] filtered_results = [tr for tr in filtered_results if f"{tr.settings.grid}-{tr.settings.level}" == selected_grid]
st.subheader("Results Table") st.subheader("Results Table")
summary_df = TrainingResult.to_dataframe(filtered_results) summary_df = TrainingResult.to_dataframe([tr for tr in filtered_results if isinstance(tr, TrainingResult)])
autogluon_df = AutogluonTrainingResult.to_dataframe(
[tr for tr in filtered_results if isinstance(tr, AutogluonTrainingResult)]
)
if len(summary_df) == 0:
summary_df = autogluon_df
elif len(autogluon_df) > 0:
summary_df = pd.concat([summary_df, autogluon_df], ignore_index=True)
# Display with color coding for best scores # Display with color coding for best scores
st.dataframe( st.dataframe(
summary_df, summary_df,
@ -107,6 +120,8 @@ def render_experiment_results(training_results: list[TrainingResult]): # noqa:
for tr in filtered_results: for tr in filtered_results:
tr_info = tr.display_info tr_info = tr.display_info
display_name = tr_info.get_display_name("model_first") display_name = tr_info.get_display_name("model_first")
model = "autogluon" if isinstance(tr, AutogluonTrainingResult) else tr.settings.model
cv_splits = tr.settings.cv_splits if hasattr(tr.settings, "cv_splits") else "N/A"
with st.expander(display_name): with st.expander(display_name):
col1, col2 = st.columns([1, 2]) col1, col2 = st.columns([1, 2])
@ -117,12 +132,12 @@ def render_experiment_results(training_results: list[TrainingResult]): # noqa:
f"- **Experiment:** {tr.experiment}\n" f"- **Experiment:** {tr.experiment}\n"
f"- **Task:** {tr.settings.task}\n" f"- **Task:** {tr.settings.task}\n"
f"- **Target:** {tr.settings.target}\n" f"- **Target:** {tr.settings.target}\n"
f"- **Model:** {tr.settings.model}\n" f"- **Model:** {model}\n"
f"- **Grid:** {grid_config.display_name}\n" f"- **Grid:** {grid_config.display_name}\n"
f"- **Created At:** {tr_info.timestamp.strftime('%Y-%m-%d %H:%M')}\n" f"- **Created At:** {tr_info.timestamp.strftime('%Y-%m-%d %H:%M')}\n"
f"- **Temporal Mode:** {tr.settings.temporal_mode}\n" f"- **Temporal Mode:** {tr.settings.temporal_mode}\n"
f"- **Members:** {', '.join(tr.settings.members)}\n" f"- **Members:** {', '.join(tr.settings.members)}\n"
f"- **CV Splits:** {tr.settings.cv_splits}\n" f"- **CV Splits:** {cv_splits}\n"
f"- **Classes:** {tr.settings.classes}\n" f"- **Classes:** {tr.settings.classes}\n"
) )
@ -140,26 +155,29 @@ def render_experiment_results(training_results: list[TrainingResult]): # noqa:
file_str += f"- 📄 `{file.name}`\n" file_str += f"- 📄 `{file.name}`\n"
st.write(file_str) st.write(file_str)
with col2: with col2:
st.write("**CV Score Summary:**") if isinstance(tr, AutogluonTrainingResult):
st.write("**Leaderboard:**")
# Extract all test scores st.dataframe(tr.leaderboard, width="stretch", hide_index=True)
metric_df = tr.get_metric_dataframe()
if metric_df is not None:
st.dataframe(metric_df, width="stretch", hide_index=True)
else: else:
st.write("No test scores found in results.") st.write("**CV Score Summary:**")
# Extract all test scores
metric_df = tr.get_metric_dataframe()
if metric_df is not None:
st.dataframe(metric_df, width="stretch", hide_index=True)
else:
st.write("No test scores found in results.")
# Show parameter space explored # Show parameter space explored
if "initial_K" in tr.results.columns: # Common parameter if "initial_K" in tr.results.columns: # Common parameter
st.write("\n**Parameter Ranges Explored:**") st.write("\n**Parameter Ranges Explored:**")
for param in ["initial_K", "eps_cl", "eps_e"]: for param in ["initial_K", "eps_cl", "eps_e"]:
if param in tr.results.columns: if param in tr.results.columns:
min_val = tr.results[param].min() min_val = tr.results[param].min()
max_val = tr.results[param].max() max_val = tr.results[param].max()
unique_vals = tr.results[param].nunique() unique_vals = tr.results[param].nunique()
st.write(f"- **{param}:** {unique_vals} values ({min_val:.2e} to {max_val:.2e})") st.write(f"- **{param}:** {unique_vals} values ({min_val:.2e} to {max_val:.2e})")
st.write("**CV Results DataFrame:**") st.write("**CV Results DataFrame:**")
st.dataframe(tr.results, width="stretch", hide_index=True) st.dataframe(tr.results, width="stretch", hide_index=True)
st.write(f"\n**Path:** `{tr.path}`") st.write(f"\n**Path:** `{tr.path}`")

View file

@ -0,0 +1,172 @@
"""Hyperparameter Space Visualization Section."""
import streamlit as st
from entropice.dashboard.plots.hyperparameter_space import (
plot_parameter_correlations,
plot_parameter_distributions,
plot_parameter_interactions,
plot_score_evolution,
plot_score_vs_parameters,
)
from entropice.dashboard.utils.formatters import format_metric_name
from entropice.dashboard.utils.loaders import TrainingResult
def _render_performance_summary(results, refit_metric: str):
"""Render performance summary subsection."""
best_idx = results[f"mean_test_{refit_metric}"].idxmax()
best_row = results.loc[best_idx]
# Extract parameter columns
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
best_params = {col.replace("param_", ""): best_row[col] for col in param_cols}
# Display best parameter combination
if not best_params:
return
with st.container(border=True):
st.subheader("🏆 Best Parameter Combination")
st.caption(f"Parameters of the best model (selected by {format_metric_name(refit_metric)} score)")
n_params = len(best_params)
cols = st.columns(n_params)
for idx, (param_name, param_value) in enumerate(best_params.items()):
with cols[idx]:
# Format value based on type and magnitude
if isinstance(param_value, int):
formatted_value = f"{param_value:.0f}"
elif isinstance(param_value, float):
# Use scientific notation for very small numbers
if abs(param_value) < 0.001 and param_value != 0:
formatted_value = f"{param_value:.2e}"
else:
formatted_value = f"{param_value:.4f}"
else:
formatted_value = str(param_value)
st.metric(param_name, formatted_value)
def _render_parameter_distributions(results, param_grid: dict | None):
"""Render parameter distributions subsection."""
st.subheader("Parameter Distributions")
st.caption("Distribution of hyperparameter values explored during random search")
param_charts = plot_parameter_distributions(results, param_grid)
if not param_charts:
st.info("No parameter distribution data available.")
return
# Display charts in a grid
param_names = list(param_charts.keys())
n_cols = min(3, len(param_names))
n_rows = (len(param_names) + n_cols - 1) // n_cols
for row in range(n_rows):
cols = st.columns(n_cols)
for col_idx in range(n_cols):
param_idx = row * n_cols + col_idx
if param_idx < len(param_names):
param_name = param_names[param_idx]
with cols[col_idx]:
st.plotly_chart(param_charts[param_name], width="stretch")
def _render_score_evolution(results, selected_metric: str):
"""Render score evolution subsection."""
st.subheader("Score Evolution Over Iterations")
st.caption(f"How {format_metric_name(selected_metric)} evolved during the random search")
evolution_chart = plot_score_evolution(results, selected_metric)
if evolution_chart:
st.plotly_chart(evolution_chart, width="stretch")
else:
st.warning(f"Score evolution not available for metric: {selected_metric}")
def _render_score_vs_parameters(results, selected_metric: str, param_grid: dict | None):
"""Render score vs parameters subsection."""
st.subheader("Score vs Individual Parameters")
st.caption(f"Relationship between {format_metric_name(selected_metric)} and each hyperparameter")
score_vs_param_charts = plot_score_vs_parameters(results, selected_metric, param_grid)
if not score_vs_param_charts:
st.info("No score vs parameter data available.")
return
param_names = list(score_vs_param_charts.keys())
n_cols = min(2, len(param_names))
n_rows = (len(param_names) + n_cols - 1) // n_cols
for row in range(n_rows):
cols = st.columns(n_cols)
for col_idx in range(n_cols):
param_idx = row * n_cols + col_idx
if param_idx < len(param_names):
param_name = param_names[param_idx]
with cols[col_idx]:
st.plotly_chart(score_vs_param_charts[param_name], width="stretch")
def _render_parameter_correlations(results, selected_metric: str):
"""Render parameter correlations subsection."""
st.subheader("Parameter-Score Correlations")
st.caption(f"Correlation between numeric parameters and {format_metric_name(selected_metric)}")
corr_chart = plot_parameter_correlations(results, selected_metric)
if corr_chart:
st.plotly_chart(corr_chart, width="stretch")
else:
st.info("No numeric parameters found for correlation analysis.")
def _render_parameter_interactions(results, selected_metric: str, param_grid: dict | None):
"""Render parameter interactions subsection."""
st.subheader("Parameter Interactions")
st.caption(f"Interaction between parameter pairs and their effect on {format_metric_name(selected_metric)}")
interaction_charts = plot_parameter_interactions(results, selected_metric, param_grid)
if not interaction_charts:
st.info("Not enough numeric parameters for parameter interaction visualization.")
return
n_cols = min(2, len(interaction_charts))
n_rows = (len(interaction_charts) + n_cols - 1) // n_cols
for row in range(n_rows):
cols = st.columns(n_cols)
for col_idx in range(n_cols):
chart_idx = row * n_cols + col_idx
if chart_idx < len(interaction_charts):
with cols[col_idx]:
st.plotly_chart(interaction_charts[chart_idx], width="stretch")
def render_hparam_space_section(selected_result: TrainingResult, selected_metric: str):
"""Render the hyperparameter space visualization section.
Args:
selected_result: The selected TrainingResult object.
selected_metric: The metric to focus analysis on.
"""
st.header("🧩 Hyperparameter Space Exploration")
results = selected_result.results
refit_metric = selected_result._get_best_metric_name()
param_grid = selected_result.settings.param_grid
_render_performance_summary(results, refit_metric)
_render_parameter_distributions(results, param_grid)
_render_score_evolution(results, selected_metric)
_render_score_vs_parameters(results, selected_metric, param_grid)
_render_parameter_correlations(results, selected_metric)
_render_parameter_interactions(results, selected_metric, param_grid)

View file

@ -0,0 +1,122 @@
"""Regression Analysis Section."""
import streamlit as st
from entropice.dashboard.plots.regression import plot_regression_scatter, plot_residuals
from entropice.dashboard.utils.loaders import TrainingResult
from entropice.ml.dataset import DatasetEnsemble
def render_regression_analysis(selected_result: TrainingResult):
"""Render regression analysis with true vs predicted scatter plots.
Args:
selected_result: The selected TrainingResult object.
"""
st.header("📊 Regression Analysis")
# Check if this is a regression task
if selected_result.settings.task in ["binary", "count_regimes", "density_regimes"]:
st.info("📈 Regression analysis is only available for regression tasks (count, density).")
return
# Load predictions
predictions_df = selected_result.load_predictions()
if predictions_df is None:
st.warning("⚠️ No prediction data found for this training result.")
return
# Create DatasetEnsemble from settings
with st.spinner("Loading training data to get true values..."):
ensemble = DatasetEnsemble(
grid=selected_result.settings.grid,
level=selected_result.settings.level,
members=selected_result.settings.members,
temporal_mode=selected_result.settings.temporal_mode,
dimension_filters=selected_result.settings.dimension_filters,
variable_filters=selected_result.settings.variable_filters,
add_lonlat=selected_result.settings.add_lonlat,
)
# Create training set to get true values
training_set = ensemble.create_training_set(
task=selected_result.settings.task,
target=selected_result.settings.target,
device="cpu",
cache_mode="read",
)
# Get split information
split_series = training_set.split
# Merge predictions with true values and split info
# predictions_df should have 'cell_id' and 'predicted' columns
# training_set.targets has 'y' (true values) with cell_id as index
true_values = training_set.targets[["y"]].reset_index()
# Merge on cell_id
merged = predictions_df.merge(true_values, on="cell_id", how="inner")
merged["split"] = split_series.reindex(merged["cell_id"]).values
# Get train, test, and combined data
train_data = merged[merged["split"] == "train"]
test_data = merged[merged["split"] == "test"]
if len(train_data) == 0 or len(test_data) == 0:
st.error("❌ Could not properly split data into train and test sets.")
return
# Display scatter plots
st.subheader("True vs Predicted Values")
st.caption("Scatter plots showing the relationship between true and predicted values")
cols = st.columns(3)
with cols[0]:
st.markdown("#### Test Set")
st.caption("Held-out test set")
fig_test = plot_regression_scatter(
test_data["y"],
test_data["predicted"],
title="Test Set",
)
st.plotly_chart(fig_test, use_container_width=True)
with cols[1]:
st.markdown("#### Training Set")
st.caption("Training set")
fig_train = plot_regression_scatter(
train_data["y"],
train_data["predicted"],
title="Training Set",
)
st.plotly_chart(fig_train, use_container_width=True)
with cols[2]:
st.markdown("#### Combined")
st.caption("Train + Test sets")
fig_combined = plot_regression_scatter(
merged["y"],
merged["predicted"],
title="Combined",
)
st.plotly_chart(fig_combined, use_container_width=True)
# Display residual plots
st.subheader("Residual Analysis")
st.caption("Residual plots to assess model fit and identify patterns in errors")
cols = st.columns(3)
with cols[0]:
fig_test_res = plot_residuals(test_data["y"], test_data["predicted"], title="Test Set Residuals")
st.plotly_chart(fig_test_res, use_container_width=True)
with cols[1]:
fig_train_res = plot_residuals(train_data["y"], train_data["predicted"], title="Training Set Residuals")
st.plotly_chart(fig_train_res, use_container_width=True)
with cols[2]:
fig_combined_res = plot_residuals(merged["y"], merged["predicted"], title="Combined Residuals")
st.plotly_chart(fig_combined_res, use_container_width=True)

View file

@ -1,70 +0,0 @@
"""Utilities for ordering predicted classes consistently across visualizations.
This module leverages the canonical class labels defined in the ML dataset module
to ensure consistent ordering across all visualizations.
"""
import pandas as pd
from entropice.utils.types import Task
# Canonical orderings imported from the ML pipeline
# Binary labels are defined inline in dataset.py: {False: "No RTS", True: "RTS"}
# Count/Density labels are defined in the bin_values function
BINARY_LABELS = ["No RTS", "RTS"]
COUNT_LABELS = ["None", "Very Few", "Few", "Several", "Many", "Very Many"]
DENSITY_LABELS = ["Empty", "Very Sparse", "Sparse", "Moderate", "Dense", "Very Dense"]
CLASS_ORDERINGS: dict[Task | str, list[str]] = {
"binary": BINARY_LABELS,
"count": COUNT_LABELS,
"density": DENSITY_LABELS,
}
def get_ordered_classes(task: Task | str, available_classes: list[str] | None = None) -> list[str]:
"""Get properly ordered class labels for a given task.
This uses the same canonical ordering as defined in the ML dataset module,
ensuring consistency between training and inference visualizations.
Args:
task: Task type ('binary', 'count', 'density').
available_classes: Optional list of available classes to filter and order.
If None, returns all canonical classes for the task.
Returns:
List of class labels in proper order.
Examples:
>>> get_ordered_classes("binary")
['No RTS', 'RTS']
>>> get_ordered_classes("count", ["None", "Few", "Several"])
['None', 'Few', 'Several']
"""
canonical_order = CLASS_ORDERINGS[task]
if available_classes is None:
return canonical_order
# Filter canonical order to only include available classes, preserving order
return [cls for cls in canonical_order if cls in available_classes]
def sort_class_series(series: pd.Series, task: Task | str) -> pd.Series:
"""Sort a pandas Series with class labels according to canonical ordering.
Args:
series: Pandas Series with class labels as index.
task: Task type ('binary', 'count', 'density').
Returns:
Sorted Series with classes in canonical order.
"""
available_classes = series.index.tolist()
ordered_classes = get_ordered_classes(task, available_classes)
# Reindex to get proper order
return series.reindex(ordered_classes)

View file

@ -59,7 +59,7 @@ task_display_infos: dict[Task, TaskDisplayInfo] = {
class TrainingResultDisplayInfo: class TrainingResultDisplayInfo:
task: Task task: Task
target: TargetDataset target: TargetDataset
model: Model model: Model | Literal["autogluon"]
grid: Grid grid: Grid
level: int level: int
timestamp: datetime timestamp: datetime

View file

@ -17,6 +17,7 @@ from shapely.geometry import shape
import entropice.spatial.grids import entropice.spatial.grids
import entropice.utils.paths import entropice.utils.paths
from entropice.dashboard.utils.formatters import TrainingResultDisplayInfo from entropice.dashboard.utils.formatters import TrainingResultDisplayInfo
from entropice.ml.autogluon_training import AutoGluonTrainingSettings
from entropice.ml.dataset import DatasetEnsemble, TrainingSet from entropice.ml.dataset import DatasetEnsemble, TrainingSet
from entropice.ml.training import TrainingSettings from entropice.ml.training import TrainingSettings
from entropice.utils.types import GridConfig, TargetDataset, Task, all_target_datasets, all_tasks from entropice.utils.types import GridConfig, TargetDataset, Task, all_target_datasets, all_tasks
@ -215,14 +216,18 @@ class TrainingResult:
return pd.DataFrame.from_records(records) return pd.DataFrame.from_records(records)
@st.cache_data @st.cache_data(ttl=300) # Cache for 5 minutes
def load_all_training_results() -> list[TrainingResult]: def load_all_training_results() -> list[TrainingResult]:
"""Load all training results from the results directory.""" """Load all training results from the results directory."""
results_dir = entropice.utils.paths.RESULTS_DIR results_dir = entropice.utils.paths.RESULTS_DIR
training_results: list[TrainingResult] = [] training_results: list[TrainingResult] = []
incomplete_results: list[tuple[Path, Exception]] = []
for result_path in results_dir.iterdir(): for result_path in results_dir.iterdir():
if not result_path.is_dir(): if not result_path.is_dir():
continue continue
# Skip AutoGluon results directory
if "autogluon" in result_path.name.lower():
continue
try: try:
training_result = TrainingResult.from_path(result_path) training_result = TrainingResult.from_path(result_path)
training_results.append(training_result) training_results.append(training_result)
@ -237,10 +242,159 @@ def load_all_training_results() -> list[TrainingResult]:
training_results.append(training_result) training_results.append(training_result)
is_experiment_dir = True is_experiment_dir = True
except FileNotFoundError as e2: except FileNotFoundError as e2:
st.warning(f"Skipping incomplete training result: {e2}") incomplete_results.append((experiment_path, e2))
if not is_experiment_dir: if not is_experiment_dir:
st.warning(f"Skipping incomplete training result: {e}") incomplete_results.append((result_path, e))
if len(incomplete_results) > 0:
st.warning(
f"Found {len(incomplete_results)} incomplete training results that were skipped:\n - "
+ "\n - ".join(f"{p}: {e}" for p, e in incomplete_results)
)
# Sort by creation time (most recent first)
training_results.sort(key=lambda tr: tr.created_at, reverse=True)
return training_results
@dataclass
class AutogluonTrainingResult:
"""Wrapper for training result data and metadata."""
path: Path
experiment: str
settings: AutoGluonTrainingSettings
test_metrics: dict[str, float | dict | pd.DataFrame]
leaderboard: pd.DataFrame
feature_importance: pd.DataFrame | None
created_at: float
files: list[Path]
@classmethod
def from_path(cls, result_path: Path, experiment_name: str | None = None) -> "AutogluonTrainingResult":
"""Load an AutogluonTrainingResult from a given result directory path."""
settings_file = result_path / "training_settings.toml"
metrics_file = result_path / "test_metrics.pickle"
leaderboard_file = result_path / "leaderboard.parquet"
feature_importance_file = result_path / "feature_importance.parquet"
all_files = list(result_path.iterdir())
if not settings_file.exists():
raise FileNotFoundError(f"Missing settings file in {result_path}")
if not metrics_file.exists():
raise FileNotFoundError(f"Missing metrics file in {result_path}")
if not leaderboard_file.exists():
raise FileNotFoundError(f"Missing leaderboard file in {result_path}")
created_at = result_path.stat().st_ctime
settings_dict = toml.load(settings_file)["settings"]
settings = AutoGluonTrainingSettings(**settings_dict)
with open(metrics_file, "rb") as f:
metrics = pickle.load(f)
leaderboard = pd.read_parquet(leaderboard_file)
if feature_importance_file.exists():
feature_importance = pd.read_parquet(feature_importance_file)
else:
feature_importance = None
return cls(
path=result_path,
experiment=experiment_name or "N/A",
settings=settings,
test_metrics=metrics,
leaderboard=leaderboard,
feature_importance=feature_importance,
created_at=created_at,
files=all_files,
)
@property
def test_confusion_matrix(self) -> pd.DataFrame | None:
"""Get the test confusion matrix."""
if "confusion_matrix" not in self.test_metrics:
return None
assert isinstance(self.test_metrics["confusion_matrix"], pd.DataFrame)
return self.test_metrics["confusion_matrix"]
@property
def display_info(self) -> TrainingResultDisplayInfo:
"""Get display information for the training result."""
return TrainingResultDisplayInfo(
task=self.settings.task,
target=self.settings.target,
model="autogluon",
grid=self.settings.grid,
level=self.settings.level,
timestamp=datetime.fromtimestamp(self.created_at),
)
def _get_best_metric_name(self) -> str:
"""Get the primary metric name for a given task."""
match self.settings.task:
case "binary":
return "f1"
case "count_regimes" | "density_regimes":
return "f1_weighted"
case _: # regression tasks
return "root_mean_squared_error"
@staticmethod
def to_dataframe(training_results: list["AutogluonTrainingResult"]) -> pd.DataFrame:
"""Convert a list of AutogluonTrainingResult objects to a DataFrame for display."""
records = []
for tr in training_results:
info = tr.display_info
best_metric_name = tr._get_best_metric_name()
record = {
"Experiment": tr.experiment if tr.experiment else "N/A",
"Task": info.task,
"Target": info.target,
"Model": info.model,
"Grid": GridConfig.from_grid_level((info.grid, info.level)).display_name,
"Created At": info.timestamp.strftime("%Y-%m-%d %H:%M"),
"Score-Metric": best_metric_name.title(),
"Best Models Score (Test-Set)": tr.test_metrics.get(best_metric_name),
"Path": str(tr.path.name),
}
records.append(record)
return pd.DataFrame.from_records(records)
@st.cache_data(ttl=300) # Cache for 5 minutes
def load_all_autogluon_training_results() -> list[AutogluonTrainingResult]:
"""Load all training results from the results directory."""
results_dir = entropice.utils.paths.RESULTS_DIR
training_results: list[AutogluonTrainingResult] = []
incomplete_results: list[tuple[Path, Exception]] = []
for result_path in results_dir.iterdir():
if not result_path.is_dir():
continue
# Skip AutoGluon results directory
if "autogluon" not in result_path.name.lower():
continue
try:
training_result = AutogluonTrainingResult.from_path(result_path)
training_results.append(training_result)
except FileNotFoundError as e:
is_experiment_dir = False
for experiment_path in result_path.iterdir():
if not experiment_path.is_dir():
continue
try:
experiment_name = experiment_path.parent.name
training_result = AutogluonTrainingResult.from_path(experiment_path, experiment_name)
training_results.append(training_result)
is_experiment_dir = True
except FileNotFoundError as e2:
incomplete_results.append((experiment_path, e2))
if not is_experiment_dir:
incomplete_results.append((result_path, e))
if len(incomplete_results) > 0:
st.warning(
f"Found {len(incomplete_results)} incomplete autogluon training results that were skipped:\n - "
+ "\n - ".join(f"{p}: {e}" for p, e in incomplete_results)
)
# Sort by creation time (most recent first) # Sort by creation time (most recent first)
training_results.sort(key=lambda tr: tr.created_at, reverse=True) training_results.sort(key=lambda tr: tr.created_at, reverse=True)
return training_results return training_results

View file

@ -369,6 +369,7 @@ def render_xgboost_model_state(model_state: xr.Dataset, selected_result: Trainin
options=["gain", "weight", "cover", "total_gain", "total_cover"], options=["gain", "weight", "cover", "total_gain", "total_cover"],
index=0, index=0,
help="Choose which importance metric to visualize", help="Choose which importance metric to visualize",
key="model_state_importance_type",
) )
# Top N slider # Top N slider

View file

@ -9,7 +9,7 @@ from entropice.dashboard.sections.experiment_results import (
render_training_results_summary, render_training_results_summary,
) )
from entropice.dashboard.sections.storage_statistics import render_storage_statistics from entropice.dashboard.sections.storage_statistics import render_storage_statistics
from entropice.dashboard.utils.loaders import load_all_training_results from entropice.dashboard.utils.loaders import load_all_autogluon_training_results, load_all_training_results
from entropice.dashboard.utils.stats import DatasetStatistics, load_all_default_dataset_statistics from entropice.dashboard.utils.stats import DatasetStatistics, load_all_default_dataset_statistics
@ -27,6 +27,9 @@ def render_overview_page():
) )
# Load training results # Load training results
training_results = load_all_training_results() training_results = load_all_training_results()
autogluon_results = load_all_autogluon_training_results()
if len(autogluon_results) > 0:
training_results.extend(autogluon_results)
if not training_results: if not training_results:
st.warning("No training results found. Please run some training experiments first.") st.warning("No training results found. Please run some training experiments first.")

View file

@ -2,150 +2,22 @@
from typing import cast from typing import cast
import geopandas as gpd
import streamlit as st import streamlit as st
import xarray as xr
from stopuhr import stopwatch
from entropice.dashboard.plots.hyperparameter_analysis import ( from entropice.dashboard.sections.cv_result import (
render_binned_parameter_space, render_confusion_matrices,
render_confusion_matrix_heatmap, render_cv_statistics_section,
render_confusion_matrix_map, render_metrics_section,
render_espa_binned_parameter_space, render_run_information,
render_multi_metric_comparison,
render_parameter_correlation,
render_parameter_distributions,
render_performance_summary,
render_top_configurations,
) )
from entropice.dashboard.sections.hparam_space import render_hparam_space_section
from entropice.dashboard.sections.regression_analysis import render_regression_analysis
from entropice.dashboard.utils.formatters import format_metric_name from entropice.dashboard.utils.formatters import format_metric_name
from entropice.dashboard.utils.loaders import TrainingResult, load_all_training_results from entropice.dashboard.utils.loaders import TrainingResult, load_all_training_results
from entropice.dashboard.utils.stats import CVResultsStatistics from entropice.dashboard.utils.stats import CVMetricStatistics
from entropice.utils.types import GridConfig
def load_predictions_with_labels(selected_result: TrainingResult) -> gpd.GeoDataFrame | None: def render_analysis_settings_sidebar(training_results: list[TrainingResult]) -> tuple[TrainingResult, str, str]:
"""Load predictions and merge with training data to get true labels and split info.
Args:
selected_result: The selected TrainingResult object.
Returns:
GeoDataFrame with predictions, true labels, and split information, or None if unavailable.
"""
from sklearn.model_selection import train_test_split
from entropice.ml.dataset import DatasetEnsemble, bin_values, taskcol
# Load predictions
preds_gdf = selected_result.load_predictions()
if preds_gdf is None:
return None
# Create a minimal dataset ensemble to access target data
settings = selected_result.settings
dataset_ensemble = DatasetEnsemble(
grid=settings.grid,
level=settings.level,
target=settings.target,
members=[], # No feature data needed, just targets
)
# Load target dataset (just labels, no features)
with st.spinner("Loading target labels..."):
targets = dataset_ensemble._read_target()
# Get coverage and task columns
task_col = taskcol[settings.task][settings.target]
# Filter for valid labels (same as in _cat_and_split)
valid_labels = targets[task_col].notna()
filtered_targets = targets.loc[valid_labels].copy()
# Apply binning to get class labels (same logic as _cat_and_split)
if settings.task == "binary":
binned = filtered_targets[task_col].map({False: "No RTS", True: "RTS"}).astype("category")
elif settings.task == "count":
binned = bin_values(filtered_targets[task_col].astype(int), task=settings.task)
elif settings.task == "density":
binned = bin_values(filtered_targets[task_col], task=settings.task)
else:
raise ValueError(f"Invalid task: {settings.task}")
filtered_targets["true_class"] = binned.to_numpy()
# Recreate the train/test split deterministically (same random_state=42 as in _cat_and_split)
_train_idx, test_idx = train_test_split(
filtered_targets.index.to_numpy(), test_size=0.2, random_state=42, shuffle=True
)
filtered_targets["split"] = "train"
filtered_targets.loc[test_idx, "split"] = "test"
filtered_targets["split"] = filtered_targets["split"].astype("category")
# Ensure cell_id is available as a column for merging
# Check if cell_id already exists, otherwise use the index
if "cell_id" not in filtered_targets.columns:
filtered_targets = filtered_targets.reset_index().rename(columns={"index": "cell_id"})
# Merge predictions with labels (inner join to keep only cells with predictions)
merged = filtered_targets.merge(preds_gdf[["cell_id", "predicted_class"]], on="cell_id", how="inner")
merged_gdf = gpd.GeoDataFrame(merged, geometry="geometry", crs=targets.crs)
return merged_gdf
def compute_confusion_matrix_from_merged_data(
merged_data: gpd.GeoDataFrame,
split_type: str,
label_names: list[str],
) -> xr.DataArray | None:
"""Compute confusion matrix from merged predictions and labels.
Args:
merged_data: GeoDataFrame with 'true_class', 'predicted_class', and 'split' columns.
split_type: One of 'test', 'train', or 'all'.
label_names: List of class label names in order.
Returns:
xarray.DataArray with confusion matrix or None if data unavailable.
"""
from sklearn.metrics import confusion_matrix
# Filter by split type
if split_type == "train":
data = merged_data[merged_data["split"] == "train"]
elif split_type == "test":
data = merged_data[merged_data["split"] == "test"]
elif split_type == "all":
data = merged_data
else:
raise ValueError(f"Invalid split_type: {split_type}")
if len(data) == 0:
st.warning(f"No data available for {split_type} split.")
return None
# Get true and predicted labels
y_true = data["true_class"].to_numpy()
y_pred = data["predicted_class"].to_numpy()
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=label_names)
# Create xarray DataArray
cm_xr = xr.DataArray(
cm,
dims=["true_label", "predicted_label"],
coords={"true_label": label_names, "predicted_label": label_names},
name="confusion_matrix",
)
return cm_xr
def render_analysis_settings_sidebar(training_results: list[TrainingResult]) -> tuple[TrainingResult, str, str, int]:
"""Render sidebar for training run and analysis settings selection. """Render sidebar for training run and analysis settings selection.
Args: Args:
@ -155,351 +27,63 @@ def render_analysis_settings_sidebar(training_results: list[TrainingResult]) ->
Tuple of (selected_result, selected_metric, refit_metric, top_n). Tuple of (selected_result, selected_metric, refit_metric, top_n).
""" """
st.header("Select Training Run") with st.sidebar.form("training_analysis_settings_form"):
st.header("Select Training Run")
# Create selection options with task-first naming # Create selection options with task-first naming
training_options = {tr.display_info.get_display_name("task_first"): tr for tr in training_results} training_options = {tr.display_info.get_display_name("task_first"): tr for tr in training_results}
selected_name = st.selectbox( selected_name = st.selectbox(
"Training Run", "Training Run",
options=list(training_options.keys()), options=list(training_options.keys()),
index=0, index=0,
help="Select a training run to analyze", help="Select a training run to analyze",
key="training_run_select", key="training_run_select",
) )
selected_result = cast(TrainingResult, training_options[selected_name]) selected_result = cast(TrainingResult, training_options[selected_name])
st.divider()
# Metric selection for detailed analysis
st.subheader("Analysis Settings")
available_metrics = selected_result.available_metrics
# Try to get refit metric from settings
refit_metric = "f1" if selected_result.settings.task == "binary" else "f1_weighted"
if refit_metric in available_metrics:
default_metric_idx = available_metrics.index(refit_metric)
else:
default_metric_idx = 0
selected_metric = st.selectbox(
"Primary Metric for Analysis",
options=available_metrics,
index=default_metric_idx,
format_func=format_metric_name,
help="Select the metric to focus on for detailed analysis",
key="metric_select",
)
# Top N configurations
top_n = st.slider(
"Top N Configurations",
min_value=5,
max_value=50,
value=10,
step=5,
help="Number of top configurations to display",
key="top_n_slider",
)
return selected_result, selected_metric, refit_metric, top_n
def render_run_information(selected_result: TrainingResult, refit_metric):
"""Render training run configuration overview.
Args:
selected_result: The selected TrainingResult object.
refit_metric: The refit metric used for model selection.
"""
st.header("📋 Run Information")
grid_config = GridConfig.from_grid_level(f"{selected_result.settings.grid}{selected_result.settings.level}") # ty:ignore[invalid-argument-type]
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
st.metric("Task", selected_result.settings.task.capitalize())
with col2:
st.metric("Target", selected_result.settings.target.capitalize())
with col3:
st.metric("Grid", grid_config.display_name)
with col4:
st.metric("Model", selected_result.settings.model.upper())
with col5:
st.metric("Trials", len(selected_result.results))
st.caption(f"**Refit Metric:** {format_metric_name(refit_metric)}")
def render_test_metrics_section(selected_result: TrainingResult):
"""Render test metrics overview showing final model performance.
Args:
selected_result: The selected TrainingResult object.
"""
st.header("🎯 Test Set Performance")
st.caption("Performance metrics on the held-out test set (best model from hyperparameter search)")
test_metrics = selected_result.metrics
if not test_metrics:
st.warning("No test metrics available for this training run.")
return
# Display metrics in columns based on task type
task = selected_result.settings.task
if task == "binary":
# Binary classification metrics
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
st.metric("Accuracy", f"{test_metrics.get('accuracy', 0):.4f}")
with col2:
st.metric("F1 Score", f"{test_metrics.get('f1', 0):.4f}")
with col3:
st.metric("Precision", f"{test_metrics.get('precision', 0):.4f}")
with col4:
st.metric("Recall", f"{test_metrics.get('recall', 0):.4f}")
with col5:
st.metric("Jaccard", f"{test_metrics.get('jaccard', 0):.4f}")
else:
# Multiclass metrics
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Accuracy", f"{test_metrics.get('accuracy', 0):.4f}")
with col2:
st.metric("F1 (Macro)", f"{test_metrics.get('f1_macro', 0):.4f}")
with col3:
st.metric("F1 (Weighted)", f"{test_metrics.get('f1_weighted', 0):.4f}")
col4, col5, col6 = st.columns(3)
with col4:
st.metric("Precision (Macro)", f"{test_metrics.get('precision_macro', 0):.4f}")
with col5:
st.metric("Precision (Weighted)", f"{test_metrics.get('precision_weighted', 0):.4f}")
with col6:
st.metric("Recall (Macro)", f"{test_metrics.get('recall_macro', 0):.4f}")
col7, col8, col9 = st.columns(3)
with col7:
st.metric("Jaccard (Micro)", f"{test_metrics.get('jaccard_micro', 0):.4f}")
with col8:
st.metric("Jaccard (Macro)", f"{test_metrics.get('jaccard_macro', 0):.4f}")
with col9:
st.metric("Jaccard (Weighted)", f"{test_metrics.get('jaccard_weighted', 0):.4f}")
def render_cv_statistics_section(selected_result, selected_metric):
"""Render cross-validation statistics for selected metric.
Args:
selected_result: The selected TrainingResult object.
selected_metric: The metric to display statistics for.
"""
st.header("📈 Cross-Validation Statistics")
st.caption("Performance during hyperparameter search (averaged across CV folds)")
from entropice.dashboard.utils.stats import CVMetricStatistics
cv_stats = CVMetricStatistics.compute(selected_result, selected_metric)
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
st.metric("Best Score", f"{cv_stats.best_score:.4f}")
with col2:
st.metric("Mean Score", f"{cv_stats.mean_score:.4f}")
with col3:
st.metric("Std Dev", f"{cv_stats.std_score:.4f}")
with col4:
st.metric("Worst Score", f"{cv_stats.worst_score:.4f}")
with col5:
st.metric("Median Score", f"{cv_stats.median_score:.4f}")
if cv_stats.mean_cv_std is not None:
st.info(f"**Mean CV Std:** {cv_stats.mean_cv_std:.4f} - Average standard deviation across CV folds")
# Compare with test metric if available
if selected_metric in selected_result.metrics:
test_score = selected_result.metrics[selected_metric]
st.divider() st.divider()
st.subheader("CV vs Test Performance")
col1, col2, col3 = st.columns(3) # Metric selection for detailed analysis
with col1: st.subheader("Analysis Settings")
st.metric("Best CV Score", f"{cv_stats.best_score:.4f}")
with col2:
st.metric("Test Score", f"{test_score:.4f}")
with col3:
delta = test_score - cv_stats.best_score
delta_pct = (delta / cv_stats.best_score * 100) if cv_stats.best_score != 0 else 0
st.metric("Difference", f"{delta:+.4f}", delta=f"{delta_pct:+.2f}%")
if abs(delta) > cv_stats.std_score: available_metrics = selected_result.available_metrics
st.warning(
"⚠️ Test performance differs significantly from CV performance. "
"This may indicate overfitting or data distribution mismatch."
)
# Try to get refit metric from settings
@st.fragment if selected_result.settings.task == "binary":
def render_confusion_matrix_section(selected_result: TrainingResult, merged_predictions: gpd.GeoDataFrame | None): refit_metric = "f1"
"""Render confusion matrix visualization and analysis. elif selected_result.settings.task in ["count_regimes", "density_regimes"]:
refit_metric = "f1_weighted"
Args:
selected_result: The selected TrainingResult object.
merged_predictions: GeoDataFrame with predictions merged with true labels and split info.
"""
st.header("🎲 Confusion Matrix")
st.caption("Detailed breakdown of predictions")
# Add selector for confusion matrix type
cm_type = st.selectbox(
"Select Data Split",
options=["test", "train", "all"],
format_func=lambda x: {"test": "Test Set", "train": "CV Set (Train Split)", "all": "All Available Data"}[x],
help="Choose which data split to display the confusion matrix for",
key="cm_split_select",
)
# Get label names from settings
label_names = selected_result.settings.classes
# Compute or load confusion matrix based on selection
if cm_type == "test":
if selected_result.confusion_matrix is None:
st.warning("No confusion matrix available for the test set.")
return
cm = selected_result.confusion_matrix
st.info("📊 Showing confusion matrix for the **Test Set** (held-out data, never used during training)")
else:
if merged_predictions is None:
st.warning("Predictions data not available. Cannot compute confusion matrix.")
return
with st.spinner(f"Computing confusion matrix for {cm_type} split..."):
cm = compute_confusion_matrix_from_merged_data(merged_predictions, cm_type, label_names)
if cm is None:
return
if cm_type == "train":
st.info(
"📊 Showing confusion matrix for the **CV Set (Train Split)** "
"(data used during hyperparameter search cross-validation)"
)
else: # all
st.info("📊 Showing confusion matrix for **All Available Data** (combined train and test splits)")
render_confusion_matrix_heatmap(cm, selected_result.settings.task)
def render_parameter_space_section(selected_result, selected_metric):
"""Render parameter space analysis section.
Args:
selected_result: The selected TrainingResult object.
selected_metric: The metric to analyze parameters against.
"""
st.header("🔍 Parameter Space Analysis")
# Compute CV results statistics
cv_results_stats = CVResultsStatistics.compute(selected_result)
# Show parameter space summary
with st.expander("📋 Parameter Space Summary", expanded=False):
param_summary_df = cv_results_stats.parameters_to_dataframe()
if not param_summary_df.empty:
st.dataframe(param_summary_df, hide_index=True, width="stretch")
else: else:
st.info("No parameter information available.") refit_metric = "r2"
results = selected_result.results if refit_metric in available_metrics:
settings = selected_result.settings default_metric_idx = available_metrics.index(refit_metric)
else:
default_metric_idx = 0
# Parameter distributions selected_metric = st.selectbox(
st.subheader("📈 Parameter Distributions") "Primary Metric for Analysis",
render_parameter_distributions(results, settings) options=available_metrics,
index=default_metric_idx,
format_func=format_metric_name,
help="Select the metric to focus on for detailed analysis",
key="metric_select",
)
# Binned parameter space plots # Form submit button
st.subheader("🎨 Binned Parameter Space") submitted = st.form_submit_button(
"Load Training Result",
type="primary",
use_container_width=True,
)
# Check if this is an ESPA model and show ESPA-specific plots if not submitted:
model_type = settings.model st.info("👆 Click 'Load Training Result' to apply changes.")
if model_type == "espa": st.stop()
# Show ESPA-specific binned plots (eps_cl vs eps_e binned by K)
render_espa_binned_parameter_space(results, selected_metric)
# Optionally show the generic binned plots in an expander return selected_result, selected_metric, refit_metric
with st.expander("📊 All Parameter Combinations", expanded=False):
st.caption("Generic parameter space exploration (all pairwise combinations)")
render_binned_parameter_space(results, selected_metric)
else:
# For non-ESPA models, show the generic binned plots
render_binned_parameter_space(results, selected_metric)
def render_data_export_section(results, selected_result):
"""Render data export section with download buttons.
Args:
results: DataFrame with CV results.
selected_result: The selected TrainingResult object.
"""
with st.expander("💾 Export Data", expanded=False):
st.subheader("Download Results")
col1, col2 = st.columns(2)
with col1:
# Download full results as CSV
csv_data = results.to_csv(index=False)
st.download_button(
label="📥 Download Full Results (CSV)",
data=csv_data,
file_name=f"{selected_result.path.name}_results.csv",
mime="text/csv",
)
with col2:
# Download settings as JSON
import json
settings_dict = {
"task": selected_result.settings.task,
"grid": selected_result.settings.grid,
"level": selected_result.settings.level,
"model": selected_result.settings.model,
"cv_splits": selected_result.settings.cv_splits,
"classes": selected_result.settings.classes,
}
settings_json = json.dumps(settings_dict, indent=2)
st.download_button(
label="⚙️ Download Settings (JSON)",
data=settings_json,
file_name=f"{selected_result.path.name}_settings.json",
mime="application/json",
)
# Show raw data preview
st.subheader("Raw Data Preview")
st.dataframe(results.head(100), width="stretch")
def render_training_analysis_page(): def render_training_analysis_page():
@ -513,91 +97,47 @@ def render_training_analysis_page():
""" """
) )
# Load all available training results # Load training results
training_results = load_all_training_results() training_results = load_all_training_results()
if not training_results: if not training_results:
st.warning("No training results found. Please run some training experiments first.") st.warning("No training results found. Please run some training experiments first.")
st.info("Run training using: `pixi run python -m entropice.ml.training`") st.stop()
return return
st.success(f"Found **{len(training_results)}** training result(s)") st.write(f"Found **{len(training_results)}** training result(s)")
st.divider() st.divider()
selected_result, selected_metric, refit_metric = render_analysis_settings_sidebar(training_results)
# Sidebar: Training run selection cv_statistics = CVMetricStatistics.compute(selected_result, selected_metric)
with st.sidebar:
selection_result = render_analysis_settings_sidebar(training_results)
if selection_result[0] is None:
return
selected_result, selected_metric, refit_metric, top_n = selection_result
# Load predictions with labels once (used by confusion matrix and map)
merged_predictions = load_predictions_with_labels(selected_result)
# Main content area
results = selected_result.results
settings = selected_result.settings
# Run Information
render_run_information(selected_result, refit_metric) render_run_information(selected_result, refit_metric)
st.divider() st.divider()
# Test Metrics Section render_metrics_section(selected_result)
render_test_metrics_section(selected_result)
st.divider() st.divider()
# Confusion Matrix Section # Render confusion matrices for classification, regression analysis for regression
render_confusion_matrix_section(selected_result, merged_predictions) if selected_result.settings.task in ["binary", "count_regimes", "density_regimes"]:
render_confusion_matrices(selected_result)
else:
render_regression_analysis(selected_result)
st.divider() st.divider()
# Performance Summary Section render_cv_statistics_section(cv_statistics, selected_result.test_metrics.get(selected_metric, float("nan")))
st.header("📊 CV Performance Overview")
st.caption("Summary of hyperparameter search results across all configurations")
render_performance_summary(results, refit_metric)
st.divider() st.divider()
# Prediction Analysis Map Section render_hparam_space_section(selected_result, selected_metric)
st.header("🗺️ Model Performance Map")
st.caption("Interactive 3D map showing prediction correctness across the training dataset")
render_confusion_matrix_map(selected_result.path, settings, merged_predictions)
st.divider() st.divider()
# Cross-Validation Statistics # List all results at the end
render_cv_statistics_section(selected_result, selected_metric) st.header("📄 All Training Results")
st.dataframe(selected_result.results)
st.divider()
# Parameter Space Analysis
render_parameter_space_section(selected_result, selected_metric)
st.divider()
# Parameter Correlation
st.header("🔗 Parameter Correlation")
render_parameter_correlation(results, selected_metric)
st.divider()
# Multi-Metric Comparison
if len(selected_result.available_metrics) >= 2:
st.header("📊 Multi-Metric Comparison")
render_multi_metric_comparison(results)
st.divider()
# Top Configurations
st.header("🏆 Top Performing Configurations")
render_top_configurations(results, selected_metric, top_n)
st.divider()
# Raw Data Export
render_data_export_section(results, selected_result)
st.balloons() st.balloons()
stopwatch.summary()

View file

@ -44,8 +44,8 @@ class AutoGluonSettings:
class AutoGluonTrainingSettings(DatasetEnsemble, AutoGluonSettings): class AutoGluonTrainingSettings(DatasetEnsemble, AutoGluonSettings):
"""Combined settings for AutoGluon training.""" """Combined settings for AutoGluon training."""
classes: list[str] | None classes: list[str] | None = None
problem_type: str problem_type: str = "binary"
def _determine_problem_type_and_metric(task: Task) -> tuple[str, str]: def _determine_problem_type_and_metric(task: Task) -> tuple[str, str]:
@ -177,6 +177,8 @@ def autogluon_train(
toml.dump({"settings": asdict(combined_settings)}, f) toml.dump({"settings": asdict(combined_settings)}, f)
# Save test metrics # Save test metrics
# We need to use pickle here, because the confusion matrix is stored as a dataframe
# This only matters for classification tasks
test_metrics_file = results_dir / "test_metrics.pickle" test_metrics_file = results_dir / "test_metrics.pickle"
print(f"💾 Saving test metrics to {test_metrics_file}") print(f"💾 Saving test metrics to {test_metrics_file}")
with open(test_metrics_file, "wb") as f: with open(test_metrics_file, "wb") as f: