Redo Training Resutls Analysis
This commit is contained in:
parent
2664579a75
commit
7d874f7f92
16 changed files with 1455 additions and 2227 deletions
|
|
@ -12,7 +12,6 @@ Pages:
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
from entropice.dashboard.views.autogluon_analysis_page import render_autogluon_analysis_page
|
|
||||||
from entropice.dashboard.views.dataset_page import render_dataset_page
|
from entropice.dashboard.views.dataset_page import render_dataset_page
|
||||||
from entropice.dashboard.views.inference_page import render_inference_page
|
from entropice.dashboard.views.inference_page import render_inference_page
|
||||||
from entropice.dashboard.views.model_state_page import render_model_state_page
|
from entropice.dashboard.views.model_state_page import render_model_state_page
|
||||||
|
|
@ -28,7 +27,6 @@ def main():
|
||||||
overview_page = st.Page(render_overview_page, title="Overview", icon="🏡", default=True)
|
overview_page = st.Page(render_overview_page, title="Overview", icon="🏡", default=True)
|
||||||
data_page = st.Page(render_dataset_page, title="Dataset", icon="📊")
|
data_page = st.Page(render_dataset_page, title="Dataset", icon="📊")
|
||||||
training_analysis_page = st.Page(render_training_analysis_page, title="Training Results Analysis", icon="🦾")
|
training_analysis_page = st.Page(render_training_analysis_page, title="Training Results Analysis", icon="🦾")
|
||||||
autogluon_page = st.Page(render_autogluon_analysis_page, title="AutoGluon Analysis", icon="🤖")
|
|
||||||
model_state_page = st.Page(render_model_state_page, title="Model State", icon="🧮")
|
model_state_page = st.Page(render_model_state_page, title="Model State", icon="🧮")
|
||||||
inference_page = st.Page(render_inference_page, title="Inference", icon="🗺️")
|
inference_page = st.Page(render_inference_page, title="Inference", icon="🗺️")
|
||||||
|
|
||||||
|
|
@ -36,7 +34,7 @@ def main():
|
||||||
{
|
{
|
||||||
"Overview": [overview_page],
|
"Overview": [overview_page],
|
||||||
"Data": [data_page],
|
"Data": [data_page],
|
||||||
"Experiments": [training_analysis_page, autogluon_page, model_state_page],
|
"Experiments": [training_analysis_page, model_state_page],
|
||||||
"Inference": [inference_page],
|
"Inference": [inference_page],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
417
src/entropice/dashboard/plots/hyperparameter_space.py
Normal file
417
src/entropice/dashboard/plots/hyperparameter_space.py
Normal file
|
|
@ -0,0 +1,417 @@
|
||||||
|
"""Hyperparameter space plotting functions."""
|
||||||
|
|
||||||
|
import matplotlib.colors as mcolors
|
||||||
|
import pandas as pd
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
from entropice.dashboard.utils.colors import get_cmap, get_palette
|
||||||
|
|
||||||
|
|
||||||
|
def plot_performance_summary(results: pd.DataFrame, refit_metric: str) -> tuple[pd.DataFrame, pd.DataFrame, dict]:
|
||||||
|
"""Compute performance summary statistics.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
results: DataFrame with CV results.
|
||||||
|
refit_metric: The metric used for refit (e.g., 'f1', 'f1_weighted').
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (best_scores_df, score_stats_df, best_params_dict).
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Get all test score columns
|
||||||
|
score_cols = [col for col in results.columns if col.startswith("mean_test_")]
|
||||||
|
|
||||||
|
if not score_cols:
|
||||||
|
return pd.DataFrame(), pd.DataFrame(), {}
|
||||||
|
|
||||||
|
# Calculate best scores
|
||||||
|
best_scores = []
|
||||||
|
for col in score_cols:
|
||||||
|
metric_name = col.replace("mean_test_", "").replace("_", " ").title()
|
||||||
|
best_score = results[col].max()
|
||||||
|
best_scores.append({"Metric": metric_name, "Best Score": f"{best_score:.4f}"})
|
||||||
|
|
||||||
|
# Calculate score statistics
|
||||||
|
score_stats = []
|
||||||
|
for col in score_cols:
|
||||||
|
metric_name = col.replace("mean_test_", "").replace("_", " ").title()
|
||||||
|
mean_score = results[col].mean()
|
||||||
|
std_score = results[col].std()
|
||||||
|
score_stats.append(
|
||||||
|
{
|
||||||
|
"Metric": metric_name,
|
||||||
|
"Mean ± Std": f"{mean_score:.4f} ± {std_score:.4f}",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get best parameter combination
|
||||||
|
refit_col = f"mean_test_{refit_metric}"
|
||||||
|
if refit_col not in results.columns and score_cols:
|
||||||
|
refit_col = score_cols[0]
|
||||||
|
|
||||||
|
best_idx = results[refit_col].idxmax()
|
||||||
|
best_row = results.loc[best_idx]
|
||||||
|
|
||||||
|
# Extract parameter columns
|
||||||
|
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
|
||||||
|
best_params = {col.replace("param_", ""): best_row[col] for col in param_cols}
|
||||||
|
|
||||||
|
return pd.DataFrame(best_scores), pd.DataFrame(score_stats), best_params
|
||||||
|
|
||||||
|
|
||||||
|
def plot_parameter_distributions(results: pd.DataFrame, param_grid: dict | None = None) -> dict[str, go.Figure]:
|
||||||
|
"""Create histogram charts for parameter distributions.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
results: DataFrame with CV results.
|
||||||
|
param_grid: Optional parameter grid with distribution information.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary mapping parameter names to Plotly figures.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Get parameter columns
|
||||||
|
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
|
||||||
|
|
||||||
|
if not param_cols:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
cmap = get_cmap("parameter_distribution")
|
||||||
|
bar_color = mcolors.rgb2hex(cmap(0.5))
|
||||||
|
|
||||||
|
charts = {}
|
||||||
|
for param_col in param_cols:
|
||||||
|
param_name = param_col.replace("param_", "")
|
||||||
|
param_values = results[param_col].dropna()
|
||||||
|
|
||||||
|
if len(param_values) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Determine if parameter is numeric
|
||||||
|
if pd.api.types.is_numeric_dtype(param_values):
|
||||||
|
# Create histogram for numeric parameters
|
||||||
|
fig = go.Figure()
|
||||||
|
fig.add_trace(
|
||||||
|
go.Histogram(
|
||||||
|
x=param_values,
|
||||||
|
nbinsx=30,
|
||||||
|
marker_color=bar_color,
|
||||||
|
name=param_name,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
title=f"Distribution of {param_name}",
|
||||||
|
xaxis_title=param_name,
|
||||||
|
yaxis_title="Count",
|
||||||
|
height=400,
|
||||||
|
showlegend=False,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Create bar chart for categorical parameters
|
||||||
|
value_counts = param_values.value_counts().reset_index()
|
||||||
|
value_counts.columns = [param_name, "count"]
|
||||||
|
fig = go.Figure()
|
||||||
|
fig.add_trace(
|
||||||
|
go.Bar(
|
||||||
|
x=value_counts[param_name],
|
||||||
|
y=value_counts["count"],
|
||||||
|
marker_color=bar_color,
|
||||||
|
name=param_name,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
title=f"Distribution of {param_name}",
|
||||||
|
xaxis_title=param_name,
|
||||||
|
yaxis_title="Count",
|
||||||
|
height=400,
|
||||||
|
showlegend=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
charts[param_name] = fig
|
||||||
|
|
||||||
|
return charts
|
||||||
|
|
||||||
|
|
||||||
|
def plot_score_vs_parameters(
|
||||||
|
results: pd.DataFrame, metric: str, param_grid: dict | None = None
|
||||||
|
) -> dict[str, go.Figure]:
|
||||||
|
"""Create scatter plots of score vs each parameter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
results: DataFrame with CV results.
|
||||||
|
metric: The metric to plot (e.g., 'f1', 'accuracy').
|
||||||
|
param_grid: Optional parameter grid with distribution information.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary mapping parameter names to Plotly figures.
|
||||||
|
|
||||||
|
"""
|
||||||
|
score_col = f"mean_test_{metric}"
|
||||||
|
if score_col not in results.columns:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Get parameter columns
|
||||||
|
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
|
||||||
|
|
||||||
|
if not param_cols:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Get colormap
|
||||||
|
hex_colors = get_palette(metric, n_colors=256)
|
||||||
|
|
||||||
|
charts = {}
|
||||||
|
for param_col in param_cols:
|
||||||
|
param_name = param_col.replace("param_", "")
|
||||||
|
param_values = results[param_col].dropna()
|
||||||
|
|
||||||
|
if len(param_values) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check if this parameter uses log scale
|
||||||
|
use_log = False
|
||||||
|
if param_grid and param_name in param_grid:
|
||||||
|
param_config = param_grid[param_name]
|
||||||
|
if isinstance(param_config, dict) and param_config.get("distribution") == "loguniform":
|
||||||
|
use_log = True
|
||||||
|
|
||||||
|
# Create scatter plot
|
||||||
|
fig = go.Figure()
|
||||||
|
fig.add_trace(
|
||||||
|
go.Scatter(
|
||||||
|
x=results[param_col],
|
||||||
|
y=results[score_col],
|
||||||
|
mode="markers",
|
||||||
|
marker={
|
||||||
|
"size": 8,
|
||||||
|
"color": results[score_col],
|
||||||
|
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
|
||||||
|
"showscale": False,
|
||||||
|
"opacity": 0.6,
|
||||||
|
},
|
||||||
|
text=[
|
||||||
|
f"{param_name}: {val}<br>Score: {score:.4f}"
|
||||||
|
for val, score in zip(results[param_col], results[score_col])
|
||||||
|
],
|
||||||
|
hovertemplate="%{text}<extra></extra>",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
title=f"{metric.replace('_', ' ').title()} vs {param_name}",
|
||||||
|
xaxis_title=param_name,
|
||||||
|
xaxis_type="log" if use_log else "linear",
|
||||||
|
yaxis_title=metric.replace("_", " ").title(),
|
||||||
|
height=400,
|
||||||
|
showlegend=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
charts[param_name] = fig
|
||||||
|
|
||||||
|
return charts
|
||||||
|
|
||||||
|
|
||||||
|
def plot_parameter_correlations(results: pd.DataFrame, metric: str) -> go.Figure | None:
|
||||||
|
"""Create correlation bar chart between parameters and score.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
results: DataFrame with CV results.
|
||||||
|
metric: The metric to analyze (e.g., 'f1', 'accuracy').
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Plotly figure or None if no numeric parameters found.
|
||||||
|
|
||||||
|
"""
|
||||||
|
score_col = f"mean_test_{metric}"
|
||||||
|
if score_col not in results.columns:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Get numeric parameter columns
|
||||||
|
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
|
||||||
|
numeric_params = [col for col in param_cols if pd.api.types.is_numeric_dtype(results[col])]
|
||||||
|
|
||||||
|
if not numeric_params:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Calculate correlations
|
||||||
|
correlations = []
|
||||||
|
for param_col in numeric_params:
|
||||||
|
param_name = param_col.replace("param_", "")
|
||||||
|
corr = results[[param_col, score_col]].corr().iloc[0, 1]
|
||||||
|
correlations.append({"Parameter": param_name, "Correlation": corr})
|
||||||
|
|
||||||
|
corr_df = pd.DataFrame(correlations).sort_values("Correlation", ascending=False)
|
||||||
|
|
||||||
|
# Get colormap (use diverging colormap for correlation)
|
||||||
|
hex_colors = get_palette("correlation", n_colors=256)
|
||||||
|
|
||||||
|
# Create bar chart
|
||||||
|
fig = go.Figure()
|
||||||
|
fig.add_trace(
|
||||||
|
go.Bar(
|
||||||
|
x=corr_df["Correlation"],
|
||||||
|
y=corr_df["Parameter"],
|
||||||
|
orientation="h",
|
||||||
|
marker={
|
||||||
|
"color": corr_df["Correlation"],
|
||||||
|
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
|
||||||
|
"cmin": -1,
|
||||||
|
"cmax": 1,
|
||||||
|
"showscale": False,
|
||||||
|
},
|
||||||
|
text=[f"{c:.3f}" for c in corr_df["Correlation"]],
|
||||||
|
hovertemplate="%{y}<br>Correlation: %{x:.3f}<extra></extra>",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
xaxis_title="Correlation with Score",
|
||||||
|
yaxis_title="Parameter",
|
||||||
|
height=max(300, len(correlations) * 30),
|
||||||
|
showlegend=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def plot_parameter_interactions(results: pd.DataFrame, metric: str, param_grid: dict | None = None) -> list[go.Figure]:
|
||||||
|
"""Create scatter plots showing parameter interactions.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
results: DataFrame with CV results.
|
||||||
|
metric: The metric to visualize (e.g., 'f1', 'accuracy').
|
||||||
|
param_grid: Optional parameter grid with distribution information.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Plotly figures showing parameter interactions.
|
||||||
|
|
||||||
|
"""
|
||||||
|
score_col = f"mean_test_{metric}"
|
||||||
|
if score_col not in results.columns:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Get numeric parameter columns
|
||||||
|
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
|
||||||
|
numeric_params = [col for col in param_cols if pd.api.types.is_numeric_dtype(results[col])]
|
||||||
|
|
||||||
|
if len(numeric_params) < 2:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Get colormap
|
||||||
|
hex_colors = get_palette(metric, n_colors=256)
|
||||||
|
|
||||||
|
# Create scatter plots for parameter pairs
|
||||||
|
charts = []
|
||||||
|
param_names = [col.replace("param_", "") for col in numeric_params]
|
||||||
|
|
||||||
|
for i, x_param in enumerate(param_names[:-1]):
|
||||||
|
for y_param in param_names[i + 1 :]:
|
||||||
|
x_col = f"param_{x_param}"
|
||||||
|
y_col = f"param_{y_param}"
|
||||||
|
|
||||||
|
# Check if parameters use log scale
|
||||||
|
x_use_log = False
|
||||||
|
y_use_log = False
|
||||||
|
if param_grid:
|
||||||
|
if x_param in param_grid:
|
||||||
|
x_config = param_grid[x_param]
|
||||||
|
if isinstance(x_config, dict) and x_config.get("distribution") == "loguniform":
|
||||||
|
x_use_log = True
|
||||||
|
if y_param in param_grid:
|
||||||
|
y_config = param_grid[y_param]
|
||||||
|
if isinstance(y_config, dict) and y_config.get("distribution") == "loguniform":
|
||||||
|
y_use_log = True
|
||||||
|
|
||||||
|
fig = go.Figure()
|
||||||
|
fig.add_trace(
|
||||||
|
go.Scatter(
|
||||||
|
x=results[x_col],
|
||||||
|
y=results[y_col],
|
||||||
|
mode="markers",
|
||||||
|
marker={
|
||||||
|
"size": 8,
|
||||||
|
"color": results[score_col],
|
||||||
|
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
|
||||||
|
"showscale": True,
|
||||||
|
"colorbar": {"title": metric.replace("_", " ").title()},
|
||||||
|
"opacity": 0.7,
|
||||||
|
},
|
||||||
|
text=[
|
||||||
|
f"{x_param}: {x_val}<br>{y_param}: {y_val}<br>Score: {score:.4f}"
|
||||||
|
for x_val, y_val, score in zip(results[x_col], results[y_col], results[score_col])
|
||||||
|
],
|
||||||
|
hovertemplate="%{text}<extra></extra>",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
fig.update_layout(
|
||||||
|
title=f"{metric.replace('_', ' ').title()} by {x_param} and {y_param}",
|
||||||
|
xaxis_title=x_param,
|
||||||
|
xaxis_type="log" if x_use_log else "linear",
|
||||||
|
yaxis_title=y_param,
|
||||||
|
yaxis_type="log" if y_use_log else "linear",
|
||||||
|
height=500,
|
||||||
|
width=500,
|
||||||
|
)
|
||||||
|
|
||||||
|
charts.append(fig)
|
||||||
|
|
||||||
|
return charts
|
||||||
|
|
||||||
|
|
||||||
|
def plot_score_evolution(results: pd.DataFrame, metric: str) -> go.Figure | None:
|
||||||
|
"""Create line chart showing score evolution over iterations.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
results: DataFrame with CV results.
|
||||||
|
metric: The metric to visualize (e.g., 'f1', 'accuracy').
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Plotly figure or None if metric not found.
|
||||||
|
|
||||||
|
"""
|
||||||
|
score_col = f"mean_test_{metric}"
|
||||||
|
if score_col not in results.columns:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Add iteration number
|
||||||
|
iterations = list(range(len(results)))
|
||||||
|
scores = results[score_col].to_numpy()
|
||||||
|
best_so_far = results[score_col].cummax().to_numpy()
|
||||||
|
|
||||||
|
# Get colormap
|
||||||
|
cmap = get_cmap("score_evolution")
|
||||||
|
score_color = mcolors.rgb2hex(cmap(0.3))
|
||||||
|
best_color = mcolors.rgb2hex(cmap(0.7))
|
||||||
|
|
||||||
|
# Create line chart
|
||||||
|
fig = go.Figure()
|
||||||
|
|
||||||
|
fig.add_trace(
|
||||||
|
go.Scatter(
|
||||||
|
x=iterations,
|
||||||
|
y=scores,
|
||||||
|
mode="lines",
|
||||||
|
name="Score",
|
||||||
|
line={"color": score_color, "width": 1},
|
||||||
|
opacity=0.6,
|
||||||
|
hovertemplate="Iteration: %{x}<br>Score: %{y:.4f}<extra></extra>",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
fig.add_trace(
|
||||||
|
go.Scatter(
|
||||||
|
x=iterations,
|
||||||
|
y=best_so_far,
|
||||||
|
mode="lines",
|
||||||
|
name="Best So Far",
|
||||||
|
line={"color": best_color, "width": 2},
|
||||||
|
hovertemplate="Iteration: %{x}<br>Best So Far: %{y:.4f}<extra></extra>",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
title=f"{metric.replace('_', ' ').title()} Evolution",
|
||||||
|
xaxis_title="Iteration",
|
||||||
|
yaxis_title=metric.replace("_", " ").title(),
|
||||||
|
height=300,
|
||||||
|
hovermode="x unified",
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
97
src/entropice/dashboard/plots/metrics.py
Normal file
97
src/entropice/dashboard/plots/metrics.py
Normal file
|
|
@ -0,0 +1,97 @@
|
||||||
|
"""Metrics visualization plots."""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
import xarray as xr
|
||||||
|
|
||||||
|
|
||||||
|
def plot_confusion_matrix(cm_data: xr.DataArray, title: str = "Confusion Matrix", normalize: str = "none") -> go.Figure:
|
||||||
|
"""Plot an interactive confusion matrix heatmap.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cm_data: XArray DataArray with confusion matrix data (dimensions: true_label, predicted_label).
|
||||||
|
title: Title for the plot.
|
||||||
|
normalize: Normalization mode - "none", "true", or "pred".
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Plotly figure with the interactive confusion matrix heatmap.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Get the data as numpy array
|
||||||
|
cm_array = cm_data.values.astype(float)
|
||||||
|
labels = cm_data.coords["true_label"].values.tolist()
|
||||||
|
|
||||||
|
# Store original counts for display
|
||||||
|
cm_counts = cm_data.values
|
||||||
|
|
||||||
|
# Apply normalization
|
||||||
|
if normalize == "true":
|
||||||
|
# Normalize over true labels (rows) - each row sums to 1
|
||||||
|
row_sums = cm_array.sum(axis=1, keepdims=True)
|
||||||
|
cm_normalized = np.divide(cm_array, row_sums, where=row_sums != 0)
|
||||||
|
colorbar_title = "Proportion"
|
||||||
|
elif normalize == "pred":
|
||||||
|
# Normalize over predicted labels (columns) - each column sums to 1
|
||||||
|
col_sums = cm_array.sum(axis=0, keepdims=True)
|
||||||
|
cm_normalized = np.divide(cm_array, col_sums, where=col_sums != 0)
|
||||||
|
colorbar_title = "Proportion"
|
||||||
|
else:
|
||||||
|
# No normalization
|
||||||
|
cm_normalized = cm_array
|
||||||
|
colorbar_title = "Count"
|
||||||
|
|
||||||
|
# Create annotations for the heatmap
|
||||||
|
annotations = []
|
||||||
|
for i, true_label in enumerate(labels):
|
||||||
|
for j, pred_label in enumerate(labels):
|
||||||
|
count = int(cm_counts[i, j])
|
||||||
|
normalized_val = cm_normalized[i, j]
|
||||||
|
|
||||||
|
# Format text based on normalization mode
|
||||||
|
if normalize == "none":
|
||||||
|
# Show count and percentage of total
|
||||||
|
total = cm_counts.sum()
|
||||||
|
pct = (count / total * 100) if total > 0 else 0
|
||||||
|
text = f"{count}<br>({pct:.1f}%)"
|
||||||
|
else:
|
||||||
|
# Show percentage only for normalized versions
|
||||||
|
text = f"{normalized_val:.1%}"
|
||||||
|
|
||||||
|
# Determine text color based on normalized value
|
||||||
|
threshold = cm_normalized.max() / 2 if cm_normalized.max() > 0 else 0.5
|
||||||
|
text_color = "white" if normalized_val > threshold else "black"
|
||||||
|
|
||||||
|
annotations.append(
|
||||||
|
{
|
||||||
|
"x": pred_label,
|
||||||
|
"y": true_label,
|
||||||
|
"text": text,
|
||||||
|
"showarrow": False,
|
||||||
|
"font": {"size": 10, "color": text_color},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create the heatmap with normalized values for coloring
|
||||||
|
fig = go.Figure(
|
||||||
|
data=go.Heatmap(
|
||||||
|
z=cm_normalized,
|
||||||
|
x=labels,
|
||||||
|
y=labels,
|
||||||
|
colorscale="Blues",
|
||||||
|
colorbar={"title": colorbar_title},
|
||||||
|
hoverongaps=False,
|
||||||
|
hovertemplate="True: %{y}<br>Predicted: %{x}<br>Count: %{customdata}<extra></extra>",
|
||||||
|
customdata=cm_counts,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add annotations
|
||||||
|
fig.update_layout(
|
||||||
|
annotations=annotations,
|
||||||
|
xaxis={"title": "Predicted Label", "side": "bottom"},
|
||||||
|
yaxis={"title": "True Label", "autorange": "reversed"},
|
||||||
|
width=600,
|
||||||
|
height=550,
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
180
src/entropice/dashboard/plots/regression.py
Normal file
180
src/entropice/dashboard/plots/regression.py
Normal file
|
|
@ -0,0 +1,180 @@
|
||||||
|
"""Regression analysis plotting functions."""
|
||||||
|
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
|
from entropice.dashboard.utils.colors import get_palette
|
||||||
|
|
||||||
|
|
||||||
|
def plot_regression_scatter(
|
||||||
|
y_true: np.ndarray | pd.Series,
|
||||||
|
y_pred: np.ndarray | pd.Series,
|
||||||
|
title: str = "True vs Predicted",
|
||||||
|
) -> go.Figure:
|
||||||
|
"""Create scatter plot of true vs predicted values for regression.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
y_true: True target values.
|
||||||
|
y_pred: Predicted target values.
|
||||||
|
title: Title for the plot.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Plotly figure with regression scatter plot.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Convert to numpy arrays if needed
|
||||||
|
y_true_np = cast(np.ndarray, y_true.to_numpy()) if isinstance(y_true, pd.Series) else y_true
|
||||||
|
y_pred_np = cast(np.ndarray, y_pred.to_numpy()) if isinstance(y_pred, pd.Series) else y_pred
|
||||||
|
|
||||||
|
# Calculate metrics
|
||||||
|
mse = np.mean((y_true_np - y_pred_np) ** 2)
|
||||||
|
mae = np.mean(np.abs(y_true_np - y_pred_np))
|
||||||
|
r2 = 1 - (np.sum((y_true_np - y_pred_np) ** 2) / np.sum((y_true_np - np.mean(y_true_np)) ** 2))
|
||||||
|
|
||||||
|
# Get colormap
|
||||||
|
hex_colors = get_palette("r2", n_colors=256)
|
||||||
|
|
||||||
|
# Calculate point density for coloring
|
||||||
|
from scipy.stats import gaussian_kde
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create KDE for density estimation
|
||||||
|
xy = np.vstack([y_true_np, y_pred_np])
|
||||||
|
kde = gaussian_kde(xy)
|
||||||
|
density = kde(xy)
|
||||||
|
except (np.linalg.LinAlgError, ValueError):
|
||||||
|
# Fallback if KDE fails (e.g., all points identical)
|
||||||
|
density = np.ones(len(y_true_np))
|
||||||
|
|
||||||
|
# Create figure
|
||||||
|
fig = go.Figure()
|
||||||
|
|
||||||
|
# Add scatter plot
|
||||||
|
fig.add_trace(
|
||||||
|
go.Scatter(
|
||||||
|
x=y_true_np,
|
||||||
|
y=y_pred_np,
|
||||||
|
mode="markers",
|
||||||
|
marker={
|
||||||
|
"size": 6,
|
||||||
|
"color": density,
|
||||||
|
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
|
||||||
|
"showscale": False,
|
||||||
|
"opacity": 0.6,
|
||||||
|
},
|
||||||
|
text=[f"True: {true:.3f}<br>Pred: {pred:.3f}" for true, pred in zip(y_true_np, y_pred_np)],
|
||||||
|
hovertemplate="%{text}<extra></extra>",
|
||||||
|
name="Data",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add diagonal line (perfect prediction)
|
||||||
|
min_val = min(y_true_np.min(), y_pred_np.min())
|
||||||
|
max_val = max(y_true_np.max(), y_pred_np.max())
|
||||||
|
fig.add_trace(
|
||||||
|
go.Scatter(
|
||||||
|
x=[min_val, max_val],
|
||||||
|
y=[min_val, max_val],
|
||||||
|
mode="lines",
|
||||||
|
line={"color": "red", "dash": "dash", "width": 2},
|
||||||
|
name="Perfect Prediction",
|
||||||
|
hovertemplate="y = x<extra></extra>",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add metrics as annotation
|
||||||
|
metrics_text = f"R² = {r2:.4f}<br>MSE = {mse:.4f}<br>MAE = {mae:.4f}"
|
||||||
|
|
||||||
|
fig.add_annotation(
|
||||||
|
x=0.02,
|
||||||
|
y=0.98,
|
||||||
|
xref="paper",
|
||||||
|
yref="paper",
|
||||||
|
text=metrics_text,
|
||||||
|
showarrow=False,
|
||||||
|
bgcolor="white",
|
||||||
|
bordercolor="black",
|
||||||
|
borderwidth=1,
|
||||||
|
xanchor="left",
|
||||||
|
yanchor="top",
|
||||||
|
font={"size": 12},
|
||||||
|
)
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
title=title,
|
||||||
|
xaxis_title="True Values",
|
||||||
|
yaxis_title="Predicted Values",
|
||||||
|
height=500,
|
||||||
|
showlegend=True,
|
||||||
|
legend={"x": 0.98, "y": 0.02, "xanchor": "right", "yanchor": "bottom"},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Make axes equal
|
||||||
|
fig.update_xaxes(scaleanchor="y", scaleratio=1)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def plot_residuals(
|
||||||
|
y_true: np.ndarray | pd.Series,
|
||||||
|
y_pred: np.ndarray | pd.Series,
|
||||||
|
title: str = "Residual Plot",
|
||||||
|
) -> go.Figure:
|
||||||
|
"""Create residual plot for regression diagnostics.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
y_true: True target values.
|
||||||
|
y_pred: Predicted target values.
|
||||||
|
title: Title for the plot.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Plotly figure with residual plot.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Convert to numpy arrays if needed
|
||||||
|
y_true_np = cast(np.ndarray, y_true.to_numpy()) if isinstance(y_true, pd.Series) else y_true
|
||||||
|
y_pred_np = cast(np.ndarray, y_pred.to_numpy()) if isinstance(y_pred, pd.Series) else y_pred
|
||||||
|
|
||||||
|
# Calculate residuals
|
||||||
|
residuals = y_true_np - y_pred_np
|
||||||
|
|
||||||
|
# Get colormap
|
||||||
|
hex_colors = get_palette("r2", n_colors=256)
|
||||||
|
|
||||||
|
# Create figure
|
||||||
|
fig = go.Figure()
|
||||||
|
|
||||||
|
# Add scatter plot
|
||||||
|
fig.add_trace(
|
||||||
|
go.Scatter(
|
||||||
|
x=y_pred,
|
||||||
|
y=residuals,
|
||||||
|
mode="markers",
|
||||||
|
marker={
|
||||||
|
"size": 6,
|
||||||
|
"color": np.abs(residuals),
|
||||||
|
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
|
||||||
|
"showscale": True,
|
||||||
|
"colorbar": {"title": "Abs Residual"},
|
||||||
|
"opacity": 0.6,
|
||||||
|
},
|
||||||
|
text=[f"Pred: {pred:.3f}<br>Residual: {res:.3f}" for pred, res in zip(y_pred, residuals)],
|
||||||
|
hovertemplate="%{text}<extra></extra>",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add zero line
|
||||||
|
fig.add_hline(y=0, line_dash="dash", line_color="red", line_width=2)
|
||||||
|
|
||||||
|
fig.update_layout(
|
||||||
|
title=title,
|
||||||
|
xaxis_title="Predicted Values",
|
||||||
|
yaxis_title="Residuals (True - Predicted)",
|
||||||
|
height=400,
|
||||||
|
showlegend=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
return fig
|
||||||
185
src/entropice/dashboard/sections/cv_result.py
Normal file
185
src/entropice/dashboard/sections/cv_result.py
Normal file
|
|
@ -0,0 +1,185 @@
|
||||||
|
"""Training Result Sections."""
|
||||||
|
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
|
from entropice.dashboard.plots.metrics import plot_confusion_matrix
|
||||||
|
from entropice.dashboard.utils.formatters import format_metric_name
|
||||||
|
from entropice.dashboard.utils.loaders import TrainingResult
|
||||||
|
from entropice.dashboard.utils.stats import CVMetricStatistics
|
||||||
|
from entropice.utils.types import GridConfig
|
||||||
|
|
||||||
|
|
||||||
|
def render_run_information(selected_result: TrainingResult, refit_metric):
|
||||||
|
"""Render training run configuration overview.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
selected_result: The selected TrainingResult object.
|
||||||
|
refit_metric: The refit metric used for model selection.
|
||||||
|
|
||||||
|
"""
|
||||||
|
st.header("📋 Run Information")
|
||||||
|
|
||||||
|
grid_config = GridConfig.from_grid_level(f"{selected_result.settings.grid}{selected_result.settings.level}") # ty:ignore[invalid-argument-type]
|
||||||
|
|
||||||
|
col1, col2, col3, col4, col5 = st.columns(5)
|
||||||
|
with col1:
|
||||||
|
st.metric("Task", selected_result.settings.task.capitalize())
|
||||||
|
with col2:
|
||||||
|
st.metric("Target", selected_result.settings.target.capitalize())
|
||||||
|
with col3:
|
||||||
|
st.metric("Grid", grid_config.display_name)
|
||||||
|
with col4:
|
||||||
|
st.metric("Model", selected_result.settings.model.upper())
|
||||||
|
with col5:
|
||||||
|
st.metric("Trials", len(selected_result.results))
|
||||||
|
|
||||||
|
st.caption(f"**Refit Metric:** {format_metric_name(refit_metric)}")
|
||||||
|
|
||||||
|
|
||||||
|
def _render_metrics(metrics: dict[str, float]):
|
||||||
|
"""Render a set of metrics in a two-column layout.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
metrics: Dictionary of metric names and their values.
|
||||||
|
|
||||||
|
"""
|
||||||
|
ncols = min(5, len(metrics))
|
||||||
|
cols = st.columns(ncols)
|
||||||
|
for idx, (metric_name, metric_value) in enumerate(metrics.items()):
|
||||||
|
with cols[idx % ncols]:
|
||||||
|
st.metric(format_metric_name(metric_name), f"{metric_value:.4f}")
|
||||||
|
|
||||||
|
|
||||||
|
def render_metrics_section(selected_result: TrainingResult):
|
||||||
|
"""Render test metrics overview showing final model performance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
selected_result: The selected TrainingResult object.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Test
|
||||||
|
st.header("🎯 Test Set Performance")
|
||||||
|
st.caption("Performance metrics on the held-out test set (best model from hyperparameter search)")
|
||||||
|
_render_metrics(selected_result.test_metrics)
|
||||||
|
|
||||||
|
# Train
|
||||||
|
st.header("🏋️♂️ Training Set Performance")
|
||||||
|
st.caption("Performance metrics on the training set (best model from hyperparameter search)")
|
||||||
|
_render_metrics(selected_result.train_metrics)
|
||||||
|
|
||||||
|
# Combined / All
|
||||||
|
st.header("🧮 Overall Performance")
|
||||||
|
st.caption("Overall performance metrics combining training and test sets")
|
||||||
|
_render_metrics(selected_result.combined_metrics)
|
||||||
|
|
||||||
|
|
||||||
|
@st.fragment
|
||||||
|
def render_confusion_matrices(selected_result: TrainingResult):
|
||||||
|
"""Render confusion matrices for classification tasks.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
selected_result: The selected TrainingResult object.
|
||||||
|
|
||||||
|
"""
|
||||||
|
st.header("🎭 Confusion Matrices")
|
||||||
|
|
||||||
|
# Check if this is a classification task
|
||||||
|
if selected_result.settings.task not in ["binary", "count_regimes", "density_regimes"]:
|
||||||
|
st.info(
|
||||||
|
"📊 Confusion matrices are only available for classification tasks "
|
||||||
|
"(binary, count_regimes, density_regimes)."
|
||||||
|
)
|
||||||
|
st.caption("Coming soon for regression tasks: residual plots and error distributions.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Check if confusion matrix data is available
|
||||||
|
if selected_result.confusion_matrix is None:
|
||||||
|
st.warning("⚠️ No confusion matrix data found for this training result.")
|
||||||
|
return
|
||||||
|
|
||||||
|
cm = selected_result.confusion_matrix
|
||||||
|
|
||||||
|
# Add normalization selection
|
||||||
|
st.subheader("Display Options")
|
||||||
|
normalize_option = st.radio(
|
||||||
|
"Normalization",
|
||||||
|
options=["No normalization", "Normalize over True Labels", "Normalize over Predicted Labels"],
|
||||||
|
horizontal=True,
|
||||||
|
help="Choose how to normalize the confusion matrix values",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Map selection to normalization mode
|
||||||
|
normalize_map = {
|
||||||
|
"No normalization": "none",
|
||||||
|
"Normalize over True Labels": "true",
|
||||||
|
"Normalize over Predicted Labels": "pred",
|
||||||
|
}
|
||||||
|
normalize_mode = normalize_map[normalize_option]
|
||||||
|
|
||||||
|
cols = st.columns(3)
|
||||||
|
|
||||||
|
with cols[0]:
|
||||||
|
# Test Set Confusion Matrix
|
||||||
|
st.subheader("Test Set")
|
||||||
|
st.caption("Held-out test set")
|
||||||
|
fig_test = plot_confusion_matrix(cm["test"], title="Test Set", normalize=normalize_mode)
|
||||||
|
st.plotly_chart(fig_test, width="stretch")
|
||||||
|
with cols[1]:
|
||||||
|
# Training Set Confusion Matrix
|
||||||
|
st.subheader("Training Set")
|
||||||
|
st.caption("Training set")
|
||||||
|
fig_train = plot_confusion_matrix(cm["train"], title="Training Set", normalize=normalize_mode)
|
||||||
|
st.plotly_chart(fig_train, width="stretch")
|
||||||
|
with cols[2]:
|
||||||
|
# Combined Confusion Matrix
|
||||||
|
st.subheader("Combined")
|
||||||
|
st.caption("Train + Test sets")
|
||||||
|
fig_combined = plot_confusion_matrix(cm["combined"], title="Combined", normalize=normalize_mode)
|
||||||
|
st.plotly_chart(fig_combined, width="stretch")
|
||||||
|
|
||||||
|
|
||||||
|
def render_cv_statistics_section(cv_stats: CVMetricStatistics, test_score: float):
|
||||||
|
"""Render cross-validation statistics for selected metric.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cv_stats: CVMetricStatistics object containing cross-validation statistics.
|
||||||
|
test_score: The test set score for the selected metric.
|
||||||
|
|
||||||
|
"""
|
||||||
|
st.header("📈 Cross-Validation Statistics")
|
||||||
|
st.caption("Performance during hyperparameter search (averaged across CV folds)")
|
||||||
|
|
||||||
|
col1, col2, col3, col4, col5 = st.columns(5)
|
||||||
|
|
||||||
|
with col1:
|
||||||
|
st.metric("Best Score", f"{cv_stats.best_score:.4f}")
|
||||||
|
with col2:
|
||||||
|
st.metric("Mean Score", f"{cv_stats.mean_score:.4f}")
|
||||||
|
with col3:
|
||||||
|
st.metric("Std Dev", f"{cv_stats.std_score:.4f}")
|
||||||
|
with col4:
|
||||||
|
st.metric("Worst Score", f"{cv_stats.worst_score:.4f}")
|
||||||
|
with col5:
|
||||||
|
st.metric("Median Score", f"{cv_stats.median_score:.4f}")
|
||||||
|
|
||||||
|
if cv_stats.mean_cv_std is not None:
|
||||||
|
st.info(f"**Mean CV Std:** {cv_stats.mean_cv_std:.4f} - Average standard deviation across CV folds")
|
||||||
|
|
||||||
|
# Compare with test metric
|
||||||
|
st.subheader("CV vs Test Performance")
|
||||||
|
|
||||||
|
col1, col2, col3 = st.columns(3)
|
||||||
|
with col1:
|
||||||
|
st.metric("Best CV Score", f"{cv_stats.best_score:.4f}")
|
||||||
|
with col2:
|
||||||
|
st.metric("Test Score", f"{test_score:.4f}")
|
||||||
|
with col3:
|
||||||
|
delta = test_score - cv_stats.best_score
|
||||||
|
delta_pct = (delta / cv_stats.best_score * 100) if cv_stats.best_score != 0 else 0
|
||||||
|
st.metric("Difference", f"{delta:+.4f}", delta=f"{delta_pct:+.2f}%")
|
||||||
|
|
||||||
|
if abs(delta) > cv_stats.std_score:
|
||||||
|
st.warning(
|
||||||
|
"⚠️ Test performance differs significantly (larger than the CV standard deviation) from CV performance. "
|
||||||
|
"This may indicate overfitting or data distribution mismatch between training and test sets."
|
||||||
|
)
|
||||||
|
|
@ -2,15 +2,16 @@
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
from entropice.dashboard.utils.loaders import TrainingResult
|
from entropice.dashboard.utils.loaders import AutogluonTrainingResult, TrainingResult
|
||||||
from entropice.utils.types import (
|
from entropice.utils.types import (
|
||||||
GridConfig,
|
GridConfig,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def render_training_results_summary(training_results: list[TrainingResult]):
|
def render_training_results_summary(training_results: list[TrainingResult | AutogluonTrainingResult]):
|
||||||
"""Render summary metrics for training results."""
|
"""Render summary metrics for training results."""
|
||||||
st.header("📊 Training Results Summary")
|
st.header("📊 Training Results Summary")
|
||||||
col1, col2, col3, col4 = st.columns(4)
|
col1, col2, col3, col4 = st.columns(4)
|
||||||
|
|
@ -23,7 +24,7 @@ def render_training_results_summary(training_results: list[TrainingResult]):
|
||||||
st.metric("Total Runs", len(training_results))
|
st.metric("Total Runs", len(training_results))
|
||||||
|
|
||||||
with col3:
|
with col3:
|
||||||
models = {tr.settings.model for tr in training_results}
|
models = {tr.settings.model for tr in training_results if hasattr(tr.settings, "model")}
|
||||||
st.metric("Model Types", len(models))
|
st.metric("Model Types", len(models))
|
||||||
|
|
||||||
with col4:
|
with col4:
|
||||||
|
|
@ -33,14 +34,14 @@ def render_training_results_summary(training_results: list[TrainingResult]):
|
||||||
|
|
||||||
|
|
||||||
@st.fragment
|
@st.fragment
|
||||||
def render_experiment_results(training_results: list[TrainingResult]): # noqa: C901
|
def render_experiment_results(training_results: list[TrainingResult | AutogluonTrainingResult]): # noqa: C901
|
||||||
"""Render detailed experiment results table and expandable details."""
|
"""Render detailed experiment results table and expandable details."""
|
||||||
st.header("🎯 Experiment Results")
|
st.header("🎯 Experiment Results")
|
||||||
|
|
||||||
# Filters
|
# Filters
|
||||||
experiments = sorted({tr.experiment for tr in training_results if tr.experiment})
|
experiments = sorted({tr.experiment for tr in training_results if tr.experiment})
|
||||||
tasks = sorted({tr.settings.task for tr in training_results})
|
tasks = sorted({tr.settings.task for tr in training_results})
|
||||||
models = sorted({tr.settings.model for tr in training_results})
|
models = sorted({tr.settings.model if isinstance(tr, TrainingResult) else "autogluon" for tr in training_results})
|
||||||
grids = sorted({f"{tr.settings.grid}-{tr.settings.level}" for tr in training_results})
|
grids = sorted({f"{tr.settings.grid}-{tr.settings.level}" for tr in training_results})
|
||||||
|
|
||||||
# Create filter columns
|
# Create filter columns
|
||||||
|
|
@ -87,14 +88,26 @@ def render_experiment_results(training_results: list[TrainingResult]): # noqa:
|
||||||
filtered_results = [tr for tr in filtered_results if tr.experiment == selected_experiment]
|
filtered_results = [tr for tr in filtered_results if tr.experiment == selected_experiment]
|
||||||
if selected_task != "All":
|
if selected_task != "All":
|
||||||
filtered_results = [tr for tr in filtered_results if tr.settings.task == selected_task]
|
filtered_results = [tr for tr in filtered_results if tr.settings.task == selected_task]
|
||||||
if selected_model != "All":
|
if selected_model != "All" and selected_model != "autogluon":
|
||||||
filtered_results = [tr for tr in filtered_results if tr.settings.model == selected_model]
|
filtered_results = [
|
||||||
|
tr for tr in filtered_results if isinstance(tr, TrainingResult) and tr.settings.model == selected_model
|
||||||
|
]
|
||||||
|
elif selected_model == "autogluon":
|
||||||
|
filtered_results = [tr for tr in filtered_results if isinstance(tr, AutogluonTrainingResult)]
|
||||||
if selected_grid != "All":
|
if selected_grid != "All":
|
||||||
filtered_results = [tr for tr in filtered_results if f"{tr.settings.grid}-{tr.settings.level}" == selected_grid]
|
filtered_results = [tr for tr in filtered_results if f"{tr.settings.grid}-{tr.settings.level}" == selected_grid]
|
||||||
|
|
||||||
st.subheader("Results Table")
|
st.subheader("Results Table")
|
||||||
|
|
||||||
summary_df = TrainingResult.to_dataframe(filtered_results)
|
summary_df = TrainingResult.to_dataframe([tr for tr in filtered_results if isinstance(tr, TrainingResult)])
|
||||||
|
autogluon_df = AutogluonTrainingResult.to_dataframe(
|
||||||
|
[tr for tr in filtered_results if isinstance(tr, AutogluonTrainingResult)]
|
||||||
|
)
|
||||||
|
if len(summary_df) == 0:
|
||||||
|
summary_df = autogluon_df
|
||||||
|
elif len(autogluon_df) > 0:
|
||||||
|
summary_df = pd.concat([summary_df, autogluon_df], ignore_index=True)
|
||||||
|
|
||||||
# Display with color coding for best scores
|
# Display with color coding for best scores
|
||||||
st.dataframe(
|
st.dataframe(
|
||||||
summary_df,
|
summary_df,
|
||||||
|
|
@ -107,6 +120,8 @@ def render_experiment_results(training_results: list[TrainingResult]): # noqa:
|
||||||
for tr in filtered_results:
|
for tr in filtered_results:
|
||||||
tr_info = tr.display_info
|
tr_info = tr.display_info
|
||||||
display_name = tr_info.get_display_name("model_first")
|
display_name = tr_info.get_display_name("model_first")
|
||||||
|
model = "autogluon" if isinstance(tr, AutogluonTrainingResult) else tr.settings.model
|
||||||
|
cv_splits = tr.settings.cv_splits if hasattr(tr.settings, "cv_splits") else "N/A"
|
||||||
with st.expander(display_name):
|
with st.expander(display_name):
|
||||||
col1, col2 = st.columns([1, 2])
|
col1, col2 = st.columns([1, 2])
|
||||||
|
|
||||||
|
|
@ -117,12 +132,12 @@ def render_experiment_results(training_results: list[TrainingResult]): # noqa:
|
||||||
f"- **Experiment:** {tr.experiment}\n"
|
f"- **Experiment:** {tr.experiment}\n"
|
||||||
f"- **Task:** {tr.settings.task}\n"
|
f"- **Task:** {tr.settings.task}\n"
|
||||||
f"- **Target:** {tr.settings.target}\n"
|
f"- **Target:** {tr.settings.target}\n"
|
||||||
f"- **Model:** {tr.settings.model}\n"
|
f"- **Model:** {model}\n"
|
||||||
f"- **Grid:** {grid_config.display_name}\n"
|
f"- **Grid:** {grid_config.display_name}\n"
|
||||||
f"- **Created At:** {tr_info.timestamp.strftime('%Y-%m-%d %H:%M')}\n"
|
f"- **Created At:** {tr_info.timestamp.strftime('%Y-%m-%d %H:%M')}\n"
|
||||||
f"- **Temporal Mode:** {tr.settings.temporal_mode}\n"
|
f"- **Temporal Mode:** {tr.settings.temporal_mode}\n"
|
||||||
f"- **Members:** {', '.join(tr.settings.members)}\n"
|
f"- **Members:** {', '.join(tr.settings.members)}\n"
|
||||||
f"- **CV Splits:** {tr.settings.cv_splits}\n"
|
f"- **CV Splits:** {cv_splits}\n"
|
||||||
f"- **Classes:** {tr.settings.classes}\n"
|
f"- **Classes:** {tr.settings.classes}\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -140,26 +155,29 @@ def render_experiment_results(training_results: list[TrainingResult]): # noqa:
|
||||||
file_str += f"- 📄 `{file.name}`\n"
|
file_str += f"- 📄 `{file.name}`\n"
|
||||||
st.write(file_str)
|
st.write(file_str)
|
||||||
with col2:
|
with col2:
|
||||||
st.write("**CV Score Summary:**")
|
if isinstance(tr, AutogluonTrainingResult):
|
||||||
|
st.write("**Leaderboard:**")
|
||||||
# Extract all test scores
|
st.dataframe(tr.leaderboard, width="stretch", hide_index=True)
|
||||||
metric_df = tr.get_metric_dataframe()
|
|
||||||
if metric_df is not None:
|
|
||||||
st.dataframe(metric_df, width="stretch", hide_index=True)
|
|
||||||
else:
|
else:
|
||||||
st.write("No test scores found in results.")
|
st.write("**CV Score Summary:**")
|
||||||
|
# Extract all test scores
|
||||||
|
metric_df = tr.get_metric_dataframe()
|
||||||
|
if metric_df is not None:
|
||||||
|
st.dataframe(metric_df, width="stretch", hide_index=True)
|
||||||
|
else:
|
||||||
|
st.write("No test scores found in results.")
|
||||||
|
|
||||||
# Show parameter space explored
|
# Show parameter space explored
|
||||||
if "initial_K" in tr.results.columns: # Common parameter
|
if "initial_K" in tr.results.columns: # Common parameter
|
||||||
st.write("\n**Parameter Ranges Explored:**")
|
st.write("\n**Parameter Ranges Explored:**")
|
||||||
for param in ["initial_K", "eps_cl", "eps_e"]:
|
for param in ["initial_K", "eps_cl", "eps_e"]:
|
||||||
if param in tr.results.columns:
|
if param in tr.results.columns:
|
||||||
min_val = tr.results[param].min()
|
min_val = tr.results[param].min()
|
||||||
max_val = tr.results[param].max()
|
max_val = tr.results[param].max()
|
||||||
unique_vals = tr.results[param].nunique()
|
unique_vals = tr.results[param].nunique()
|
||||||
st.write(f"- **{param}:** {unique_vals} values ({min_val:.2e} to {max_val:.2e})")
|
st.write(f"- **{param}:** {unique_vals} values ({min_val:.2e} to {max_val:.2e})")
|
||||||
|
|
||||||
st.write("**CV Results DataFrame:**")
|
st.write("**CV Results DataFrame:**")
|
||||||
st.dataframe(tr.results, width="stretch", hide_index=True)
|
st.dataframe(tr.results, width="stretch", hide_index=True)
|
||||||
|
|
||||||
st.write(f"\n**Path:** `{tr.path}`")
|
st.write(f"\n**Path:** `{tr.path}`")
|
||||||
|
|
|
||||||
172
src/entropice/dashboard/sections/hparam_space.py
Normal file
172
src/entropice/dashboard/sections/hparam_space.py
Normal file
|
|
@ -0,0 +1,172 @@
|
||||||
|
"""Hyperparameter Space Visualization Section."""
|
||||||
|
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
|
from entropice.dashboard.plots.hyperparameter_space import (
|
||||||
|
plot_parameter_correlations,
|
||||||
|
plot_parameter_distributions,
|
||||||
|
plot_parameter_interactions,
|
||||||
|
plot_score_evolution,
|
||||||
|
plot_score_vs_parameters,
|
||||||
|
)
|
||||||
|
from entropice.dashboard.utils.formatters import format_metric_name
|
||||||
|
from entropice.dashboard.utils.loaders import TrainingResult
|
||||||
|
|
||||||
|
|
||||||
|
def _render_performance_summary(results, refit_metric: str):
|
||||||
|
"""Render performance summary subsection."""
|
||||||
|
best_idx = results[f"mean_test_{refit_metric}"].idxmax()
|
||||||
|
best_row = results.loc[best_idx]
|
||||||
|
# Extract parameter columns
|
||||||
|
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
|
||||||
|
best_params = {col.replace("param_", ""): best_row[col] for col in param_cols}
|
||||||
|
|
||||||
|
# Display best parameter combination
|
||||||
|
if not best_params:
|
||||||
|
return
|
||||||
|
|
||||||
|
with st.container(border=True):
|
||||||
|
st.subheader("🏆 Best Parameter Combination")
|
||||||
|
st.caption(f"Parameters of the best model (selected by {format_metric_name(refit_metric)} score)")
|
||||||
|
n_params = len(best_params)
|
||||||
|
cols = st.columns(n_params)
|
||||||
|
for idx, (param_name, param_value) in enumerate(best_params.items()):
|
||||||
|
with cols[idx]:
|
||||||
|
# Format value based on type and magnitude
|
||||||
|
if isinstance(param_value, int):
|
||||||
|
formatted_value = f"{param_value:.0f}"
|
||||||
|
elif isinstance(param_value, float):
|
||||||
|
# Use scientific notation for very small numbers
|
||||||
|
if abs(param_value) < 0.001 and param_value != 0:
|
||||||
|
formatted_value = f"{param_value:.2e}"
|
||||||
|
else:
|
||||||
|
formatted_value = f"{param_value:.4f}"
|
||||||
|
else:
|
||||||
|
formatted_value = str(param_value)
|
||||||
|
|
||||||
|
st.metric(param_name, formatted_value)
|
||||||
|
|
||||||
|
|
||||||
|
def _render_parameter_distributions(results, param_grid: dict | None):
|
||||||
|
"""Render parameter distributions subsection."""
|
||||||
|
st.subheader("Parameter Distributions")
|
||||||
|
st.caption("Distribution of hyperparameter values explored during random search")
|
||||||
|
|
||||||
|
param_charts = plot_parameter_distributions(results, param_grid)
|
||||||
|
|
||||||
|
if not param_charts:
|
||||||
|
st.info("No parameter distribution data available.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Display charts in a grid
|
||||||
|
param_names = list(param_charts.keys())
|
||||||
|
n_cols = min(3, len(param_names))
|
||||||
|
n_rows = (len(param_names) + n_cols - 1) // n_cols
|
||||||
|
|
||||||
|
for row in range(n_rows):
|
||||||
|
cols = st.columns(n_cols)
|
||||||
|
for col_idx in range(n_cols):
|
||||||
|
param_idx = row * n_cols + col_idx
|
||||||
|
if param_idx < len(param_names):
|
||||||
|
param_name = param_names[param_idx]
|
||||||
|
with cols[col_idx]:
|
||||||
|
st.plotly_chart(param_charts[param_name], width="stretch")
|
||||||
|
|
||||||
|
|
||||||
|
def _render_score_evolution(results, selected_metric: str):
|
||||||
|
"""Render score evolution subsection."""
|
||||||
|
st.subheader("Score Evolution Over Iterations")
|
||||||
|
st.caption(f"How {format_metric_name(selected_metric)} evolved during the random search")
|
||||||
|
|
||||||
|
evolution_chart = plot_score_evolution(results, selected_metric)
|
||||||
|
if evolution_chart:
|
||||||
|
st.plotly_chart(evolution_chart, width="stretch")
|
||||||
|
else:
|
||||||
|
st.warning(f"Score evolution not available for metric: {selected_metric}")
|
||||||
|
|
||||||
|
|
||||||
|
def _render_score_vs_parameters(results, selected_metric: str, param_grid: dict | None):
|
||||||
|
"""Render score vs parameters subsection."""
|
||||||
|
st.subheader("Score vs Individual Parameters")
|
||||||
|
st.caption(f"Relationship between {format_metric_name(selected_metric)} and each hyperparameter")
|
||||||
|
|
||||||
|
score_vs_param_charts = plot_score_vs_parameters(results, selected_metric, param_grid)
|
||||||
|
|
||||||
|
if not score_vs_param_charts:
|
||||||
|
st.info("No score vs parameter data available.")
|
||||||
|
return
|
||||||
|
|
||||||
|
param_names = list(score_vs_param_charts.keys())
|
||||||
|
n_cols = min(2, len(param_names))
|
||||||
|
n_rows = (len(param_names) + n_cols - 1) // n_cols
|
||||||
|
|
||||||
|
for row in range(n_rows):
|
||||||
|
cols = st.columns(n_cols)
|
||||||
|
for col_idx in range(n_cols):
|
||||||
|
param_idx = row * n_cols + col_idx
|
||||||
|
if param_idx < len(param_names):
|
||||||
|
param_name = param_names[param_idx]
|
||||||
|
with cols[col_idx]:
|
||||||
|
st.plotly_chart(score_vs_param_charts[param_name], width="stretch")
|
||||||
|
|
||||||
|
|
||||||
|
def _render_parameter_correlations(results, selected_metric: str):
|
||||||
|
"""Render parameter correlations subsection."""
|
||||||
|
st.subheader("Parameter-Score Correlations")
|
||||||
|
st.caption(f"Correlation between numeric parameters and {format_metric_name(selected_metric)}")
|
||||||
|
|
||||||
|
corr_chart = plot_parameter_correlations(results, selected_metric)
|
||||||
|
if corr_chart:
|
||||||
|
st.plotly_chart(corr_chart, width="stretch")
|
||||||
|
else:
|
||||||
|
st.info("No numeric parameters found for correlation analysis.")
|
||||||
|
|
||||||
|
|
||||||
|
def _render_parameter_interactions(results, selected_metric: str, param_grid: dict | None):
|
||||||
|
"""Render parameter interactions subsection."""
|
||||||
|
st.subheader("Parameter Interactions")
|
||||||
|
st.caption(f"Interaction between parameter pairs and their effect on {format_metric_name(selected_metric)}")
|
||||||
|
|
||||||
|
interaction_charts = plot_parameter_interactions(results, selected_metric, param_grid)
|
||||||
|
|
||||||
|
if not interaction_charts:
|
||||||
|
st.info("Not enough numeric parameters for parameter interaction visualization.")
|
||||||
|
return
|
||||||
|
|
||||||
|
n_cols = min(2, len(interaction_charts))
|
||||||
|
n_rows = (len(interaction_charts) + n_cols - 1) // n_cols
|
||||||
|
|
||||||
|
for row in range(n_rows):
|
||||||
|
cols = st.columns(n_cols)
|
||||||
|
for col_idx in range(n_cols):
|
||||||
|
chart_idx = row * n_cols + col_idx
|
||||||
|
if chart_idx < len(interaction_charts):
|
||||||
|
with cols[col_idx]:
|
||||||
|
st.plotly_chart(interaction_charts[chart_idx], width="stretch")
|
||||||
|
|
||||||
|
|
||||||
|
def render_hparam_space_section(selected_result: TrainingResult, selected_metric: str):
|
||||||
|
"""Render the hyperparameter space visualization section.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
selected_result: The selected TrainingResult object.
|
||||||
|
selected_metric: The metric to focus analysis on.
|
||||||
|
|
||||||
|
"""
|
||||||
|
st.header("🧩 Hyperparameter Space Exploration")
|
||||||
|
|
||||||
|
results = selected_result.results
|
||||||
|
refit_metric = selected_result._get_best_metric_name()
|
||||||
|
param_grid = selected_result.settings.param_grid
|
||||||
|
|
||||||
|
_render_performance_summary(results, refit_metric)
|
||||||
|
|
||||||
|
_render_parameter_distributions(results, param_grid)
|
||||||
|
|
||||||
|
_render_score_evolution(results, selected_metric)
|
||||||
|
|
||||||
|
_render_score_vs_parameters(results, selected_metric, param_grid)
|
||||||
|
|
||||||
|
_render_parameter_correlations(results, selected_metric)
|
||||||
|
|
||||||
|
_render_parameter_interactions(results, selected_metric, param_grid)
|
||||||
122
src/entropice/dashboard/sections/regression_analysis.py
Normal file
122
src/entropice/dashboard/sections/regression_analysis.py
Normal file
|
|
@ -0,0 +1,122 @@
|
||||||
|
"""Regression Analysis Section."""
|
||||||
|
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
|
from entropice.dashboard.plots.regression import plot_regression_scatter, plot_residuals
|
||||||
|
from entropice.dashboard.utils.loaders import TrainingResult
|
||||||
|
from entropice.ml.dataset import DatasetEnsemble
|
||||||
|
|
||||||
|
|
||||||
|
def render_regression_analysis(selected_result: TrainingResult):
|
||||||
|
"""Render regression analysis with true vs predicted scatter plots.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
selected_result: The selected TrainingResult object.
|
||||||
|
|
||||||
|
"""
|
||||||
|
st.header("📊 Regression Analysis")
|
||||||
|
|
||||||
|
# Check if this is a regression task
|
||||||
|
if selected_result.settings.task in ["binary", "count_regimes", "density_regimes"]:
|
||||||
|
st.info("📈 Regression analysis is only available for regression tasks (count, density).")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Load predictions
|
||||||
|
predictions_df = selected_result.load_predictions()
|
||||||
|
if predictions_df is None:
|
||||||
|
st.warning("⚠️ No prediction data found for this training result.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create DatasetEnsemble from settings
|
||||||
|
with st.spinner("Loading training data to get true values..."):
|
||||||
|
ensemble = DatasetEnsemble(
|
||||||
|
grid=selected_result.settings.grid,
|
||||||
|
level=selected_result.settings.level,
|
||||||
|
members=selected_result.settings.members,
|
||||||
|
temporal_mode=selected_result.settings.temporal_mode,
|
||||||
|
dimension_filters=selected_result.settings.dimension_filters,
|
||||||
|
variable_filters=selected_result.settings.variable_filters,
|
||||||
|
add_lonlat=selected_result.settings.add_lonlat,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create training set to get true values
|
||||||
|
training_set = ensemble.create_training_set(
|
||||||
|
task=selected_result.settings.task,
|
||||||
|
target=selected_result.settings.target,
|
||||||
|
device="cpu",
|
||||||
|
cache_mode="read",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get split information
|
||||||
|
split_series = training_set.split
|
||||||
|
|
||||||
|
# Merge predictions with true values and split info
|
||||||
|
# predictions_df should have 'cell_id' and 'predicted' columns
|
||||||
|
# training_set.targets has 'y' (true values) with cell_id as index
|
||||||
|
true_values = training_set.targets[["y"]].reset_index()
|
||||||
|
|
||||||
|
# Merge on cell_id
|
||||||
|
merged = predictions_df.merge(true_values, on="cell_id", how="inner")
|
||||||
|
merged["split"] = split_series.reindex(merged["cell_id"]).values
|
||||||
|
|
||||||
|
# Get train, test, and combined data
|
||||||
|
train_data = merged[merged["split"] == "train"]
|
||||||
|
test_data = merged[merged["split"] == "test"]
|
||||||
|
|
||||||
|
if len(train_data) == 0 or len(test_data) == 0:
|
||||||
|
st.error("❌ Could not properly split data into train and test sets.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Display scatter plots
|
||||||
|
st.subheader("True vs Predicted Values")
|
||||||
|
st.caption("Scatter plots showing the relationship between true and predicted values")
|
||||||
|
|
||||||
|
cols = st.columns(3)
|
||||||
|
|
||||||
|
with cols[0]:
|
||||||
|
st.markdown("#### Test Set")
|
||||||
|
st.caption("Held-out test set")
|
||||||
|
fig_test = plot_regression_scatter(
|
||||||
|
test_data["y"],
|
||||||
|
test_data["predicted"],
|
||||||
|
title="Test Set",
|
||||||
|
)
|
||||||
|
st.plotly_chart(fig_test, use_container_width=True)
|
||||||
|
|
||||||
|
with cols[1]:
|
||||||
|
st.markdown("#### Training Set")
|
||||||
|
st.caption("Training set")
|
||||||
|
fig_train = plot_regression_scatter(
|
||||||
|
train_data["y"],
|
||||||
|
train_data["predicted"],
|
||||||
|
title="Training Set",
|
||||||
|
)
|
||||||
|
st.plotly_chart(fig_train, use_container_width=True)
|
||||||
|
|
||||||
|
with cols[2]:
|
||||||
|
st.markdown("#### Combined")
|
||||||
|
st.caption("Train + Test sets")
|
||||||
|
fig_combined = plot_regression_scatter(
|
||||||
|
merged["y"],
|
||||||
|
merged["predicted"],
|
||||||
|
title="Combined",
|
||||||
|
)
|
||||||
|
st.plotly_chart(fig_combined, use_container_width=True)
|
||||||
|
|
||||||
|
# Display residual plots
|
||||||
|
st.subheader("Residual Analysis")
|
||||||
|
st.caption("Residual plots to assess model fit and identify patterns in errors")
|
||||||
|
|
||||||
|
cols = st.columns(3)
|
||||||
|
|
||||||
|
with cols[0]:
|
||||||
|
fig_test_res = plot_residuals(test_data["y"], test_data["predicted"], title="Test Set Residuals")
|
||||||
|
st.plotly_chart(fig_test_res, use_container_width=True)
|
||||||
|
|
||||||
|
with cols[1]:
|
||||||
|
fig_train_res = plot_residuals(train_data["y"], train_data["predicted"], title="Training Set Residuals")
|
||||||
|
st.plotly_chart(fig_train_res, use_container_width=True)
|
||||||
|
|
||||||
|
with cols[2]:
|
||||||
|
fig_combined_res = plot_residuals(merged["y"], merged["predicted"], title="Combined Residuals")
|
||||||
|
st.plotly_chart(fig_combined_res, use_container_width=True)
|
||||||
|
|
@ -1,70 +0,0 @@
|
||||||
"""Utilities for ordering predicted classes consistently across visualizations.
|
|
||||||
|
|
||||||
This module leverages the canonical class labels defined in the ML dataset module
|
|
||||||
to ensure consistent ordering across all visualizations.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from entropice.utils.types import Task
|
|
||||||
|
|
||||||
# Canonical orderings imported from the ML pipeline
|
|
||||||
# Binary labels are defined inline in dataset.py: {False: "No RTS", True: "RTS"}
|
|
||||||
# Count/Density labels are defined in the bin_values function
|
|
||||||
BINARY_LABELS = ["No RTS", "RTS"]
|
|
||||||
COUNT_LABELS = ["None", "Very Few", "Few", "Several", "Many", "Very Many"]
|
|
||||||
DENSITY_LABELS = ["Empty", "Very Sparse", "Sparse", "Moderate", "Dense", "Very Dense"]
|
|
||||||
|
|
||||||
CLASS_ORDERINGS: dict[Task | str, list[str]] = {
|
|
||||||
"binary": BINARY_LABELS,
|
|
||||||
"count": COUNT_LABELS,
|
|
||||||
"density": DENSITY_LABELS,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get_ordered_classes(task: Task | str, available_classes: list[str] | None = None) -> list[str]:
|
|
||||||
"""Get properly ordered class labels for a given task.
|
|
||||||
|
|
||||||
This uses the same canonical ordering as defined in the ML dataset module,
|
|
||||||
ensuring consistency between training and inference visualizations.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
task: Task type ('binary', 'count', 'density').
|
|
||||||
available_classes: Optional list of available classes to filter and order.
|
|
||||||
If None, returns all canonical classes for the task.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of class labels in proper order.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
>>> get_ordered_classes("binary")
|
|
||||||
['No RTS', 'RTS']
|
|
||||||
>>> get_ordered_classes("count", ["None", "Few", "Several"])
|
|
||||||
['None', 'Few', 'Several']
|
|
||||||
|
|
||||||
"""
|
|
||||||
canonical_order = CLASS_ORDERINGS[task]
|
|
||||||
|
|
||||||
if available_classes is None:
|
|
||||||
return canonical_order
|
|
||||||
|
|
||||||
# Filter canonical order to only include available classes, preserving order
|
|
||||||
return [cls for cls in canonical_order if cls in available_classes]
|
|
||||||
|
|
||||||
|
|
||||||
def sort_class_series(series: pd.Series, task: Task | str) -> pd.Series:
|
|
||||||
"""Sort a pandas Series with class labels according to canonical ordering.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
series: Pandas Series with class labels as index.
|
|
||||||
task: Task type ('binary', 'count', 'density').
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Sorted Series with classes in canonical order.
|
|
||||||
|
|
||||||
"""
|
|
||||||
available_classes = series.index.tolist()
|
|
||||||
ordered_classes = get_ordered_classes(task, available_classes)
|
|
||||||
|
|
||||||
# Reindex to get proper order
|
|
||||||
return series.reindex(ordered_classes)
|
|
||||||
|
|
@ -59,7 +59,7 @@ task_display_infos: dict[Task, TaskDisplayInfo] = {
|
||||||
class TrainingResultDisplayInfo:
|
class TrainingResultDisplayInfo:
|
||||||
task: Task
|
task: Task
|
||||||
target: TargetDataset
|
target: TargetDataset
|
||||||
model: Model
|
model: Model | Literal["autogluon"]
|
||||||
grid: Grid
|
grid: Grid
|
||||||
level: int
|
level: int
|
||||||
timestamp: datetime
|
timestamp: datetime
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ from shapely.geometry import shape
|
||||||
import entropice.spatial.grids
|
import entropice.spatial.grids
|
||||||
import entropice.utils.paths
|
import entropice.utils.paths
|
||||||
from entropice.dashboard.utils.formatters import TrainingResultDisplayInfo
|
from entropice.dashboard.utils.formatters import TrainingResultDisplayInfo
|
||||||
|
from entropice.ml.autogluon_training import AutoGluonTrainingSettings
|
||||||
from entropice.ml.dataset import DatasetEnsemble, TrainingSet
|
from entropice.ml.dataset import DatasetEnsemble, TrainingSet
|
||||||
from entropice.ml.training import TrainingSettings
|
from entropice.ml.training import TrainingSettings
|
||||||
from entropice.utils.types import GridConfig, TargetDataset, Task, all_target_datasets, all_tasks
|
from entropice.utils.types import GridConfig, TargetDataset, Task, all_target_datasets, all_tasks
|
||||||
|
|
@ -215,14 +216,18 @@ class TrainingResult:
|
||||||
return pd.DataFrame.from_records(records)
|
return pd.DataFrame.from_records(records)
|
||||||
|
|
||||||
|
|
||||||
@st.cache_data
|
@st.cache_data(ttl=300) # Cache for 5 minutes
|
||||||
def load_all_training_results() -> list[TrainingResult]:
|
def load_all_training_results() -> list[TrainingResult]:
|
||||||
"""Load all training results from the results directory."""
|
"""Load all training results from the results directory."""
|
||||||
results_dir = entropice.utils.paths.RESULTS_DIR
|
results_dir = entropice.utils.paths.RESULTS_DIR
|
||||||
training_results: list[TrainingResult] = []
|
training_results: list[TrainingResult] = []
|
||||||
|
incomplete_results: list[tuple[Path, Exception]] = []
|
||||||
for result_path in results_dir.iterdir():
|
for result_path in results_dir.iterdir():
|
||||||
if not result_path.is_dir():
|
if not result_path.is_dir():
|
||||||
continue
|
continue
|
||||||
|
# Skip AutoGluon results directory
|
||||||
|
if "autogluon" in result_path.name.lower():
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
training_result = TrainingResult.from_path(result_path)
|
training_result = TrainingResult.from_path(result_path)
|
||||||
training_results.append(training_result)
|
training_results.append(training_result)
|
||||||
|
|
@ -237,10 +242,159 @@ def load_all_training_results() -> list[TrainingResult]:
|
||||||
training_results.append(training_result)
|
training_results.append(training_result)
|
||||||
is_experiment_dir = True
|
is_experiment_dir = True
|
||||||
except FileNotFoundError as e2:
|
except FileNotFoundError as e2:
|
||||||
st.warning(f"Skipping incomplete training result: {e2}")
|
incomplete_results.append((experiment_path, e2))
|
||||||
if not is_experiment_dir:
|
if not is_experiment_dir:
|
||||||
st.warning(f"Skipping incomplete training result: {e}")
|
incomplete_results.append((result_path, e))
|
||||||
|
|
||||||
|
if len(incomplete_results) > 0:
|
||||||
|
st.warning(
|
||||||
|
f"Found {len(incomplete_results)} incomplete training results that were skipped:\n - "
|
||||||
|
+ "\n - ".join(f"{p}: {e}" for p, e in incomplete_results)
|
||||||
|
)
|
||||||
|
# Sort by creation time (most recent first)
|
||||||
|
training_results.sort(key=lambda tr: tr.created_at, reverse=True)
|
||||||
|
return training_results
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AutogluonTrainingResult:
|
||||||
|
"""Wrapper for training result data and metadata."""
|
||||||
|
|
||||||
|
path: Path
|
||||||
|
experiment: str
|
||||||
|
settings: AutoGluonTrainingSettings
|
||||||
|
test_metrics: dict[str, float | dict | pd.DataFrame]
|
||||||
|
leaderboard: pd.DataFrame
|
||||||
|
feature_importance: pd.DataFrame | None
|
||||||
|
created_at: float
|
||||||
|
files: list[Path]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_path(cls, result_path: Path, experiment_name: str | None = None) -> "AutogluonTrainingResult":
|
||||||
|
"""Load an AutogluonTrainingResult from a given result directory path."""
|
||||||
|
settings_file = result_path / "training_settings.toml"
|
||||||
|
metrics_file = result_path / "test_metrics.pickle"
|
||||||
|
leaderboard_file = result_path / "leaderboard.parquet"
|
||||||
|
feature_importance_file = result_path / "feature_importance.parquet"
|
||||||
|
all_files = list(result_path.iterdir())
|
||||||
|
if not settings_file.exists():
|
||||||
|
raise FileNotFoundError(f"Missing settings file in {result_path}")
|
||||||
|
if not metrics_file.exists():
|
||||||
|
raise FileNotFoundError(f"Missing metrics file in {result_path}")
|
||||||
|
if not leaderboard_file.exists():
|
||||||
|
raise FileNotFoundError(f"Missing leaderboard file in {result_path}")
|
||||||
|
|
||||||
|
created_at = result_path.stat().st_ctime
|
||||||
|
settings_dict = toml.load(settings_file)["settings"]
|
||||||
|
settings = AutoGluonTrainingSettings(**settings_dict)
|
||||||
|
with open(metrics_file, "rb") as f:
|
||||||
|
metrics = pickle.load(f)
|
||||||
|
leaderboard = pd.read_parquet(leaderboard_file)
|
||||||
|
|
||||||
|
if feature_importance_file.exists():
|
||||||
|
feature_importance = pd.read_parquet(feature_importance_file)
|
||||||
|
else:
|
||||||
|
feature_importance = None
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
path=result_path,
|
||||||
|
experiment=experiment_name or "N/A",
|
||||||
|
settings=settings,
|
||||||
|
test_metrics=metrics,
|
||||||
|
leaderboard=leaderboard,
|
||||||
|
feature_importance=feature_importance,
|
||||||
|
created_at=created_at,
|
||||||
|
files=all_files,
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def test_confusion_matrix(self) -> pd.DataFrame | None:
|
||||||
|
"""Get the test confusion matrix."""
|
||||||
|
if "confusion_matrix" not in self.test_metrics:
|
||||||
|
return None
|
||||||
|
assert isinstance(self.test_metrics["confusion_matrix"], pd.DataFrame)
|
||||||
|
return self.test_metrics["confusion_matrix"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def display_info(self) -> TrainingResultDisplayInfo:
|
||||||
|
"""Get display information for the training result."""
|
||||||
|
return TrainingResultDisplayInfo(
|
||||||
|
task=self.settings.task,
|
||||||
|
target=self.settings.target,
|
||||||
|
model="autogluon",
|
||||||
|
grid=self.settings.grid,
|
||||||
|
level=self.settings.level,
|
||||||
|
timestamp=datetime.fromtimestamp(self.created_at),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get_best_metric_name(self) -> str:
|
||||||
|
"""Get the primary metric name for a given task."""
|
||||||
|
match self.settings.task:
|
||||||
|
case "binary":
|
||||||
|
return "f1"
|
||||||
|
case "count_regimes" | "density_regimes":
|
||||||
|
return "f1_weighted"
|
||||||
|
case _: # regression tasks
|
||||||
|
return "root_mean_squared_error"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def to_dataframe(training_results: list["AutogluonTrainingResult"]) -> pd.DataFrame:
|
||||||
|
"""Convert a list of AutogluonTrainingResult objects to a DataFrame for display."""
|
||||||
|
records = []
|
||||||
|
for tr in training_results:
|
||||||
|
info = tr.display_info
|
||||||
|
best_metric_name = tr._get_best_metric_name()
|
||||||
|
|
||||||
|
record = {
|
||||||
|
"Experiment": tr.experiment if tr.experiment else "N/A",
|
||||||
|
"Task": info.task,
|
||||||
|
"Target": info.target,
|
||||||
|
"Model": info.model,
|
||||||
|
"Grid": GridConfig.from_grid_level((info.grid, info.level)).display_name,
|
||||||
|
"Created At": info.timestamp.strftime("%Y-%m-%d %H:%M"),
|
||||||
|
"Score-Metric": best_metric_name.title(),
|
||||||
|
"Best Models Score (Test-Set)": tr.test_metrics.get(best_metric_name),
|
||||||
|
"Path": str(tr.path.name),
|
||||||
|
}
|
||||||
|
records.append(record)
|
||||||
|
return pd.DataFrame.from_records(records)
|
||||||
|
|
||||||
|
|
||||||
|
@st.cache_data(ttl=300) # Cache for 5 minutes
|
||||||
|
def load_all_autogluon_training_results() -> list[AutogluonTrainingResult]:
|
||||||
|
"""Load all training results from the results directory."""
|
||||||
|
results_dir = entropice.utils.paths.RESULTS_DIR
|
||||||
|
training_results: list[AutogluonTrainingResult] = []
|
||||||
|
incomplete_results: list[tuple[Path, Exception]] = []
|
||||||
|
for result_path in results_dir.iterdir():
|
||||||
|
if not result_path.is_dir():
|
||||||
|
continue
|
||||||
|
# Skip AutoGluon results directory
|
||||||
|
if "autogluon" not in result_path.name.lower():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
training_result = AutogluonTrainingResult.from_path(result_path)
|
||||||
|
training_results.append(training_result)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
is_experiment_dir = False
|
||||||
|
for experiment_path in result_path.iterdir():
|
||||||
|
if not experiment_path.is_dir():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
experiment_name = experiment_path.parent.name
|
||||||
|
training_result = AutogluonTrainingResult.from_path(experiment_path, experiment_name)
|
||||||
|
training_results.append(training_result)
|
||||||
|
is_experiment_dir = True
|
||||||
|
except FileNotFoundError as e2:
|
||||||
|
incomplete_results.append((experiment_path, e2))
|
||||||
|
if not is_experiment_dir:
|
||||||
|
incomplete_results.append((result_path, e))
|
||||||
|
|
||||||
|
if len(incomplete_results) > 0:
|
||||||
|
st.warning(
|
||||||
|
f"Found {len(incomplete_results)} incomplete autogluon training results that were skipped:\n - "
|
||||||
|
+ "\n - ".join(f"{p}: {e}" for p, e in incomplete_results)
|
||||||
|
)
|
||||||
# Sort by creation time (most recent first)
|
# Sort by creation time (most recent first)
|
||||||
training_results.sort(key=lambda tr: tr.created_at, reverse=True)
|
training_results.sort(key=lambda tr: tr.created_at, reverse=True)
|
||||||
return training_results
|
return training_results
|
||||||
|
|
|
||||||
|
|
@ -369,6 +369,7 @@ def render_xgboost_model_state(model_state: xr.Dataset, selected_result: Trainin
|
||||||
options=["gain", "weight", "cover", "total_gain", "total_cover"],
|
options=["gain", "weight", "cover", "total_gain", "total_cover"],
|
||||||
index=0,
|
index=0,
|
||||||
help="Choose which importance metric to visualize",
|
help="Choose which importance metric to visualize",
|
||||||
|
key="model_state_importance_type",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Top N slider
|
# Top N slider
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from entropice.dashboard.sections.experiment_results import (
|
||||||
render_training_results_summary,
|
render_training_results_summary,
|
||||||
)
|
)
|
||||||
from entropice.dashboard.sections.storage_statistics import render_storage_statistics
|
from entropice.dashboard.sections.storage_statistics import render_storage_statistics
|
||||||
from entropice.dashboard.utils.loaders import load_all_training_results
|
from entropice.dashboard.utils.loaders import load_all_autogluon_training_results, load_all_training_results
|
||||||
from entropice.dashboard.utils.stats import DatasetStatistics, load_all_default_dataset_statistics
|
from entropice.dashboard.utils.stats import DatasetStatistics, load_all_default_dataset_statistics
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -27,6 +27,9 @@ def render_overview_page():
|
||||||
)
|
)
|
||||||
# Load training results
|
# Load training results
|
||||||
training_results = load_all_training_results()
|
training_results = load_all_training_results()
|
||||||
|
autogluon_results = load_all_autogluon_training_results()
|
||||||
|
if len(autogluon_results) > 0:
|
||||||
|
training_results.extend(autogluon_results)
|
||||||
|
|
||||||
if not training_results:
|
if not training_results:
|
||||||
st.warning("No training results found. Please run some training experiments first.")
|
st.warning("No training results found. Please run some training experiments first.")
|
||||||
|
|
|
||||||
|
|
@ -2,150 +2,22 @@
|
||||||
|
|
||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
import geopandas as gpd
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
import xarray as xr
|
|
||||||
from stopuhr import stopwatch
|
|
||||||
|
|
||||||
from entropice.dashboard.plots.hyperparameter_analysis import (
|
from entropice.dashboard.sections.cv_result import (
|
||||||
render_binned_parameter_space,
|
render_confusion_matrices,
|
||||||
render_confusion_matrix_heatmap,
|
render_cv_statistics_section,
|
||||||
render_confusion_matrix_map,
|
render_metrics_section,
|
||||||
render_espa_binned_parameter_space,
|
render_run_information,
|
||||||
render_multi_metric_comparison,
|
|
||||||
render_parameter_correlation,
|
|
||||||
render_parameter_distributions,
|
|
||||||
render_performance_summary,
|
|
||||||
render_top_configurations,
|
|
||||||
)
|
)
|
||||||
|
from entropice.dashboard.sections.hparam_space import render_hparam_space_section
|
||||||
|
from entropice.dashboard.sections.regression_analysis import render_regression_analysis
|
||||||
from entropice.dashboard.utils.formatters import format_metric_name
|
from entropice.dashboard.utils.formatters import format_metric_name
|
||||||
from entropice.dashboard.utils.loaders import TrainingResult, load_all_training_results
|
from entropice.dashboard.utils.loaders import TrainingResult, load_all_training_results
|
||||||
from entropice.dashboard.utils.stats import CVResultsStatistics
|
from entropice.dashboard.utils.stats import CVMetricStatistics
|
||||||
from entropice.utils.types import GridConfig
|
|
||||||
|
|
||||||
|
|
||||||
def load_predictions_with_labels(selected_result: TrainingResult) -> gpd.GeoDataFrame | None:
|
def render_analysis_settings_sidebar(training_results: list[TrainingResult]) -> tuple[TrainingResult, str, str]:
|
||||||
"""Load predictions and merge with training data to get true labels and split info.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
selected_result: The selected TrainingResult object.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
GeoDataFrame with predictions, true labels, and split information, or None if unavailable.
|
|
||||||
|
|
||||||
"""
|
|
||||||
from sklearn.model_selection import train_test_split
|
|
||||||
|
|
||||||
from entropice.ml.dataset import DatasetEnsemble, bin_values, taskcol
|
|
||||||
|
|
||||||
# Load predictions
|
|
||||||
preds_gdf = selected_result.load_predictions()
|
|
||||||
if preds_gdf is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Create a minimal dataset ensemble to access target data
|
|
||||||
settings = selected_result.settings
|
|
||||||
dataset_ensemble = DatasetEnsemble(
|
|
||||||
grid=settings.grid,
|
|
||||||
level=settings.level,
|
|
||||||
target=settings.target,
|
|
||||||
members=[], # No feature data needed, just targets
|
|
||||||
)
|
|
||||||
|
|
||||||
# Load target dataset (just labels, no features)
|
|
||||||
with st.spinner("Loading target labels..."):
|
|
||||||
targets = dataset_ensemble._read_target()
|
|
||||||
|
|
||||||
# Get coverage and task columns
|
|
||||||
task_col = taskcol[settings.task][settings.target]
|
|
||||||
|
|
||||||
# Filter for valid labels (same as in _cat_and_split)
|
|
||||||
valid_labels = targets[task_col].notna()
|
|
||||||
filtered_targets = targets.loc[valid_labels].copy()
|
|
||||||
|
|
||||||
# Apply binning to get class labels (same logic as _cat_and_split)
|
|
||||||
if settings.task == "binary":
|
|
||||||
binned = filtered_targets[task_col].map({False: "No RTS", True: "RTS"}).astype("category")
|
|
||||||
elif settings.task == "count":
|
|
||||||
binned = bin_values(filtered_targets[task_col].astype(int), task=settings.task)
|
|
||||||
elif settings.task == "density":
|
|
||||||
binned = bin_values(filtered_targets[task_col], task=settings.task)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Invalid task: {settings.task}")
|
|
||||||
|
|
||||||
filtered_targets["true_class"] = binned.to_numpy()
|
|
||||||
|
|
||||||
# Recreate the train/test split deterministically (same random_state=42 as in _cat_and_split)
|
|
||||||
_train_idx, test_idx = train_test_split(
|
|
||||||
filtered_targets.index.to_numpy(), test_size=0.2, random_state=42, shuffle=True
|
|
||||||
)
|
|
||||||
filtered_targets["split"] = "train"
|
|
||||||
filtered_targets.loc[test_idx, "split"] = "test"
|
|
||||||
filtered_targets["split"] = filtered_targets["split"].astype("category")
|
|
||||||
|
|
||||||
# Ensure cell_id is available as a column for merging
|
|
||||||
# Check if cell_id already exists, otherwise use the index
|
|
||||||
if "cell_id" not in filtered_targets.columns:
|
|
||||||
filtered_targets = filtered_targets.reset_index().rename(columns={"index": "cell_id"})
|
|
||||||
|
|
||||||
# Merge predictions with labels (inner join to keep only cells with predictions)
|
|
||||||
merged = filtered_targets.merge(preds_gdf[["cell_id", "predicted_class"]], on="cell_id", how="inner")
|
|
||||||
merged_gdf = gpd.GeoDataFrame(merged, geometry="geometry", crs=targets.crs)
|
|
||||||
|
|
||||||
return merged_gdf
|
|
||||||
|
|
||||||
|
|
||||||
def compute_confusion_matrix_from_merged_data(
|
|
||||||
merged_data: gpd.GeoDataFrame,
|
|
||||||
split_type: str,
|
|
||||||
label_names: list[str],
|
|
||||||
) -> xr.DataArray | None:
|
|
||||||
"""Compute confusion matrix from merged predictions and labels.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
merged_data: GeoDataFrame with 'true_class', 'predicted_class', and 'split' columns.
|
|
||||||
split_type: One of 'test', 'train', or 'all'.
|
|
||||||
label_names: List of class label names in order.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
xarray.DataArray with confusion matrix or None if data unavailable.
|
|
||||||
|
|
||||||
"""
|
|
||||||
from sklearn.metrics import confusion_matrix
|
|
||||||
|
|
||||||
# Filter by split type
|
|
||||||
if split_type == "train":
|
|
||||||
data = merged_data[merged_data["split"] == "train"]
|
|
||||||
elif split_type == "test":
|
|
||||||
data = merged_data[merged_data["split"] == "test"]
|
|
||||||
elif split_type == "all":
|
|
||||||
data = merged_data
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Invalid split_type: {split_type}")
|
|
||||||
|
|
||||||
if len(data) == 0:
|
|
||||||
st.warning(f"No data available for {split_type} split.")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Get true and predicted labels
|
|
||||||
y_true = data["true_class"].to_numpy()
|
|
||||||
y_pred = data["predicted_class"].to_numpy()
|
|
||||||
|
|
||||||
# Compute confusion matrix
|
|
||||||
cm = confusion_matrix(y_true, y_pred, labels=label_names)
|
|
||||||
|
|
||||||
# Create xarray DataArray
|
|
||||||
cm_xr = xr.DataArray(
|
|
||||||
cm,
|
|
||||||
dims=["true_label", "predicted_label"],
|
|
||||||
coords={"true_label": label_names, "predicted_label": label_names},
|
|
||||||
name="confusion_matrix",
|
|
||||||
)
|
|
||||||
|
|
||||||
return cm_xr
|
|
||||||
|
|
||||||
|
|
||||||
def render_analysis_settings_sidebar(training_results: list[TrainingResult]) -> tuple[TrainingResult, str, str, int]:
|
|
||||||
"""Render sidebar for training run and analysis settings selection.
|
"""Render sidebar for training run and analysis settings selection.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
@ -155,351 +27,63 @@ def render_analysis_settings_sidebar(training_results: list[TrainingResult]) ->
|
||||||
Tuple of (selected_result, selected_metric, refit_metric, top_n).
|
Tuple of (selected_result, selected_metric, refit_metric, top_n).
|
||||||
|
|
||||||
"""
|
"""
|
||||||
st.header("Select Training Run")
|
with st.sidebar.form("training_analysis_settings_form"):
|
||||||
|
st.header("Select Training Run")
|
||||||
|
|
||||||
# Create selection options with task-first naming
|
# Create selection options with task-first naming
|
||||||
training_options = {tr.display_info.get_display_name("task_first"): tr for tr in training_results}
|
training_options = {tr.display_info.get_display_name("task_first"): tr for tr in training_results}
|
||||||
|
|
||||||
selected_name = st.selectbox(
|
selected_name = st.selectbox(
|
||||||
"Training Run",
|
"Training Run",
|
||||||
options=list(training_options.keys()),
|
options=list(training_options.keys()),
|
||||||
index=0,
|
index=0,
|
||||||
help="Select a training run to analyze",
|
help="Select a training run to analyze",
|
||||||
key="training_run_select",
|
key="training_run_select",
|
||||||
)
|
)
|
||||||
|
|
||||||
selected_result = cast(TrainingResult, training_options[selected_name])
|
selected_result = cast(TrainingResult, training_options[selected_name])
|
||||||
|
|
||||||
st.divider()
|
|
||||||
|
|
||||||
# Metric selection for detailed analysis
|
|
||||||
st.subheader("Analysis Settings")
|
|
||||||
|
|
||||||
available_metrics = selected_result.available_metrics
|
|
||||||
|
|
||||||
# Try to get refit metric from settings
|
|
||||||
refit_metric = "f1" if selected_result.settings.task == "binary" else "f1_weighted"
|
|
||||||
|
|
||||||
if refit_metric in available_metrics:
|
|
||||||
default_metric_idx = available_metrics.index(refit_metric)
|
|
||||||
else:
|
|
||||||
default_metric_idx = 0
|
|
||||||
|
|
||||||
selected_metric = st.selectbox(
|
|
||||||
"Primary Metric for Analysis",
|
|
||||||
options=available_metrics,
|
|
||||||
index=default_metric_idx,
|
|
||||||
format_func=format_metric_name,
|
|
||||||
help="Select the metric to focus on for detailed analysis",
|
|
||||||
key="metric_select",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Top N configurations
|
|
||||||
top_n = st.slider(
|
|
||||||
"Top N Configurations",
|
|
||||||
min_value=5,
|
|
||||||
max_value=50,
|
|
||||||
value=10,
|
|
||||||
step=5,
|
|
||||||
help="Number of top configurations to display",
|
|
||||||
key="top_n_slider",
|
|
||||||
)
|
|
||||||
|
|
||||||
return selected_result, selected_metric, refit_metric, top_n
|
|
||||||
|
|
||||||
|
|
||||||
def render_run_information(selected_result: TrainingResult, refit_metric):
|
|
||||||
"""Render training run configuration overview.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
selected_result: The selected TrainingResult object.
|
|
||||||
refit_metric: The refit metric used for model selection.
|
|
||||||
|
|
||||||
"""
|
|
||||||
st.header("📋 Run Information")
|
|
||||||
|
|
||||||
grid_config = GridConfig.from_grid_level(f"{selected_result.settings.grid}{selected_result.settings.level}") # ty:ignore[invalid-argument-type]
|
|
||||||
|
|
||||||
col1, col2, col3, col4, col5 = st.columns(5)
|
|
||||||
with col1:
|
|
||||||
st.metric("Task", selected_result.settings.task.capitalize())
|
|
||||||
with col2:
|
|
||||||
st.metric("Target", selected_result.settings.target.capitalize())
|
|
||||||
with col3:
|
|
||||||
st.metric("Grid", grid_config.display_name)
|
|
||||||
with col4:
|
|
||||||
st.metric("Model", selected_result.settings.model.upper())
|
|
||||||
with col5:
|
|
||||||
st.metric("Trials", len(selected_result.results))
|
|
||||||
|
|
||||||
st.caption(f"**Refit Metric:** {format_metric_name(refit_metric)}")
|
|
||||||
|
|
||||||
|
|
||||||
def render_test_metrics_section(selected_result: TrainingResult):
|
|
||||||
"""Render test metrics overview showing final model performance.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
selected_result: The selected TrainingResult object.
|
|
||||||
|
|
||||||
"""
|
|
||||||
st.header("🎯 Test Set Performance")
|
|
||||||
st.caption("Performance metrics on the held-out test set (best model from hyperparameter search)")
|
|
||||||
|
|
||||||
test_metrics = selected_result.metrics
|
|
||||||
|
|
||||||
if not test_metrics:
|
|
||||||
st.warning("No test metrics available for this training run.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Display metrics in columns based on task type
|
|
||||||
task = selected_result.settings.task
|
|
||||||
|
|
||||||
if task == "binary":
|
|
||||||
# Binary classification metrics
|
|
||||||
col1, col2, col3, col4, col5 = st.columns(5)
|
|
||||||
|
|
||||||
with col1:
|
|
||||||
st.metric("Accuracy", f"{test_metrics.get('accuracy', 0):.4f}")
|
|
||||||
with col2:
|
|
||||||
st.metric("F1 Score", f"{test_metrics.get('f1', 0):.4f}")
|
|
||||||
with col3:
|
|
||||||
st.metric("Precision", f"{test_metrics.get('precision', 0):.4f}")
|
|
||||||
with col4:
|
|
||||||
st.metric("Recall", f"{test_metrics.get('recall', 0):.4f}")
|
|
||||||
with col5:
|
|
||||||
st.metric("Jaccard", f"{test_metrics.get('jaccard', 0):.4f}")
|
|
||||||
else:
|
|
||||||
# Multiclass metrics
|
|
||||||
col1, col2, col3 = st.columns(3)
|
|
||||||
|
|
||||||
with col1:
|
|
||||||
st.metric("Accuracy", f"{test_metrics.get('accuracy', 0):.4f}")
|
|
||||||
with col2:
|
|
||||||
st.metric("F1 (Macro)", f"{test_metrics.get('f1_macro', 0):.4f}")
|
|
||||||
with col3:
|
|
||||||
st.metric("F1 (Weighted)", f"{test_metrics.get('f1_weighted', 0):.4f}")
|
|
||||||
|
|
||||||
col4, col5, col6 = st.columns(3)
|
|
||||||
|
|
||||||
with col4:
|
|
||||||
st.metric("Precision (Macro)", f"{test_metrics.get('precision_macro', 0):.4f}")
|
|
||||||
with col5:
|
|
||||||
st.metric("Precision (Weighted)", f"{test_metrics.get('precision_weighted', 0):.4f}")
|
|
||||||
with col6:
|
|
||||||
st.metric("Recall (Macro)", f"{test_metrics.get('recall_macro', 0):.4f}")
|
|
||||||
|
|
||||||
col7, col8, col9 = st.columns(3)
|
|
||||||
|
|
||||||
with col7:
|
|
||||||
st.metric("Jaccard (Micro)", f"{test_metrics.get('jaccard_micro', 0):.4f}")
|
|
||||||
with col8:
|
|
||||||
st.metric("Jaccard (Macro)", f"{test_metrics.get('jaccard_macro', 0):.4f}")
|
|
||||||
with col9:
|
|
||||||
st.metric("Jaccard (Weighted)", f"{test_metrics.get('jaccard_weighted', 0):.4f}")
|
|
||||||
|
|
||||||
|
|
||||||
def render_cv_statistics_section(selected_result, selected_metric):
|
|
||||||
"""Render cross-validation statistics for selected metric.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
selected_result: The selected TrainingResult object.
|
|
||||||
selected_metric: The metric to display statistics for.
|
|
||||||
|
|
||||||
"""
|
|
||||||
st.header("📈 Cross-Validation Statistics")
|
|
||||||
st.caption("Performance during hyperparameter search (averaged across CV folds)")
|
|
||||||
|
|
||||||
from entropice.dashboard.utils.stats import CVMetricStatistics
|
|
||||||
|
|
||||||
cv_stats = CVMetricStatistics.compute(selected_result, selected_metric)
|
|
||||||
|
|
||||||
col1, col2, col3, col4, col5 = st.columns(5)
|
|
||||||
|
|
||||||
with col1:
|
|
||||||
st.metric("Best Score", f"{cv_stats.best_score:.4f}")
|
|
||||||
|
|
||||||
with col2:
|
|
||||||
st.metric("Mean Score", f"{cv_stats.mean_score:.4f}")
|
|
||||||
|
|
||||||
with col3:
|
|
||||||
st.metric("Std Dev", f"{cv_stats.std_score:.4f}")
|
|
||||||
|
|
||||||
with col4:
|
|
||||||
st.metric("Worst Score", f"{cv_stats.worst_score:.4f}")
|
|
||||||
|
|
||||||
with col5:
|
|
||||||
st.metric("Median Score", f"{cv_stats.median_score:.4f}")
|
|
||||||
|
|
||||||
if cv_stats.mean_cv_std is not None:
|
|
||||||
st.info(f"**Mean CV Std:** {cv_stats.mean_cv_std:.4f} - Average standard deviation across CV folds")
|
|
||||||
|
|
||||||
# Compare with test metric if available
|
|
||||||
if selected_metric in selected_result.metrics:
|
|
||||||
test_score = selected_result.metrics[selected_metric]
|
|
||||||
st.divider()
|
st.divider()
|
||||||
st.subheader("CV vs Test Performance")
|
|
||||||
|
|
||||||
col1, col2, col3 = st.columns(3)
|
# Metric selection for detailed analysis
|
||||||
with col1:
|
st.subheader("Analysis Settings")
|
||||||
st.metric("Best CV Score", f"{cv_stats.best_score:.4f}")
|
|
||||||
with col2:
|
|
||||||
st.metric("Test Score", f"{test_score:.4f}")
|
|
||||||
with col3:
|
|
||||||
delta = test_score - cv_stats.best_score
|
|
||||||
delta_pct = (delta / cv_stats.best_score * 100) if cv_stats.best_score != 0 else 0
|
|
||||||
st.metric("Difference", f"{delta:+.4f}", delta=f"{delta_pct:+.2f}%")
|
|
||||||
|
|
||||||
if abs(delta) > cv_stats.std_score:
|
available_metrics = selected_result.available_metrics
|
||||||
st.warning(
|
|
||||||
"⚠️ Test performance differs significantly from CV performance. "
|
|
||||||
"This may indicate overfitting or data distribution mismatch."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
# Try to get refit metric from settings
|
||||||
@st.fragment
|
if selected_result.settings.task == "binary":
|
||||||
def render_confusion_matrix_section(selected_result: TrainingResult, merged_predictions: gpd.GeoDataFrame | None):
|
refit_metric = "f1"
|
||||||
"""Render confusion matrix visualization and analysis.
|
elif selected_result.settings.task in ["count_regimes", "density_regimes"]:
|
||||||
|
refit_metric = "f1_weighted"
|
||||||
Args:
|
|
||||||
selected_result: The selected TrainingResult object.
|
|
||||||
merged_predictions: GeoDataFrame with predictions merged with true labels and split info.
|
|
||||||
|
|
||||||
"""
|
|
||||||
st.header("🎲 Confusion Matrix")
|
|
||||||
st.caption("Detailed breakdown of predictions")
|
|
||||||
|
|
||||||
# Add selector for confusion matrix type
|
|
||||||
cm_type = st.selectbox(
|
|
||||||
"Select Data Split",
|
|
||||||
options=["test", "train", "all"],
|
|
||||||
format_func=lambda x: {"test": "Test Set", "train": "CV Set (Train Split)", "all": "All Available Data"}[x],
|
|
||||||
help="Choose which data split to display the confusion matrix for",
|
|
||||||
key="cm_split_select",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get label names from settings
|
|
||||||
label_names = selected_result.settings.classes
|
|
||||||
|
|
||||||
# Compute or load confusion matrix based on selection
|
|
||||||
if cm_type == "test":
|
|
||||||
if selected_result.confusion_matrix is None:
|
|
||||||
st.warning("No confusion matrix available for the test set.")
|
|
||||||
return
|
|
||||||
cm = selected_result.confusion_matrix
|
|
||||||
st.info("📊 Showing confusion matrix for the **Test Set** (held-out data, never used during training)")
|
|
||||||
else:
|
|
||||||
if merged_predictions is None:
|
|
||||||
st.warning("Predictions data not available. Cannot compute confusion matrix.")
|
|
||||||
return
|
|
||||||
|
|
||||||
with st.spinner(f"Computing confusion matrix for {cm_type} split..."):
|
|
||||||
cm = compute_confusion_matrix_from_merged_data(merged_predictions, cm_type, label_names)
|
|
||||||
if cm is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
if cm_type == "train":
|
|
||||||
st.info(
|
|
||||||
"📊 Showing confusion matrix for the **CV Set (Train Split)** "
|
|
||||||
"(data used during hyperparameter search cross-validation)"
|
|
||||||
)
|
|
||||||
else: # all
|
|
||||||
st.info("📊 Showing confusion matrix for **All Available Data** (combined train and test splits)")
|
|
||||||
|
|
||||||
render_confusion_matrix_heatmap(cm, selected_result.settings.task)
|
|
||||||
|
|
||||||
|
|
||||||
def render_parameter_space_section(selected_result, selected_metric):
|
|
||||||
"""Render parameter space analysis section.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
selected_result: The selected TrainingResult object.
|
|
||||||
selected_metric: The metric to analyze parameters against.
|
|
||||||
|
|
||||||
"""
|
|
||||||
st.header("🔍 Parameter Space Analysis")
|
|
||||||
|
|
||||||
# Compute CV results statistics
|
|
||||||
cv_results_stats = CVResultsStatistics.compute(selected_result)
|
|
||||||
|
|
||||||
# Show parameter space summary
|
|
||||||
with st.expander("📋 Parameter Space Summary", expanded=False):
|
|
||||||
param_summary_df = cv_results_stats.parameters_to_dataframe()
|
|
||||||
if not param_summary_df.empty:
|
|
||||||
st.dataframe(param_summary_df, hide_index=True, width="stretch")
|
|
||||||
else:
|
else:
|
||||||
st.info("No parameter information available.")
|
refit_metric = "r2"
|
||||||
|
|
||||||
results = selected_result.results
|
if refit_metric in available_metrics:
|
||||||
settings = selected_result.settings
|
default_metric_idx = available_metrics.index(refit_metric)
|
||||||
|
else:
|
||||||
|
default_metric_idx = 0
|
||||||
|
|
||||||
# Parameter distributions
|
selected_metric = st.selectbox(
|
||||||
st.subheader("📈 Parameter Distributions")
|
"Primary Metric for Analysis",
|
||||||
render_parameter_distributions(results, settings)
|
options=available_metrics,
|
||||||
|
index=default_metric_idx,
|
||||||
|
format_func=format_metric_name,
|
||||||
|
help="Select the metric to focus on for detailed analysis",
|
||||||
|
key="metric_select",
|
||||||
|
)
|
||||||
|
|
||||||
# Binned parameter space plots
|
# Form submit button
|
||||||
st.subheader("🎨 Binned Parameter Space")
|
submitted = st.form_submit_button(
|
||||||
|
"Load Training Result",
|
||||||
|
type="primary",
|
||||||
|
use_container_width=True,
|
||||||
|
)
|
||||||
|
|
||||||
# Check if this is an ESPA model and show ESPA-specific plots
|
if not submitted:
|
||||||
model_type = settings.model
|
st.info("👆 Click 'Load Training Result' to apply changes.")
|
||||||
if model_type == "espa":
|
st.stop()
|
||||||
# Show ESPA-specific binned plots (eps_cl vs eps_e binned by K)
|
|
||||||
render_espa_binned_parameter_space(results, selected_metric)
|
|
||||||
|
|
||||||
# Optionally show the generic binned plots in an expander
|
return selected_result, selected_metric, refit_metric
|
||||||
with st.expander("📊 All Parameter Combinations", expanded=False):
|
|
||||||
st.caption("Generic parameter space exploration (all pairwise combinations)")
|
|
||||||
render_binned_parameter_space(results, selected_metric)
|
|
||||||
else:
|
|
||||||
# For non-ESPA models, show the generic binned plots
|
|
||||||
render_binned_parameter_space(results, selected_metric)
|
|
||||||
|
|
||||||
|
|
||||||
def render_data_export_section(results, selected_result):
|
|
||||||
"""Render data export section with download buttons.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
results: DataFrame with CV results.
|
|
||||||
selected_result: The selected TrainingResult object.
|
|
||||||
|
|
||||||
"""
|
|
||||||
with st.expander("💾 Export Data", expanded=False):
|
|
||||||
st.subheader("Download Results")
|
|
||||||
|
|
||||||
col1, col2 = st.columns(2)
|
|
||||||
|
|
||||||
with col1:
|
|
||||||
# Download full results as CSV
|
|
||||||
csv_data = results.to_csv(index=False)
|
|
||||||
st.download_button(
|
|
||||||
label="📥 Download Full Results (CSV)",
|
|
||||||
data=csv_data,
|
|
||||||
file_name=f"{selected_result.path.name}_results.csv",
|
|
||||||
mime="text/csv",
|
|
||||||
)
|
|
||||||
|
|
||||||
with col2:
|
|
||||||
# Download settings as JSON
|
|
||||||
import json
|
|
||||||
|
|
||||||
settings_dict = {
|
|
||||||
"task": selected_result.settings.task,
|
|
||||||
"grid": selected_result.settings.grid,
|
|
||||||
"level": selected_result.settings.level,
|
|
||||||
"model": selected_result.settings.model,
|
|
||||||
"cv_splits": selected_result.settings.cv_splits,
|
|
||||||
"classes": selected_result.settings.classes,
|
|
||||||
}
|
|
||||||
settings_json = json.dumps(settings_dict, indent=2)
|
|
||||||
st.download_button(
|
|
||||||
label="⚙️ Download Settings (JSON)",
|
|
||||||
data=settings_json,
|
|
||||||
file_name=f"{selected_result.path.name}_settings.json",
|
|
||||||
mime="application/json",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Show raw data preview
|
|
||||||
st.subheader("Raw Data Preview")
|
|
||||||
st.dataframe(results.head(100), width="stretch")
|
|
||||||
|
|
||||||
|
|
||||||
def render_training_analysis_page():
|
def render_training_analysis_page():
|
||||||
|
|
@ -513,91 +97,47 @@ def render_training_analysis_page():
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
# Load all available training results
|
# Load training results
|
||||||
training_results = load_all_training_results()
|
training_results = load_all_training_results()
|
||||||
|
|
||||||
if not training_results:
|
if not training_results:
|
||||||
st.warning("No training results found. Please run some training experiments first.")
|
st.warning("No training results found. Please run some training experiments first.")
|
||||||
st.info("Run training using: `pixi run python -m entropice.ml.training`")
|
st.stop()
|
||||||
return
|
return
|
||||||
|
|
||||||
st.success(f"Found **{len(training_results)}** training result(s)")
|
st.write(f"Found **{len(training_results)}** training result(s)")
|
||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
|
selected_result, selected_metric, refit_metric = render_analysis_settings_sidebar(training_results)
|
||||||
|
|
||||||
# Sidebar: Training run selection
|
cv_statistics = CVMetricStatistics.compute(selected_result, selected_metric)
|
||||||
with st.sidebar:
|
|
||||||
selection_result = render_analysis_settings_sidebar(training_results)
|
|
||||||
if selection_result[0] is None:
|
|
||||||
return
|
|
||||||
selected_result, selected_metric, refit_metric, top_n = selection_result
|
|
||||||
|
|
||||||
# Load predictions with labels once (used by confusion matrix and map)
|
|
||||||
merged_predictions = load_predictions_with_labels(selected_result)
|
|
||||||
|
|
||||||
# Main content area
|
|
||||||
results = selected_result.results
|
|
||||||
settings = selected_result.settings
|
|
||||||
|
|
||||||
# Run Information
|
|
||||||
render_run_information(selected_result, refit_metric)
|
render_run_information(selected_result, refit_metric)
|
||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
|
|
||||||
# Test Metrics Section
|
render_metrics_section(selected_result)
|
||||||
render_test_metrics_section(selected_result)
|
|
||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
|
|
||||||
# Confusion Matrix Section
|
# Render confusion matrices for classification, regression analysis for regression
|
||||||
render_confusion_matrix_section(selected_result, merged_predictions)
|
if selected_result.settings.task in ["binary", "count_regimes", "density_regimes"]:
|
||||||
|
render_confusion_matrices(selected_result)
|
||||||
|
else:
|
||||||
|
render_regression_analysis(selected_result)
|
||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
|
|
||||||
# Performance Summary Section
|
render_cv_statistics_section(cv_statistics, selected_result.test_metrics.get(selected_metric, float("nan")))
|
||||||
st.header("📊 CV Performance Overview")
|
|
||||||
st.caption("Summary of hyperparameter search results across all configurations")
|
|
||||||
render_performance_summary(results, refit_metric)
|
|
||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
|
|
||||||
# Prediction Analysis Map Section
|
render_hparam_space_section(selected_result, selected_metric)
|
||||||
st.header("🗺️ Model Performance Map")
|
|
||||||
st.caption("Interactive 3D map showing prediction correctness across the training dataset")
|
|
||||||
render_confusion_matrix_map(selected_result.path, settings, merged_predictions)
|
|
||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
|
|
||||||
# Cross-Validation Statistics
|
# List all results at the end
|
||||||
render_cv_statistics_section(selected_result, selected_metric)
|
st.header("📄 All Training Results")
|
||||||
|
st.dataframe(selected_result.results)
|
||||||
st.divider()
|
|
||||||
|
|
||||||
# Parameter Space Analysis
|
|
||||||
render_parameter_space_section(selected_result, selected_metric)
|
|
||||||
|
|
||||||
st.divider()
|
|
||||||
|
|
||||||
# Parameter Correlation
|
|
||||||
st.header("🔗 Parameter Correlation")
|
|
||||||
render_parameter_correlation(results, selected_metric)
|
|
||||||
|
|
||||||
st.divider()
|
|
||||||
|
|
||||||
# Multi-Metric Comparison
|
|
||||||
if len(selected_result.available_metrics) >= 2:
|
|
||||||
st.header("📊 Multi-Metric Comparison")
|
|
||||||
render_multi_metric_comparison(results)
|
|
||||||
st.divider()
|
|
||||||
|
|
||||||
# Top Configurations
|
|
||||||
st.header("🏆 Top Performing Configurations")
|
|
||||||
render_top_configurations(results, selected_metric, top_n)
|
|
||||||
|
|
||||||
st.divider()
|
|
||||||
|
|
||||||
# Raw Data Export
|
|
||||||
render_data_export_section(results, selected_result)
|
|
||||||
|
|
||||||
st.balloons()
|
st.balloons()
|
||||||
stopwatch.summary()
|
|
||||||
|
|
|
||||||
|
|
@ -44,8 +44,8 @@ class AutoGluonSettings:
|
||||||
class AutoGluonTrainingSettings(DatasetEnsemble, AutoGluonSettings):
|
class AutoGluonTrainingSettings(DatasetEnsemble, AutoGluonSettings):
|
||||||
"""Combined settings for AutoGluon training."""
|
"""Combined settings for AutoGluon training."""
|
||||||
|
|
||||||
classes: list[str] | None
|
classes: list[str] | None = None
|
||||||
problem_type: str
|
problem_type: str = "binary"
|
||||||
|
|
||||||
|
|
||||||
def _determine_problem_type_and_metric(task: Task) -> tuple[str, str]:
|
def _determine_problem_type_and_metric(task: Task) -> tuple[str, str]:
|
||||||
|
|
@ -177,6 +177,8 @@ def autogluon_train(
|
||||||
toml.dump({"settings": asdict(combined_settings)}, f)
|
toml.dump({"settings": asdict(combined_settings)}, f)
|
||||||
|
|
||||||
# Save test metrics
|
# Save test metrics
|
||||||
|
# We need to use pickle here, because the confusion matrix is stored as a dataframe
|
||||||
|
# This only matters for classification tasks
|
||||||
test_metrics_file = results_dir / "test_metrics.pickle"
|
test_metrics_file = results_dir / "test_metrics.pickle"
|
||||||
print(f"💾 Saving test metrics to {test_metrics_file}")
|
print(f"💾 Saving test metrics to {test_metrics_file}")
|
||||||
with open(test_metrics_file, "wb") as f:
|
with open(test_metrics_file, "wb") as f:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue