Redo Training Resutls Analysis
This commit is contained in:
parent
2664579a75
commit
7d874f7f92
16 changed files with 1455 additions and 2227 deletions
|
|
@ -12,7 +12,6 @@ Pages:
|
|||
|
||||
import streamlit as st
|
||||
|
||||
from entropice.dashboard.views.autogluon_analysis_page import render_autogluon_analysis_page
|
||||
from entropice.dashboard.views.dataset_page import render_dataset_page
|
||||
from entropice.dashboard.views.inference_page import render_inference_page
|
||||
from entropice.dashboard.views.model_state_page import render_model_state_page
|
||||
|
|
@ -28,7 +27,6 @@ def main():
|
|||
overview_page = st.Page(render_overview_page, title="Overview", icon="🏡", default=True)
|
||||
data_page = st.Page(render_dataset_page, title="Dataset", icon="📊")
|
||||
training_analysis_page = st.Page(render_training_analysis_page, title="Training Results Analysis", icon="🦾")
|
||||
autogluon_page = st.Page(render_autogluon_analysis_page, title="AutoGluon Analysis", icon="🤖")
|
||||
model_state_page = st.Page(render_model_state_page, title="Model State", icon="🧮")
|
||||
inference_page = st.Page(render_inference_page, title="Inference", icon="🗺️")
|
||||
|
||||
|
|
@ -36,7 +34,7 @@ def main():
|
|||
{
|
||||
"Overview": [overview_page],
|
||||
"Data": [data_page],
|
||||
"Experiments": [training_analysis_page, autogluon_page, model_state_page],
|
||||
"Experiments": [training_analysis_page, model_state_page],
|
||||
"Inference": [inference_page],
|
||||
}
|
||||
)
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
417
src/entropice/dashboard/plots/hyperparameter_space.py
Normal file
417
src/entropice/dashboard/plots/hyperparameter_space.py
Normal file
|
|
@ -0,0 +1,417 @@
|
|||
"""Hyperparameter space plotting functions."""
|
||||
|
||||
import matplotlib.colors as mcolors
|
||||
import pandas as pd
|
||||
import plotly.graph_objects as go
|
||||
|
||||
from entropice.dashboard.utils.colors import get_cmap, get_palette
|
||||
|
||||
|
||||
def plot_performance_summary(results: pd.DataFrame, refit_metric: str) -> tuple[pd.DataFrame, pd.DataFrame, dict]:
|
||||
"""Compute performance summary statistics.
|
||||
|
||||
Args:
|
||||
results: DataFrame with CV results.
|
||||
refit_metric: The metric used for refit (e.g., 'f1', 'f1_weighted').
|
||||
|
||||
Returns:
|
||||
Tuple of (best_scores_df, score_stats_df, best_params_dict).
|
||||
|
||||
"""
|
||||
# Get all test score columns
|
||||
score_cols = [col for col in results.columns if col.startswith("mean_test_")]
|
||||
|
||||
if not score_cols:
|
||||
return pd.DataFrame(), pd.DataFrame(), {}
|
||||
|
||||
# Calculate best scores
|
||||
best_scores = []
|
||||
for col in score_cols:
|
||||
metric_name = col.replace("mean_test_", "").replace("_", " ").title()
|
||||
best_score = results[col].max()
|
||||
best_scores.append({"Metric": metric_name, "Best Score": f"{best_score:.4f}"})
|
||||
|
||||
# Calculate score statistics
|
||||
score_stats = []
|
||||
for col in score_cols:
|
||||
metric_name = col.replace("mean_test_", "").replace("_", " ").title()
|
||||
mean_score = results[col].mean()
|
||||
std_score = results[col].std()
|
||||
score_stats.append(
|
||||
{
|
||||
"Metric": metric_name,
|
||||
"Mean ± Std": f"{mean_score:.4f} ± {std_score:.4f}",
|
||||
}
|
||||
)
|
||||
|
||||
# Get best parameter combination
|
||||
refit_col = f"mean_test_{refit_metric}"
|
||||
if refit_col not in results.columns and score_cols:
|
||||
refit_col = score_cols[0]
|
||||
|
||||
best_idx = results[refit_col].idxmax()
|
||||
best_row = results.loc[best_idx]
|
||||
|
||||
# Extract parameter columns
|
||||
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
|
||||
best_params = {col.replace("param_", ""): best_row[col] for col in param_cols}
|
||||
|
||||
return pd.DataFrame(best_scores), pd.DataFrame(score_stats), best_params
|
||||
|
||||
|
||||
def plot_parameter_distributions(results: pd.DataFrame, param_grid: dict | None = None) -> dict[str, go.Figure]:
|
||||
"""Create histogram charts for parameter distributions.
|
||||
|
||||
Args:
|
||||
results: DataFrame with CV results.
|
||||
param_grid: Optional parameter grid with distribution information.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping parameter names to Plotly figures.
|
||||
|
||||
"""
|
||||
# Get parameter columns
|
||||
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
|
||||
|
||||
if not param_cols:
|
||||
return {}
|
||||
|
||||
cmap = get_cmap("parameter_distribution")
|
||||
bar_color = mcolors.rgb2hex(cmap(0.5))
|
||||
|
||||
charts = {}
|
||||
for param_col in param_cols:
|
||||
param_name = param_col.replace("param_", "")
|
||||
param_values = results[param_col].dropna()
|
||||
|
||||
if len(param_values) == 0:
|
||||
continue
|
||||
|
||||
# Determine if parameter is numeric
|
||||
if pd.api.types.is_numeric_dtype(param_values):
|
||||
# Create histogram for numeric parameters
|
||||
fig = go.Figure()
|
||||
fig.add_trace(
|
||||
go.Histogram(
|
||||
x=param_values,
|
||||
nbinsx=30,
|
||||
marker_color=bar_color,
|
||||
name=param_name,
|
||||
)
|
||||
)
|
||||
fig.update_layout(
|
||||
title=f"Distribution of {param_name}",
|
||||
xaxis_title=param_name,
|
||||
yaxis_title="Count",
|
||||
height=400,
|
||||
showlegend=False,
|
||||
)
|
||||
else:
|
||||
# Create bar chart for categorical parameters
|
||||
value_counts = param_values.value_counts().reset_index()
|
||||
value_counts.columns = [param_name, "count"]
|
||||
fig = go.Figure()
|
||||
fig.add_trace(
|
||||
go.Bar(
|
||||
x=value_counts[param_name],
|
||||
y=value_counts["count"],
|
||||
marker_color=bar_color,
|
||||
name=param_name,
|
||||
)
|
||||
)
|
||||
fig.update_layout(
|
||||
title=f"Distribution of {param_name}",
|
||||
xaxis_title=param_name,
|
||||
yaxis_title="Count",
|
||||
height=400,
|
||||
showlegend=False,
|
||||
)
|
||||
|
||||
charts[param_name] = fig
|
||||
|
||||
return charts
|
||||
|
||||
|
||||
def plot_score_vs_parameters(
|
||||
results: pd.DataFrame, metric: str, param_grid: dict | None = None
|
||||
) -> dict[str, go.Figure]:
|
||||
"""Create scatter plots of score vs each parameter.
|
||||
|
||||
Args:
|
||||
results: DataFrame with CV results.
|
||||
metric: The metric to plot (e.g., 'f1', 'accuracy').
|
||||
param_grid: Optional parameter grid with distribution information.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping parameter names to Plotly figures.
|
||||
|
||||
"""
|
||||
score_col = f"mean_test_{metric}"
|
||||
if score_col not in results.columns:
|
||||
return {}
|
||||
|
||||
# Get parameter columns
|
||||
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
|
||||
|
||||
if not param_cols:
|
||||
return {}
|
||||
|
||||
# Get colormap
|
||||
hex_colors = get_palette(metric, n_colors=256)
|
||||
|
||||
charts = {}
|
||||
for param_col in param_cols:
|
||||
param_name = param_col.replace("param_", "")
|
||||
param_values = results[param_col].dropna()
|
||||
|
||||
if len(param_values) == 0:
|
||||
continue
|
||||
|
||||
# Check if this parameter uses log scale
|
||||
use_log = False
|
||||
if param_grid and param_name in param_grid:
|
||||
param_config = param_grid[param_name]
|
||||
if isinstance(param_config, dict) and param_config.get("distribution") == "loguniform":
|
||||
use_log = True
|
||||
|
||||
# Create scatter plot
|
||||
fig = go.Figure()
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=results[param_col],
|
||||
y=results[score_col],
|
||||
mode="markers",
|
||||
marker={
|
||||
"size": 8,
|
||||
"color": results[score_col],
|
||||
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
|
||||
"showscale": False,
|
||||
"opacity": 0.6,
|
||||
},
|
||||
text=[
|
||||
f"{param_name}: {val}<br>Score: {score:.4f}"
|
||||
for val, score in zip(results[param_col], results[score_col])
|
||||
],
|
||||
hovertemplate="%{text}<extra></extra>",
|
||||
)
|
||||
)
|
||||
fig.update_layout(
|
||||
title=f"{metric.replace('_', ' ').title()} vs {param_name}",
|
||||
xaxis_title=param_name,
|
||||
xaxis_type="log" if use_log else "linear",
|
||||
yaxis_title=metric.replace("_", " ").title(),
|
||||
height=400,
|
||||
showlegend=False,
|
||||
)
|
||||
|
||||
charts[param_name] = fig
|
||||
|
||||
return charts
|
||||
|
||||
|
||||
def plot_parameter_correlations(results: pd.DataFrame, metric: str) -> go.Figure | None:
|
||||
"""Create correlation bar chart between parameters and score.
|
||||
|
||||
Args:
|
||||
results: DataFrame with CV results.
|
||||
metric: The metric to analyze (e.g., 'f1', 'accuracy').
|
||||
|
||||
Returns:
|
||||
Plotly figure or None if no numeric parameters found.
|
||||
|
||||
"""
|
||||
score_col = f"mean_test_{metric}"
|
||||
if score_col not in results.columns:
|
||||
return None
|
||||
|
||||
# Get numeric parameter columns
|
||||
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
|
||||
numeric_params = [col for col in param_cols if pd.api.types.is_numeric_dtype(results[col])]
|
||||
|
||||
if not numeric_params:
|
||||
return None
|
||||
|
||||
# Calculate correlations
|
||||
correlations = []
|
||||
for param_col in numeric_params:
|
||||
param_name = param_col.replace("param_", "")
|
||||
corr = results[[param_col, score_col]].corr().iloc[0, 1]
|
||||
correlations.append({"Parameter": param_name, "Correlation": corr})
|
||||
|
||||
corr_df = pd.DataFrame(correlations).sort_values("Correlation", ascending=False)
|
||||
|
||||
# Get colormap (use diverging colormap for correlation)
|
||||
hex_colors = get_palette("correlation", n_colors=256)
|
||||
|
||||
# Create bar chart
|
||||
fig = go.Figure()
|
||||
fig.add_trace(
|
||||
go.Bar(
|
||||
x=corr_df["Correlation"],
|
||||
y=corr_df["Parameter"],
|
||||
orientation="h",
|
||||
marker={
|
||||
"color": corr_df["Correlation"],
|
||||
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
|
||||
"cmin": -1,
|
||||
"cmax": 1,
|
||||
"showscale": False,
|
||||
},
|
||||
text=[f"{c:.3f}" for c in corr_df["Correlation"]],
|
||||
hovertemplate="%{y}<br>Correlation: %{x:.3f}<extra></extra>",
|
||||
)
|
||||
)
|
||||
fig.update_layout(
|
||||
xaxis_title="Correlation with Score",
|
||||
yaxis_title="Parameter",
|
||||
height=max(300, len(correlations) * 30),
|
||||
showlegend=False,
|
||||
)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def plot_parameter_interactions(results: pd.DataFrame, metric: str, param_grid: dict | None = None) -> list[go.Figure]:
|
||||
"""Create scatter plots showing parameter interactions.
|
||||
|
||||
Args:
|
||||
results: DataFrame with CV results.
|
||||
metric: The metric to visualize (e.g., 'f1', 'accuracy').
|
||||
param_grid: Optional parameter grid with distribution information.
|
||||
|
||||
Returns:
|
||||
List of Plotly figures showing parameter interactions.
|
||||
|
||||
"""
|
||||
score_col = f"mean_test_{metric}"
|
||||
if score_col not in results.columns:
|
||||
return []
|
||||
|
||||
# Get numeric parameter columns
|
||||
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
|
||||
numeric_params = [col for col in param_cols if pd.api.types.is_numeric_dtype(results[col])]
|
||||
|
||||
if len(numeric_params) < 2:
|
||||
return []
|
||||
|
||||
# Get colormap
|
||||
hex_colors = get_palette(metric, n_colors=256)
|
||||
|
||||
# Create scatter plots for parameter pairs
|
||||
charts = []
|
||||
param_names = [col.replace("param_", "") for col in numeric_params]
|
||||
|
||||
for i, x_param in enumerate(param_names[:-1]):
|
||||
for y_param in param_names[i + 1 :]:
|
||||
x_col = f"param_{x_param}"
|
||||
y_col = f"param_{y_param}"
|
||||
|
||||
# Check if parameters use log scale
|
||||
x_use_log = False
|
||||
y_use_log = False
|
||||
if param_grid:
|
||||
if x_param in param_grid:
|
||||
x_config = param_grid[x_param]
|
||||
if isinstance(x_config, dict) and x_config.get("distribution") == "loguniform":
|
||||
x_use_log = True
|
||||
if y_param in param_grid:
|
||||
y_config = param_grid[y_param]
|
||||
if isinstance(y_config, dict) and y_config.get("distribution") == "loguniform":
|
||||
y_use_log = True
|
||||
|
||||
fig = go.Figure()
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=results[x_col],
|
||||
y=results[y_col],
|
||||
mode="markers",
|
||||
marker={
|
||||
"size": 8,
|
||||
"color": results[score_col],
|
||||
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
|
||||
"showscale": True,
|
||||
"colorbar": {"title": metric.replace("_", " ").title()},
|
||||
"opacity": 0.7,
|
||||
},
|
||||
text=[
|
||||
f"{x_param}: {x_val}<br>{y_param}: {y_val}<br>Score: {score:.4f}"
|
||||
for x_val, y_val, score in zip(results[x_col], results[y_col], results[score_col])
|
||||
],
|
||||
hovertemplate="%{text}<extra></extra>",
|
||||
)
|
||||
)
|
||||
fig.update_layout(
|
||||
title=f"{metric.replace('_', ' ').title()} by {x_param} and {y_param}",
|
||||
xaxis_title=x_param,
|
||||
xaxis_type="log" if x_use_log else "linear",
|
||||
yaxis_title=y_param,
|
||||
yaxis_type="log" if y_use_log else "linear",
|
||||
height=500,
|
||||
width=500,
|
||||
)
|
||||
|
||||
charts.append(fig)
|
||||
|
||||
return charts
|
||||
|
||||
|
||||
def plot_score_evolution(results: pd.DataFrame, metric: str) -> go.Figure | None:
|
||||
"""Create line chart showing score evolution over iterations.
|
||||
|
||||
Args:
|
||||
results: DataFrame with CV results.
|
||||
metric: The metric to visualize (e.g., 'f1', 'accuracy').
|
||||
|
||||
Returns:
|
||||
Plotly figure or None if metric not found.
|
||||
|
||||
"""
|
||||
score_col = f"mean_test_{metric}"
|
||||
if score_col not in results.columns:
|
||||
return None
|
||||
|
||||
# Add iteration number
|
||||
iterations = list(range(len(results)))
|
||||
scores = results[score_col].to_numpy()
|
||||
best_so_far = results[score_col].cummax().to_numpy()
|
||||
|
||||
# Get colormap
|
||||
cmap = get_cmap("score_evolution")
|
||||
score_color = mcolors.rgb2hex(cmap(0.3))
|
||||
best_color = mcolors.rgb2hex(cmap(0.7))
|
||||
|
||||
# Create line chart
|
||||
fig = go.Figure()
|
||||
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=iterations,
|
||||
y=scores,
|
||||
mode="lines",
|
||||
name="Score",
|
||||
line={"color": score_color, "width": 1},
|
||||
opacity=0.6,
|
||||
hovertemplate="Iteration: %{x}<br>Score: %{y:.4f}<extra></extra>",
|
||||
)
|
||||
)
|
||||
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=iterations,
|
||||
y=best_so_far,
|
||||
mode="lines",
|
||||
name="Best So Far",
|
||||
line={"color": best_color, "width": 2},
|
||||
hovertemplate="Iteration: %{x}<br>Best So Far: %{y:.4f}<extra></extra>",
|
||||
)
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
title=f"{metric.replace('_', ' ').title()} Evolution",
|
||||
xaxis_title="Iteration",
|
||||
yaxis_title=metric.replace("_", " ").title(),
|
||||
height=300,
|
||||
hovermode="x unified",
|
||||
)
|
||||
|
||||
return fig
|
||||
97
src/entropice/dashboard/plots/metrics.py
Normal file
97
src/entropice/dashboard/plots/metrics.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
"""Metrics visualization plots."""
|
||||
|
||||
import numpy as np
|
||||
import plotly.graph_objects as go
|
||||
import xarray as xr
|
||||
|
||||
|
||||
def plot_confusion_matrix(cm_data: xr.DataArray, title: str = "Confusion Matrix", normalize: str = "none") -> go.Figure:
|
||||
"""Plot an interactive confusion matrix heatmap.
|
||||
|
||||
Args:
|
||||
cm_data: XArray DataArray with confusion matrix data (dimensions: true_label, predicted_label).
|
||||
title: Title for the plot.
|
||||
normalize: Normalization mode - "none", "true", or "pred".
|
||||
|
||||
Returns:
|
||||
Plotly figure with the interactive confusion matrix heatmap.
|
||||
|
||||
"""
|
||||
# Get the data as numpy array
|
||||
cm_array = cm_data.values.astype(float)
|
||||
labels = cm_data.coords["true_label"].values.tolist()
|
||||
|
||||
# Store original counts for display
|
||||
cm_counts = cm_data.values
|
||||
|
||||
# Apply normalization
|
||||
if normalize == "true":
|
||||
# Normalize over true labels (rows) - each row sums to 1
|
||||
row_sums = cm_array.sum(axis=1, keepdims=True)
|
||||
cm_normalized = np.divide(cm_array, row_sums, where=row_sums != 0)
|
||||
colorbar_title = "Proportion"
|
||||
elif normalize == "pred":
|
||||
# Normalize over predicted labels (columns) - each column sums to 1
|
||||
col_sums = cm_array.sum(axis=0, keepdims=True)
|
||||
cm_normalized = np.divide(cm_array, col_sums, where=col_sums != 0)
|
||||
colorbar_title = "Proportion"
|
||||
else:
|
||||
# No normalization
|
||||
cm_normalized = cm_array
|
||||
colorbar_title = "Count"
|
||||
|
||||
# Create annotations for the heatmap
|
||||
annotations = []
|
||||
for i, true_label in enumerate(labels):
|
||||
for j, pred_label in enumerate(labels):
|
||||
count = int(cm_counts[i, j])
|
||||
normalized_val = cm_normalized[i, j]
|
||||
|
||||
# Format text based on normalization mode
|
||||
if normalize == "none":
|
||||
# Show count and percentage of total
|
||||
total = cm_counts.sum()
|
||||
pct = (count / total * 100) if total > 0 else 0
|
||||
text = f"{count}<br>({pct:.1f}%)"
|
||||
else:
|
||||
# Show percentage only for normalized versions
|
||||
text = f"{normalized_val:.1%}"
|
||||
|
||||
# Determine text color based on normalized value
|
||||
threshold = cm_normalized.max() / 2 if cm_normalized.max() > 0 else 0.5
|
||||
text_color = "white" if normalized_val > threshold else "black"
|
||||
|
||||
annotations.append(
|
||||
{
|
||||
"x": pred_label,
|
||||
"y": true_label,
|
||||
"text": text,
|
||||
"showarrow": False,
|
||||
"font": {"size": 10, "color": text_color},
|
||||
}
|
||||
)
|
||||
|
||||
# Create the heatmap with normalized values for coloring
|
||||
fig = go.Figure(
|
||||
data=go.Heatmap(
|
||||
z=cm_normalized,
|
||||
x=labels,
|
||||
y=labels,
|
||||
colorscale="Blues",
|
||||
colorbar={"title": colorbar_title},
|
||||
hoverongaps=False,
|
||||
hovertemplate="True: %{y}<br>Predicted: %{x}<br>Count: %{customdata}<extra></extra>",
|
||||
customdata=cm_counts,
|
||||
)
|
||||
)
|
||||
|
||||
# Add annotations
|
||||
fig.update_layout(
|
||||
annotations=annotations,
|
||||
xaxis={"title": "Predicted Label", "side": "bottom"},
|
||||
yaxis={"title": "True Label", "autorange": "reversed"},
|
||||
width=600,
|
||||
height=550,
|
||||
)
|
||||
|
||||
return fig
|
||||
180
src/entropice/dashboard/plots/regression.py
Normal file
180
src/entropice/dashboard/plots/regression.py
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
"""Regression analysis plotting functions."""
|
||||
|
||||
from typing import cast
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import plotly.graph_objects as go
|
||||
|
||||
from entropice.dashboard.utils.colors import get_palette
|
||||
|
||||
|
||||
def plot_regression_scatter(
|
||||
y_true: np.ndarray | pd.Series,
|
||||
y_pred: np.ndarray | pd.Series,
|
||||
title: str = "True vs Predicted",
|
||||
) -> go.Figure:
|
||||
"""Create scatter plot of true vs predicted values for regression.
|
||||
|
||||
Args:
|
||||
y_true: True target values.
|
||||
y_pred: Predicted target values.
|
||||
title: Title for the plot.
|
||||
|
||||
Returns:
|
||||
Plotly figure with regression scatter plot.
|
||||
|
||||
"""
|
||||
# Convert to numpy arrays if needed
|
||||
y_true_np = cast(np.ndarray, y_true.to_numpy()) if isinstance(y_true, pd.Series) else y_true
|
||||
y_pred_np = cast(np.ndarray, y_pred.to_numpy()) if isinstance(y_pred, pd.Series) else y_pred
|
||||
|
||||
# Calculate metrics
|
||||
mse = np.mean((y_true_np - y_pred_np) ** 2)
|
||||
mae = np.mean(np.abs(y_true_np - y_pred_np))
|
||||
r2 = 1 - (np.sum((y_true_np - y_pred_np) ** 2) / np.sum((y_true_np - np.mean(y_true_np)) ** 2))
|
||||
|
||||
# Get colormap
|
||||
hex_colors = get_palette("r2", n_colors=256)
|
||||
|
||||
# Calculate point density for coloring
|
||||
from scipy.stats import gaussian_kde
|
||||
|
||||
try:
|
||||
# Create KDE for density estimation
|
||||
xy = np.vstack([y_true_np, y_pred_np])
|
||||
kde = gaussian_kde(xy)
|
||||
density = kde(xy)
|
||||
except (np.linalg.LinAlgError, ValueError):
|
||||
# Fallback if KDE fails (e.g., all points identical)
|
||||
density = np.ones(len(y_true_np))
|
||||
|
||||
# Create figure
|
||||
fig = go.Figure()
|
||||
|
||||
# Add scatter plot
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=y_true_np,
|
||||
y=y_pred_np,
|
||||
mode="markers",
|
||||
marker={
|
||||
"size": 6,
|
||||
"color": density,
|
||||
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
|
||||
"showscale": False,
|
||||
"opacity": 0.6,
|
||||
},
|
||||
text=[f"True: {true:.3f}<br>Pred: {pred:.3f}" for true, pred in zip(y_true_np, y_pred_np)],
|
||||
hovertemplate="%{text}<extra></extra>",
|
||||
name="Data",
|
||||
)
|
||||
)
|
||||
|
||||
# Add diagonal line (perfect prediction)
|
||||
min_val = min(y_true_np.min(), y_pred_np.min())
|
||||
max_val = max(y_true_np.max(), y_pred_np.max())
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=[min_val, max_val],
|
||||
y=[min_val, max_val],
|
||||
mode="lines",
|
||||
line={"color": "red", "dash": "dash", "width": 2},
|
||||
name="Perfect Prediction",
|
||||
hovertemplate="y = x<extra></extra>",
|
||||
)
|
||||
)
|
||||
|
||||
# Add metrics as annotation
|
||||
metrics_text = f"R² = {r2:.4f}<br>MSE = {mse:.4f}<br>MAE = {mae:.4f}"
|
||||
|
||||
fig.add_annotation(
|
||||
x=0.02,
|
||||
y=0.98,
|
||||
xref="paper",
|
||||
yref="paper",
|
||||
text=metrics_text,
|
||||
showarrow=False,
|
||||
bgcolor="white",
|
||||
bordercolor="black",
|
||||
borderwidth=1,
|
||||
xanchor="left",
|
||||
yanchor="top",
|
||||
font={"size": 12},
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
xaxis_title="True Values",
|
||||
yaxis_title="Predicted Values",
|
||||
height=500,
|
||||
showlegend=True,
|
||||
legend={"x": 0.98, "y": 0.02, "xanchor": "right", "yanchor": "bottom"},
|
||||
)
|
||||
|
||||
# Make axes equal
|
||||
fig.update_xaxes(scaleanchor="y", scaleratio=1)
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def plot_residuals(
|
||||
y_true: np.ndarray | pd.Series,
|
||||
y_pred: np.ndarray | pd.Series,
|
||||
title: str = "Residual Plot",
|
||||
) -> go.Figure:
|
||||
"""Create residual plot for regression diagnostics.
|
||||
|
||||
Args:
|
||||
y_true: True target values.
|
||||
y_pred: Predicted target values.
|
||||
title: Title for the plot.
|
||||
|
||||
Returns:
|
||||
Plotly figure with residual plot.
|
||||
|
||||
"""
|
||||
# Convert to numpy arrays if needed
|
||||
y_true_np = cast(np.ndarray, y_true.to_numpy()) if isinstance(y_true, pd.Series) else y_true
|
||||
y_pred_np = cast(np.ndarray, y_pred.to_numpy()) if isinstance(y_pred, pd.Series) else y_pred
|
||||
|
||||
# Calculate residuals
|
||||
residuals = y_true_np - y_pred_np
|
||||
|
||||
# Get colormap
|
||||
hex_colors = get_palette("r2", n_colors=256)
|
||||
|
||||
# Create figure
|
||||
fig = go.Figure()
|
||||
|
||||
# Add scatter plot
|
||||
fig.add_trace(
|
||||
go.Scatter(
|
||||
x=y_pred,
|
||||
y=residuals,
|
||||
mode="markers",
|
||||
marker={
|
||||
"size": 6,
|
||||
"color": np.abs(residuals),
|
||||
"colorscale": [[i / 255, c] for i, c in enumerate(hex_colors)],
|
||||
"showscale": True,
|
||||
"colorbar": {"title": "Abs Residual"},
|
||||
"opacity": 0.6,
|
||||
},
|
||||
text=[f"Pred: {pred:.3f}<br>Residual: {res:.3f}" for pred, res in zip(y_pred, residuals)],
|
||||
hovertemplate="%{text}<extra></extra>",
|
||||
)
|
||||
)
|
||||
|
||||
# Add zero line
|
||||
fig.add_hline(y=0, line_dash="dash", line_color="red", line_width=2)
|
||||
|
||||
fig.update_layout(
|
||||
title=title,
|
||||
xaxis_title="Predicted Values",
|
||||
yaxis_title="Residuals (True - Predicted)",
|
||||
height=400,
|
||||
showlegend=False,
|
||||
)
|
||||
|
||||
return fig
|
||||
185
src/entropice/dashboard/sections/cv_result.py
Normal file
185
src/entropice/dashboard/sections/cv_result.py
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
"""Training Result Sections."""
|
||||
|
||||
import streamlit as st
|
||||
|
||||
from entropice.dashboard.plots.metrics import plot_confusion_matrix
|
||||
from entropice.dashboard.utils.formatters import format_metric_name
|
||||
from entropice.dashboard.utils.loaders import TrainingResult
|
||||
from entropice.dashboard.utils.stats import CVMetricStatistics
|
||||
from entropice.utils.types import GridConfig
|
||||
|
||||
|
||||
def render_run_information(selected_result: TrainingResult, refit_metric):
|
||||
"""Render training run configuration overview.
|
||||
|
||||
Args:
|
||||
selected_result: The selected TrainingResult object.
|
||||
refit_metric: The refit metric used for model selection.
|
||||
|
||||
"""
|
||||
st.header("📋 Run Information")
|
||||
|
||||
grid_config = GridConfig.from_grid_level(f"{selected_result.settings.grid}{selected_result.settings.level}") # ty:ignore[invalid-argument-type]
|
||||
|
||||
col1, col2, col3, col4, col5 = st.columns(5)
|
||||
with col1:
|
||||
st.metric("Task", selected_result.settings.task.capitalize())
|
||||
with col2:
|
||||
st.metric("Target", selected_result.settings.target.capitalize())
|
||||
with col3:
|
||||
st.metric("Grid", grid_config.display_name)
|
||||
with col4:
|
||||
st.metric("Model", selected_result.settings.model.upper())
|
||||
with col5:
|
||||
st.metric("Trials", len(selected_result.results))
|
||||
|
||||
st.caption(f"**Refit Metric:** {format_metric_name(refit_metric)}")
|
||||
|
||||
|
||||
def _render_metrics(metrics: dict[str, float]):
|
||||
"""Render a set of metrics in a two-column layout.
|
||||
|
||||
Args:
|
||||
metrics: Dictionary of metric names and their values.
|
||||
|
||||
"""
|
||||
ncols = min(5, len(metrics))
|
||||
cols = st.columns(ncols)
|
||||
for idx, (metric_name, metric_value) in enumerate(metrics.items()):
|
||||
with cols[idx % ncols]:
|
||||
st.metric(format_metric_name(metric_name), f"{metric_value:.4f}")
|
||||
|
||||
|
||||
def render_metrics_section(selected_result: TrainingResult):
|
||||
"""Render test metrics overview showing final model performance.
|
||||
|
||||
Args:
|
||||
selected_result: The selected TrainingResult object.
|
||||
|
||||
"""
|
||||
# Test
|
||||
st.header("🎯 Test Set Performance")
|
||||
st.caption("Performance metrics on the held-out test set (best model from hyperparameter search)")
|
||||
_render_metrics(selected_result.test_metrics)
|
||||
|
||||
# Train
|
||||
st.header("🏋️♂️ Training Set Performance")
|
||||
st.caption("Performance metrics on the training set (best model from hyperparameter search)")
|
||||
_render_metrics(selected_result.train_metrics)
|
||||
|
||||
# Combined / All
|
||||
st.header("🧮 Overall Performance")
|
||||
st.caption("Overall performance metrics combining training and test sets")
|
||||
_render_metrics(selected_result.combined_metrics)
|
||||
|
||||
|
||||
@st.fragment
|
||||
def render_confusion_matrices(selected_result: TrainingResult):
|
||||
"""Render confusion matrices for classification tasks.
|
||||
|
||||
Args:
|
||||
selected_result: The selected TrainingResult object.
|
||||
|
||||
"""
|
||||
st.header("🎭 Confusion Matrices")
|
||||
|
||||
# Check if this is a classification task
|
||||
if selected_result.settings.task not in ["binary", "count_regimes", "density_regimes"]:
|
||||
st.info(
|
||||
"📊 Confusion matrices are only available for classification tasks "
|
||||
"(binary, count_regimes, density_regimes)."
|
||||
)
|
||||
st.caption("Coming soon for regression tasks: residual plots and error distributions.")
|
||||
return
|
||||
|
||||
# Check if confusion matrix data is available
|
||||
if selected_result.confusion_matrix is None:
|
||||
st.warning("⚠️ No confusion matrix data found for this training result.")
|
||||
return
|
||||
|
||||
cm = selected_result.confusion_matrix
|
||||
|
||||
# Add normalization selection
|
||||
st.subheader("Display Options")
|
||||
normalize_option = st.radio(
|
||||
"Normalization",
|
||||
options=["No normalization", "Normalize over True Labels", "Normalize over Predicted Labels"],
|
||||
horizontal=True,
|
||||
help="Choose how to normalize the confusion matrix values",
|
||||
)
|
||||
|
||||
# Map selection to normalization mode
|
||||
normalize_map = {
|
||||
"No normalization": "none",
|
||||
"Normalize over True Labels": "true",
|
||||
"Normalize over Predicted Labels": "pred",
|
||||
}
|
||||
normalize_mode = normalize_map[normalize_option]
|
||||
|
||||
cols = st.columns(3)
|
||||
|
||||
with cols[0]:
|
||||
# Test Set Confusion Matrix
|
||||
st.subheader("Test Set")
|
||||
st.caption("Held-out test set")
|
||||
fig_test = plot_confusion_matrix(cm["test"], title="Test Set", normalize=normalize_mode)
|
||||
st.plotly_chart(fig_test, width="stretch")
|
||||
with cols[1]:
|
||||
# Training Set Confusion Matrix
|
||||
st.subheader("Training Set")
|
||||
st.caption("Training set")
|
||||
fig_train = plot_confusion_matrix(cm["train"], title="Training Set", normalize=normalize_mode)
|
||||
st.plotly_chart(fig_train, width="stretch")
|
||||
with cols[2]:
|
||||
# Combined Confusion Matrix
|
||||
st.subheader("Combined")
|
||||
st.caption("Train + Test sets")
|
||||
fig_combined = plot_confusion_matrix(cm["combined"], title="Combined", normalize=normalize_mode)
|
||||
st.plotly_chart(fig_combined, width="stretch")
|
||||
|
||||
|
||||
def render_cv_statistics_section(cv_stats: CVMetricStatistics, test_score: float):
|
||||
"""Render cross-validation statistics for selected metric.
|
||||
|
||||
Args:
|
||||
cv_stats: CVMetricStatistics object containing cross-validation statistics.
|
||||
test_score: The test set score for the selected metric.
|
||||
|
||||
"""
|
||||
st.header("📈 Cross-Validation Statistics")
|
||||
st.caption("Performance during hyperparameter search (averaged across CV folds)")
|
||||
|
||||
col1, col2, col3, col4, col5 = st.columns(5)
|
||||
|
||||
with col1:
|
||||
st.metric("Best Score", f"{cv_stats.best_score:.4f}")
|
||||
with col2:
|
||||
st.metric("Mean Score", f"{cv_stats.mean_score:.4f}")
|
||||
with col3:
|
||||
st.metric("Std Dev", f"{cv_stats.std_score:.4f}")
|
||||
with col4:
|
||||
st.metric("Worst Score", f"{cv_stats.worst_score:.4f}")
|
||||
with col5:
|
||||
st.metric("Median Score", f"{cv_stats.median_score:.4f}")
|
||||
|
||||
if cv_stats.mean_cv_std is not None:
|
||||
st.info(f"**Mean CV Std:** {cv_stats.mean_cv_std:.4f} - Average standard deviation across CV folds")
|
||||
|
||||
# Compare with test metric
|
||||
st.subheader("CV vs Test Performance")
|
||||
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
st.metric("Best CV Score", f"{cv_stats.best_score:.4f}")
|
||||
with col2:
|
||||
st.metric("Test Score", f"{test_score:.4f}")
|
||||
with col3:
|
||||
delta = test_score - cv_stats.best_score
|
||||
delta_pct = (delta / cv_stats.best_score * 100) if cv_stats.best_score != 0 else 0
|
||||
st.metric("Difference", f"{delta:+.4f}", delta=f"{delta_pct:+.2f}%")
|
||||
|
||||
if abs(delta) > cv_stats.std_score:
|
||||
st.warning(
|
||||
"⚠️ Test performance differs significantly (larger than the CV standard deviation) from CV performance. "
|
||||
"This may indicate overfitting or data distribution mismatch between training and test sets."
|
||||
)
|
||||
|
|
@ -2,15 +2,16 @@
|
|||
|
||||
from datetime import datetime
|
||||
|
||||
import pandas as pd
|
||||
import streamlit as st
|
||||
|
||||
from entropice.dashboard.utils.loaders import TrainingResult
|
||||
from entropice.dashboard.utils.loaders import AutogluonTrainingResult, TrainingResult
|
||||
from entropice.utils.types import (
|
||||
GridConfig,
|
||||
)
|
||||
|
||||
|
||||
def render_training_results_summary(training_results: list[TrainingResult]):
|
||||
def render_training_results_summary(training_results: list[TrainingResult | AutogluonTrainingResult]):
|
||||
"""Render summary metrics for training results."""
|
||||
st.header("📊 Training Results Summary")
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
|
|
@ -23,7 +24,7 @@ def render_training_results_summary(training_results: list[TrainingResult]):
|
|||
st.metric("Total Runs", len(training_results))
|
||||
|
||||
with col3:
|
||||
models = {tr.settings.model for tr in training_results}
|
||||
models = {tr.settings.model for tr in training_results if hasattr(tr.settings, "model")}
|
||||
st.metric("Model Types", len(models))
|
||||
|
||||
with col4:
|
||||
|
|
@ -33,14 +34,14 @@ def render_training_results_summary(training_results: list[TrainingResult]):
|
|||
|
||||
|
||||
@st.fragment
|
||||
def render_experiment_results(training_results: list[TrainingResult]): # noqa: C901
|
||||
def render_experiment_results(training_results: list[TrainingResult | AutogluonTrainingResult]): # noqa: C901
|
||||
"""Render detailed experiment results table and expandable details."""
|
||||
st.header("🎯 Experiment Results")
|
||||
|
||||
# Filters
|
||||
experiments = sorted({tr.experiment for tr in training_results if tr.experiment})
|
||||
tasks = sorted({tr.settings.task for tr in training_results})
|
||||
models = sorted({tr.settings.model for tr in training_results})
|
||||
models = sorted({tr.settings.model if isinstance(tr, TrainingResult) else "autogluon" for tr in training_results})
|
||||
grids = sorted({f"{tr.settings.grid}-{tr.settings.level}" for tr in training_results})
|
||||
|
||||
# Create filter columns
|
||||
|
|
@ -87,14 +88,26 @@ def render_experiment_results(training_results: list[TrainingResult]): # noqa:
|
|||
filtered_results = [tr for tr in filtered_results if tr.experiment == selected_experiment]
|
||||
if selected_task != "All":
|
||||
filtered_results = [tr for tr in filtered_results if tr.settings.task == selected_task]
|
||||
if selected_model != "All":
|
||||
filtered_results = [tr for tr in filtered_results if tr.settings.model == selected_model]
|
||||
if selected_model != "All" and selected_model != "autogluon":
|
||||
filtered_results = [
|
||||
tr for tr in filtered_results if isinstance(tr, TrainingResult) and tr.settings.model == selected_model
|
||||
]
|
||||
elif selected_model == "autogluon":
|
||||
filtered_results = [tr for tr in filtered_results if isinstance(tr, AutogluonTrainingResult)]
|
||||
if selected_grid != "All":
|
||||
filtered_results = [tr for tr in filtered_results if f"{tr.settings.grid}-{tr.settings.level}" == selected_grid]
|
||||
|
||||
st.subheader("Results Table")
|
||||
|
||||
summary_df = TrainingResult.to_dataframe(filtered_results)
|
||||
summary_df = TrainingResult.to_dataframe([tr for tr in filtered_results if isinstance(tr, TrainingResult)])
|
||||
autogluon_df = AutogluonTrainingResult.to_dataframe(
|
||||
[tr for tr in filtered_results if isinstance(tr, AutogluonTrainingResult)]
|
||||
)
|
||||
if len(summary_df) == 0:
|
||||
summary_df = autogluon_df
|
||||
elif len(autogluon_df) > 0:
|
||||
summary_df = pd.concat([summary_df, autogluon_df], ignore_index=True)
|
||||
|
||||
# Display with color coding for best scores
|
||||
st.dataframe(
|
||||
summary_df,
|
||||
|
|
@ -107,6 +120,8 @@ def render_experiment_results(training_results: list[TrainingResult]): # noqa:
|
|||
for tr in filtered_results:
|
||||
tr_info = tr.display_info
|
||||
display_name = tr_info.get_display_name("model_first")
|
||||
model = "autogluon" if isinstance(tr, AutogluonTrainingResult) else tr.settings.model
|
||||
cv_splits = tr.settings.cv_splits if hasattr(tr.settings, "cv_splits") else "N/A"
|
||||
with st.expander(display_name):
|
||||
col1, col2 = st.columns([1, 2])
|
||||
|
||||
|
|
@ -117,12 +132,12 @@ def render_experiment_results(training_results: list[TrainingResult]): # noqa:
|
|||
f"- **Experiment:** {tr.experiment}\n"
|
||||
f"- **Task:** {tr.settings.task}\n"
|
||||
f"- **Target:** {tr.settings.target}\n"
|
||||
f"- **Model:** {tr.settings.model}\n"
|
||||
f"- **Model:** {model}\n"
|
||||
f"- **Grid:** {grid_config.display_name}\n"
|
||||
f"- **Created At:** {tr_info.timestamp.strftime('%Y-%m-%d %H:%M')}\n"
|
||||
f"- **Temporal Mode:** {tr.settings.temporal_mode}\n"
|
||||
f"- **Members:** {', '.join(tr.settings.members)}\n"
|
||||
f"- **CV Splits:** {tr.settings.cv_splits}\n"
|
||||
f"- **CV Splits:** {cv_splits}\n"
|
||||
f"- **Classes:** {tr.settings.classes}\n"
|
||||
)
|
||||
|
||||
|
|
@ -140,8 +155,11 @@ def render_experiment_results(training_results: list[TrainingResult]): # noqa:
|
|||
file_str += f"- 📄 `{file.name}`\n"
|
||||
st.write(file_str)
|
||||
with col2:
|
||||
if isinstance(tr, AutogluonTrainingResult):
|
||||
st.write("**Leaderboard:**")
|
||||
st.dataframe(tr.leaderboard, width="stretch", hide_index=True)
|
||||
else:
|
||||
st.write("**CV Score Summary:**")
|
||||
|
||||
# Extract all test scores
|
||||
metric_df = tr.get_metric_dataframe()
|
||||
if metric_df is not None:
|
||||
|
|
|
|||
172
src/entropice/dashboard/sections/hparam_space.py
Normal file
172
src/entropice/dashboard/sections/hparam_space.py
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
"""Hyperparameter Space Visualization Section."""
|
||||
|
||||
import streamlit as st
|
||||
|
||||
from entropice.dashboard.plots.hyperparameter_space import (
|
||||
plot_parameter_correlations,
|
||||
plot_parameter_distributions,
|
||||
plot_parameter_interactions,
|
||||
plot_score_evolution,
|
||||
plot_score_vs_parameters,
|
||||
)
|
||||
from entropice.dashboard.utils.formatters import format_metric_name
|
||||
from entropice.dashboard.utils.loaders import TrainingResult
|
||||
|
||||
|
||||
def _render_performance_summary(results, refit_metric: str):
|
||||
"""Render performance summary subsection."""
|
||||
best_idx = results[f"mean_test_{refit_metric}"].idxmax()
|
||||
best_row = results.loc[best_idx]
|
||||
# Extract parameter columns
|
||||
param_cols = [col for col in results.columns if col.startswith("param_") and col != "params"]
|
||||
best_params = {col.replace("param_", ""): best_row[col] for col in param_cols}
|
||||
|
||||
# Display best parameter combination
|
||||
if not best_params:
|
||||
return
|
||||
|
||||
with st.container(border=True):
|
||||
st.subheader("🏆 Best Parameter Combination")
|
||||
st.caption(f"Parameters of the best model (selected by {format_metric_name(refit_metric)} score)")
|
||||
n_params = len(best_params)
|
||||
cols = st.columns(n_params)
|
||||
for idx, (param_name, param_value) in enumerate(best_params.items()):
|
||||
with cols[idx]:
|
||||
# Format value based on type and magnitude
|
||||
if isinstance(param_value, int):
|
||||
formatted_value = f"{param_value:.0f}"
|
||||
elif isinstance(param_value, float):
|
||||
# Use scientific notation for very small numbers
|
||||
if abs(param_value) < 0.001 and param_value != 0:
|
||||
formatted_value = f"{param_value:.2e}"
|
||||
else:
|
||||
formatted_value = f"{param_value:.4f}"
|
||||
else:
|
||||
formatted_value = str(param_value)
|
||||
|
||||
st.metric(param_name, formatted_value)
|
||||
|
||||
|
||||
def _render_parameter_distributions(results, param_grid: dict | None):
|
||||
"""Render parameter distributions subsection."""
|
||||
st.subheader("Parameter Distributions")
|
||||
st.caption("Distribution of hyperparameter values explored during random search")
|
||||
|
||||
param_charts = plot_parameter_distributions(results, param_grid)
|
||||
|
||||
if not param_charts:
|
||||
st.info("No parameter distribution data available.")
|
||||
return
|
||||
|
||||
# Display charts in a grid
|
||||
param_names = list(param_charts.keys())
|
||||
n_cols = min(3, len(param_names))
|
||||
n_rows = (len(param_names) + n_cols - 1) // n_cols
|
||||
|
||||
for row in range(n_rows):
|
||||
cols = st.columns(n_cols)
|
||||
for col_idx in range(n_cols):
|
||||
param_idx = row * n_cols + col_idx
|
||||
if param_idx < len(param_names):
|
||||
param_name = param_names[param_idx]
|
||||
with cols[col_idx]:
|
||||
st.plotly_chart(param_charts[param_name], width="stretch")
|
||||
|
||||
|
||||
def _render_score_evolution(results, selected_metric: str):
|
||||
"""Render score evolution subsection."""
|
||||
st.subheader("Score Evolution Over Iterations")
|
||||
st.caption(f"How {format_metric_name(selected_metric)} evolved during the random search")
|
||||
|
||||
evolution_chart = plot_score_evolution(results, selected_metric)
|
||||
if evolution_chart:
|
||||
st.plotly_chart(evolution_chart, width="stretch")
|
||||
else:
|
||||
st.warning(f"Score evolution not available for metric: {selected_metric}")
|
||||
|
||||
|
||||
def _render_score_vs_parameters(results, selected_metric: str, param_grid: dict | None):
|
||||
"""Render score vs parameters subsection."""
|
||||
st.subheader("Score vs Individual Parameters")
|
||||
st.caption(f"Relationship between {format_metric_name(selected_metric)} and each hyperparameter")
|
||||
|
||||
score_vs_param_charts = plot_score_vs_parameters(results, selected_metric, param_grid)
|
||||
|
||||
if not score_vs_param_charts:
|
||||
st.info("No score vs parameter data available.")
|
||||
return
|
||||
|
||||
param_names = list(score_vs_param_charts.keys())
|
||||
n_cols = min(2, len(param_names))
|
||||
n_rows = (len(param_names) + n_cols - 1) // n_cols
|
||||
|
||||
for row in range(n_rows):
|
||||
cols = st.columns(n_cols)
|
||||
for col_idx in range(n_cols):
|
||||
param_idx = row * n_cols + col_idx
|
||||
if param_idx < len(param_names):
|
||||
param_name = param_names[param_idx]
|
||||
with cols[col_idx]:
|
||||
st.plotly_chart(score_vs_param_charts[param_name], width="stretch")
|
||||
|
||||
|
||||
def _render_parameter_correlations(results, selected_metric: str):
|
||||
"""Render parameter correlations subsection."""
|
||||
st.subheader("Parameter-Score Correlations")
|
||||
st.caption(f"Correlation between numeric parameters and {format_metric_name(selected_metric)}")
|
||||
|
||||
corr_chart = plot_parameter_correlations(results, selected_metric)
|
||||
if corr_chart:
|
||||
st.plotly_chart(corr_chart, width="stretch")
|
||||
else:
|
||||
st.info("No numeric parameters found for correlation analysis.")
|
||||
|
||||
|
||||
def _render_parameter_interactions(results, selected_metric: str, param_grid: dict | None):
|
||||
"""Render parameter interactions subsection."""
|
||||
st.subheader("Parameter Interactions")
|
||||
st.caption(f"Interaction between parameter pairs and their effect on {format_metric_name(selected_metric)}")
|
||||
|
||||
interaction_charts = plot_parameter_interactions(results, selected_metric, param_grid)
|
||||
|
||||
if not interaction_charts:
|
||||
st.info("Not enough numeric parameters for parameter interaction visualization.")
|
||||
return
|
||||
|
||||
n_cols = min(2, len(interaction_charts))
|
||||
n_rows = (len(interaction_charts) + n_cols - 1) // n_cols
|
||||
|
||||
for row in range(n_rows):
|
||||
cols = st.columns(n_cols)
|
||||
for col_idx in range(n_cols):
|
||||
chart_idx = row * n_cols + col_idx
|
||||
if chart_idx < len(interaction_charts):
|
||||
with cols[col_idx]:
|
||||
st.plotly_chart(interaction_charts[chart_idx], width="stretch")
|
||||
|
||||
|
||||
def render_hparam_space_section(selected_result: TrainingResult, selected_metric: str):
|
||||
"""Render the hyperparameter space visualization section.
|
||||
|
||||
Args:
|
||||
selected_result: The selected TrainingResult object.
|
||||
selected_metric: The metric to focus analysis on.
|
||||
|
||||
"""
|
||||
st.header("🧩 Hyperparameter Space Exploration")
|
||||
|
||||
results = selected_result.results
|
||||
refit_metric = selected_result._get_best_metric_name()
|
||||
param_grid = selected_result.settings.param_grid
|
||||
|
||||
_render_performance_summary(results, refit_metric)
|
||||
|
||||
_render_parameter_distributions(results, param_grid)
|
||||
|
||||
_render_score_evolution(results, selected_metric)
|
||||
|
||||
_render_score_vs_parameters(results, selected_metric, param_grid)
|
||||
|
||||
_render_parameter_correlations(results, selected_metric)
|
||||
|
||||
_render_parameter_interactions(results, selected_metric, param_grid)
|
||||
122
src/entropice/dashboard/sections/regression_analysis.py
Normal file
122
src/entropice/dashboard/sections/regression_analysis.py
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
"""Regression Analysis Section."""
|
||||
|
||||
import streamlit as st
|
||||
|
||||
from entropice.dashboard.plots.regression import plot_regression_scatter, plot_residuals
|
||||
from entropice.dashboard.utils.loaders import TrainingResult
|
||||
from entropice.ml.dataset import DatasetEnsemble
|
||||
|
||||
|
||||
def render_regression_analysis(selected_result: TrainingResult):
|
||||
"""Render regression analysis with true vs predicted scatter plots.
|
||||
|
||||
Args:
|
||||
selected_result: The selected TrainingResult object.
|
||||
|
||||
"""
|
||||
st.header("📊 Regression Analysis")
|
||||
|
||||
# Check if this is a regression task
|
||||
if selected_result.settings.task in ["binary", "count_regimes", "density_regimes"]:
|
||||
st.info("📈 Regression analysis is only available for regression tasks (count, density).")
|
||||
return
|
||||
|
||||
# Load predictions
|
||||
predictions_df = selected_result.load_predictions()
|
||||
if predictions_df is None:
|
||||
st.warning("⚠️ No prediction data found for this training result.")
|
||||
return
|
||||
|
||||
# Create DatasetEnsemble from settings
|
||||
with st.spinner("Loading training data to get true values..."):
|
||||
ensemble = DatasetEnsemble(
|
||||
grid=selected_result.settings.grid,
|
||||
level=selected_result.settings.level,
|
||||
members=selected_result.settings.members,
|
||||
temporal_mode=selected_result.settings.temporal_mode,
|
||||
dimension_filters=selected_result.settings.dimension_filters,
|
||||
variable_filters=selected_result.settings.variable_filters,
|
||||
add_lonlat=selected_result.settings.add_lonlat,
|
||||
)
|
||||
|
||||
# Create training set to get true values
|
||||
training_set = ensemble.create_training_set(
|
||||
task=selected_result.settings.task,
|
||||
target=selected_result.settings.target,
|
||||
device="cpu",
|
||||
cache_mode="read",
|
||||
)
|
||||
|
||||
# Get split information
|
||||
split_series = training_set.split
|
||||
|
||||
# Merge predictions with true values and split info
|
||||
# predictions_df should have 'cell_id' and 'predicted' columns
|
||||
# training_set.targets has 'y' (true values) with cell_id as index
|
||||
true_values = training_set.targets[["y"]].reset_index()
|
||||
|
||||
# Merge on cell_id
|
||||
merged = predictions_df.merge(true_values, on="cell_id", how="inner")
|
||||
merged["split"] = split_series.reindex(merged["cell_id"]).values
|
||||
|
||||
# Get train, test, and combined data
|
||||
train_data = merged[merged["split"] == "train"]
|
||||
test_data = merged[merged["split"] == "test"]
|
||||
|
||||
if len(train_data) == 0 or len(test_data) == 0:
|
||||
st.error("❌ Could not properly split data into train and test sets.")
|
||||
return
|
||||
|
||||
# Display scatter plots
|
||||
st.subheader("True vs Predicted Values")
|
||||
st.caption("Scatter plots showing the relationship between true and predicted values")
|
||||
|
||||
cols = st.columns(3)
|
||||
|
||||
with cols[0]:
|
||||
st.markdown("#### Test Set")
|
||||
st.caption("Held-out test set")
|
||||
fig_test = plot_regression_scatter(
|
||||
test_data["y"],
|
||||
test_data["predicted"],
|
||||
title="Test Set",
|
||||
)
|
||||
st.plotly_chart(fig_test, use_container_width=True)
|
||||
|
||||
with cols[1]:
|
||||
st.markdown("#### Training Set")
|
||||
st.caption("Training set")
|
||||
fig_train = plot_regression_scatter(
|
||||
train_data["y"],
|
||||
train_data["predicted"],
|
||||
title="Training Set",
|
||||
)
|
||||
st.plotly_chart(fig_train, use_container_width=True)
|
||||
|
||||
with cols[2]:
|
||||
st.markdown("#### Combined")
|
||||
st.caption("Train + Test sets")
|
||||
fig_combined = plot_regression_scatter(
|
||||
merged["y"],
|
||||
merged["predicted"],
|
||||
title="Combined",
|
||||
)
|
||||
st.plotly_chart(fig_combined, use_container_width=True)
|
||||
|
||||
# Display residual plots
|
||||
st.subheader("Residual Analysis")
|
||||
st.caption("Residual plots to assess model fit and identify patterns in errors")
|
||||
|
||||
cols = st.columns(3)
|
||||
|
||||
with cols[0]:
|
||||
fig_test_res = plot_residuals(test_data["y"], test_data["predicted"], title="Test Set Residuals")
|
||||
st.plotly_chart(fig_test_res, use_container_width=True)
|
||||
|
||||
with cols[1]:
|
||||
fig_train_res = plot_residuals(train_data["y"], train_data["predicted"], title="Training Set Residuals")
|
||||
st.plotly_chart(fig_train_res, use_container_width=True)
|
||||
|
||||
with cols[2]:
|
||||
fig_combined_res = plot_residuals(merged["y"], merged["predicted"], title="Combined Residuals")
|
||||
st.plotly_chart(fig_combined_res, use_container_width=True)
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
"""Utilities for ordering predicted classes consistently across visualizations.
|
||||
|
||||
This module leverages the canonical class labels defined in the ML dataset module
|
||||
to ensure consistent ordering across all visualizations.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from entropice.utils.types import Task
|
||||
|
||||
# Canonical orderings imported from the ML pipeline
|
||||
# Binary labels are defined inline in dataset.py: {False: "No RTS", True: "RTS"}
|
||||
# Count/Density labels are defined in the bin_values function
|
||||
BINARY_LABELS = ["No RTS", "RTS"]
|
||||
COUNT_LABELS = ["None", "Very Few", "Few", "Several", "Many", "Very Many"]
|
||||
DENSITY_LABELS = ["Empty", "Very Sparse", "Sparse", "Moderate", "Dense", "Very Dense"]
|
||||
|
||||
CLASS_ORDERINGS: dict[Task | str, list[str]] = {
|
||||
"binary": BINARY_LABELS,
|
||||
"count": COUNT_LABELS,
|
||||
"density": DENSITY_LABELS,
|
||||
}
|
||||
|
||||
|
||||
def get_ordered_classes(task: Task | str, available_classes: list[str] | None = None) -> list[str]:
|
||||
"""Get properly ordered class labels for a given task.
|
||||
|
||||
This uses the same canonical ordering as defined in the ML dataset module,
|
||||
ensuring consistency between training and inference visualizations.
|
||||
|
||||
Args:
|
||||
task: Task type ('binary', 'count', 'density').
|
||||
available_classes: Optional list of available classes to filter and order.
|
||||
If None, returns all canonical classes for the task.
|
||||
|
||||
Returns:
|
||||
List of class labels in proper order.
|
||||
|
||||
Examples:
|
||||
>>> get_ordered_classes("binary")
|
||||
['No RTS', 'RTS']
|
||||
>>> get_ordered_classes("count", ["None", "Few", "Several"])
|
||||
['None', 'Few', 'Several']
|
||||
|
||||
"""
|
||||
canonical_order = CLASS_ORDERINGS[task]
|
||||
|
||||
if available_classes is None:
|
||||
return canonical_order
|
||||
|
||||
# Filter canonical order to only include available classes, preserving order
|
||||
return [cls for cls in canonical_order if cls in available_classes]
|
||||
|
||||
|
||||
def sort_class_series(series: pd.Series, task: Task | str) -> pd.Series:
|
||||
"""Sort a pandas Series with class labels according to canonical ordering.
|
||||
|
||||
Args:
|
||||
series: Pandas Series with class labels as index.
|
||||
task: Task type ('binary', 'count', 'density').
|
||||
|
||||
Returns:
|
||||
Sorted Series with classes in canonical order.
|
||||
|
||||
"""
|
||||
available_classes = series.index.tolist()
|
||||
ordered_classes = get_ordered_classes(task, available_classes)
|
||||
|
||||
# Reindex to get proper order
|
||||
return series.reindex(ordered_classes)
|
||||
|
|
@ -59,7 +59,7 @@ task_display_infos: dict[Task, TaskDisplayInfo] = {
|
|||
class TrainingResultDisplayInfo:
|
||||
task: Task
|
||||
target: TargetDataset
|
||||
model: Model
|
||||
model: Model | Literal["autogluon"]
|
||||
grid: Grid
|
||||
level: int
|
||||
timestamp: datetime
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ from shapely.geometry import shape
|
|||
import entropice.spatial.grids
|
||||
import entropice.utils.paths
|
||||
from entropice.dashboard.utils.formatters import TrainingResultDisplayInfo
|
||||
from entropice.ml.autogluon_training import AutoGluonTrainingSettings
|
||||
from entropice.ml.dataset import DatasetEnsemble, TrainingSet
|
||||
from entropice.ml.training import TrainingSettings
|
||||
from entropice.utils.types import GridConfig, TargetDataset, Task, all_target_datasets, all_tasks
|
||||
|
|
@ -215,14 +216,18 @@ class TrainingResult:
|
|||
return pd.DataFrame.from_records(records)
|
||||
|
||||
|
||||
@st.cache_data
|
||||
@st.cache_data(ttl=300) # Cache for 5 minutes
|
||||
def load_all_training_results() -> list[TrainingResult]:
|
||||
"""Load all training results from the results directory."""
|
||||
results_dir = entropice.utils.paths.RESULTS_DIR
|
||||
training_results: list[TrainingResult] = []
|
||||
incomplete_results: list[tuple[Path, Exception]] = []
|
||||
for result_path in results_dir.iterdir():
|
||||
if not result_path.is_dir():
|
||||
continue
|
||||
# Skip AutoGluon results directory
|
||||
if "autogluon" in result_path.name.lower():
|
||||
continue
|
||||
try:
|
||||
training_result = TrainingResult.from_path(result_path)
|
||||
training_results.append(training_result)
|
||||
|
|
@ -237,10 +242,159 @@ def load_all_training_results() -> list[TrainingResult]:
|
|||
training_results.append(training_result)
|
||||
is_experiment_dir = True
|
||||
except FileNotFoundError as e2:
|
||||
st.warning(f"Skipping incomplete training result: {e2}")
|
||||
incomplete_results.append((experiment_path, e2))
|
||||
if not is_experiment_dir:
|
||||
st.warning(f"Skipping incomplete training result: {e}")
|
||||
incomplete_results.append((result_path, e))
|
||||
|
||||
if len(incomplete_results) > 0:
|
||||
st.warning(
|
||||
f"Found {len(incomplete_results)} incomplete training results that were skipped:\n - "
|
||||
+ "\n - ".join(f"{p}: {e}" for p, e in incomplete_results)
|
||||
)
|
||||
# Sort by creation time (most recent first)
|
||||
training_results.sort(key=lambda tr: tr.created_at, reverse=True)
|
||||
return training_results
|
||||
|
||||
|
||||
@dataclass
|
||||
class AutogluonTrainingResult:
|
||||
"""Wrapper for training result data and metadata."""
|
||||
|
||||
path: Path
|
||||
experiment: str
|
||||
settings: AutoGluonTrainingSettings
|
||||
test_metrics: dict[str, float | dict | pd.DataFrame]
|
||||
leaderboard: pd.DataFrame
|
||||
feature_importance: pd.DataFrame | None
|
||||
created_at: float
|
||||
files: list[Path]
|
||||
|
||||
@classmethod
|
||||
def from_path(cls, result_path: Path, experiment_name: str | None = None) -> "AutogluonTrainingResult":
|
||||
"""Load an AutogluonTrainingResult from a given result directory path."""
|
||||
settings_file = result_path / "training_settings.toml"
|
||||
metrics_file = result_path / "test_metrics.pickle"
|
||||
leaderboard_file = result_path / "leaderboard.parquet"
|
||||
feature_importance_file = result_path / "feature_importance.parquet"
|
||||
all_files = list(result_path.iterdir())
|
||||
if not settings_file.exists():
|
||||
raise FileNotFoundError(f"Missing settings file in {result_path}")
|
||||
if not metrics_file.exists():
|
||||
raise FileNotFoundError(f"Missing metrics file in {result_path}")
|
||||
if not leaderboard_file.exists():
|
||||
raise FileNotFoundError(f"Missing leaderboard file in {result_path}")
|
||||
|
||||
created_at = result_path.stat().st_ctime
|
||||
settings_dict = toml.load(settings_file)["settings"]
|
||||
settings = AutoGluonTrainingSettings(**settings_dict)
|
||||
with open(metrics_file, "rb") as f:
|
||||
metrics = pickle.load(f)
|
||||
leaderboard = pd.read_parquet(leaderboard_file)
|
||||
|
||||
if feature_importance_file.exists():
|
||||
feature_importance = pd.read_parquet(feature_importance_file)
|
||||
else:
|
||||
feature_importance = None
|
||||
|
||||
return cls(
|
||||
path=result_path,
|
||||
experiment=experiment_name or "N/A",
|
||||
settings=settings,
|
||||
test_metrics=metrics,
|
||||
leaderboard=leaderboard,
|
||||
feature_importance=feature_importance,
|
||||
created_at=created_at,
|
||||
files=all_files,
|
||||
)
|
||||
|
||||
@property
|
||||
def test_confusion_matrix(self) -> pd.DataFrame | None:
|
||||
"""Get the test confusion matrix."""
|
||||
if "confusion_matrix" not in self.test_metrics:
|
||||
return None
|
||||
assert isinstance(self.test_metrics["confusion_matrix"], pd.DataFrame)
|
||||
return self.test_metrics["confusion_matrix"]
|
||||
|
||||
@property
|
||||
def display_info(self) -> TrainingResultDisplayInfo:
|
||||
"""Get display information for the training result."""
|
||||
return TrainingResultDisplayInfo(
|
||||
task=self.settings.task,
|
||||
target=self.settings.target,
|
||||
model="autogluon",
|
||||
grid=self.settings.grid,
|
||||
level=self.settings.level,
|
||||
timestamp=datetime.fromtimestamp(self.created_at),
|
||||
)
|
||||
|
||||
def _get_best_metric_name(self) -> str:
|
||||
"""Get the primary metric name for a given task."""
|
||||
match self.settings.task:
|
||||
case "binary":
|
||||
return "f1"
|
||||
case "count_regimes" | "density_regimes":
|
||||
return "f1_weighted"
|
||||
case _: # regression tasks
|
||||
return "root_mean_squared_error"
|
||||
|
||||
@staticmethod
|
||||
def to_dataframe(training_results: list["AutogluonTrainingResult"]) -> pd.DataFrame:
|
||||
"""Convert a list of AutogluonTrainingResult objects to a DataFrame for display."""
|
||||
records = []
|
||||
for tr in training_results:
|
||||
info = tr.display_info
|
||||
best_metric_name = tr._get_best_metric_name()
|
||||
|
||||
record = {
|
||||
"Experiment": tr.experiment if tr.experiment else "N/A",
|
||||
"Task": info.task,
|
||||
"Target": info.target,
|
||||
"Model": info.model,
|
||||
"Grid": GridConfig.from_grid_level((info.grid, info.level)).display_name,
|
||||
"Created At": info.timestamp.strftime("%Y-%m-%d %H:%M"),
|
||||
"Score-Metric": best_metric_name.title(),
|
||||
"Best Models Score (Test-Set)": tr.test_metrics.get(best_metric_name),
|
||||
"Path": str(tr.path.name),
|
||||
}
|
||||
records.append(record)
|
||||
return pd.DataFrame.from_records(records)
|
||||
|
||||
|
||||
@st.cache_data(ttl=300) # Cache for 5 minutes
|
||||
def load_all_autogluon_training_results() -> list[AutogluonTrainingResult]:
|
||||
"""Load all training results from the results directory."""
|
||||
results_dir = entropice.utils.paths.RESULTS_DIR
|
||||
training_results: list[AutogluonTrainingResult] = []
|
||||
incomplete_results: list[tuple[Path, Exception]] = []
|
||||
for result_path in results_dir.iterdir():
|
||||
if not result_path.is_dir():
|
||||
continue
|
||||
# Skip AutoGluon results directory
|
||||
if "autogluon" not in result_path.name.lower():
|
||||
continue
|
||||
try:
|
||||
training_result = AutogluonTrainingResult.from_path(result_path)
|
||||
training_results.append(training_result)
|
||||
except FileNotFoundError as e:
|
||||
is_experiment_dir = False
|
||||
for experiment_path in result_path.iterdir():
|
||||
if not experiment_path.is_dir():
|
||||
continue
|
||||
try:
|
||||
experiment_name = experiment_path.parent.name
|
||||
training_result = AutogluonTrainingResult.from_path(experiment_path, experiment_name)
|
||||
training_results.append(training_result)
|
||||
is_experiment_dir = True
|
||||
except FileNotFoundError as e2:
|
||||
incomplete_results.append((experiment_path, e2))
|
||||
if not is_experiment_dir:
|
||||
incomplete_results.append((result_path, e))
|
||||
|
||||
if len(incomplete_results) > 0:
|
||||
st.warning(
|
||||
f"Found {len(incomplete_results)} incomplete autogluon training results that were skipped:\n - "
|
||||
+ "\n - ".join(f"{p}: {e}" for p, e in incomplete_results)
|
||||
)
|
||||
# Sort by creation time (most recent first)
|
||||
training_results.sort(key=lambda tr: tr.created_at, reverse=True)
|
||||
return training_results
|
||||
|
|
|
|||
|
|
@ -369,6 +369,7 @@ def render_xgboost_model_state(model_state: xr.Dataset, selected_result: Trainin
|
|||
options=["gain", "weight", "cover", "total_gain", "total_cover"],
|
||||
index=0,
|
||||
help="Choose which importance metric to visualize",
|
||||
key="model_state_importance_type",
|
||||
)
|
||||
|
||||
# Top N slider
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from entropice.dashboard.sections.experiment_results import (
|
|||
render_training_results_summary,
|
||||
)
|
||||
from entropice.dashboard.sections.storage_statistics import render_storage_statistics
|
||||
from entropice.dashboard.utils.loaders import load_all_training_results
|
||||
from entropice.dashboard.utils.loaders import load_all_autogluon_training_results, load_all_training_results
|
||||
from entropice.dashboard.utils.stats import DatasetStatistics, load_all_default_dataset_statistics
|
||||
|
||||
|
||||
|
|
@ -27,6 +27,9 @@ def render_overview_page():
|
|||
)
|
||||
# Load training results
|
||||
training_results = load_all_training_results()
|
||||
autogluon_results = load_all_autogluon_training_results()
|
||||
if len(autogluon_results) > 0:
|
||||
training_results.extend(autogluon_results)
|
||||
|
||||
if not training_results:
|
||||
st.warning("No training results found. Please run some training experiments first.")
|
||||
|
|
|
|||
|
|
@ -2,150 +2,22 @@
|
|||
|
||||
from typing import cast
|
||||
|
||||
import geopandas as gpd
|
||||
import streamlit as st
|
||||
import xarray as xr
|
||||
from stopuhr import stopwatch
|
||||
|
||||
from entropice.dashboard.plots.hyperparameter_analysis import (
|
||||
render_binned_parameter_space,
|
||||
render_confusion_matrix_heatmap,
|
||||
render_confusion_matrix_map,
|
||||
render_espa_binned_parameter_space,
|
||||
render_multi_metric_comparison,
|
||||
render_parameter_correlation,
|
||||
render_parameter_distributions,
|
||||
render_performance_summary,
|
||||
render_top_configurations,
|
||||
from entropice.dashboard.sections.cv_result import (
|
||||
render_confusion_matrices,
|
||||
render_cv_statistics_section,
|
||||
render_metrics_section,
|
||||
render_run_information,
|
||||
)
|
||||
from entropice.dashboard.sections.hparam_space import render_hparam_space_section
|
||||
from entropice.dashboard.sections.regression_analysis import render_regression_analysis
|
||||
from entropice.dashboard.utils.formatters import format_metric_name
|
||||
from entropice.dashboard.utils.loaders import TrainingResult, load_all_training_results
|
||||
from entropice.dashboard.utils.stats import CVResultsStatistics
|
||||
from entropice.utils.types import GridConfig
|
||||
from entropice.dashboard.utils.stats import CVMetricStatistics
|
||||
|
||||
|
||||
def load_predictions_with_labels(selected_result: TrainingResult) -> gpd.GeoDataFrame | None:
|
||||
"""Load predictions and merge with training data to get true labels and split info.
|
||||
|
||||
Args:
|
||||
selected_result: The selected TrainingResult object.
|
||||
|
||||
Returns:
|
||||
GeoDataFrame with predictions, true labels, and split information, or None if unavailable.
|
||||
|
||||
"""
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from entropice.ml.dataset import DatasetEnsemble, bin_values, taskcol
|
||||
|
||||
# Load predictions
|
||||
preds_gdf = selected_result.load_predictions()
|
||||
if preds_gdf is None:
|
||||
return None
|
||||
|
||||
# Create a minimal dataset ensemble to access target data
|
||||
settings = selected_result.settings
|
||||
dataset_ensemble = DatasetEnsemble(
|
||||
grid=settings.grid,
|
||||
level=settings.level,
|
||||
target=settings.target,
|
||||
members=[], # No feature data needed, just targets
|
||||
)
|
||||
|
||||
# Load target dataset (just labels, no features)
|
||||
with st.spinner("Loading target labels..."):
|
||||
targets = dataset_ensemble._read_target()
|
||||
|
||||
# Get coverage and task columns
|
||||
task_col = taskcol[settings.task][settings.target]
|
||||
|
||||
# Filter for valid labels (same as in _cat_and_split)
|
||||
valid_labels = targets[task_col].notna()
|
||||
filtered_targets = targets.loc[valid_labels].copy()
|
||||
|
||||
# Apply binning to get class labels (same logic as _cat_and_split)
|
||||
if settings.task == "binary":
|
||||
binned = filtered_targets[task_col].map({False: "No RTS", True: "RTS"}).astype("category")
|
||||
elif settings.task == "count":
|
||||
binned = bin_values(filtered_targets[task_col].astype(int), task=settings.task)
|
||||
elif settings.task == "density":
|
||||
binned = bin_values(filtered_targets[task_col], task=settings.task)
|
||||
else:
|
||||
raise ValueError(f"Invalid task: {settings.task}")
|
||||
|
||||
filtered_targets["true_class"] = binned.to_numpy()
|
||||
|
||||
# Recreate the train/test split deterministically (same random_state=42 as in _cat_and_split)
|
||||
_train_idx, test_idx = train_test_split(
|
||||
filtered_targets.index.to_numpy(), test_size=0.2, random_state=42, shuffle=True
|
||||
)
|
||||
filtered_targets["split"] = "train"
|
||||
filtered_targets.loc[test_idx, "split"] = "test"
|
||||
filtered_targets["split"] = filtered_targets["split"].astype("category")
|
||||
|
||||
# Ensure cell_id is available as a column for merging
|
||||
# Check if cell_id already exists, otherwise use the index
|
||||
if "cell_id" not in filtered_targets.columns:
|
||||
filtered_targets = filtered_targets.reset_index().rename(columns={"index": "cell_id"})
|
||||
|
||||
# Merge predictions with labels (inner join to keep only cells with predictions)
|
||||
merged = filtered_targets.merge(preds_gdf[["cell_id", "predicted_class"]], on="cell_id", how="inner")
|
||||
merged_gdf = gpd.GeoDataFrame(merged, geometry="geometry", crs=targets.crs)
|
||||
|
||||
return merged_gdf
|
||||
|
||||
|
||||
def compute_confusion_matrix_from_merged_data(
|
||||
merged_data: gpd.GeoDataFrame,
|
||||
split_type: str,
|
||||
label_names: list[str],
|
||||
) -> xr.DataArray | None:
|
||||
"""Compute confusion matrix from merged predictions and labels.
|
||||
|
||||
Args:
|
||||
merged_data: GeoDataFrame with 'true_class', 'predicted_class', and 'split' columns.
|
||||
split_type: One of 'test', 'train', or 'all'.
|
||||
label_names: List of class label names in order.
|
||||
|
||||
Returns:
|
||||
xarray.DataArray with confusion matrix or None if data unavailable.
|
||||
|
||||
"""
|
||||
from sklearn.metrics import confusion_matrix
|
||||
|
||||
# Filter by split type
|
||||
if split_type == "train":
|
||||
data = merged_data[merged_data["split"] == "train"]
|
||||
elif split_type == "test":
|
||||
data = merged_data[merged_data["split"] == "test"]
|
||||
elif split_type == "all":
|
||||
data = merged_data
|
||||
else:
|
||||
raise ValueError(f"Invalid split_type: {split_type}")
|
||||
|
||||
if len(data) == 0:
|
||||
st.warning(f"No data available for {split_type} split.")
|
||||
return None
|
||||
|
||||
# Get true and predicted labels
|
||||
y_true = data["true_class"].to_numpy()
|
||||
y_pred = data["predicted_class"].to_numpy()
|
||||
|
||||
# Compute confusion matrix
|
||||
cm = confusion_matrix(y_true, y_pred, labels=label_names)
|
||||
|
||||
# Create xarray DataArray
|
||||
cm_xr = xr.DataArray(
|
||||
cm,
|
||||
dims=["true_label", "predicted_label"],
|
||||
coords={"true_label": label_names, "predicted_label": label_names},
|
||||
name="confusion_matrix",
|
||||
)
|
||||
|
||||
return cm_xr
|
||||
|
||||
|
||||
def render_analysis_settings_sidebar(training_results: list[TrainingResult]) -> tuple[TrainingResult, str, str, int]:
|
||||
def render_analysis_settings_sidebar(training_results: list[TrainingResult]) -> tuple[TrainingResult, str, str]:
|
||||
"""Render sidebar for training run and analysis settings selection.
|
||||
|
||||
Args:
|
||||
|
|
@ -155,6 +27,7 @@ def render_analysis_settings_sidebar(training_results: list[TrainingResult]) ->
|
|||
Tuple of (selected_result, selected_metric, refit_metric, top_n).
|
||||
|
||||
"""
|
||||
with st.sidebar.form("training_analysis_settings_form"):
|
||||
st.header("Select Training Run")
|
||||
|
||||
# Create selection options with task-first naming
|
||||
|
|
@ -178,7 +51,12 @@ def render_analysis_settings_sidebar(training_results: list[TrainingResult]) ->
|
|||
available_metrics = selected_result.available_metrics
|
||||
|
||||
# Try to get refit metric from settings
|
||||
refit_metric = "f1" if selected_result.settings.task == "binary" else "f1_weighted"
|
||||
if selected_result.settings.task == "binary":
|
||||
refit_metric = "f1"
|
||||
elif selected_result.settings.task in ["count_regimes", "density_regimes"]:
|
||||
refit_metric = "f1_weighted"
|
||||
else:
|
||||
refit_metric = "r2"
|
||||
|
||||
if refit_metric in available_metrics:
|
||||
default_metric_idx = available_metrics.index(refit_metric)
|
||||
|
|
@ -194,312 +72,18 @@ def render_analysis_settings_sidebar(training_results: list[TrainingResult]) ->
|
|||
key="metric_select",
|
||||
)
|
||||
|
||||
# Top N configurations
|
||||
top_n = st.slider(
|
||||
"Top N Configurations",
|
||||
min_value=5,
|
||||
max_value=50,
|
||||
value=10,
|
||||
step=5,
|
||||
help="Number of top configurations to display",
|
||||
key="top_n_slider",
|
||||
# Form submit button
|
||||
submitted = st.form_submit_button(
|
||||
"Load Training Result",
|
||||
type="primary",
|
||||
use_container_width=True,
|
||||
)
|
||||
|
||||
return selected_result, selected_metric, refit_metric, top_n
|
||||
if not submitted:
|
||||
st.info("👆 Click 'Load Training Result' to apply changes.")
|
||||
st.stop()
|
||||
|
||||
|
||||
def render_run_information(selected_result: TrainingResult, refit_metric):
|
||||
"""Render training run configuration overview.
|
||||
|
||||
Args:
|
||||
selected_result: The selected TrainingResult object.
|
||||
refit_metric: The refit metric used for model selection.
|
||||
|
||||
"""
|
||||
st.header("📋 Run Information")
|
||||
|
||||
grid_config = GridConfig.from_grid_level(f"{selected_result.settings.grid}{selected_result.settings.level}") # ty:ignore[invalid-argument-type]
|
||||
|
||||
col1, col2, col3, col4, col5 = st.columns(5)
|
||||
with col1:
|
||||
st.metric("Task", selected_result.settings.task.capitalize())
|
||||
with col2:
|
||||
st.metric("Target", selected_result.settings.target.capitalize())
|
||||
with col3:
|
||||
st.metric("Grid", grid_config.display_name)
|
||||
with col4:
|
||||
st.metric("Model", selected_result.settings.model.upper())
|
||||
with col5:
|
||||
st.metric("Trials", len(selected_result.results))
|
||||
|
||||
st.caption(f"**Refit Metric:** {format_metric_name(refit_metric)}")
|
||||
|
||||
|
||||
def render_test_metrics_section(selected_result: TrainingResult):
|
||||
"""Render test metrics overview showing final model performance.
|
||||
|
||||
Args:
|
||||
selected_result: The selected TrainingResult object.
|
||||
|
||||
"""
|
||||
st.header("🎯 Test Set Performance")
|
||||
st.caption("Performance metrics on the held-out test set (best model from hyperparameter search)")
|
||||
|
||||
test_metrics = selected_result.metrics
|
||||
|
||||
if not test_metrics:
|
||||
st.warning("No test metrics available for this training run.")
|
||||
return
|
||||
|
||||
# Display metrics in columns based on task type
|
||||
task = selected_result.settings.task
|
||||
|
||||
if task == "binary":
|
||||
# Binary classification metrics
|
||||
col1, col2, col3, col4, col5 = st.columns(5)
|
||||
|
||||
with col1:
|
||||
st.metric("Accuracy", f"{test_metrics.get('accuracy', 0):.4f}")
|
||||
with col2:
|
||||
st.metric("F1 Score", f"{test_metrics.get('f1', 0):.4f}")
|
||||
with col3:
|
||||
st.metric("Precision", f"{test_metrics.get('precision', 0):.4f}")
|
||||
with col4:
|
||||
st.metric("Recall", f"{test_metrics.get('recall', 0):.4f}")
|
||||
with col5:
|
||||
st.metric("Jaccard", f"{test_metrics.get('jaccard', 0):.4f}")
|
||||
else:
|
||||
# Multiclass metrics
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
st.metric("Accuracy", f"{test_metrics.get('accuracy', 0):.4f}")
|
||||
with col2:
|
||||
st.metric("F1 (Macro)", f"{test_metrics.get('f1_macro', 0):.4f}")
|
||||
with col3:
|
||||
st.metric("F1 (Weighted)", f"{test_metrics.get('f1_weighted', 0):.4f}")
|
||||
|
||||
col4, col5, col6 = st.columns(3)
|
||||
|
||||
with col4:
|
||||
st.metric("Precision (Macro)", f"{test_metrics.get('precision_macro', 0):.4f}")
|
||||
with col5:
|
||||
st.metric("Precision (Weighted)", f"{test_metrics.get('precision_weighted', 0):.4f}")
|
||||
with col6:
|
||||
st.metric("Recall (Macro)", f"{test_metrics.get('recall_macro', 0):.4f}")
|
||||
|
||||
col7, col8, col9 = st.columns(3)
|
||||
|
||||
with col7:
|
||||
st.metric("Jaccard (Micro)", f"{test_metrics.get('jaccard_micro', 0):.4f}")
|
||||
with col8:
|
||||
st.metric("Jaccard (Macro)", f"{test_metrics.get('jaccard_macro', 0):.4f}")
|
||||
with col9:
|
||||
st.metric("Jaccard (Weighted)", f"{test_metrics.get('jaccard_weighted', 0):.4f}")
|
||||
|
||||
|
||||
def render_cv_statistics_section(selected_result, selected_metric):
|
||||
"""Render cross-validation statistics for selected metric.
|
||||
|
||||
Args:
|
||||
selected_result: The selected TrainingResult object.
|
||||
selected_metric: The metric to display statistics for.
|
||||
|
||||
"""
|
||||
st.header("📈 Cross-Validation Statistics")
|
||||
st.caption("Performance during hyperparameter search (averaged across CV folds)")
|
||||
|
||||
from entropice.dashboard.utils.stats import CVMetricStatistics
|
||||
|
||||
cv_stats = CVMetricStatistics.compute(selected_result, selected_metric)
|
||||
|
||||
col1, col2, col3, col4, col5 = st.columns(5)
|
||||
|
||||
with col1:
|
||||
st.metric("Best Score", f"{cv_stats.best_score:.4f}")
|
||||
|
||||
with col2:
|
||||
st.metric("Mean Score", f"{cv_stats.mean_score:.4f}")
|
||||
|
||||
with col3:
|
||||
st.metric("Std Dev", f"{cv_stats.std_score:.4f}")
|
||||
|
||||
with col4:
|
||||
st.metric("Worst Score", f"{cv_stats.worst_score:.4f}")
|
||||
|
||||
with col5:
|
||||
st.metric("Median Score", f"{cv_stats.median_score:.4f}")
|
||||
|
||||
if cv_stats.mean_cv_std is not None:
|
||||
st.info(f"**Mean CV Std:** {cv_stats.mean_cv_std:.4f} - Average standard deviation across CV folds")
|
||||
|
||||
# Compare with test metric if available
|
||||
if selected_metric in selected_result.metrics:
|
||||
test_score = selected_result.metrics[selected_metric]
|
||||
st.divider()
|
||||
st.subheader("CV vs Test Performance")
|
||||
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
st.metric("Best CV Score", f"{cv_stats.best_score:.4f}")
|
||||
with col2:
|
||||
st.metric("Test Score", f"{test_score:.4f}")
|
||||
with col3:
|
||||
delta = test_score - cv_stats.best_score
|
||||
delta_pct = (delta / cv_stats.best_score * 100) if cv_stats.best_score != 0 else 0
|
||||
st.metric("Difference", f"{delta:+.4f}", delta=f"{delta_pct:+.2f}%")
|
||||
|
||||
if abs(delta) > cv_stats.std_score:
|
||||
st.warning(
|
||||
"⚠️ Test performance differs significantly from CV performance. "
|
||||
"This may indicate overfitting or data distribution mismatch."
|
||||
)
|
||||
|
||||
|
||||
@st.fragment
|
||||
def render_confusion_matrix_section(selected_result: TrainingResult, merged_predictions: gpd.GeoDataFrame | None):
|
||||
"""Render confusion matrix visualization and analysis.
|
||||
|
||||
Args:
|
||||
selected_result: The selected TrainingResult object.
|
||||
merged_predictions: GeoDataFrame with predictions merged with true labels and split info.
|
||||
|
||||
"""
|
||||
st.header("🎲 Confusion Matrix")
|
||||
st.caption("Detailed breakdown of predictions")
|
||||
|
||||
# Add selector for confusion matrix type
|
||||
cm_type = st.selectbox(
|
||||
"Select Data Split",
|
||||
options=["test", "train", "all"],
|
||||
format_func=lambda x: {"test": "Test Set", "train": "CV Set (Train Split)", "all": "All Available Data"}[x],
|
||||
help="Choose which data split to display the confusion matrix for",
|
||||
key="cm_split_select",
|
||||
)
|
||||
|
||||
# Get label names from settings
|
||||
label_names = selected_result.settings.classes
|
||||
|
||||
# Compute or load confusion matrix based on selection
|
||||
if cm_type == "test":
|
||||
if selected_result.confusion_matrix is None:
|
||||
st.warning("No confusion matrix available for the test set.")
|
||||
return
|
||||
cm = selected_result.confusion_matrix
|
||||
st.info("📊 Showing confusion matrix for the **Test Set** (held-out data, never used during training)")
|
||||
else:
|
||||
if merged_predictions is None:
|
||||
st.warning("Predictions data not available. Cannot compute confusion matrix.")
|
||||
return
|
||||
|
||||
with st.spinner(f"Computing confusion matrix for {cm_type} split..."):
|
||||
cm = compute_confusion_matrix_from_merged_data(merged_predictions, cm_type, label_names)
|
||||
if cm is None:
|
||||
return
|
||||
|
||||
if cm_type == "train":
|
||||
st.info(
|
||||
"📊 Showing confusion matrix for the **CV Set (Train Split)** "
|
||||
"(data used during hyperparameter search cross-validation)"
|
||||
)
|
||||
else: # all
|
||||
st.info("📊 Showing confusion matrix for **All Available Data** (combined train and test splits)")
|
||||
|
||||
render_confusion_matrix_heatmap(cm, selected_result.settings.task)
|
||||
|
||||
|
||||
def render_parameter_space_section(selected_result, selected_metric):
|
||||
"""Render parameter space analysis section.
|
||||
|
||||
Args:
|
||||
selected_result: The selected TrainingResult object.
|
||||
selected_metric: The metric to analyze parameters against.
|
||||
|
||||
"""
|
||||
st.header("🔍 Parameter Space Analysis")
|
||||
|
||||
# Compute CV results statistics
|
||||
cv_results_stats = CVResultsStatistics.compute(selected_result)
|
||||
|
||||
# Show parameter space summary
|
||||
with st.expander("📋 Parameter Space Summary", expanded=False):
|
||||
param_summary_df = cv_results_stats.parameters_to_dataframe()
|
||||
if not param_summary_df.empty:
|
||||
st.dataframe(param_summary_df, hide_index=True, width="stretch")
|
||||
else:
|
||||
st.info("No parameter information available.")
|
||||
|
||||
results = selected_result.results
|
||||
settings = selected_result.settings
|
||||
|
||||
# Parameter distributions
|
||||
st.subheader("📈 Parameter Distributions")
|
||||
render_parameter_distributions(results, settings)
|
||||
|
||||
# Binned parameter space plots
|
||||
st.subheader("🎨 Binned Parameter Space")
|
||||
|
||||
# Check if this is an ESPA model and show ESPA-specific plots
|
||||
model_type = settings.model
|
||||
if model_type == "espa":
|
||||
# Show ESPA-specific binned plots (eps_cl vs eps_e binned by K)
|
||||
render_espa_binned_parameter_space(results, selected_metric)
|
||||
|
||||
# Optionally show the generic binned plots in an expander
|
||||
with st.expander("📊 All Parameter Combinations", expanded=False):
|
||||
st.caption("Generic parameter space exploration (all pairwise combinations)")
|
||||
render_binned_parameter_space(results, selected_metric)
|
||||
else:
|
||||
# For non-ESPA models, show the generic binned plots
|
||||
render_binned_parameter_space(results, selected_metric)
|
||||
|
||||
|
||||
def render_data_export_section(results, selected_result):
|
||||
"""Render data export section with download buttons.
|
||||
|
||||
Args:
|
||||
results: DataFrame with CV results.
|
||||
selected_result: The selected TrainingResult object.
|
||||
|
||||
"""
|
||||
with st.expander("💾 Export Data", expanded=False):
|
||||
st.subheader("Download Results")
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
# Download full results as CSV
|
||||
csv_data = results.to_csv(index=False)
|
||||
st.download_button(
|
||||
label="📥 Download Full Results (CSV)",
|
||||
data=csv_data,
|
||||
file_name=f"{selected_result.path.name}_results.csv",
|
||||
mime="text/csv",
|
||||
)
|
||||
|
||||
with col2:
|
||||
# Download settings as JSON
|
||||
import json
|
||||
|
||||
settings_dict = {
|
||||
"task": selected_result.settings.task,
|
||||
"grid": selected_result.settings.grid,
|
||||
"level": selected_result.settings.level,
|
||||
"model": selected_result.settings.model,
|
||||
"cv_splits": selected_result.settings.cv_splits,
|
||||
"classes": selected_result.settings.classes,
|
||||
}
|
||||
settings_json = json.dumps(settings_dict, indent=2)
|
||||
st.download_button(
|
||||
label="⚙️ Download Settings (JSON)",
|
||||
data=settings_json,
|
||||
file_name=f"{selected_result.path.name}_settings.json",
|
||||
mime="application/json",
|
||||
)
|
||||
|
||||
# Show raw data preview
|
||||
st.subheader("Raw Data Preview")
|
||||
st.dataframe(results.head(100), width="stretch")
|
||||
return selected_result, selected_metric, refit_metric
|
||||
|
||||
|
||||
def render_training_analysis_page():
|
||||
|
|
@ -513,91 +97,47 @@ def render_training_analysis_page():
|
|||
"""
|
||||
)
|
||||
|
||||
# Load all available training results
|
||||
# Load training results
|
||||
training_results = load_all_training_results()
|
||||
|
||||
if not training_results:
|
||||
st.warning("No training results found. Please run some training experiments first.")
|
||||
st.info("Run training using: `pixi run python -m entropice.ml.training`")
|
||||
st.stop()
|
||||
return
|
||||
|
||||
st.success(f"Found **{len(training_results)}** training result(s)")
|
||||
st.write(f"Found **{len(training_results)}** training result(s)")
|
||||
|
||||
st.divider()
|
||||
selected_result, selected_metric, refit_metric = render_analysis_settings_sidebar(training_results)
|
||||
|
||||
# Sidebar: Training run selection
|
||||
with st.sidebar:
|
||||
selection_result = render_analysis_settings_sidebar(training_results)
|
||||
if selection_result[0] is None:
|
||||
return
|
||||
selected_result, selected_metric, refit_metric, top_n = selection_result
|
||||
cv_statistics = CVMetricStatistics.compute(selected_result, selected_metric)
|
||||
|
||||
# Load predictions with labels once (used by confusion matrix and map)
|
||||
merged_predictions = load_predictions_with_labels(selected_result)
|
||||
|
||||
# Main content area
|
||||
results = selected_result.results
|
||||
settings = selected_result.settings
|
||||
|
||||
# Run Information
|
||||
render_run_information(selected_result, refit_metric)
|
||||
|
||||
st.divider()
|
||||
|
||||
# Test Metrics Section
|
||||
render_test_metrics_section(selected_result)
|
||||
render_metrics_section(selected_result)
|
||||
|
||||
st.divider()
|
||||
|
||||
# Confusion Matrix Section
|
||||
render_confusion_matrix_section(selected_result, merged_predictions)
|
||||
# Render confusion matrices for classification, regression analysis for regression
|
||||
if selected_result.settings.task in ["binary", "count_regimes", "density_regimes"]:
|
||||
render_confusion_matrices(selected_result)
|
||||
else:
|
||||
render_regression_analysis(selected_result)
|
||||
|
||||
st.divider()
|
||||
|
||||
# Performance Summary Section
|
||||
st.header("📊 CV Performance Overview")
|
||||
st.caption("Summary of hyperparameter search results across all configurations")
|
||||
render_performance_summary(results, refit_metric)
|
||||
render_cv_statistics_section(cv_statistics, selected_result.test_metrics.get(selected_metric, float("nan")))
|
||||
|
||||
st.divider()
|
||||
|
||||
# Prediction Analysis Map Section
|
||||
st.header("🗺️ Model Performance Map")
|
||||
st.caption("Interactive 3D map showing prediction correctness across the training dataset")
|
||||
render_confusion_matrix_map(selected_result.path, settings, merged_predictions)
|
||||
render_hparam_space_section(selected_result, selected_metric)
|
||||
|
||||
st.divider()
|
||||
|
||||
# Cross-Validation Statistics
|
||||
render_cv_statistics_section(selected_result, selected_metric)
|
||||
|
||||
st.divider()
|
||||
|
||||
# Parameter Space Analysis
|
||||
render_parameter_space_section(selected_result, selected_metric)
|
||||
|
||||
st.divider()
|
||||
|
||||
# Parameter Correlation
|
||||
st.header("🔗 Parameter Correlation")
|
||||
render_parameter_correlation(results, selected_metric)
|
||||
|
||||
st.divider()
|
||||
|
||||
# Multi-Metric Comparison
|
||||
if len(selected_result.available_metrics) >= 2:
|
||||
st.header("📊 Multi-Metric Comparison")
|
||||
render_multi_metric_comparison(results)
|
||||
st.divider()
|
||||
|
||||
# Top Configurations
|
||||
st.header("🏆 Top Performing Configurations")
|
||||
render_top_configurations(results, selected_metric, top_n)
|
||||
|
||||
st.divider()
|
||||
|
||||
# Raw Data Export
|
||||
render_data_export_section(results, selected_result)
|
||||
# List all results at the end
|
||||
st.header("📄 All Training Results")
|
||||
st.dataframe(selected_result.results)
|
||||
|
||||
st.balloons()
|
||||
stopwatch.summary()
|
||||
|
|
|
|||
|
|
@ -44,8 +44,8 @@ class AutoGluonSettings:
|
|||
class AutoGluonTrainingSettings(DatasetEnsemble, AutoGluonSettings):
|
||||
"""Combined settings for AutoGluon training."""
|
||||
|
||||
classes: list[str] | None
|
||||
problem_type: str
|
||||
classes: list[str] | None = None
|
||||
problem_type: str = "binary"
|
||||
|
||||
|
||||
def _determine_problem_type_and_metric(task: Task) -> tuple[str, str]:
|
||||
|
|
@ -177,6 +177,8 @@ def autogluon_train(
|
|||
toml.dump({"settings": asdict(combined_settings)}, f)
|
||||
|
||||
# Save test metrics
|
||||
# We need to use pickle here, because the confusion matrix is stored as a dataframe
|
||||
# This only matters for classification tasks
|
||||
test_metrics_file = results_dir / "test_metrics.pickle"
|
||||
print(f"💾 Saving test metrics to {test_metrics_file}")
|
||||
with open(test_metrics_file, "wb") as f:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue