Enhance training analysis page with test metrics and confusion matrix
- Added a section to display test metrics for model performance on the held-out test set. - Implemented confusion matrix visualization to analyze prediction breakdown. - Refactored sidebar settings to streamline metric selection and improve user experience. - Updated cross-validation statistics to compare CV performance with test metrics. - Enhanced DatasetEnsemble methods to handle empty data scenarios gracefully. - Introduced debug scripts to assist in identifying feature mismatches and validating dataset preparation. - Added comprehensive tests for DatasetEnsemble to ensure feature consistency and correct behavior across various scenarios.
This commit is contained in:
parent
4fecac535c
commit
c92e856c55
23 changed files with 1845 additions and 484 deletions
33
tests/debug_arcticdem_batch.py
Normal file
33
tests/debug_arcticdem_batch.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
"""Debug script to check what _prep_arcticdem returns for a batch."""
|
||||
|
||||
from entropice.ml.dataset import DatasetEnsemble
|
||||
|
||||
ensemble = DatasetEnsemble(
|
||||
grid="healpix",
|
||||
level=10,
|
||||
target="darts_mllabels",
|
||||
members=["ArcticDEM"],
|
||||
add_lonlat=True,
|
||||
filter_target=False,
|
||||
)
|
||||
|
||||
# Get targets
|
||||
targets = ensemble._read_target()
|
||||
print(f"Total targets: {len(targets)}")
|
||||
|
||||
# Get first batch of targets
|
||||
batch_targets = targets.iloc[:100]
|
||||
print(f"\nBatch targets: {len(batch_targets)}")
|
||||
print(f"Cell IDs in batch: {batch_targets['cell_id'].values[:5]}")
|
||||
|
||||
# Try to prep ArcticDEM for this batch
|
||||
print("\n" + "=" * 80)
|
||||
print("Calling _prep_arcticdem...")
|
||||
print("=" * 80)
|
||||
arcticdem_df = ensemble._prep_arcticdem(batch_targets)
|
||||
print(f"\nArcticDEM DataFrame shape: {arcticdem_df.shape}")
|
||||
print(f"ArcticDEM DataFrame index: {arcticdem_df.index[:5].tolist() if len(arcticdem_df) > 0 else 'EMPTY'}")
|
||||
print(
|
||||
f"ArcticDEM DataFrame columns ({len(arcticdem_df.columns)}): {arcticdem_df.columns[:10].tolist() if len(arcticdem_df.columns) > 0 else 'NO COLUMNS'}"
|
||||
)
|
||||
print(f"Number of non-NaN rows: {arcticdem_df.notna().any(axis=1).sum()}")
|
||||
Loading…
Add table
Add a link
Reference in a new issue