- Added a section to display test metrics for model performance on the held-out test set. - Implemented confusion matrix visualization to analyze prediction breakdown. - Refactored sidebar settings to streamline metric selection and improve user experience. - Updated cross-validation statistics to compare CV performance with test metrics. - Enhanced DatasetEnsemble methods to handle empty data scenarios gracefully. - Introduced debug scripts to assist in identifying feature mismatches and validating dataset preparation. - Added comprehensive tests for DatasetEnsemble to ensure feature consistency and correct behavior across various scenarios.
33 lines
1.1 KiB
Python
33 lines
1.1 KiB
Python
"""Debug script to check what _prep_arcticdem returns for a batch."""
|
|
|
|
from entropice.ml.dataset import DatasetEnsemble
|
|
|
|
ensemble = DatasetEnsemble(
|
|
grid="healpix",
|
|
level=10,
|
|
target="darts_mllabels",
|
|
members=["ArcticDEM"],
|
|
add_lonlat=True,
|
|
filter_target=False,
|
|
)
|
|
|
|
# Get targets
|
|
targets = ensemble._read_target()
|
|
print(f"Total targets: {len(targets)}")
|
|
|
|
# Get first batch of targets
|
|
batch_targets = targets.iloc[:100]
|
|
print(f"\nBatch targets: {len(batch_targets)}")
|
|
print(f"Cell IDs in batch: {batch_targets['cell_id'].values[:5]}")
|
|
|
|
# Try to prep ArcticDEM for this batch
|
|
print("\n" + "=" * 80)
|
|
print("Calling _prep_arcticdem...")
|
|
print("=" * 80)
|
|
arcticdem_df = ensemble._prep_arcticdem(batch_targets)
|
|
print(f"\nArcticDEM DataFrame shape: {arcticdem_df.shape}")
|
|
print(f"ArcticDEM DataFrame index: {arcticdem_df.index[:5].tolist() if len(arcticdem_df) > 0 else 'EMPTY'}")
|
|
print(
|
|
f"ArcticDEM DataFrame columns ({len(arcticdem_df.columns)}): {arcticdem_df.columns[:10].tolist() if len(arcticdem_df.columns) > 0 else 'NO COLUMNS'}"
|
|
)
|
|
print(f"Number of non-NaN rows: {arcticdem_df.notna().any(axis=1).sum()}")
|