entropice/tests/debug_arcticdem_batch.py

"""Debug script to check what _prep_arcticdem returns for a batch."""

from entropice.ml.dataset import DatasetEnsemble

ensemble = DatasetEnsemble(
    grid="healpix",
    level=10,
    target="darts_mllabels",
    members=["ArcticDEM"],
    add_lonlat=True,
    filter_target=False,
)

# Get targets
targets = ensemble._read_target()
print(f"Total targets: {len(targets)}")

# Get first batch of targets
batch_targets = targets.iloc[:100]
print(f"\nBatch targets: {len(batch_targets)}")
print(f"Cell IDs in batch: {batch_targets['cell_id'].values[:5]}")

# Try to prep ArcticDEM for this batch
print("\n" + "=" * 80)
print("Calling _prep_arcticdem...")
print("=" * 80)
arcticdem_df = ensemble._prep_arcticdem(batch_targets)
print(f"\nArcticDEM DataFrame shape: {arcticdem_df.shape}")
print(f"ArcticDEM DataFrame index: {arcticdem_df.index[:5].tolist() if len(arcticdem_df) > 0 else 'EMPTY'}")
print(
    f"ArcticDEM DataFrame columns ({len(arcticdem_df.columns)}): {arcticdem_df.columns[:10].tolist() if len(arcticdem_df.columns) > 0 else 'NO COLUMNS'}"
)
print(f"Number of non-NaN rows: {arcticdem_df.notna().any(axis=1).sum()}")