- Added a section to display test metrics for model performance on the held-out test set. - Implemented confusion matrix visualization to analyze prediction breakdown. - Refactored sidebar settings to streamline metric selection and improve user experience. - Updated cross-validation statistics to compare CV performance with test metrics. - Enhanced DatasetEnsemble methods to handle empty data scenarios gracefully. - Introduced debug scripts to assist in identifying feature mismatches and validating dataset preparation. - Added comprehensive tests for DatasetEnsemble to ensure feature consistency and correct behavior across various scenarios.
58 lines
No EOL
1.8 KiB
Python
58 lines
No EOL
1.8 KiB
Python
import xarray as xr
|
|
import zarr
|
|
from rich import print
|
|
import dask.distributed as dd
|
|
|
|
from entropice.utils.paths import get_era5_stores
|
|
import entropice.utils.codecs
|
|
|
|
def print_info(daily_raw = None, show_vars: bool = True):
|
|
if daily_raw is None:
|
|
daily_store = get_era5_stores("daily")
|
|
daily_raw = xr.open_zarr(daily_store, consolidated=False)
|
|
print("=== Daily INFO ===")
|
|
print(f" Dims: {daily_raw.sizes}")
|
|
numchunks = 1
|
|
chunksizes = {}
|
|
approxchunksize = 4 # 4 Bytes = float32
|
|
for d, cs in daily_raw.chunksizes.items():
|
|
numchunks *= len(cs)
|
|
chunksizes[d] = max(cs)
|
|
approxchunksize *= max(cs)
|
|
approxchunksize /= 10e6 # MB
|
|
print(f" Chunks: {chunksizes} (~{approxchunksize:.2f}MB) => {numchunks} total")
|
|
print(f" Encoding: {daily_raw.encoding}")
|
|
if show_vars:
|
|
print(" Variables:")
|
|
for var in daily_raw.data_vars:
|
|
da = daily_raw[var]
|
|
print(f" {var} Encoding:")
|
|
print(da.encoding)
|
|
print("")
|
|
|
|
def rechunk():
|
|
daily_store = get_era5_stores("daily")
|
|
daily_raw = xr.open_zarr(daily_store, consolidated=False)
|
|
print_info(daily_raw, False)
|
|
daily_raw = daily_raw.chunk({
|
|
"time": 120,
|
|
"latitude": -1, # Should be 337,
|
|
"longitude": -1 # Should be 3600
|
|
})
|
|
print_info(daily_raw, False)
|
|
|
|
encoding = entropice.utils.codecs.from_ds(daily_raw)
|
|
daily_store_rechunked = daily_store.with_stem(f"{daily_store.stem}_rechunked")
|
|
daily_raw.to_zarr(daily_store_rechunked, mode="w", encoding=encoding, consolidated=False)
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
with (
|
|
dd.LocalCluster(n_workers=1, threads_per_worker=10, memory_limit="100GB") as cluster,
|
|
dd.Client(cluster) as client,
|
|
):
|
|
print(client)
|
|
print(client.dashboard_link)
|
|
rechunk()
|
|
print("Done.") |