Enhance training analysis page with test metrics and confusion matrix
- Added a section to display test metrics for model performance on the held-out test set. - Implemented confusion matrix visualization to analyze prediction breakdown. - Refactored sidebar settings to streamline metric selection and improve user experience. - Updated cross-validation statistics to compare CV performance with test metrics. - Enhanced DatasetEnsemble methods to handle empty data scenarios gracefully. - Introduced debug scripts to assist in identifying feature mismatches and validating dataset preparation. - Added comprehensive tests for DatasetEnsemble to ensure feature consistency and correct behavior across various scenarios.
This commit is contained in:
parent
4fecac535c
commit
c92e856c55
23 changed files with 1845 additions and 484 deletions
58
scripts/rechunk_zarr.py
Normal file
58
scripts/rechunk_zarr.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import xarray as xr
|
||||
import zarr
|
||||
from rich import print
|
||||
import dask.distributed as dd
|
||||
|
||||
from entropice.utils.paths import get_era5_stores
|
||||
import entropice.utils.codecs
|
||||
|
||||
def print_info(daily_raw = None, show_vars: bool = True):
|
||||
if daily_raw is None:
|
||||
daily_store = get_era5_stores("daily")
|
||||
daily_raw = xr.open_zarr(daily_store, consolidated=False)
|
||||
print("=== Daily INFO ===")
|
||||
print(f" Dims: {daily_raw.sizes}")
|
||||
numchunks = 1
|
||||
chunksizes = {}
|
||||
approxchunksize = 4 # 4 Bytes = float32
|
||||
for d, cs in daily_raw.chunksizes.items():
|
||||
numchunks *= len(cs)
|
||||
chunksizes[d] = max(cs)
|
||||
approxchunksize *= max(cs)
|
||||
approxchunksize /= 10e6 # MB
|
||||
print(f" Chunks: {chunksizes} (~{approxchunksize:.2f}MB) => {numchunks} total")
|
||||
print(f" Encoding: {daily_raw.encoding}")
|
||||
if show_vars:
|
||||
print(" Variables:")
|
||||
for var in daily_raw.data_vars:
|
||||
da = daily_raw[var]
|
||||
print(f" {var} Encoding:")
|
||||
print(da.encoding)
|
||||
print("")
|
||||
|
||||
def rechunk():
|
||||
daily_store = get_era5_stores("daily")
|
||||
daily_raw = xr.open_zarr(daily_store, consolidated=False)
|
||||
print_info(daily_raw, False)
|
||||
daily_raw = daily_raw.chunk({
|
||||
"time": 120,
|
||||
"latitude": -1, # Should be 337,
|
||||
"longitude": -1 # Should be 3600
|
||||
})
|
||||
print_info(daily_raw, False)
|
||||
|
||||
encoding = entropice.utils.codecs.from_ds(daily_raw)
|
||||
daily_store_rechunked = daily_store.with_stem(f"{daily_store.stem}_rechunked")
|
||||
daily_raw.to_zarr(daily_store_rechunked, mode="w", encoding=encoding, consolidated=False)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with (
|
||||
dd.LocalCluster(n_workers=1, threads_per_worker=10, memory_limit="100GB") as cluster,
|
||||
dd.Client(cluster) as client,
|
||||
):
|
||||
print(client)
|
||||
print(client.dashboard_link)
|
||||
rechunk()
|
||||
print("Done.")
|
||||
Loading…
Add table
Add a link
Reference in a new issue