import xarray as xr import zarr from rich import print import dask.distributed as dd from entropice.utils.paths import get_era5_stores import entropice.utils.codecs def print_info(daily_raw = None, show_vars: bool = True): if daily_raw is None: daily_store = get_era5_stores("daily") daily_raw = xr.open_zarr(daily_store, consolidated=False) print("=== Daily INFO ===") print(f" Dims: {daily_raw.sizes}") numchunks = 1 chunksizes = {} approxchunksize = 4 # 4 Bytes = float32 for d, cs in daily_raw.chunksizes.items(): numchunks *= len(cs) chunksizes[d] = max(cs) approxchunksize *= max(cs) approxchunksize /= 10e6 # MB print(f" Chunks: {chunksizes} (~{approxchunksize:.2f}MB) => {numchunks} total") print(f" Encoding: {daily_raw.encoding}") if show_vars: print(" Variables:") for var in daily_raw.data_vars: da = daily_raw[var] print(f" {var} Encoding:") print(da.encoding) print("") def rechunk(): daily_store = get_era5_stores("daily") daily_raw = xr.open_zarr(daily_store, consolidated=False) print_info(daily_raw, False) daily_raw = daily_raw.chunk({ "time": 120, "latitude": -1, # Should be 337, "longitude": -1 # Should be 3600 }) print_info(daily_raw, False) encoding = entropice.utils.codecs.from_ds(daily_raw) daily_store_rechunked = daily_store.with_stem(f"{daily_store.stem}_rechunked") daily_raw.to_zarr(daily_store_rechunked, mode="w", encoding=encoding, consolidated=False) if __name__ == "__main__": with ( dd.LocalCluster(n_workers=1, threads_per_worker=10, memory_limit="100GB") as cluster, dd.Client(cluster) as client, ): print(client) print(client.dashboard_link) rechunk() print("Done.")