58 lines
1.8 KiB
Python
58 lines
1.8 KiB
Python
|
|
import xarray as xr
|
||
|
|
import zarr
|
||
|
|
from rich import print
|
||
|
|
import dask.distributed as dd
|
||
|
|
|
||
|
|
from entropice.utils.paths import get_era5_stores
|
||
|
|
import entropice.utils.codecs
|
||
|
|
|
||
|
|
def print_info(daily_raw = None, show_vars: bool = True):
|
||
|
|
if daily_raw is None:
|
||
|
|
daily_store = get_era5_stores("daily")
|
||
|
|
daily_raw = xr.open_zarr(daily_store, consolidated=False)
|
||
|
|
print("=== Daily INFO ===")
|
||
|
|
print(f" Dims: {daily_raw.sizes}")
|
||
|
|
numchunks = 1
|
||
|
|
chunksizes = {}
|
||
|
|
approxchunksize = 4 # 4 Bytes = float32
|
||
|
|
for d, cs in daily_raw.chunksizes.items():
|
||
|
|
numchunks *= len(cs)
|
||
|
|
chunksizes[d] = max(cs)
|
||
|
|
approxchunksize *= max(cs)
|
||
|
|
approxchunksize /= 10e6 # MB
|
||
|
|
print(f" Chunks: {chunksizes} (~{approxchunksize:.2f}MB) => {numchunks} total")
|
||
|
|
print(f" Encoding: {daily_raw.encoding}")
|
||
|
|
if show_vars:
|
||
|
|
print(" Variables:")
|
||
|
|
for var in daily_raw.data_vars:
|
||
|
|
da = daily_raw[var]
|
||
|
|
print(f" {var} Encoding:")
|
||
|
|
print(da.encoding)
|
||
|
|
print("")
|
||
|
|
|
||
|
|
def rechunk():
|
||
|
|
daily_store = get_era5_stores("daily")
|
||
|
|
daily_raw = xr.open_zarr(daily_store, consolidated=False)
|
||
|
|
print_info(daily_raw, False)
|
||
|
|
daily_raw = daily_raw.chunk({
|
||
|
|
"time": 120,
|
||
|
|
"latitude": -1, # Should be 337,
|
||
|
|
"longitude": -1 # Should be 3600
|
||
|
|
})
|
||
|
|
print_info(daily_raw, False)
|
||
|
|
|
||
|
|
encoding = entropice.utils.codecs.from_ds(daily_raw)
|
||
|
|
daily_store_rechunked = daily_store.with_stem(f"{daily_store.stem}_rechunked")
|
||
|
|
daily_raw.to_zarr(daily_store_rechunked, mode="w", encoding=encoding, consolidated=False)
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
with (
|
||
|
|
dd.LocalCluster(n_workers=1, threads_per_worker=10, memory_limit="100GB") as cluster,
|
||
|
|
dd.Client(cluster) as client,
|
||
|
|
):
|
||
|
|
print(client)
|
||
|
|
print(client.dashboard_link)
|
||
|
|
rechunk()
|
||
|
|
print("Done.")
|