Restructure to steps

2025-10-21 18:42:01 +02:00 · 2025-10-21 18:42:01 +02:00 · ce4c728e1a
commit ce4c728e1a
parent 2af5c011a3
10 changed files with 1377 additions and 640 deletions
--- a/.gitignore
+++ b/.gitignore
@ -14,3 +14,9 @@ data
 # Editors
 .vscode/
 # Debug / Playground notebooks
 pg.ipynb
 playground.ipynb
 *fix*.ipynb
 *debug*.ipynb
--- a/era5.py
+++ b/era5.py
@ -1,616 +0,0 @@
 """Download and preprocess ERA5 data.
 Variables of Interest:
 - 2 metre temperature (t2m) [instant]
 - Total precipitation (tp) [accum]
 - Snow Fall (sf) [accum]
 - Snow cover (snowc) [instant]
 - Snow depth (sde) [instant]
 - Surface sensible heat flux (sshf) [accum]
 - Lake ice bottom temperature (lblt) [instant]
 Daily Variables (downloaded from hourly data):
 - t2m_daily_max
 - t2m_daily_min
 - tp_daily_sum
 - sf_daily_sum
 - snowc_daily_mean
 - sde_daily_mean
 - sshf_daily_sum
 - lblt_daily_max
 Derived Daily Variables:
 - t2m_daily_avg
 - t2m_daily_range
 - t2m_daily_skew
 - thawing_degree_days
 - freezing_degree_days
 - thawing_days
 - freezing_days
 - precipitation_occurrences
 - snowfall_occurrences
 - snow_isolation (snowc * sde)
 Monthly Variables:
 - t2m_monthly_max
 - t2m_monthly_min
 - tp_monthly_sum
 - sf_monthly_sum
 - snowc_monthly_mean
 - sde_monthly_mean
 - sshf_monthly_sum
 - lblt_monthly_max
 - t2m_monthly_avg
 - t2m_monthly_range_avg
 - t2m_monthly_skew_avg
 - thawing_degree_days_monthly
 - freezing_degree_days_monthly
 - thawing_days_monthly
 - freezing_days_monthly
 - precipitation_occurrences_monthly TODO: Rename to precipitation_days_monthly?
 - snowfall_occurrences_monthly TODO: Rename to snowfall_days_monthly?
 - snow_isolation_monthly_mean
 Yearly Variables:
 - TODO
 # TODO Variables:
 - Day of first thaw (yearly)
 - Day of last thaw (yearly)
 - Thawing period length (yearly)
 - Freezing period length (yearly)
 Author: Tobias Hölzer
 Date: 09. June 2025
 """
 import os
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 from typing import Literal
 import cyclopts
 import dask.distributed as dd
 import geopandas as gpd
 import odc.geo
 import odc.geo.xr
 import pandas as pd
 import shapely
 import shapely.ops
 import xarray as xr
 from numcodecs.zarr3 import Blosc
 from rich import pretty, print, traceback
 from rich.progress import track
 from shapely.geometry import LineString, Polygon
 traceback.install(show_locals=True, suppress=[cyclopts, xr, pd])
 pretty.install()
 cli = cyclopts.App()
 # TODO: Directly handle stuff on a grid level - this is more what the zarr access is indented to do
 DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) / "entropyc-rts"
 # DATA_DIR = Path("data")
 ERA5_DIR = DATA_DIR / "era5"
 AGG_PATH = ERA5_DIR / "era5_agg.zarr"
 ALIGNED_PATH = ERA5_DIR / "era5_spatial_aligned.zarr"
 MONTHLY_PATH = ERA5_DIR / "era5_monthly.zarr"
 YEARLY_PATH = ERA5_DIR / "era5_yearly.zarr"
 min_lat = 50
 max_lat = 83.7  # Ensures the right Chunks Size (90 - 64 / 10 + 0.1)
 min_time = "1990-01-01"
 max_time = "2024-12-31"
 today = time.strftime("%Y-%m-%d")
 # TODO: I think it would be better to aggregate via hours instead of days
 # Pipeline would be:
 # Download hourly data -> Spatially match hourly data ->
 # For {daily, monthly, yearly}:
 #    Enrich -> Aggregate temporally
 # TODO: Rethink aggregations by differentiating between "instant" and "accum" variables:
 # https://consensus.app/search/instantaneous-versus-accumulated-weather/JBaNbhc1R_-BwN5E9Un0Fw/
 # ================
 # === Download ===
 # ================
 def create_encoding(ds: xr.Dataset):
    """Create compression encoding for zarr dataset storage.
    Creates Blosc compression configuration for all data variables and coordinates
    in the dataset using zstd compression with level 9.
    Args:
        ds (xr.Dataset): The xarray Dataset to create encoding for.
    Returns:
        dict: Encoding dictionary with compression settings for each variable.
    """
    # encoding = {var: {"compressors": BloscCodec(cname="zlib", clevel=9)} for var in ds.data_vars}
    encoding = {var: {"compressors": Blosc(cname="zstd", clevel=9)} for var in [*ds.data_vars, *ds.coords]}
    return encoding
 def download_daily_aggregated():
    """Download and aggregate ERA5 data to daily resolution.
    Downloads ERA5 reanalysis data from the DESTINE Earth Data Hub and aggregates
    it to daily resolution. Includes temperature extremes, precipitation, snow,
    and surface heat flux variables.
    The function downloads hourly data and creates daily aggregates:
    - Temperature: daily min/max
    - Precipitation and snowfall: daily totals
    - Snow cover and depth: daily means
    - Surface heat flux: daily totals
    - Lake ice temperature: daily max
    The aggregated data is saved to a zarr file with compression.
    """
    era5 = xr.open_dataset(
        "https://data.earthdatahub.destine.eu/era5/reanalysis-era5-land-no-antartica-v0.zarr",
        storage_options={"client_kwargs": {"trust_env": True}},
        chunks={},
        # chunks={},
        engine="zarr",
    ).rename({"valid_time": "time"})
    subset = {
        "latitude": slice(max_lat, min_lat),
    }
    # Compute the clostest chunk-start to min_time, to avoid problems with cropped chunks at the start
    tchunksize = era5.chunksizes["time"][0]
    era5_chunk_starts = pd.date_range(era5.time.min().item(), era5.time.max().item(), freq=f"{tchunksize}h")
    closest_chunk_start = era5_chunk_starts[
        era5_chunk_starts.get_indexer([pd.to_datetime(min_time)], method="ffill")[0]
    ]
    subset["time"] = slice(str(closest_chunk_start), max_time)
    era5 = era5.sel(**subset)
    era5_agg = xr.merge(
        [
            era5.t2m.resample(time="1D").max().rename("t2m_daily_max"),
            era5.t2m.resample(time="1D").min().rename("t2m_daily_min"),
            era5.tp.resample(time="1D").sum().rename("tp_daily_sum"),
            era5.sf.resample(time="1D").sum().rename("sf_daily_sum"),
            era5.snowc.resample(time="1D").mean().rename("snowc_daily_mean"),
            era5.sde.resample(time="1D").mean().rename("sde_daily_mean"),
            era5.sshf.resample(time="1D").sum().rename("sshf_daily_sum"),
            era5.lblt.resample(time="1D").max().rename("lblt_daily_max"),
        ]
    )
    # Assign attributes
    era5_agg["t2m_daily_max"].attrs = {"long_name": "Daily maximum 2 metre temperature", "units": "K"}
    era5_agg["t2m_daily_min"].attrs = {"long_name": "Daily minimum 2 metre temperature", "units": "K"}
    era5_agg["tp_daily_sum"].attrs = {"long_name": "Daily total precipitation", "units": "m"}
    era5_agg["sf_daily_sum"].attrs = {"long_name": "Daily total snow fall", "units": "m"}
    era5_agg["snowc_daily_mean"].attrs = {"long_name": "Daily mean snow cover", "units": "m"}
    era5_agg["sde_daily_mean"].attrs = {"long_name": "Daily mean snow depth", "units": "m"}
    era5_agg["sshf_daily_sum"].attrs = {"long_name": "Daily total surface sensible heat flux", "units": "J/m²"}
    era5_agg["lblt_daily_max"].attrs = {"long_name": "Daily maximum lake ice bottom temperature", "units": "K"}
    era5_agg = era5_agg.odc.assign_crs("epsg:4326")
    era5_agg = era5_agg.drop_vars(["surface", "number", "depthBelowLandLayer"])
    era5_agg.to_zarr(AGG_PATH, mode="w", encoding=create_encoding(era5_agg), consolidated=False)
@cli.command
 def download():
    """Download ERA5 data using Dask cluster for parallel processing.
    Creates a local Dask cluster and downloads daily aggregated ERA5 data.
    The cluster is configured with a single worker with 10 threads and 100GB
    memory limit for optimal performance.
    """
    with (
        dd.LocalCluster(n_workers=1, threads_per_worker=10, memory_limit="100GB") as cluster,
        dd.Client(cluster) as client,
    ):
        print(client)
        print(client.dashboard_link)
        download_daily_aggregated()
        print("Downloaded and aggregated ERA5 data.")
 # ===========================
 # === Spatial Aggregation ===
 # ===========================
 def _crosses_antimeridian(geom: Polygon) -> bool:
    coords = shapely.get_coordinates(geom)
    crosses_any_meridian = (coords[:, 0] > 0).any() and (coords[:, 0] < 0).any()
    return crosses_any_meridian and abs(coords[:, 0]).max() > 90
 def _split_antimeridian_cell(geom: Polygon) -> list[Polygon]:
    # Assumes that it is a antimeridian hex
    coords = shapely.get_coordinates(geom)
    for i in range(coords.shape[0]):
        if coords[i, 0] < 0:
            coords[i, 0] += 360
    geom = Polygon(coords)
    antimeridian = LineString([[180, -90], [180, 90]])
    polys = shapely.ops.split(geom, antimeridian)
    return list(polys.geoms)
 def _check_geobox(geobox):
    x, y = geobox.shape
    return x > 1 and y > 1
 def extract_cell_data(idx: int, geom: Polygon) -> xr.Dataset:
    """Extract ERA5 data for a specific grid cell geometry.
    Extracts and spatially averages ERA5 data within the bounds of a grid cell.
    Handles antimeridian-crossing cells by splitting them appropriately.
    The extracted data is written to the aligned zarr file.
    Args:
        idx (int): Index of the grid cell.
        geom (Polygon): Polygon geometry of the grid cell.
    Returns:
        xr.Dataset or bool: Returns True if successful, False if cell doesn't
                          overlap with ERA5 data.
    """
    era5_agg = (
        xr.open_zarr(AGG_PATH, consolidated=False)
        .set_coords("spatial_ref")
        .drop_vars(["surface", "number", "depthBelowLandLayer"])
    )
    # cell.geometry is a shapely Polygon
    if not _crosses_antimeridian(geom):
        geoms = [geom]
    # Split geometry in case it crossed antimeridian
    else:
        geoms = _split_antimeridian_cell(geom)
    cell_data = []
    for geom in geoms:
        geom = odc.geo.Geometry(geom, crs="epsg:4326")
        if not _check_geobox(era5_agg.odc.geobox.enclosing(geom)):
            continue
        cell_data.append(era5_agg.odc.crop(geom).drop_vars("spatial_ref").mean(["latitude", "longitude"]))
    if len(cell_data) == 0:
        return False
    elif len(cell_data) == 1:
        cell_data = cell_data[0]
    else:
        cell_data = xr.concat(cell_data, dim="part").mean("part")
    cell_data = cell_data.expand_dims({"cell": [idx]}).compute()
    cell_data.to_zarr(ALIGNED_PATH, region="auto", consolidated=False)
    return True
@cli.command
 def spatial_agg(grid: Literal["hex", "healpix"], level: int, n_workers: int = 10):
    """Perform spatial aggregation of ERA5 data to grid cells.
    Loads a grid and spatially aggregates ERA5 data to each grid cell using
    parallel processing. Creates an empty zarr file first, then fills it
    with extracted data for each cell.
    Args:
        grid ("hex", "healpix"): Grid type.
        level (int): Grid resolution level.
        n_workers (int, optional): Number of parallel workers to use. Defaults to 10.
    """
    gridname = f"permafrost_{grid}{level}"
    grid = gpd.read_parquet(DATA_DIR / f"grids/{gridname}_grid.parquet")
    # Create an empty zarr array with the right dimensions
    era5_agg = (
        xr.open_zarr(AGG_PATH, consolidated=False)
        .set_coords("spatial_ref")
        .drop_vars(["surface", "number", "depthBelowLandLayer"])
    )
    assert {"latitude", "longitude", "time"} == set(era5_agg.dims), (
        f"Expected dims ('latitude', 'longitude', 'time'), got {era5_agg.dims}"
    )
    assert era5_agg.odc.crs == "epsg:4326", f"Expected CRS 'epsg:4326', got {era5_agg.odc.crs}"
    empty = (
        xr.zeros_like(era5_agg.isel(latitude=0, longitude=0))
        .expand_dims({"cell": [idx for idx, _ in grid.iterrows()]})
        .chunk({"cell": 1, "time": len(era5_agg.time)})
    )
    empty.to_zarr(ALIGNED_PATH, mode="w", consolidated=False, encoding=create_encoding(empty))
    print(f"Starting spatial matching of {len(grid)} cells with {n_workers} workers...")
    # TODO: Maybe change to process pool executor?
    with ThreadPoolExecutor(max_workers=n_workers) as executor:
        futures = {
            executor.submit(extract_cell_data, idx, row.geometry): idx
            for idx, row in grid.to_crs("epsg:4326").iterrows()
        }
        for future in track(as_completed(futures), total=len(futures), description="Processing cells"):
            idx = futures[future]
            try:
                flag = future.result()
                if flag:
                    print(f"Successfully written cell {idx}")
                else:
                    print(f"Cell {idx} did not overlap with ERA5 data.")
            except Exception as e:
                print(f"Error processing cell {idx}: {e}")
                print(type(e))
    print("Finished spatial matching.")
 # ============================
 # === Temporal Aggregation ===
 # ============================
 def daily_enrich() -> xr.Dataset:
    """Enrich daily ERA5 data with derived climate variables.
    Loads spatially aligned ERA5 data and computes additional climate variables.
    Creates derived variables including temperature statistics, degree days, and occurrence indicators.
    Derived variables include:
    - Daily average and range temperature
    - Temperature skewness
    - Thawing and freezing degree days
    - Thawing and freezing day counts
    - Precipitation and snowfall occurrences
    - Snow isolation index
    Returns:
        xr.Dataset: Enriched dataset with original and derived variables.
    """
    era5 = xr.open_zarr(ALIGNED_PATH, consolidated=False).set_coords("spatial_ref")
    assert {"cell", "time"} == set(era5.dims), f"Expected dims ('cell', 'time'), got {era5.dims}"
    # Formulas based on Groeke et. al. (2025) Stochastic Weather generation...
    era5["t2m_daily_avg"] = (era5.t2m_daily_max + era5.t2m_daily_min) / 2
    era5.t2m_daily_avg.attrs = {"long_name": "Daily average 2 metre temperature", "units": "K"}
    era5["t2m_daily_range"] = era5.t2m_daily_max - era5.t2m_daily_min
    era5.t2m_daily_range.attrs = {"long_name": "Daily range of 2 metre temperature", "units": "K"}
    era5["t2m_daily_skew"] = (era5.t2m_daily_avg - era5.t2m_daily_min) / era5.t2m_daily_range
    era5.t2m_daily_skew.attrs = {"long_name": "Daily skewness of 2 metre temperature"}
    era5["thawing_degree_days"] = (era5.t2m_daily_avg - 273.15).clip(min=0)
    era5.thawing_degree_days.attrs = {"long_name": "Thawing degree days", "units": "K"}
    era5["freezing_degree_days"] = (273.15 - era5.t2m_daily_avg).clip(min=0)
    era5.freezing_degree_days.attrs = {"long_name": "Freezing degree days", "units": "K"}
    era5["thawing_days"] = (era5.t2m_daily_avg > 273.15).astype(int)
    era5.thawing_days.attrs = {"long_name": "Thawing days"}
    era5["freezing_days"] = (era5.t2m_daily_avg < 273.15).astype(int)
    era5.freezing_days.attrs = {"long_name": "Freezing days"}
    era5["precipitation_occurrences"] = (era5.tp_daily_sum > 0).astype(int)
    era5.precipitation_occurrences.attrs = {"long_name": "Precipitation occurrences"}
    era5["snowfall_occurrences"] = (era5.sf_daily_sum > 0).astype(int)
    era5.snowfall_occurrences.attrs = {"long_name": "Snowfall occurrences"}
    era5["snow_isolation"] = era5.snowc_daily_mean * era5.sde_daily_mean
    era5.snow_isolation.attrs = {"long_name": "Snow isolation"}
    return era5
 def monthly_aggregate():
    """Aggregate enriched daily ERA5 data to monthly resolution.
    Takes the enriched daily ERA5 data and creates monthly aggregates using
    appropriate statistical functions for each variable type. Temperature
    variables use min/max/mean, accumulation variables use sums, and derived
    variables use appropriate aggregations.
    The aggregated monthly data is saved to a zarr file for further processing.
    """
    era5 = daily_enrich()
    assert {"cell", "time"} == set(era5.dims), f"Expected dims ('cell', 'time'), got {era5.dims}"
    # Monthly aggregates
    monthly = xr.merge(
        [
            # Original variables
            era5.t2m_daily_min.resample(time="1ME").min().rename("t2m_monthly_min"),
            era5.t2m_daily_max.resample(time="1ME").max().rename("t2m_monthly_max"),
            era5.tp_daily_sum.resample(time="1ME").sum().rename("tp_monthly_sum"),
            era5.sf_daily_sum.resample(time="1ME").sum().rename("sf_monthly_sum"),
            era5.snowc_daily_mean.resample(time="1ME").mean().rename("snowc_monthly_mean"),
            era5.sde_daily_mean.resample(time="1ME").mean().rename("sde_monthly_mean"),
            era5.sshf_daily_sum.resample(time="1ME").sum().rename("sshf_monthly_sum"),
            era5.lblt_daily_max.resample(time="1ME").max().rename("lblt_monthly_max"),
            # Enriched variables
            era5.t2m_daily_avg.resample(time="1ME").mean().rename("t2m_monthly_avg"),
            era5.t2m_daily_range.resample(time="1ME").mean().rename("t2m_monthly_range_avg"),
            era5.t2m_daily_skew.resample(time="1ME").mean().rename("t2m_monthly_skew_avg"),
            era5.thawing_degree_days.resample(time="1ME").sum().rename("thawing_degree_days_monthly"),
            era5.freezing_degree_days.resample(time="1ME").sum().rename("freezing_degree_days_monthly"),
            era5.thawing_days.resample(time="1ME").sum().rename("thawing_days_monthly"),
            era5.freezing_days.resample(time="1ME").sum().rename("freezing_days_monthly"),
            era5.precipitation_occurrences.resample(time="1ME").sum().rename("precipitation_occurrences_monthly"),
            era5.snowfall_occurrences.resample(time="1ME").sum().rename("snowfall_occurrences_monthly"),
            era5.snow_isolation.resample(time="1ME").mean().rename("snow_isolation_monthly_mean"),
        ]
    )
    monthly.to_zarr(MONTHLY_PATH, mode="w", encoding=create_encoding(monthly), consolidated=False)
 def yearly_aggregate():
    """Aggregate monthly ERA5 data to yearly resolution with seasonal splits.
    Takes monthly aggregated data and creates yearly aggregates using a shifted
    calendar (October to September) to better capture Arctic seasonal patterns.
    Creates separate aggregates for full year, winter (Oct-Apr), and summer
    (May-Sep) periods.
    The first and last incomplete years are excluded from the analysis.
    Winter months are defined as months 1-7 in the shifted calendar,
    and summer months are 8-12.
    The final dataset includes yearly, winter, and summer aggregates for all
    climate variables, saved to a zarr file.
    """
    monthly = xr.open_zarr(MONTHLY_PATH, consolidated=False).set_coords("spatial_ref")
    assert {"cell", "time"} == set(monthly.dims), f"Expected dims ('cell', 'time'), got {monthly.dims}"
    # Yearly aggregates (shifted by +10 months to start in Oktober, first and last years will be cropped)
    monthly_shifted = monthly.copy()
    monthly_shifted["time"] = monthly_shifted.get_index("time") + pd.DateOffset(months=10)
    incomplete_years = {monthly_shifted.time.dt.year.min().item(), monthly_shifted.time.dt.year.max().item()}
    monthly_shifted = monthly_shifted.sel(time=~monthly_shifted.time.dt.year.isin(incomplete_years))
    yearly = xr.merge(
        [
            # Original variables
            monthly_shifted.t2m_monthly_min.resample(time="1YE").min().rename("t2m_yearly_min"),
            monthly_shifted.t2m_monthly_max.resample(time="1YE").max().rename("t2m_yearly_max"),
            monthly_shifted.tp_monthly_sum.resample(time="1YE").sum().rename("tp_yearly_sum"),
            monthly_shifted.sf_monthly_sum.resample(time="1YE").sum().rename("sf_yearly_sum"),
            monthly_shifted.snowc_monthly_mean.resample(time="1YE").mean().rename("snowc_yearly_mean"),
            monthly_shifted.sde_monthly_mean.resample(time="1YE").mean().rename("sde_yearly_mean"),
            monthly_shifted.sshf_monthly_sum.resample(time="1YE").sum().rename("sshf_yearly_sum"),
            monthly_shifted.lblt_monthly_max.resample(time="1YE").max().rename("lblt_yearly_max"),
            # Enriched variables
            monthly_shifted.t2m_monthly_avg.resample(time="1YE").mean().rename("t2m_yearly_avg"),
            # TODO: Check if this is correct -> use daily / hourly data instead for range and skew?
            monthly_shifted.t2m_monthly_range_avg.resample(time="1YE").mean().rename("t2m_daily_range_yearly_avg"),
            monthly_shifted.t2m_monthly_skew_avg.resample(time="1YE").mean().rename("t2m_daily_skew_yearly_avg"),
            monthly_shifted.thawing_degree_days_monthly.resample(time="1YE").sum().rename("thawing_degree_days_yearly"),
            monthly_shifted.freezing_degree_days_monthly.resample(time="1YE")
            .sum()
            .rename("freezing_degree_days_yearly"),
            monthly_shifted.thawing_days_monthly.resample(time="1YE").sum().rename("thawing_days_yearly"),
            monthly_shifted.freezing_days_monthly.resample(time="1YE").sum().rename("freezing_days_yearly"),
            monthly_shifted.precipitation_occurrences_monthly.resample(time="1YE")
            .sum()
            .rename("precipitation_occurrences_yearly"),
            monthly_shifted.snowfall_occurrences_monthly.resample(time="1YE")
            .sum()
            .rename("snowfall_occurrences_yearly"),
            monthly_shifted.snow_isolation_monthly_mean.resample(time="1YE")
            .mean()
            .rename("snow_isolation_yearly_mean"),
        ]
    )
    # Summer / Winter aggregates
    winter_months = [1, 2, 3, 4, 5, 6, 7]  # These do NOT correspond to calendar months, but to the shifted months
    summer_months = [8, 9, 10, 11, 12]
    monthly_shifted_winter = monthly_shifted.sel(time=monthly_shifted.time.dt.month.isin(winter_months))
    monthly_shifted_summer = monthly_shifted.sel(time=monthly_shifted.time.dt.month.isin(summer_months))
    winter = xr.merge(
        [
            # Original variables
            monthly_shifted_winter.t2m_monthly_min.resample(time="1YE").min().rename("t2m_winter_min"),
            monthly_shifted_winter.t2m_monthly_max.resample(time="1YE").max().rename("t2m_winter_max"),
            monthly_shifted_winter.tp_monthly_sum.resample(time="1YE").sum().rename("tp_winter_sum"),
            monthly_shifted_winter.sf_monthly_sum.resample(time="1YE").sum().rename("sf_winter_sum"),
            monthly_shifted_winter.snowc_monthly_mean.resample(time="1YE").mean().rename("snowc_winter_mean"),
            monthly_shifted_winter.sde_monthly_mean.resample(time="1YE").mean().rename("sde_winter_mean"),
            monthly_shifted_winter.sshf_monthly_sum.resample(time="1YE").sum().rename("sshf_winter_sum"),
            monthly_shifted_winter.lblt_monthly_max.resample(time="1YE").max().rename("lblt_winter_max"),
            # Enriched variables
            monthly_shifted_winter.t2m_monthly_avg.resample(time="1YE").mean().rename("t2m_winter_avg"),
            # TODO: Check if this is correct -> use daily / hourly data instead for range and skew?
            monthly_shifted_winter.t2m_monthly_range_avg.resample(time="1YE")
            .mean()
            .rename("t2m_daily_range_winter_avg"),
            monthly_shifted_winter.t2m_monthly_skew_avg.resample(time="1YE").mean().rename("t2m_daily_skew_winter_avg"),
            monthly_shifted_winter.thawing_degree_days_monthly.resample(time="1YE")
            .sum()
            .rename("thawing_degree_days_winter"),
            monthly_shifted_winter.freezing_degree_days_monthly.resample(time="1YE")
            .sum()
            .rename("freezing_degree_days_winter"),
            monthly_shifted_winter.thawing_days_monthly.resample(time="1YE").sum().rename("thawing_days_winter"),
            monthly_shifted_winter.freezing_days_monthly.resample(time="1YE").sum().rename("freezing_days_winter"),
            monthly_shifted_winter.precipitation_occurrences_monthly.resample(time="1YE")
            .sum()
            .rename("precipitation_occurrences_winter"),
            monthly_shifted_winter.snowfall_occurrences_monthly.resample(time="1YE")
            .sum()
            .rename("snowfall_occurrences_winter"),
            monthly_shifted_winter.snow_isolation_monthly_mean.resample(time="1YE")
            .mean()
            .rename("snow_isolation_winter_mean"),
        ]
    )
    summer = xr.merge(
        [
            # Original variables
            monthly_shifted_summer.t2m_monthly_min.resample(time="1YE").min().rename("t2m_summer_min"),
            monthly_shifted_summer.t2m_monthly_max.resample(time="1YE").max().rename("t2m_summer_max"),
            monthly_shifted_summer.tp_monthly_sum.resample(time="1YE").sum().rename("tp_summer_sum"),
            monthly_shifted_summer.sf_monthly_sum.resample(time="1YE").sum().rename("sf_summer_sum"),
            monthly_shifted_summer.snowc_monthly_mean.resample(time="1YE").mean().rename("snowc_summer_mean"),
            monthly_shifted_summer.sde_monthly_mean.resample(time="1YE").mean().rename("sde_summer_mean"),
            monthly_shifted_summer.sshf_monthly_sum.resample(time="1YE").sum().rename("sshf_summer_sum"),
            monthly_shifted_summer.lblt_monthly_max.resample(time="1YE").max().rename("lblt_summer_max"),
            # Enriched variables
            monthly_shifted_summer.t2m_monthly_avg.resample(time="1YE").mean().rename("t2m_summer_avg"),
            # TODO: Check if this is correct -> use daily / hourly data instead for range and skew?
            monthly_shifted_summer.t2m_monthly_range_avg.resample(time="1YE")
            .mean()
            .rename("t2m_daily_range_summer_avg"),
            monthly_shifted_summer.t2m_monthly_skew_avg.resample(time="1YE").mean().rename("t2m_daily_skew_summer_avg"),
            monthly_shifted_summer.thawing_degree_days_monthly.resample(time="1YE")
            .sum()
            .rename("thawing_degree_days_summer"),
            monthly_shifted_summer.freezing_degree_days_monthly.resample(time="1YE")
            .sum()
            .rename("freezing_degree_days_summer"),
            monthly_shifted_summer.thawing_days_monthly.resample(time="1YE").sum().rename("thawing_days_summer"),
            monthly_shifted_summer.freezing_days_monthly.resample(time="1YE").sum().rename("freezing_days_summer"),
            monthly_shifted_summer.precipitation_occurrences_monthly.resample(time="1YE")
            .sum()
            .rename("precipitation_occurrences_summer"),
            monthly_shifted_summer.snowfall_occurrences_monthly.resample(time="1YE")
            .sum()
            .rename("snowfall_occurrences_summer"),
            monthly_shifted_summer.snow_isolation_monthly_mean.resample(time="1YE")
            .mean()
            .rename("snow_isolation_summer_mean"),
        ]
    )
    combined = xr.merge([yearly, summer, winter])
    combined.to_zarr(YEARLY_PATH, mode="w", encoding=create_encoding(combined), consolidated=False)
@cli.command
 def temporal_agg(n_workers: int = 10):
    """Perform temporal aggregation of ERA5 data using Dask cluster.
    Creates a Dask cluster and runs both monthly and yearly aggregation
    functions to generate temporally aggregated climate datasets. The
    processing uses parallel workers for efficient computation.
    Args:
        n_workers (int, optional): Number of Dask workers to use. Defaults to 10.
    """
    with (
        dd.LocalCluster(n_workers=n_workers, threads_per_worker=20, memory_limit="10GB") as cluster,
        dd.Client(cluster) as client,
    ):
        print(client)
        print(client.dashboard_link)
        monthly_aggregate()
        yearly_aggregate()
        print("Enriched ERA5 data with additional features and aggregated it temporally.")
 if __name__ == "__main__":
    cli()
--- a/pyproject.toml
+++ b/pyproject.toml
@ -10,7 +10,7 @@ dependencies = [
    "bokeh>=3.7.3",
    "cartopy>=0.24.1",
    "cdsapi>=0.7.6",
-    "cyclopts>=3.17.0",
+    "cyclopts>=4.0.0",
    "dask>=2025.5.1",
    "distributed>=2025.5.1",
    "earthengine-api>=1.6.9",
@ -34,7 +34,8 @@ dependencies = [
    "rich>=14.0.0",
    "rioxarray>=0.19.0",
    "scipy>=1.15.3",
-    "smart-geocubes[arcticdem,dask,stac,viz]>=0.0.9",
+    "seaborn>=0.13.2",
    "smart-geocubes[gee,dask,stac,viz]>=0.0.9",
    "stopuhr>=0.0.10",
    "xanimate",
    "xarray>=2025.9.0",
@ -43,6 +44,14 @@ dependencies = [
    "zarr[remote]>=3.1.3",
 ]
 [project.scripts]
 create-grid = "steps.s0_0_grids.create_grid:main"
 alpha-earth = "steps.s1_0_alphaearth.alphaearth:main"
 era5 = "steps.s1_1_era5.era5:cli"
 [tool.uv]
 package = true
 [tool.uv.sources]
 entropyc = { git = "ssh://git@github.com/AlbertEMC2Stein/entropyc", branch = "refactor/tobi" }
 xanimate = { git = "https://github.com/davbyr/xAnimate" }
--- a/steps/s0_0_grids/create_grid.py
+++ b/steps/s0_0_grids/create_grid.py
@ -4,6 +4,8 @@ Author: Tobias Hölzer
 Date: 09. June 2025
 """
 import os
 from pathlib import Path
 from typing import Literal
 import cartopy.crs as ccrs
@ -26,6 +28,12 @@ from xdggs.healpix import HealpixInfo
 traceback.install()
 pretty.install()
 DATA_DIR = Path(os.environ.get("DATA_DIR", "../../data")) / "entropyc-rts"
 GRIDS_DIR = DATA_DIR / "grids"
 FIGURES_DIR = DATA_DIR / "figures"
 GRIDS_DIR.mkdir(parents=True, exist_ok=True)
 FIGURES_DIR.mkdir(parents=True, exist_ok=True)
@stopwatch("Create a global hex grid")
 def create_global_hex_grid(resolution):
@ -123,7 +131,7 @@ def filter_permafrost_grid(grid: gpd.GeoDataFrame):
    grid = grid.to_crs("EPSG:3413")
    # Filter out non-land areas (e.g., oceans)
-    water_mask = gpd.read_file("./data/simplified-water-polygons-split-3857/simplified_water_polygons.shp")
+    water_mask = gpd.read_file(DATA_DIR / "simplified-water-polygons-split-3857/simplified_water_polygons.shp")
    water_mask = water_mask.to_crs("EPSG:3413")
    ov = gpd.overlay(grid, water_mask, how="intersection")
@ -218,14 +226,20 @@ def cli(grid: Literal["hex", "healpix"], level: int):
        print("No valid grid cells found.")
        return
-    grid_gdf.to_parquet(f"./data/grids/permafrost_{grid}{level}_grid.parquet")
+    grid_file = GRIDS_DIR / f"permafrost_{grid}{level}_grid.parquet"
-    print(f"Saved to ./data/grids/permafrost_{grid}{level}_grid.parquet")
+    grid_gdf.to_parquet(grid_file)
    print(f"Saved to {grid_file.resolve()}")
    fig = vizualize_grid(grid_gdf, grid, level)
-    fig.savefig(f"./figures/permafrost_{grid}{level}_grid.png", dpi=300)
+    fig_file = FIGURES_DIR / f"permafrost_{grid}{level}_grid.png"
-    print(f"Saved figure to ./figures/permafrost_{grid}{level}_grid.png")
+    fig.savefig(fig_file, dpi=300)
    print(f"Saved figure to {fig_file.resolve()}")
    plt.close(fig)
-if __name__ == "__main__":
+def main():  # noqa: D103
    cyclopts.run(cli)
 if __name__ == "__main__":
    main()
--- a/steps/s1_0_alphaearth/alphaearth.ipynb
+++ b/steps/s1_0_alphaearth/alphaearth.ipynb
--- a/steps/s1_0_alphaearth/alphaearth.py
+++ b/steps/s1_0_alphaearth/alphaearth.py
@ -1,6 +1,7 @@
 """Extract satellite embeddings from Google Earth Engine and map them to a grid."""
 import os
 import warnings
 from pathlib import Path
 from typing import Literal
@ -10,14 +11,17 @@ import geemap
 import geopandas as gpd
 import numpy as np
 import pandas as pd
-from rich import pretty, traceback
+from rich import pretty, print, traceback
 from rich.progress import track
 # Filter out the GeoDataFrame.swapaxes deprecation warning
 warnings.filterwarnings("ignore", message=".*GeoDataFrame.swapaxes.*", category=FutureWarning)
 pretty.install()
 traceback.install()
 ee.Initialize(project="ee-tobias-hoelzer")
-DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) / "entropyc-rts"
+DATA_DIR = Path(os.environ.get("DATA_DIR", "../../data")) / "entropyc-rts"
 EMBEDDINGS_DIR = DATA_DIR / "embeddings"
 EMBEDDINGS_DIR.mkdir(parents=True, exist_ok=True)
@ -34,10 +38,11 @@ def cli(grid: Literal["hex", "healpix"], level: int, backup_intermediate: bool =
    gridname = f"permafrost_{grid}{level}"
    grid = gpd.read_parquet(DATA_DIR / f"grids/{gridname}_grid.parquet")
-    for year in track(range(2022, 2025), total=3, description="Processing years..."):
+    for year in track(range(2017, 2025), total=8, description="Processing years..."):
        embedding_collection = ee.ImageCollection("GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL")
        embedding_collection = embedding_collection.filterDate(f"{year}-01-01", f"{year}-12-31")
-        bands = [f"A{str(i).zfill(2)}" for i in range(64)]
+        aggs = ["median", "stdDev", "min", "max", "mean", "p1", "p5", "p25", "p75", "p95", "p99"]
        bands = [f"A{str(i).zfill(2)}_{agg}" for i in range(64) for agg in aggs]
        def extract_embedding(feature):
            # Filter collection by geometry
@ -45,7 +50,11 @@ def cli(grid: Literal["hex", "healpix"], level: int, backup_intermediate: bool =
            embedding = embedding_collection.filterBounds(geom).mosaic()
            # Get mean embedding value for the geometry
            mean_dict = embedding.reduceRegion(
-                reducer=ee.Reducer.median(),
+                reducer=ee.Reducer.median()
                .combine(ee.Reducer.stdDev(), sharedInputs=True)
                .combine(ee.Reducer.minMax(), sharedInputs=True)
                .combine(ee.Reducer.mean(), sharedInputs=True)
                .combine(ee.Reducer.percentile([1, 5, 25, 75, 95, 99]), sharedInputs=True),
                geometry=geom,
            )
            # Add mean embedding values as properties to the feature
@ -79,8 +88,14 @@ def cli(grid: Literal["hex", "healpix"], level: int, backup_intermediate: bool =
        # Combine all batch results
        df = pd.concat(all_results, ignore_index=True)
        embeddings_on_grid = grid.merge(df[[*bands, "cell_id"]], on="cell_id", how="left")
-        embeddings_on_grid.to_parquet(EMBEDDINGS_DIR / f"{gridname}_embeddings-{year}.parquet")
+        embeddings_file = EMBEDDINGS_DIR / f"{gridname}_embeddings-{year}.parquet"
        embeddings_on_grid.to_parquet(embeddings_file)
        print(f"Saved embeddings for year {year} to {embeddings_file.resolve()}.")
 def main():  # noqa: D103
    cyclopts.run(cli)
 if __name__ == "__main__":
-    cyclopts.run(cli)
+    main()
--- a/steps/s1_0_alphaearth/alphaearth.sh
+++ b/steps/s1_0_alphaearth/alphaearth.sh
@ -0,0 +1,9 @@
 #!/bin/bash
 # uv run alpha-earth --grid hex --level 3
 uv run alpha-earth --grid hex --level 4
 uv run alpha-earth --grid hex --level 5
 uv run alpha-earth --grid healpix --level 6
 uv run alpha-earth --grid healpix --level 7
 uv run alpha-earth --grid healpix --level 8
 uv run alpha-earth --grid healpix --level 9
--- a/steps/s1_1_era5/cds.py
+++ b/steps/s1_1_era5/cds.py
@ -14,7 +14,7 @@ from rich import pretty, print, traceback
 traceback.install()
 pretty.install()
-DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) / "entropyc-rts"
+DATA_DIR = Path(os.environ.get("DATA_DIR", "../../data")) / "entropyc-rts"
 def hourly(years: str):
--- a/steps/s1_1_era5/era5.py
+++ b/steps/s1_1_era5/era5.py
@ -0,0 +1,576 @@
 """Download and preprocess ERA5 data.
 Variables of Interest:
 - 2 metre temperature (t2m) [instant]
 - Total precipitation (tp) [accum]
 - Snow Fall (sf) [accum]
 - Snow cover (snowc) [instant]
 - Snow depth (sde) [instant]
 - Surface sensible heat flux (sshf) [accum]
 - Lake ice bottom temperature (lblt) [instant]
 Naming patterns:
 - Instant Variables are downloaded already as statistically aggregated (lossy),
  therefore their names get the aggregation as suffix
 - Accumulation Variables are downloaded as totals, their names stay the same
 Daily Variables (downloaded from hourly data):
 - t2m_max
 - t2m_min
 - snowc_mean
 - sde_mean
 - lblt_max
 - tp
 - sf
 - sshf
 Derived Daily Variables:
 - t2m_daily_avg
 - t2m_daily_range
 - t2m_daily_skew
 - thawing_degree_days
 - freezing_degree_days
 - thawing_days
 - freezing_days
 - precipitation_occurrences
 - snowfall_occurrences
 - snow_isolation (snowc * sde)
 Monthly Variables:
 - t2m_monthly_max
 - t2m_monthly_min
 - tp_monthly_sum
 - sf_monthly_sum
 - snowc_monthly_mean
 - sde_monthly_mean
 - sshf_monthly_sum
 - lblt_monthly_max
 - t2m_monthly_avg
 - t2m_monthly_range_avg
 - t2m_monthly_skew_avg
 - thawing_degree_days_monthly
 - freezing_degree_days_monthly
 - thawing_days_monthly
 - freezing_days_monthly
 - precipitation_occurrences_monthly TODO: Rename to precipitation_days_monthly?
 - snowfall_occurrences_monthly TODO: Rename to snowfall_days_monthly?
 - snow_isolation_monthly_mean
 Yearly Variables:
 - TODO
 # TODO Variables:
 - Day of first thaw (yearly)
 - Day of last thaw (yearly)
 - Thawing period length (yearly)
 - Freezing period length (yearly)
 Author: Tobias Hölzer
 Date: 09. June 2025
 """
 import os
 import time
 from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
 from pathlib import Path
 from typing import Literal
 import cyclopts
 import dask.distributed as dd
 import geopandas as gpd
 import odc.geo
 import odc.geo.xr
 import pandas as pd
 import shapely
 import shapely.ops
 import xarray as xr
 from numcodecs.zarr3 import Blosc
 from rich import pretty, print, traceback
 from rich.progress import track
 from shapely.geometry import LineString, Polygon
 traceback.install(show_locals=True, suppress=[cyclopts, xr, pd])
 pretty.install()
 cli = cyclopts.App()
 # TODO: Directly handle download on a grid level - this is more what the zarr access is indented to do
 DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) / "entropyc-rts"
 ERA5_DIR = DATA_DIR / "era5"
 DAILY_RAW_PATH = ERA5_DIR / "daily_raw.zarr"
 def _get_grid_paths(
    agg: Literal["daily", "monthly", "summer", "winter", "yearly"],
    grid: Literal["hex", "healpix"],
    level: int,
 ):
    gridname = f"permafrost_{grid}{level}"
    aligned_path = ERA5_DIR / f"{agg}_{gridname}.zarr"
    return aligned_path
 min_lat = 50
 max_lat = 83.7  # Ensures the right Chunks Size (90 - 64 / 10 + 0.1)
 min_time = "1990-01-01"
 max_time = "2024-12-31"
 today = time.strftime("%Y-%m-%d")
 # ================
 # === Download ===
 # ================
 def create_encoding(ds: xr.Dataset):
    """Create compression encoding for zarr dataset storage.
    Creates Blosc compression configuration for all data variables and coordinates
    in the dataset using zstd compression with level 9.
    Args:
        ds (xr.Dataset): The xarray Dataset to create encoding for.
    Returns:
        dict: Encoding dictionary with compression settings for each variable.
    """
    # encoding = {var: {"compressors": BloscCodec(cname="zlib", clevel=9)} for var in ds.data_vars}
    encoding = {var: {"compressors": Blosc(cname="zstd", clevel=9)} for var in [*ds.data_vars, *ds.coords]}
    return encoding
 def download_daily_aggregated():
    """Download and aggregate ERA5 data to daily resolution.
    Downloads ERA5 reanalysis data from the DESTINE Earth Data Hub and aggregates
    it to daily resolution. Includes temperature extremes, precipitation, snow,
    and surface heat flux variables.
    The function downloads hourly data and creates daily aggregates:
    - Temperature: daily min/max
    - Precipitation and snowfall: daily totals
    - Snow cover and depth: daily means
    - Surface heat flux: daily totals
    - Lake ice temperature: daily max
    The aggregated data is saved to a zarr file with compression.
    """
    era5 = xr.open_dataset(
        "https://data.earthdatahub.destine.eu/era5/reanalysis-era5-land-no-antartica-v0.zarr",
        storage_options={"client_kwargs": {"trust_env": True}},
        chunks={},
        # chunks={},
        engine="zarr",
    ).rename({"valid_time": "time"})
    subset = {
        "latitude": slice(max_lat, min_lat),
    }
    # Compute the clostest chunk-start to min_time, to avoid problems with cropped chunks at the start
    tchunksize = era5.chunksizes["time"][0]
    era5_chunk_starts = pd.date_range(era5.time.min().item(), era5.time.max().item(), freq=f"{tchunksize}h")
    closest_chunk_start = era5_chunk_starts[
        era5_chunk_starts.get_indexer([pd.to_datetime(min_time)], method="ffill")[0]
    ]
    subset["time"] = slice(str(closest_chunk_start), max_time)
    era5 = era5.sel(**subset)
    daily_raw = xr.merge(
        [
            # Instant
            era5.t2m.resample(time="1D").max().rename("t2m_max"),
            era5.t2m.resample(time="1D").min().rename("t2m_min"),
            era5.snowc.resample(time="1D").mean().rename("snowc_mean"),
            era5.sde.resample(time="1D").mean().rename("sde_mean"),
            era5.lblt.resample(time="1D").max().rename("lblt_max"),
            # Accum
            era5.tp.resample(time="1D").sum().rename("tp"),
            era5.sf.resample(time="1D").sum().rename("sf"),
            era5.sshf.resample(time="1D").sum().rename("sshf"),
        ]
    )
    # Assign attributes
    daily_raw["t2m_max"].attrs = {"long_name": "Daily maximum 2 metre temperature", "units": "K"}
    daily_raw["t2m_min"].attrs = {"long_name": "Daily minimum 2 metre temperature", "units": "K"}
    daily_raw["tp"].attrs = {"long_name": "Daily total precipitation", "units": "m"}
    daily_raw["sf"].attrs = {"long_name": "Daily total snow fall", "units": "m"}
    daily_raw["snowc_mean"].attrs = {"long_name": "Daily mean snow cover", "units": "m"}
    daily_raw["sde_mean"].attrs = {"long_name": "Daily mean snow depth", "units": "m"}
    daily_raw["sshf"].attrs = {"long_name": "Daily total surface sensible heat flux", "units": "J/m²"}
    daily_raw["lblt_max"].attrs = {"long_name": "Daily maximum lake ice bottom temperature", "units": "K"}
    daily_raw = daily_raw.odc.assign_crs("epsg:4326")
    daily_raw = daily_raw.drop_vars(["surface", "number", "depthBelowLandLayer"])
    daily_raw.to_zarr(DAILY_RAW_PATH, mode="w", encoding=create_encoding(daily_raw), consolidated=False)
@cli.command
 def download():
    """Download ERA5 data using Dask cluster for parallel processing.
    Creates a local Dask cluster and downloads daily aggregated ERA5 data.
    The cluster is configured with a single worker with 10 threads and 100GB
    memory limit for optimal performance.
    """
    with (
        dd.LocalCluster(n_workers=1, threads_per_worker=10, memory_limit="100GB") as cluster,
        dd.Client(cluster) as client,
    ):
        print(client)
        print(client.dashboard_link)
        download_daily_aggregated()
        print(f"Downloaded and aggregated ERA5 data to {DAILY_RAW_PATH.resolve()}.")
 # ===========================
 # === Spatial Aggregation ===
 # ===========================
 def _crosses_antimeridian(geom: Polygon) -> bool:
    coords = shapely.get_coordinates(geom)
    crosses_any_meridian = (coords[:, 0] > 0).any() and (coords[:, 0] < 0).any()
    return crosses_any_meridian and abs(coords[:, 0]).max() > 90
 def _split_antimeridian_cell(geom: Polygon) -> list[Polygon]:
    # Assumes that it is a antimeridian hex
    coords = shapely.get_coordinates(geom)
    for i in range(coords.shape[0]):
        if coords[i, 0] < 0:
            coords[i, 0] += 360
    geom = Polygon(coords)
    antimeridian = LineString([[180, -90], [180, 90]])
    polys = shapely.ops.split(geom, antimeridian)
    return list(polys.geoms)
 def _check_geobox(geobox):
    x, y = geobox.shape
    return x > 1 and y > 1
 def extract_cell_data(idx: int, geom: Polygon) -> xr.Dataset:
    """Extract ERA5 data for a specific grid cell geometry.
    Extracts and spatially averages ERA5 data within the bounds of a grid cell.
    Handles antimeridian-crossing cells by splitting them appropriately.
    Args:
        idx (int): Index of the grid cell.
        geom (Polygon): Polygon geometry of the grid cell.
    Returns:
        xr.Dataset: The computed cell dataset
    """
    daily_raw = xr.open_zarr(DAILY_RAW_PATH, consolidated=False).set_coords("spatial_ref")
    # cell.geometry is a shapely Polygon
    if not _crosses_antimeridian(geom):
        geoms = [geom]
    # Split geometry in case it crossed antimeridian
    else:
        geoms = _split_antimeridian_cell(geom)
    cell_data = []
    for geom in geoms:
        geom = odc.geo.Geometry(geom, crs="epsg:4326")
        if not _check_geobox(daily_raw.odc.geobox.enclosing(geom)):
            continue
        # TODO: use mean for instant variables, sum for accum variables
        cell_data.append(daily_raw.odc.crop(geom).drop_vars("spatial_ref").mean(["latitude", "longitude"]))
    if len(cell_data) == 0:
        return False
    elif len(cell_data) == 1:
        cell_data = cell_data[0]
    else:
        cell_data = xr.concat(cell_data, dim="part").mean("part")
    cell_data = cell_data.expand_dims({"cell": [idx]}).compute()
    return cell_data
@cli.command
 def spatial_agg(
    grid: Literal["hex", "healpix"],
    level: int,
    n_workers: int = 10,
    executor: Literal["threads", "processes"] = "threads",
 ):
    """Perform spatial aggregation of ERA5 data to grid cells.
    Loads a grid and spatially aggregates ERA5 data to each grid cell using
    parallel processing. Creates an empty zarr file first, then fills it
    with extracted data for each cell.
    Args:
        grid ("hex" | "healpix"): Grid type.
        level (int): Grid resolution level.
        n_workers (int, optional): Number of parallel workers to use. Defaults to 10.
        executor ("threads" | "processes"): The type of parallel executor pool to use. Defaults to threads.
    """
    gridname = f"permafrost_{grid}{level}"
    daily_grid_path = _get_grid_paths("daily", grid, level)
    grid = gpd.read_parquet(DATA_DIR / f"grids/{gridname}_grid.parquet")
    # Create an empty zarr array with the right dimensions
    daily_raw = xr.open_zarr(DAILY_RAW_PATH, consolidated=False).set_coords("spatial_ref")
    assert {"latitude", "longitude", "time"} == set(daily_raw.dims), (
        f"Expected dims ('latitude', 'longitude', 'time'), got {daily_raw.dims}"
    )
    assert daily_raw.odc.crs == "epsg:4326", f"Expected CRS 'epsg:4326', got {daily_raw.odc.crs}"
    daily = (
        xr.zeros_like(daily_raw.isel(latitude=0, longitude=0))
        .expand_dims({"cell": [idx for idx, _ in grid.iterrows()]})
        .chunk({"cell": min(len(grid), 1000), "time": len(daily_raw.time)})  # ~50MB chunks
    )
    daily.to_zarr(daily_grid_path, mode="w", consolidated=False, encoding=create_encoding(daily))
    print(f"Created empty zarr at {daily_grid_path.resolve()} with shape {daily.sizes}.")
    print(f"Starting spatial matching of {len(grid)} cells with {n_workers} workers...")
    ExecutorCls = ThreadPoolExecutor if executor == "threads" else ProcessPoolExecutor
    with ExecutorCls(max_workers=n_workers) as executor:
        futures = {
            executor.submit(extract_cell_data, idx, row.geometry): idx
            for idx, row in grid.to_crs("epsg:4326").iterrows()
        }
        for future in track(as_completed(futures), total=len(futures), description="Processing cells"):
            idx = futures[future]
            try:
                cell_data = future.result()
                if not cell_data:
                    print(f"Cell {idx} did not overlap with ERA5 data.")
                cell_data.to_zarr(daily_grid_path, region="auto", consolidated=False)
                print(f"Successfully written cell {idx}")
            except Exception as e:
                print(f"{type(e)} processing cell {idx}: {e}")
    print("Finished spatial matching.")
 # ============================
 # === Temporal Aggregation ===
 # ============================
 def daily_enrich(grid: Literal["hex", "healpix"], level: int) -> xr.Dataset:
    """Enrich daily ERA5 data with derived climate variables.
    Loads spatially aligned ERA5 data and computes additional climate variables.
    Creates derived variables including temperature statistics, degree days, and occurrence indicators.
    Derived variables include:
    - Daily average and range temperature
    - Temperature skewness
    - Thawing and freezing degree days
    - Thawing and freezing day counts
    - Precipitation and snowfall occurrences
    - Snow isolation index
    Args:
        grid ("hex", "healpix"): Grid type.
        level (int): Grid resolution level.
    Returns:
        xr.Dataset: Enriched dataset with original and derived variables.
    """
    daily_grid_path = _get_grid_paths("daily", grid, level)
    daily = xr.open_zarr(daily_grid_path, consolidated=False).set_coords("spatial_ref")
    assert {"cell", "time"} == set(daily.dims), f"Expected dims ('cell', 'time'), got {daily.dims}"
    # Formulas based on Groeke et. al. (2025) Stochastic Weather generation...
    daily["t2m_avg"] = (daily.t2m_max + daily.t2m_min) / 2
    daily.t2m_avg.attrs = {"long_name": "Daily average 2 metre temperature", "units": "K"}
    daily["t2m_range"] = daily.t2m_max - daily.t2m_min
    daily.t2m_range.attrs = {"long_name": "Daily range of 2 metre temperature", "units": "K"}
    daily["t2m_skew"] = (daily.t2m_avg - daily.t2m_min) / daily.t2m_range
    daily.t2m_skew.attrs = {"long_name": "Daily skewness of 2 metre temperature"}
    daily["thawing_degree_days"] = (daily.t2m_avg - 273.15).clip(min=0)
    daily.thawing_degree_days.attrs = {"long_name": "Thawing degree days", "units": "K"}
    daily["freezing_degree_days"] = (273.15 - daily.t2m_avg).clip(min=0)
    daily.freezing_degree_days.attrs = {"long_name": "Freezing degree days", "units": "K"}
    daily["thawing_days"] = (daily.t2m_avg > 273.15).astype(int)
    daily.thawing_days.attrs = {"long_name": "Thawing days"}
    daily["freezing_days"] = (daily.t2m_avg < 273.15).astype(int)
    daily.freezing_days.attrs = {"long_name": "Freezing days"}
    daily["precipitation_occurrences"] = (daily.tp > 0).astype(int)
    daily.precipitation_occurrences.attrs = {"long_name": "Precipitation occurrences"}
    daily["snowfall_occurrences"] = (daily.sf > 0).astype(int)
    daily.snowfall_occurrences.attrs = {"long_name": "Snowfall occurrences"}
    daily["snow_isolation"] = daily.snowc_mean * daily.sde_mean
    daily.snow_isolation.attrs = {"long_name": "Snow isolation"}
    return daily
 def monthly_aggregate(grid: Literal["hex", "healpix"], level: int):
    """Aggregate enriched daily ERA5 data to monthly resolution.
    Takes the enriched daily ERA5 data and creates monthly aggregates using
    appropriate statistical functions for each variable type. Temperature
    variables use min/max/mean, accumulation variables use sums, and derived
    variables use appropriate aggregations.
    The aggregated monthly data is saved to a zarr file for further processing.
    Args:
        grid ("hex", "healpix"): Grid type.
        level (int): Grid resolution level.
    """
    daily = daily_enrich(grid, level)
    assert {"cell", "time"} == set(daily.dims), f"Expected dims ('cell', 'time'), got {daily.dims}"
    # Monthly aggregates
    monthly = xr.merge(
        [
            # Original variables
            daily.t2m_min.resample(time="1ME").min().rename("t2m_min"),
            daily.t2m_max.resample(time="1ME").max().rename("t2m_max"),
            daily.snowc_mean.resample(time="1ME").mean().rename("snowc_mean"),
            daily.sde_mean.resample(time="1ME").mean().rename("sde_mean"),
            daily.lblt_max.resample(time="1ME").max().rename("lblt_max"),
            daily.tp.resample(time="1ME").sum().rename("tp"),
            daily.sf.resample(time="1ME").sum().rename("sf"),
            daily.sshf.resample(time="1ME").sum().rename("sshf"),
            # Enriched variables
            daily.t2m_avg.resample(time="1ME").mean().rename("t2m_avg"),
            daily.t2m_range.resample(time="1ME").mean().rename("t2m_mean_range"),
            daily.t2m_skew.resample(time="1ME").mean().rename("t2m_mean_skew"),
            daily.thawing_degree_days.resample(time="1ME").sum().rename("thawing_degree_days"),
            daily.freezing_degree_days.resample(time="1ME").sum().rename("freezing_degree_days"),
            daily.thawing_days.resample(time="1ME").sum().rename("thawing_days"),
            daily.freezing_days.resample(time="1ME").sum().rename("freezing_days"),
            daily.precipitation_occurrences.resample(time="1ME").sum().rename("precipitation_occurrences"),
            daily.snowfall_occurrences.resample(time="1ME").sum().rename("snowfall_occurrences"),
            daily.snow_isolation.resample(time="1ME").mean().rename("snow_mean_isolation"),
        ]
    )
    monthly_grid_path = _get_grid_paths("monthly", grid, level)
    monthly.to_zarr(monthly_grid_path, mode="w", encoding=create_encoding(monthly), consolidated=False)
 def yearly_aggregate(monthly: xr.Dataset) -> xr.Dataset:
    """Aggregate monthly ERA5 data to yearly resolution.
    Takes monthly aggregated data and creates yearly aggregates using a shifted
    calendar (October to September) to better capture Arctic seasonal patterns.
    Args:
        monthly (xr.Dataset): The monthly aggregates
    Returns:
        xr.Dataset: The aggregated dataset
    """
    return xr.merge(
        [
            # Original variables
            monthly.t2m_min.resample(time="1YE").min().rename("t2m_min"),
            monthly.t2m_max.resample(time="1YE").max().rename("t2m_max"),
            monthly.snowc_mean.resample(time="1YE").mean().rename("snowc_mean"),
            monthly.sde_mean.resample(time="1YE").mean().rename("sde_mean"),
            monthly.lblt_max.resample(time="1YE").max().rename("lblt_max"),
            monthly.tp.resample(time="1YE").sum().rename("tp"),
            monthly.sf.resample(time="1YE").sum().rename("sf"),
            monthly.sshf.resample(time="1YE").sum().rename("sshf"),
            # Enriched variables
            monthly.t2m_avg.resample(time="1YE").mean().rename("t2m_avg"),
            # TODO: Check if this is correct -> use daily / hourly data instead for range and skew?
            monthly.t2m_mean_range.resample(time="1YE").mean().rename("t2m_mean_range"),
            monthly.t2m_mean_skew.resample(time="1YE").mean().rename("t2m_mean_skew"),
            monthly.thawing_degree_days.resample(time="1YE").sum().rename("thawing_degree_days"),
            monthly.freezing_degree_days.resample(time="1YE").sum().rename("freezing_degree_days"),
            monthly.thawing_days.resample(time="1YE").sum().rename("thawing_days"),
            monthly.freezing_days.resample(time="1YE").sum().rename("freezing_days"),
            monthly.precipitation_occurrences.resample(time="1YE").sum().rename("precipitation_occurrences"),
            monthly.snowfall_occurrences.resample(time="1YE").sum().rename("snowfall_occurrences"),
            monthly.snow_mean_isolation.resample(time="1YE").mean().rename("snow_mean_isolation"),
        ]
    )
 def yearly_and_seasonal_aggregate(grid: Literal["hex", "healpix"], level: int):
    """Aggregate monthly ERA5 data to yearly resolution with seasonal splits.
    Takes monthly aggregated data and creates yearly aggregates using a shifted
    calendar (October to September) to better capture Arctic seasonal patterns.
    Creates separate aggregates for full year, winter (Oct-Apr), and summer
    (May-Sep) periods.
    The first and last incomplete years are excluded from the analysis.
    Winter months are defined as months 1-7 in the shifted calendar,
    and summer months are 8-12.
    The final dataset includes yearly, winter, and summer aggregates for all
    climate variables, saved to a zarr file.
    Args:
        grid ("hex", "healpix"): Grid type.
        level (int): Grid resolution level.
    """
    monthly_grid_path = _get_grid_paths("monthly", grid, level)
    monthly = xr.open_zarr(monthly_grid_path, consolidated=False).set_coords("spatial_ref")
    assert {"cell", "time"} == set(monthly.dims), f"Expected dims ('cell', 'time'), got {monthly.dims}"
    valid_years = slice(str(monthly.time.min().dt.year.item() + 1), str(monthly.time.max().dt.year.item()))
    # Summer aggregates
    summer = yearly_aggregate(monthly.sel(time=monthly.time.dt.month.isin([5, 6, 7, 8, 9])).sel(time=valid_years))
    # Yearly aggregates (shifted by +8 months to start in Oktober, first and last years will be cropped)
    monthly_shifted = monthly.copy()
    monthly_shifted["time"] = monthly_shifted.get_index("time") + pd.DateOffset(months=8)
    monthly_shifted = monthly_shifted.sel(time=valid_years)
    yearly = yearly_aggregate(monthly_shifted)
    # Winter aggregates (shifted by +8 months to start in Oktober, first and last years will be cropped)
    monthly_shifted = monthly.copy().sel(time=monthly.time.dt.month.isin([1, 2, 3, 4, 10, 11, 12]))
    monthly_shifted["time"] = monthly_shifted.get_index("time") + pd.DateOffset(months=8)
    monthly_shifted = monthly_shifted.sel(time=valid_years)
    winter = yearly_aggregate(monthly_shifted)
    yearly_grid_path = _get_grid_paths("yearly", grid, level)
    yearly.to_zarr(yearly_grid_path, mode="w", encoding=create_encoding(yearly), consolidated=False)
    winter_grid_path = _get_grid_paths("winter", grid, level)
    winter.to_zarr(winter_grid_path, mode="w", encoding=create_encoding(winter), consolidated=False)
    summer_grid_path = _get_grid_paths("summer", grid, level)
    summer.to_zarr(summer_grid_path, mode="w", encoding=create_encoding(summer), consolidated=False)
@cli.command
 def temporal_agg(n_workers: int = 10):
    """Perform temporal aggregation of ERA5 data using Dask cluster.
    Creates a Dask cluster and runs both monthly and yearly aggregation
    functions to generate temporally aggregated climate datasets. The
    processing uses parallel workers for efficient computation.
    Args:
        n_workers (int, optional): Number of Dask workers to use. Defaults to 10.
    """
    with (
        dd.LocalCluster(n_workers=n_workers, threads_per_worker=20, memory_limit="10GB") as cluster,
        dd.Client(cluster) as client,
    ):
        print(client)
        print(client.dashboard_link)
        monthly_aggregate()
        yearly_and_seasonal_aggregate()
        print("Enriched ERA5 data with additional features and aggregated it temporally.")
 if __name__ == "__main__":
    cli()
--- a/uv.lock
+++ b/uv.lock
@ -850,17 +850,17 @@ wheels = [
 [[package]]
 name = "cyclopts"
-version = "3.24.0"
+version = "4.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "attrs" },
-    { name = "docstring-parser", marker = "python_full_version < '4'" },
+    { name = "docstring-parser" },
    { name = "rich" },
    { name = "rich-rst" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/30/ca/7782da3b03242d5f0a16c20371dff99d4bd1fedafe26bc48ff82e42be8c9/cyclopts-3.24.0.tar.gz", hash = "sha256:de6964a041dfb3c57bf043b41e68c43548227a17de1bad246e3a0bfc5c4b7417", size = 76131 }
+sdist = { url = "https://files.pythonhosted.org/packages/9a/d1/2f2b99ec5ea54ac18baadfc4a011e2a1743c1eaae1e39838ca520dcf4811/cyclopts-4.0.0.tar.gz", hash = "sha256:0dae712085e91d32cc099ea3d78f305b0100a3998b1dec693be9feb0b1be101f", size = 143546 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f0/8b/2c95f0645c6f40211896375e6fa51f504b8ccb29c21f6ae661fe87ab044e/cyclopts-3.24.0-py3-none-any.whl", hash = "sha256:809d04cde9108617106091140c3964ee6fceb33cecdd537f7ffa360bde13ed71", size = 86154 },
+    { url = "https://files.pythonhosted.org/packages/44/0e/0a22e076944600aeb06f40b7e03bbd762a42d56d43a2f5f4ab954aed9005/cyclopts-4.0.0-py3-none-any.whl", hash = "sha256:e64801a2c86b681f08323fd50110444ee961236a0bae402a66d2cc3feda33da7", size = 178837 },
 ]
 [[package]]
@ -885,6 +885,15 @@ wheels = [
 array = [
    { name = "numpy" },
 ]
 complete = [
    { name = "bokeh" },
    { name = "distributed" },
    { name = "jinja2" },
    { name = "lz4" },
    { name = "numpy" },
    { name = "pandas" },
    { name = "pyarrow" },
 ]
 distributed = [
    { name = "distributed" },
 ]
@ -1064,7 +1073,7 @@ wheels = [
 [[package]]
 name = "entropic-perma-risk"
 version = "0.1.0"
-source = { virtual = "." }
+source = { editable = "." }
 dependencies = [
    { name = "aiohttp" },
    { name = "bokeh" },
@ -1094,7 +1103,8 @@ dependencies = [
    { name = "rich" },
    { name = "rioxarray" },
    { name = "scipy" },
-    { name = "smart-geocubes", extra = ["dask", "stac", "viz"] },
+    { name = "seaborn" },
    { name = "smart-geocubes", extra = ["dask", "gee", "stac", "viz"] },
    { name = "stopuhr" },
    { name = "xanimate" },
    { name = "xarray" },
@ -1109,7 +1119,7 @@ requires-dist = [
    { name = "bokeh", specifier = ">=3.7.3" },
    { name = "cartopy", specifier = ">=0.24.1" },
    { name = "cdsapi", specifier = ">=0.7.6" },
-    { name = "cyclopts", specifier = ">=3.17.0" },
+    { name = "cyclopts", specifier = ">=4.0.0" },
    { name = "dask", specifier = ">=2025.5.1" },
    { name = "distributed", specifier = ">=2025.5.1" },
    { name = "earthengine-api", specifier = ">=1.6.9" },
@ -1133,7 +1143,8 @@ requires-dist = [
    { name = "rich", specifier = ">=14.0.0" },
    { name = "rioxarray", specifier = ">=0.19.0" },
    { name = "scipy", specifier = ">=1.15.3" },
-    { name = "smart-geocubes", extras = ["arcticdem", "dask", "stac", "viz"], specifier = ">=0.0.9" },
+    { name = "seaborn", specifier = ">=0.13.2" },
    { name = "smart-geocubes", extras = ["gee", "dask", "stac", "viz"], specifier = ">=0.0.9" },
    { name = "stopuhr", specifier = ">=0.0.10" },
    { name = "xanimate", git = "https://github.com/davbyr/xAnimate" },
    { name = "xarray", specifier = ">=2025.9.0" },
@ -2120,6 +2131,30 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/2a/86/30142987e19a0f9bb464ada0dad647fc39b81bac5eacefdcdfff0d14c41e/lonboard-0.12.1-py3-none-any.whl", hash = "sha256:74fd1a904443d38018aa142214367d183a89fc41d16a36f5b3cde17b2e293773", size = 928199 },
 ]
 [[package]]
 name = "lz4"
 version = "4.4.4"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/c6/5a/945f5086326d569f14c84ac6f7fcc3229f0b9b1e8cc536b951fd53dfb9e1/lz4-4.4.4.tar.gz", hash = "sha256:070fd0627ec4393011251a094e08ed9fdcc78cb4e7ab28f507638eee4e39abda", size = 171884 }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/f7/2d/5523b4fabe11cd98f040f715728d1932eb7e696bfe94391872a823332b94/lz4-4.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:23ae267494fdd80f0d2a131beff890cf857f1b812ee72dbb96c3204aab725553", size = 220669 },
    { url = "https://files.pythonhosted.org/packages/91/06/1a5bbcacbfb48d8ee5b6eb3fca6aa84143a81d92946bdb5cd6b005f1863e/lz4-4.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fff9f3a1ed63d45cb6514bfb8293005dc4141341ce3500abdfeb76124c0b9b2e", size = 189661 },
    { url = "https://files.pythonhosted.org/packages/fa/08/39eb7ac907f73e11a69a11576a75a9e36406b3241c0ba41453a7eb842abb/lz4-4.4.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ea7f07329f85a8eda4d8cf937b87f27f0ac392c6400f18bea2c667c8b7f8ecc", size = 1238775 },
    { url = "https://files.pythonhosted.org/packages/e9/26/05840fbd4233e8d23e88411a066ab19f1e9de332edddb8df2b6a95c7fddc/lz4-4.4.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ccab8f7f7b82f9fa9fc3b0ba584d353bd5aa818d5821d77d5b9447faad2aaad", size = 1265143 },
    { url = "https://files.pythonhosted.org/packages/b7/5d/5f2db18c298a419932f3ab2023deb689863cf8fd7ed875b1c43492479af2/lz4-4.4.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e43e9d48b2daf80e486213128b0763deed35bbb7a59b66d1681e205e1702d735", size = 1185032 },
    { url = "https://files.pythonhosted.org/packages/c4/e6/736ab5f128694b0f6aac58343bcf37163437ac95997276cd0be3ea4c3342/lz4-4.4.4-cp312-cp312-win32.whl", hash = "sha256:33e01e18e4561b0381b2c33d58e77ceee850a5067f0ece945064cbaac2176962", size = 88284 },
    { url = "https://files.pythonhosted.org/packages/40/b8/243430cb62319175070e06e3a94c4c7bd186a812e474e22148ae1290d47d/lz4-4.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:d21d1a2892a2dcc193163dd13eaadabb2c1b803807a5117d8f8588b22eaf9f12", size = 99918 },
    { url = "https://files.pythonhosted.org/packages/6c/e1/0686c91738f3e6c2e1a243e0fdd4371667c4d2e5009b0a3605806c2aa020/lz4-4.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:2f4f2965c98ab254feddf6b5072854a6935adab7bc81412ec4fe238f07b85f62", size = 89736 },
    { url = "https://files.pythonhosted.org/packages/3b/3c/d1d1b926d3688263893461e7c47ed7382a969a0976fc121fc678ec325fc6/lz4-4.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ed6eb9f8deaf25ee4f6fad9625d0955183fdc90c52b6f79a76b7f209af1b6e54", size = 220678 },
    { url = "https://files.pythonhosted.org/packages/26/89/8783d98deb058800dabe07e6cdc90f5a2a8502a9bad8c5343c641120ace2/lz4-4.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:18ae4fe3bafb344dbd09f976d45cbf49c05c34416f2462828f9572c1fa6d5af7", size = 189670 },
    { url = "https://files.pythonhosted.org/packages/22/ab/a491ace69a83a8914a49f7391e92ca0698f11b28d5ce7b2ececa2be28e9a/lz4-4.4.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57fd20c5fc1a49d1bbd170836fccf9a338847e73664f8e313dce6ac91b8c1e02", size = 1238746 },
    { url = "https://files.pythonhosted.org/packages/97/12/a1f2f4fdc6b7159c0d12249456f9fe454665b6126e98dbee9f2bd3cf735c/lz4-4.4.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9cb387c33f014dae4db8cb4ba789c8d2a0a6d045ddff6be13f6c8d9def1d2a6", size = 1265119 },
    { url = "https://files.pythonhosted.org/packages/50/6e/e22e50f5207649db6ea83cd31b79049118305be67e96bec60becf317afc6/lz4-4.4.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d0be9f68240231e1e44118a4ebfecd8a5d4184f0bdf5c591c98dd6ade9720afd", size = 1184954 },
    { url = "https://files.pythonhosted.org/packages/4c/c4/2a458039645fcc6324ece731d4d1361c5daf960b553d1fcb4261ba07d51c/lz4-4.4.4-cp313-cp313-win32.whl", hash = "sha256:e9ec5d45ea43684f87c316542af061ef5febc6a6b322928f059ce1fb289c298a", size = 88289 },
    { url = "https://files.pythonhosted.org/packages/00/96/b8e24ea7537ab418074c226279acfcaa470e1ea8271003e24909b6db942b/lz4-4.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:a760a175b46325b2bb33b1f2bbfb8aa21b48e1b9653e29c10b6834f9bb44ead4", size = 99925 },
    { url = "https://files.pythonhosted.org/packages/a5/a5/f9838fe6aa132cfd22733ed2729d0592259fff074cefb80f19aa0607367b/lz4-4.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:f4c21648d81e0dda38b4720dccc9006ae33b0e9e7ffe88af6bf7d4ec124e2fba", size = 89743 },
 ]
 [[package]]
 name = "mapclassify"
 version = "2.10.0"
@ -2406,6 +2441,11 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/71/96/d5d8859a6dac29f8ebc815ff8e75770bd513db9f08d7a711e21ae562a948/netCDF4-1.7.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30d20e56b9ba2c48884eb89c91b63e6c0612b4927881707e34402719153ef17f", size = 9378149 },
    { url = "https://files.pythonhosted.org/packages/d1/80/b9c19f1bb4ac6c5fa6f94a4f278bc68a778473d1814a86a375d7cffa193a/netCDF4-1.7.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d6bfd38ba0bde04d56f06c1554714a2ea9dab75811c89450dc3ec57a9d36b80", size = 9254471 },
    { url = "https://files.pythonhosted.org/packages/66/b5/e04550fd53de57001dbd5a87242da7ff784c80790adc48897977b6ccf891/netCDF4-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:5c5fbee6134ee1246c397e1508e5297d825aa19221fdf3fa8dc9727ad824d7a5", size = 6990521 },
    { url = "https://files.pythonhosted.org/packages/84/0a/182bb4fe5639699ba39d558b553b8e6f04fbfea6cf78404c0f21ef149bf7/netcdf4-1.7.2-cp311-abi3-macosx_13_0_x86_64.whl", hash = "sha256:7e81c3c47f2772eab0b93fba8bb05b17b58dce17720e1bed25e9d76551deecd0", size = 2751391 },
    { url = "https://files.pythonhosted.org/packages/2d/1f/54ac27c791360f7452ca27ed1cb2917946bbe1ea4337c590a5abcef6332d/netcdf4-1.7.2-cp311-abi3-macosx_14_0_arm64.whl", hash = "sha256:cb2791dba37fc98fd1ac4e236c97822909f54efbcdf7f1415c9777810e0a28f4", size = 2387513 },
    { url = "https://files.pythonhosted.org/packages/5c/5e/9bf3008a9e45c08f4c9fedce4d6f722ef5d970f56a9c5eb375a200dd2b66/netcdf4-1.7.2-cp311-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf11480f6b8a5b246818ffff6b4d90481e51f8b9555b41af0c372eb0aaf8b65f", size = 9621674 },
    { url = "https://files.pythonhosted.org/packages/a1/75/46871e85f2bbfb1efe229623d25d7c9daa17e2e968d5235572b2c8bb53e8/netcdf4-1.7.2-cp311-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1ccc05328a8ff31921b539821791aeb20b054879f3fdf6d1d505bf6422824fec", size = 9453759 },
    { url = "https://files.pythonhosted.org/packages/cd/10/c52f12297965938d9b9be666ea1f9d8340c2aea31d6909d90aa650847248/netcdf4-1.7.2-cp311-abi3-win_amd64.whl", hash = "sha256:999bfc4acebf400ed724d5e7329e2e768accc7ee1fa1d82d505da782f730301b", size = 7148514 },
 ]
 [[package]]
@ -3622,6 +3662,20 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/3b/88/d32b4b52dcdc5c9c5d6475ccbdc5272588c23523c2164b6717de0ff0801d/scooby-0.10.2-py3-none-any.whl", hash = "sha256:8aec2f3f7fb541bf2c9795cad43a88c976869248a4c16523f07f366388ffcfff", size = 18668 },
 ]
 [[package]]
 name = "seaborn"
 version = "0.13.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "matplotlib" },
    { name = "numpy" },
    { name = "pandas" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/86/59/a451d7420a77ab0b98f7affa3a1d78a313d2f7281a57afb1a34bae8ab412/seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7", size = 1457696 }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914 },
 ]
 [[package]]
 name = "shapely"
 version = "2.1.2"
@ -3707,6 +3761,12 @@ dask = [
    { name = "dask" },
    { name = "distributed" },
 ]
 gee = [
    { name = "earthengine-api" },
    { name = "rasterio" },
    { name = "rioxarray" },
    { name = "xee" },
 ]
 stac = [
    { name = "odc-stac" },
    { name = "pyarrow" },
@ -3981,6 +4041,11 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/8d/f0/73c24457c941b8b08f7d090853e40f4b2cdde88b5da721f3f28e98df77c9/xarray-2025.9.0-py3-none-any.whl", hash = "sha256:79f0e25fb39571f612526ee998ee5404d8725a1db3951aabffdb287388885df0", size = 1349595 },
 ]
 [package.optional-dependencies]
 parallel = [
    { name = "dask", extra = ["complete"] },
 ]
 [[package]]
 name = "xdggs"
 version = "0.2.2"
@ -4001,6 +4066,21 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/49/89/d62e492218806d2d40e9d7baa1fbf8e355fc3d5b8acbeaeac0d9f27d4eca/xdggs-0.2.2-py3-none-any.whl", hash = "sha256:66b9a5f84383e24aea00e986e964a7f77ae23b08ebe179f5c2e74b747006d470", size = 35393 },
 ]
 [[package]]
 name = "xee"
 version = "0.0.22"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "affine" },
    { name = "earthengine-api" },
    { name = "pyproj" },
    { name = "xarray", extra = ["parallel"] },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/fe/9c/722cf70efef355e476f218ba35f60ef4c64cea335ef6c9f772c259132856/xee-0.0.22.tar.gz", hash = "sha256:c3acf2e82ce36caddf84f43b442131cc3707300533143e19f70e31bc8652d4ac", size = 421940 }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/76/86/659f5b3c692d3813edcb373b9260e79303bc4a81234b80e5438194e0c5f2/xee-0.0.22-py3-none-any.whl", hash = "sha256:9d05f2fff1d24e5d39ab0621c51486f511851334f1f2fc4e0779aec2beba322a", size = 31204 },
 ]
 [[package]]
 name = "xproj"
 version = "0.2.1"