Restructure to steps
This commit is contained in:
parent
2af5c011a3
commit
ce4c728e1a
10 changed files with 1377 additions and 640 deletions
6
.gitignore
vendored
6
.gitignore
vendored
|
|
@ -14,3 +14,9 @@ data
|
||||||
|
|
||||||
# Editors
|
# Editors
|
||||||
.vscode/
|
.vscode/
|
||||||
|
|
||||||
|
# Debug / Playground notebooks
|
||||||
|
pg.ipynb
|
||||||
|
playground.ipynb
|
||||||
|
*fix*.ipynb
|
||||||
|
*debug*.ipynb
|
||||||
|
|
|
||||||
616
era5.py
616
era5.py
|
|
@ -1,616 +0,0 @@
|
||||||
"""Download and preprocess ERA5 data.
|
|
||||||
|
|
||||||
Variables of Interest:
|
|
||||||
- 2 metre temperature (t2m) [instant]
|
|
||||||
- Total precipitation (tp) [accum]
|
|
||||||
- Snow Fall (sf) [accum]
|
|
||||||
- Snow cover (snowc) [instant]
|
|
||||||
- Snow depth (sde) [instant]
|
|
||||||
- Surface sensible heat flux (sshf) [accum]
|
|
||||||
- Lake ice bottom temperature (lblt) [instant]
|
|
||||||
|
|
||||||
Daily Variables (downloaded from hourly data):
|
|
||||||
- t2m_daily_max
|
|
||||||
- t2m_daily_min
|
|
||||||
- tp_daily_sum
|
|
||||||
- sf_daily_sum
|
|
||||||
- snowc_daily_mean
|
|
||||||
- sde_daily_mean
|
|
||||||
- sshf_daily_sum
|
|
||||||
- lblt_daily_max
|
|
||||||
|
|
||||||
Derived Daily Variables:
|
|
||||||
- t2m_daily_avg
|
|
||||||
- t2m_daily_range
|
|
||||||
- t2m_daily_skew
|
|
||||||
- thawing_degree_days
|
|
||||||
- freezing_degree_days
|
|
||||||
- thawing_days
|
|
||||||
- freezing_days
|
|
||||||
- precipitation_occurrences
|
|
||||||
- snowfall_occurrences
|
|
||||||
- snow_isolation (snowc * sde)
|
|
||||||
|
|
||||||
Monthly Variables:
|
|
||||||
- t2m_monthly_max
|
|
||||||
- t2m_monthly_min
|
|
||||||
- tp_monthly_sum
|
|
||||||
- sf_monthly_sum
|
|
||||||
- snowc_monthly_mean
|
|
||||||
- sde_monthly_mean
|
|
||||||
- sshf_monthly_sum
|
|
||||||
- lblt_monthly_max
|
|
||||||
- t2m_monthly_avg
|
|
||||||
- t2m_monthly_range_avg
|
|
||||||
- t2m_monthly_skew_avg
|
|
||||||
- thawing_degree_days_monthly
|
|
||||||
- freezing_degree_days_monthly
|
|
||||||
- thawing_days_monthly
|
|
||||||
- freezing_days_monthly
|
|
||||||
- precipitation_occurrences_monthly TODO: Rename to precipitation_days_monthly?
|
|
||||||
- snowfall_occurrences_monthly TODO: Rename to snowfall_days_monthly?
|
|
||||||
- snow_isolation_monthly_mean
|
|
||||||
|
|
||||||
Yearly Variables:
|
|
||||||
- TODO
|
|
||||||
|
|
||||||
# TODO Variables:
|
|
||||||
- Day of first thaw (yearly)
|
|
||||||
- Day of last thaw (yearly)
|
|
||||||
- Thawing period length (yearly)
|
|
||||||
- Freezing period length (yearly)
|
|
||||||
|
|
||||||
Author: Tobias Hölzer
|
|
||||||
Date: 09. June 2025
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Literal
|
|
||||||
|
|
||||||
import cyclopts
|
|
||||||
import dask.distributed as dd
|
|
||||||
import geopandas as gpd
|
|
||||||
import odc.geo
|
|
||||||
import odc.geo.xr
|
|
||||||
import pandas as pd
|
|
||||||
import shapely
|
|
||||||
import shapely.ops
|
|
||||||
import xarray as xr
|
|
||||||
from numcodecs.zarr3 import Blosc
|
|
||||||
from rich import pretty, print, traceback
|
|
||||||
from rich.progress import track
|
|
||||||
from shapely.geometry import LineString, Polygon
|
|
||||||
|
|
||||||
traceback.install(show_locals=True, suppress=[cyclopts, xr, pd])
|
|
||||||
pretty.install()
|
|
||||||
|
|
||||||
cli = cyclopts.App()
|
|
||||||
|
|
||||||
# TODO: Directly handle stuff on a grid level - this is more what the zarr access is indented to do
|
|
||||||
|
|
||||||
DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) / "entropyc-rts"
|
|
||||||
# DATA_DIR = Path("data")
|
|
||||||
ERA5_DIR = DATA_DIR / "era5"
|
|
||||||
AGG_PATH = ERA5_DIR / "era5_agg.zarr"
|
|
||||||
ALIGNED_PATH = ERA5_DIR / "era5_spatial_aligned.zarr"
|
|
||||||
MONTHLY_PATH = ERA5_DIR / "era5_monthly.zarr"
|
|
||||||
YEARLY_PATH = ERA5_DIR / "era5_yearly.zarr"
|
|
||||||
|
|
||||||
min_lat = 50
|
|
||||||
max_lat = 83.7 # Ensures the right Chunks Size (90 - 64 / 10 + 0.1)
|
|
||||||
min_time = "1990-01-01"
|
|
||||||
max_time = "2024-12-31"
|
|
||||||
|
|
||||||
|
|
||||||
today = time.strftime("%Y-%m-%d")
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: I think it would be better to aggregate via hours instead of days
|
|
||||||
# Pipeline would be:
|
|
||||||
# Download hourly data -> Spatially match hourly data ->
|
|
||||||
# For {daily, monthly, yearly}:
|
|
||||||
# Enrich -> Aggregate temporally
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: Rethink aggregations by differentiating between "instant" and "accum" variables:
|
|
||||||
# https://consensus.app/search/instantaneous-versus-accumulated-weather/JBaNbhc1R_-BwN5E9Un0Fw/
|
|
||||||
|
|
||||||
# ================
|
|
||||||
# === Download ===
|
|
||||||
# ================
|
|
||||||
|
|
||||||
|
|
||||||
def create_encoding(ds: xr.Dataset):
|
|
||||||
"""Create compression encoding for zarr dataset storage.
|
|
||||||
|
|
||||||
Creates Blosc compression configuration for all data variables and coordinates
|
|
||||||
in the dataset using zstd compression with level 9.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
ds (xr.Dataset): The xarray Dataset to create encoding for.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Encoding dictionary with compression settings for each variable.
|
|
||||||
|
|
||||||
"""
|
|
||||||
# encoding = {var: {"compressors": BloscCodec(cname="zlib", clevel=9)} for var in ds.data_vars}
|
|
||||||
encoding = {var: {"compressors": Blosc(cname="zstd", clevel=9)} for var in [*ds.data_vars, *ds.coords]}
|
|
||||||
return encoding
|
|
||||||
|
|
||||||
|
|
||||||
def download_daily_aggregated():
|
|
||||||
"""Download and aggregate ERA5 data to daily resolution.
|
|
||||||
|
|
||||||
Downloads ERA5 reanalysis data from the DESTINE Earth Data Hub and aggregates
|
|
||||||
it to daily resolution. Includes temperature extremes, precipitation, snow,
|
|
||||||
and surface heat flux variables.
|
|
||||||
|
|
||||||
The function downloads hourly data and creates daily aggregates:
|
|
||||||
- Temperature: daily min/max
|
|
||||||
- Precipitation and snowfall: daily totals
|
|
||||||
- Snow cover and depth: daily means
|
|
||||||
- Surface heat flux: daily totals
|
|
||||||
- Lake ice temperature: daily max
|
|
||||||
|
|
||||||
The aggregated data is saved to a zarr file with compression.
|
|
||||||
"""
|
|
||||||
era5 = xr.open_dataset(
|
|
||||||
"https://data.earthdatahub.destine.eu/era5/reanalysis-era5-land-no-antartica-v0.zarr",
|
|
||||||
storage_options={"client_kwargs": {"trust_env": True}},
|
|
||||||
chunks={},
|
|
||||||
# chunks={},
|
|
||||||
engine="zarr",
|
|
||||||
).rename({"valid_time": "time"})
|
|
||||||
subset = {
|
|
||||||
"latitude": slice(max_lat, min_lat),
|
|
||||||
}
|
|
||||||
|
|
||||||
# Compute the clostest chunk-start to min_time, to avoid problems with cropped chunks at the start
|
|
||||||
tchunksize = era5.chunksizes["time"][0]
|
|
||||||
era5_chunk_starts = pd.date_range(era5.time.min().item(), era5.time.max().item(), freq=f"{tchunksize}h")
|
|
||||||
closest_chunk_start = era5_chunk_starts[
|
|
||||||
era5_chunk_starts.get_indexer([pd.to_datetime(min_time)], method="ffill")[0]
|
|
||||||
]
|
|
||||||
subset["time"] = slice(str(closest_chunk_start), max_time)
|
|
||||||
|
|
||||||
era5 = era5.sel(**subset)
|
|
||||||
|
|
||||||
era5_agg = xr.merge(
|
|
||||||
[
|
|
||||||
era5.t2m.resample(time="1D").max().rename("t2m_daily_max"),
|
|
||||||
era5.t2m.resample(time="1D").min().rename("t2m_daily_min"),
|
|
||||||
era5.tp.resample(time="1D").sum().rename("tp_daily_sum"),
|
|
||||||
era5.sf.resample(time="1D").sum().rename("sf_daily_sum"),
|
|
||||||
era5.snowc.resample(time="1D").mean().rename("snowc_daily_mean"),
|
|
||||||
era5.sde.resample(time="1D").mean().rename("sde_daily_mean"),
|
|
||||||
era5.sshf.resample(time="1D").sum().rename("sshf_daily_sum"),
|
|
||||||
era5.lblt.resample(time="1D").max().rename("lblt_daily_max"),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Assign attributes
|
|
||||||
era5_agg["t2m_daily_max"].attrs = {"long_name": "Daily maximum 2 metre temperature", "units": "K"}
|
|
||||||
era5_agg["t2m_daily_min"].attrs = {"long_name": "Daily minimum 2 metre temperature", "units": "K"}
|
|
||||||
era5_agg["tp_daily_sum"].attrs = {"long_name": "Daily total precipitation", "units": "m"}
|
|
||||||
era5_agg["sf_daily_sum"].attrs = {"long_name": "Daily total snow fall", "units": "m"}
|
|
||||||
era5_agg["snowc_daily_mean"].attrs = {"long_name": "Daily mean snow cover", "units": "m"}
|
|
||||||
era5_agg["sde_daily_mean"].attrs = {"long_name": "Daily mean snow depth", "units": "m"}
|
|
||||||
era5_agg["sshf_daily_sum"].attrs = {"long_name": "Daily total surface sensible heat flux", "units": "J/m²"}
|
|
||||||
era5_agg["lblt_daily_max"].attrs = {"long_name": "Daily maximum lake ice bottom temperature", "units": "K"}
|
|
||||||
|
|
||||||
era5_agg = era5_agg.odc.assign_crs("epsg:4326")
|
|
||||||
era5_agg = era5_agg.drop_vars(["surface", "number", "depthBelowLandLayer"])
|
|
||||||
era5_agg.to_zarr(AGG_PATH, mode="w", encoding=create_encoding(era5_agg), consolidated=False)
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command
|
|
||||||
def download():
|
|
||||||
"""Download ERA5 data using Dask cluster for parallel processing.
|
|
||||||
|
|
||||||
Creates a local Dask cluster and downloads daily aggregated ERA5 data.
|
|
||||||
The cluster is configured with a single worker with 10 threads and 100GB
|
|
||||||
memory limit for optimal performance.
|
|
||||||
"""
|
|
||||||
with (
|
|
||||||
dd.LocalCluster(n_workers=1, threads_per_worker=10, memory_limit="100GB") as cluster,
|
|
||||||
dd.Client(cluster) as client,
|
|
||||||
):
|
|
||||||
print(client)
|
|
||||||
print(client.dashboard_link)
|
|
||||||
download_daily_aggregated()
|
|
||||||
print("Downloaded and aggregated ERA5 data.")
|
|
||||||
|
|
||||||
|
|
||||||
# ===========================
|
|
||||||
# === Spatial Aggregation ===
|
|
||||||
# ===========================
|
|
||||||
|
|
||||||
|
|
||||||
def _crosses_antimeridian(geom: Polygon) -> bool:
|
|
||||||
coords = shapely.get_coordinates(geom)
|
|
||||||
crosses_any_meridian = (coords[:, 0] > 0).any() and (coords[:, 0] < 0).any()
|
|
||||||
return crosses_any_meridian and abs(coords[:, 0]).max() > 90
|
|
||||||
|
|
||||||
|
|
||||||
def _split_antimeridian_cell(geom: Polygon) -> list[Polygon]:
|
|
||||||
# Assumes that it is a antimeridian hex
|
|
||||||
coords = shapely.get_coordinates(geom)
|
|
||||||
for i in range(coords.shape[0]):
|
|
||||||
if coords[i, 0] < 0:
|
|
||||||
coords[i, 0] += 360
|
|
||||||
geom = Polygon(coords)
|
|
||||||
antimeridian = LineString([[180, -90], [180, 90]])
|
|
||||||
polys = shapely.ops.split(geom, antimeridian)
|
|
||||||
return list(polys.geoms)
|
|
||||||
|
|
||||||
|
|
||||||
def _check_geobox(geobox):
|
|
||||||
x, y = geobox.shape
|
|
||||||
return x > 1 and y > 1
|
|
||||||
|
|
||||||
|
|
||||||
def extract_cell_data(idx: int, geom: Polygon) -> xr.Dataset:
|
|
||||||
"""Extract ERA5 data for a specific grid cell geometry.
|
|
||||||
|
|
||||||
Extracts and spatially averages ERA5 data within the bounds of a grid cell.
|
|
||||||
Handles antimeridian-crossing cells by splitting them appropriately.
|
|
||||||
The extracted data is written to the aligned zarr file.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
idx (int): Index of the grid cell.
|
|
||||||
geom (Polygon): Polygon geometry of the grid cell.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
xr.Dataset or bool: Returns True if successful, False if cell doesn't
|
|
||||||
overlap with ERA5 data.
|
|
||||||
|
|
||||||
"""
|
|
||||||
era5_agg = (
|
|
||||||
xr.open_zarr(AGG_PATH, consolidated=False)
|
|
||||||
.set_coords("spatial_ref")
|
|
||||||
.drop_vars(["surface", "number", "depthBelowLandLayer"])
|
|
||||||
)
|
|
||||||
# cell.geometry is a shapely Polygon
|
|
||||||
if not _crosses_antimeridian(geom):
|
|
||||||
geoms = [geom]
|
|
||||||
# Split geometry in case it crossed antimeridian
|
|
||||||
else:
|
|
||||||
geoms = _split_antimeridian_cell(geom)
|
|
||||||
cell_data = []
|
|
||||||
for geom in geoms:
|
|
||||||
geom = odc.geo.Geometry(geom, crs="epsg:4326")
|
|
||||||
if not _check_geobox(era5_agg.odc.geobox.enclosing(geom)):
|
|
||||||
continue
|
|
||||||
cell_data.append(era5_agg.odc.crop(geom).drop_vars("spatial_ref").mean(["latitude", "longitude"]))
|
|
||||||
if len(cell_data) == 0:
|
|
||||||
return False
|
|
||||||
elif len(cell_data) == 1:
|
|
||||||
cell_data = cell_data[0]
|
|
||||||
else:
|
|
||||||
cell_data = xr.concat(cell_data, dim="part").mean("part")
|
|
||||||
cell_data = cell_data.expand_dims({"cell": [idx]}).compute()
|
|
||||||
cell_data.to_zarr(ALIGNED_PATH, region="auto", consolidated=False)
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command
|
|
||||||
def spatial_agg(grid: Literal["hex", "healpix"], level: int, n_workers: int = 10):
|
|
||||||
"""Perform spatial aggregation of ERA5 data to grid cells.
|
|
||||||
|
|
||||||
Loads a grid and spatially aggregates ERA5 data to each grid cell using
|
|
||||||
parallel processing. Creates an empty zarr file first, then fills it
|
|
||||||
with extracted data for each cell.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
grid ("hex", "healpix"): Grid type.
|
|
||||||
level (int): Grid resolution level.
|
|
||||||
n_workers (int, optional): Number of parallel workers to use. Defaults to 10.
|
|
||||||
|
|
||||||
"""
|
|
||||||
gridname = f"permafrost_{grid}{level}"
|
|
||||||
grid = gpd.read_parquet(DATA_DIR / f"grids/{gridname}_grid.parquet")
|
|
||||||
# Create an empty zarr array with the right dimensions
|
|
||||||
era5_agg = (
|
|
||||||
xr.open_zarr(AGG_PATH, consolidated=False)
|
|
||||||
.set_coords("spatial_ref")
|
|
||||||
.drop_vars(["surface", "number", "depthBelowLandLayer"])
|
|
||||||
)
|
|
||||||
assert {"latitude", "longitude", "time"} == set(era5_agg.dims), (
|
|
||||||
f"Expected dims ('latitude', 'longitude', 'time'), got {era5_agg.dims}"
|
|
||||||
)
|
|
||||||
assert era5_agg.odc.crs == "epsg:4326", f"Expected CRS 'epsg:4326', got {era5_agg.odc.crs}"
|
|
||||||
empty = (
|
|
||||||
xr.zeros_like(era5_agg.isel(latitude=0, longitude=0))
|
|
||||||
.expand_dims({"cell": [idx for idx, _ in grid.iterrows()]})
|
|
||||||
.chunk({"cell": 1, "time": len(era5_agg.time)})
|
|
||||||
)
|
|
||||||
empty.to_zarr(ALIGNED_PATH, mode="w", consolidated=False, encoding=create_encoding(empty))
|
|
||||||
|
|
||||||
print(f"Starting spatial matching of {len(grid)} cells with {n_workers} workers...")
|
|
||||||
|
|
||||||
# TODO: Maybe change to process pool executor?
|
|
||||||
with ThreadPoolExecutor(max_workers=n_workers) as executor:
|
|
||||||
futures = {
|
|
||||||
executor.submit(extract_cell_data, idx, row.geometry): idx
|
|
||||||
for idx, row in grid.to_crs("epsg:4326").iterrows()
|
|
||||||
}
|
|
||||||
for future in track(as_completed(futures), total=len(futures), description="Processing cells"):
|
|
||||||
idx = futures[future]
|
|
||||||
try:
|
|
||||||
flag = future.result()
|
|
||||||
if flag:
|
|
||||||
print(f"Successfully written cell {idx}")
|
|
||||||
else:
|
|
||||||
print(f"Cell {idx} did not overlap with ERA5 data.")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error processing cell {idx}: {e}")
|
|
||||||
print(type(e))
|
|
||||||
print("Finished spatial matching.")
|
|
||||||
|
|
||||||
|
|
||||||
# ============================
|
|
||||||
# === Temporal Aggregation ===
|
|
||||||
# ============================
|
|
||||||
|
|
||||||
|
|
||||||
def daily_enrich() -> xr.Dataset:
|
|
||||||
"""Enrich daily ERA5 data with derived climate variables.
|
|
||||||
|
|
||||||
Loads spatially aligned ERA5 data and computes additional climate variables.
|
|
||||||
Creates derived variables including temperature statistics, degree days, and occurrence indicators.
|
|
||||||
|
|
||||||
Derived variables include:
|
|
||||||
- Daily average and range temperature
|
|
||||||
- Temperature skewness
|
|
||||||
- Thawing and freezing degree days
|
|
||||||
- Thawing and freezing day counts
|
|
||||||
- Precipitation and snowfall occurrences
|
|
||||||
- Snow isolation index
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
xr.Dataset: Enriched dataset with original and derived variables.
|
|
||||||
|
|
||||||
"""
|
|
||||||
era5 = xr.open_zarr(ALIGNED_PATH, consolidated=False).set_coords("spatial_ref")
|
|
||||||
assert {"cell", "time"} == set(era5.dims), f"Expected dims ('cell', 'time'), got {era5.dims}"
|
|
||||||
|
|
||||||
# Formulas based on Groeke et. al. (2025) Stochastic Weather generation...
|
|
||||||
era5["t2m_daily_avg"] = (era5.t2m_daily_max + era5.t2m_daily_min) / 2
|
|
||||||
era5.t2m_daily_avg.attrs = {"long_name": "Daily average 2 metre temperature", "units": "K"}
|
|
||||||
era5["t2m_daily_range"] = era5.t2m_daily_max - era5.t2m_daily_min
|
|
||||||
era5.t2m_daily_range.attrs = {"long_name": "Daily range of 2 metre temperature", "units": "K"}
|
|
||||||
era5["t2m_daily_skew"] = (era5.t2m_daily_avg - era5.t2m_daily_min) / era5.t2m_daily_range
|
|
||||||
era5.t2m_daily_skew.attrs = {"long_name": "Daily skewness of 2 metre temperature"}
|
|
||||||
|
|
||||||
era5["thawing_degree_days"] = (era5.t2m_daily_avg - 273.15).clip(min=0)
|
|
||||||
era5.thawing_degree_days.attrs = {"long_name": "Thawing degree days", "units": "K"}
|
|
||||||
era5["freezing_degree_days"] = (273.15 - era5.t2m_daily_avg).clip(min=0)
|
|
||||||
era5.freezing_degree_days.attrs = {"long_name": "Freezing degree days", "units": "K"}
|
|
||||||
|
|
||||||
era5["thawing_days"] = (era5.t2m_daily_avg > 273.15).astype(int)
|
|
||||||
era5.thawing_days.attrs = {"long_name": "Thawing days"}
|
|
||||||
era5["freezing_days"] = (era5.t2m_daily_avg < 273.15).astype(int)
|
|
||||||
era5.freezing_days.attrs = {"long_name": "Freezing days"}
|
|
||||||
|
|
||||||
era5["precipitation_occurrences"] = (era5.tp_daily_sum > 0).astype(int)
|
|
||||||
era5.precipitation_occurrences.attrs = {"long_name": "Precipitation occurrences"}
|
|
||||||
era5["snowfall_occurrences"] = (era5.sf_daily_sum > 0).astype(int)
|
|
||||||
era5.snowfall_occurrences.attrs = {"long_name": "Snowfall occurrences"}
|
|
||||||
|
|
||||||
era5["snow_isolation"] = era5.snowc_daily_mean * era5.sde_daily_mean
|
|
||||||
era5.snow_isolation.attrs = {"long_name": "Snow isolation"}
|
|
||||||
|
|
||||||
return era5
|
|
||||||
|
|
||||||
|
|
||||||
def monthly_aggregate():
|
|
||||||
"""Aggregate enriched daily ERA5 data to monthly resolution.
|
|
||||||
|
|
||||||
Takes the enriched daily ERA5 data and creates monthly aggregates using
|
|
||||||
appropriate statistical functions for each variable type. Temperature
|
|
||||||
variables use min/max/mean, accumulation variables use sums, and derived
|
|
||||||
variables use appropriate aggregations.
|
|
||||||
|
|
||||||
The aggregated monthly data is saved to a zarr file for further processing.
|
|
||||||
"""
|
|
||||||
era5 = daily_enrich()
|
|
||||||
assert {"cell", "time"} == set(era5.dims), f"Expected dims ('cell', 'time'), got {era5.dims}"
|
|
||||||
|
|
||||||
# Monthly aggregates
|
|
||||||
monthly = xr.merge(
|
|
||||||
[
|
|
||||||
# Original variables
|
|
||||||
era5.t2m_daily_min.resample(time="1ME").min().rename("t2m_monthly_min"),
|
|
||||||
era5.t2m_daily_max.resample(time="1ME").max().rename("t2m_monthly_max"),
|
|
||||||
era5.tp_daily_sum.resample(time="1ME").sum().rename("tp_monthly_sum"),
|
|
||||||
era5.sf_daily_sum.resample(time="1ME").sum().rename("sf_monthly_sum"),
|
|
||||||
era5.snowc_daily_mean.resample(time="1ME").mean().rename("snowc_monthly_mean"),
|
|
||||||
era5.sde_daily_mean.resample(time="1ME").mean().rename("sde_monthly_mean"),
|
|
||||||
era5.sshf_daily_sum.resample(time="1ME").sum().rename("sshf_monthly_sum"),
|
|
||||||
era5.lblt_daily_max.resample(time="1ME").max().rename("lblt_monthly_max"),
|
|
||||||
# Enriched variables
|
|
||||||
era5.t2m_daily_avg.resample(time="1ME").mean().rename("t2m_monthly_avg"),
|
|
||||||
era5.t2m_daily_range.resample(time="1ME").mean().rename("t2m_monthly_range_avg"),
|
|
||||||
era5.t2m_daily_skew.resample(time="1ME").mean().rename("t2m_monthly_skew_avg"),
|
|
||||||
era5.thawing_degree_days.resample(time="1ME").sum().rename("thawing_degree_days_monthly"),
|
|
||||||
era5.freezing_degree_days.resample(time="1ME").sum().rename("freezing_degree_days_monthly"),
|
|
||||||
era5.thawing_days.resample(time="1ME").sum().rename("thawing_days_monthly"),
|
|
||||||
era5.freezing_days.resample(time="1ME").sum().rename("freezing_days_monthly"),
|
|
||||||
era5.precipitation_occurrences.resample(time="1ME").sum().rename("precipitation_occurrences_monthly"),
|
|
||||||
era5.snowfall_occurrences.resample(time="1ME").sum().rename("snowfall_occurrences_monthly"),
|
|
||||||
era5.snow_isolation.resample(time="1ME").mean().rename("snow_isolation_monthly_mean"),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
monthly.to_zarr(MONTHLY_PATH, mode="w", encoding=create_encoding(monthly), consolidated=False)
|
|
||||||
|
|
||||||
|
|
||||||
def yearly_aggregate():
|
|
||||||
"""Aggregate monthly ERA5 data to yearly resolution with seasonal splits.
|
|
||||||
|
|
||||||
Takes monthly aggregated data and creates yearly aggregates using a shifted
|
|
||||||
calendar (October to September) to better capture Arctic seasonal patterns.
|
|
||||||
Creates separate aggregates for full year, winter (Oct-Apr), and summer
|
|
||||||
(May-Sep) periods.
|
|
||||||
|
|
||||||
The first and last incomplete years are excluded from the analysis.
|
|
||||||
Winter months are defined as months 1-7 in the shifted calendar,
|
|
||||||
and summer months are 8-12.
|
|
||||||
|
|
||||||
The final dataset includes yearly, winter, and summer aggregates for all
|
|
||||||
climate variables, saved to a zarr file.
|
|
||||||
"""
|
|
||||||
monthly = xr.open_zarr(MONTHLY_PATH, consolidated=False).set_coords("spatial_ref")
|
|
||||||
assert {"cell", "time"} == set(monthly.dims), f"Expected dims ('cell', 'time'), got {monthly.dims}"
|
|
||||||
|
|
||||||
# Yearly aggregates (shifted by +10 months to start in Oktober, first and last years will be cropped)
|
|
||||||
monthly_shifted = monthly.copy()
|
|
||||||
monthly_shifted["time"] = monthly_shifted.get_index("time") + pd.DateOffset(months=10)
|
|
||||||
incomplete_years = {monthly_shifted.time.dt.year.min().item(), monthly_shifted.time.dt.year.max().item()}
|
|
||||||
monthly_shifted = monthly_shifted.sel(time=~monthly_shifted.time.dt.year.isin(incomplete_years))
|
|
||||||
yearly = xr.merge(
|
|
||||||
[
|
|
||||||
# Original variables
|
|
||||||
monthly_shifted.t2m_monthly_min.resample(time="1YE").min().rename("t2m_yearly_min"),
|
|
||||||
monthly_shifted.t2m_monthly_max.resample(time="1YE").max().rename("t2m_yearly_max"),
|
|
||||||
monthly_shifted.tp_monthly_sum.resample(time="1YE").sum().rename("tp_yearly_sum"),
|
|
||||||
monthly_shifted.sf_monthly_sum.resample(time="1YE").sum().rename("sf_yearly_sum"),
|
|
||||||
monthly_shifted.snowc_monthly_mean.resample(time="1YE").mean().rename("snowc_yearly_mean"),
|
|
||||||
monthly_shifted.sde_monthly_mean.resample(time="1YE").mean().rename("sde_yearly_mean"),
|
|
||||||
monthly_shifted.sshf_monthly_sum.resample(time="1YE").sum().rename("sshf_yearly_sum"),
|
|
||||||
monthly_shifted.lblt_monthly_max.resample(time="1YE").max().rename("lblt_yearly_max"),
|
|
||||||
# Enriched variables
|
|
||||||
monthly_shifted.t2m_monthly_avg.resample(time="1YE").mean().rename("t2m_yearly_avg"),
|
|
||||||
# TODO: Check if this is correct -> use daily / hourly data instead for range and skew?
|
|
||||||
monthly_shifted.t2m_monthly_range_avg.resample(time="1YE").mean().rename("t2m_daily_range_yearly_avg"),
|
|
||||||
monthly_shifted.t2m_monthly_skew_avg.resample(time="1YE").mean().rename("t2m_daily_skew_yearly_avg"),
|
|
||||||
monthly_shifted.thawing_degree_days_monthly.resample(time="1YE").sum().rename("thawing_degree_days_yearly"),
|
|
||||||
monthly_shifted.freezing_degree_days_monthly.resample(time="1YE")
|
|
||||||
.sum()
|
|
||||||
.rename("freezing_degree_days_yearly"),
|
|
||||||
monthly_shifted.thawing_days_monthly.resample(time="1YE").sum().rename("thawing_days_yearly"),
|
|
||||||
monthly_shifted.freezing_days_monthly.resample(time="1YE").sum().rename("freezing_days_yearly"),
|
|
||||||
monthly_shifted.precipitation_occurrences_monthly.resample(time="1YE")
|
|
||||||
.sum()
|
|
||||||
.rename("precipitation_occurrences_yearly"),
|
|
||||||
monthly_shifted.snowfall_occurrences_monthly.resample(time="1YE")
|
|
||||||
.sum()
|
|
||||||
.rename("snowfall_occurrences_yearly"),
|
|
||||||
monthly_shifted.snow_isolation_monthly_mean.resample(time="1YE")
|
|
||||||
.mean()
|
|
||||||
.rename("snow_isolation_yearly_mean"),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
# Summer / Winter aggregates
|
|
||||||
winter_months = [1, 2, 3, 4, 5, 6, 7] # These do NOT correspond to calendar months, but to the shifted months
|
|
||||||
summer_months = [8, 9, 10, 11, 12]
|
|
||||||
monthly_shifted_winter = monthly_shifted.sel(time=monthly_shifted.time.dt.month.isin(winter_months))
|
|
||||||
monthly_shifted_summer = monthly_shifted.sel(time=monthly_shifted.time.dt.month.isin(summer_months))
|
|
||||||
|
|
||||||
winter = xr.merge(
|
|
||||||
[
|
|
||||||
# Original variables
|
|
||||||
monthly_shifted_winter.t2m_monthly_min.resample(time="1YE").min().rename("t2m_winter_min"),
|
|
||||||
monthly_shifted_winter.t2m_monthly_max.resample(time="1YE").max().rename("t2m_winter_max"),
|
|
||||||
monthly_shifted_winter.tp_monthly_sum.resample(time="1YE").sum().rename("tp_winter_sum"),
|
|
||||||
monthly_shifted_winter.sf_monthly_sum.resample(time="1YE").sum().rename("sf_winter_sum"),
|
|
||||||
monthly_shifted_winter.snowc_monthly_mean.resample(time="1YE").mean().rename("snowc_winter_mean"),
|
|
||||||
monthly_shifted_winter.sde_monthly_mean.resample(time="1YE").mean().rename("sde_winter_mean"),
|
|
||||||
monthly_shifted_winter.sshf_monthly_sum.resample(time="1YE").sum().rename("sshf_winter_sum"),
|
|
||||||
monthly_shifted_winter.lblt_monthly_max.resample(time="1YE").max().rename("lblt_winter_max"),
|
|
||||||
# Enriched variables
|
|
||||||
monthly_shifted_winter.t2m_monthly_avg.resample(time="1YE").mean().rename("t2m_winter_avg"),
|
|
||||||
# TODO: Check if this is correct -> use daily / hourly data instead for range and skew?
|
|
||||||
monthly_shifted_winter.t2m_monthly_range_avg.resample(time="1YE")
|
|
||||||
.mean()
|
|
||||||
.rename("t2m_daily_range_winter_avg"),
|
|
||||||
monthly_shifted_winter.t2m_monthly_skew_avg.resample(time="1YE").mean().rename("t2m_daily_skew_winter_avg"),
|
|
||||||
monthly_shifted_winter.thawing_degree_days_monthly.resample(time="1YE")
|
|
||||||
.sum()
|
|
||||||
.rename("thawing_degree_days_winter"),
|
|
||||||
monthly_shifted_winter.freezing_degree_days_monthly.resample(time="1YE")
|
|
||||||
.sum()
|
|
||||||
.rename("freezing_degree_days_winter"),
|
|
||||||
monthly_shifted_winter.thawing_days_monthly.resample(time="1YE").sum().rename("thawing_days_winter"),
|
|
||||||
monthly_shifted_winter.freezing_days_monthly.resample(time="1YE").sum().rename("freezing_days_winter"),
|
|
||||||
monthly_shifted_winter.precipitation_occurrences_monthly.resample(time="1YE")
|
|
||||||
.sum()
|
|
||||||
.rename("precipitation_occurrences_winter"),
|
|
||||||
monthly_shifted_winter.snowfall_occurrences_monthly.resample(time="1YE")
|
|
||||||
.sum()
|
|
||||||
.rename("snowfall_occurrences_winter"),
|
|
||||||
monthly_shifted_winter.snow_isolation_monthly_mean.resample(time="1YE")
|
|
||||||
.mean()
|
|
||||||
.rename("snow_isolation_winter_mean"),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
summer = xr.merge(
|
|
||||||
[
|
|
||||||
# Original variables
|
|
||||||
monthly_shifted_summer.t2m_monthly_min.resample(time="1YE").min().rename("t2m_summer_min"),
|
|
||||||
monthly_shifted_summer.t2m_monthly_max.resample(time="1YE").max().rename("t2m_summer_max"),
|
|
||||||
monthly_shifted_summer.tp_monthly_sum.resample(time="1YE").sum().rename("tp_summer_sum"),
|
|
||||||
monthly_shifted_summer.sf_monthly_sum.resample(time="1YE").sum().rename("sf_summer_sum"),
|
|
||||||
monthly_shifted_summer.snowc_monthly_mean.resample(time="1YE").mean().rename("snowc_summer_mean"),
|
|
||||||
monthly_shifted_summer.sde_monthly_mean.resample(time="1YE").mean().rename("sde_summer_mean"),
|
|
||||||
monthly_shifted_summer.sshf_monthly_sum.resample(time="1YE").sum().rename("sshf_summer_sum"),
|
|
||||||
monthly_shifted_summer.lblt_monthly_max.resample(time="1YE").max().rename("lblt_summer_max"),
|
|
||||||
# Enriched variables
|
|
||||||
monthly_shifted_summer.t2m_monthly_avg.resample(time="1YE").mean().rename("t2m_summer_avg"),
|
|
||||||
# TODO: Check if this is correct -> use daily / hourly data instead for range and skew?
|
|
||||||
monthly_shifted_summer.t2m_monthly_range_avg.resample(time="1YE")
|
|
||||||
.mean()
|
|
||||||
.rename("t2m_daily_range_summer_avg"),
|
|
||||||
monthly_shifted_summer.t2m_monthly_skew_avg.resample(time="1YE").mean().rename("t2m_daily_skew_summer_avg"),
|
|
||||||
monthly_shifted_summer.thawing_degree_days_monthly.resample(time="1YE")
|
|
||||||
.sum()
|
|
||||||
.rename("thawing_degree_days_summer"),
|
|
||||||
monthly_shifted_summer.freezing_degree_days_monthly.resample(time="1YE")
|
|
||||||
.sum()
|
|
||||||
.rename("freezing_degree_days_summer"),
|
|
||||||
monthly_shifted_summer.thawing_days_monthly.resample(time="1YE").sum().rename("thawing_days_summer"),
|
|
||||||
monthly_shifted_summer.freezing_days_monthly.resample(time="1YE").sum().rename("freezing_days_summer"),
|
|
||||||
monthly_shifted_summer.precipitation_occurrences_monthly.resample(time="1YE")
|
|
||||||
.sum()
|
|
||||||
.rename("precipitation_occurrences_summer"),
|
|
||||||
monthly_shifted_summer.snowfall_occurrences_monthly.resample(time="1YE")
|
|
||||||
.sum()
|
|
||||||
.rename("snowfall_occurrences_summer"),
|
|
||||||
monthly_shifted_summer.snow_isolation_monthly_mean.resample(time="1YE")
|
|
||||||
.mean()
|
|
||||||
.rename("snow_isolation_summer_mean"),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
combined = xr.merge([yearly, summer, winter])
|
|
||||||
combined.to_zarr(YEARLY_PATH, mode="w", encoding=create_encoding(combined), consolidated=False)
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command
|
|
||||||
def temporal_agg(n_workers: int = 10):
|
|
||||||
"""Perform temporal aggregation of ERA5 data using Dask cluster.
|
|
||||||
|
|
||||||
Creates a Dask cluster and runs both monthly and yearly aggregation
|
|
||||||
functions to generate temporally aggregated climate datasets. The
|
|
||||||
processing uses parallel workers for efficient computation.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
n_workers (int, optional): Number of Dask workers to use. Defaults to 10.
|
|
||||||
|
|
||||||
"""
|
|
||||||
with (
|
|
||||||
dd.LocalCluster(n_workers=n_workers, threads_per_worker=20, memory_limit="10GB") as cluster,
|
|
||||||
dd.Client(cluster) as client,
|
|
||||||
):
|
|
||||||
print(client)
|
|
||||||
print(client.dashboard_link)
|
|
||||||
monthly_aggregate()
|
|
||||||
yearly_aggregate()
|
|
||||||
print("Enriched ERA5 data with additional features and aggregated it temporally.")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
cli()
|
|
||||||
|
|
@ -10,7 +10,7 @@ dependencies = [
|
||||||
"bokeh>=3.7.3",
|
"bokeh>=3.7.3",
|
||||||
"cartopy>=0.24.1",
|
"cartopy>=0.24.1",
|
||||||
"cdsapi>=0.7.6",
|
"cdsapi>=0.7.6",
|
||||||
"cyclopts>=3.17.0",
|
"cyclopts>=4.0.0",
|
||||||
"dask>=2025.5.1",
|
"dask>=2025.5.1",
|
||||||
"distributed>=2025.5.1",
|
"distributed>=2025.5.1",
|
||||||
"earthengine-api>=1.6.9",
|
"earthengine-api>=1.6.9",
|
||||||
|
|
@ -34,7 +34,8 @@ dependencies = [
|
||||||
"rich>=14.0.0",
|
"rich>=14.0.0",
|
||||||
"rioxarray>=0.19.0",
|
"rioxarray>=0.19.0",
|
||||||
"scipy>=1.15.3",
|
"scipy>=1.15.3",
|
||||||
"smart-geocubes[arcticdem,dask,stac,viz]>=0.0.9",
|
"seaborn>=0.13.2",
|
||||||
|
"smart-geocubes[gee,dask,stac,viz]>=0.0.9",
|
||||||
"stopuhr>=0.0.10",
|
"stopuhr>=0.0.10",
|
||||||
"xanimate",
|
"xanimate",
|
||||||
"xarray>=2025.9.0",
|
"xarray>=2025.9.0",
|
||||||
|
|
@ -43,6 +44,14 @@ dependencies = [
|
||||||
"zarr[remote]>=3.1.3",
|
"zarr[remote]>=3.1.3",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
create-grid = "steps.s0_0_grids.create_grid:main"
|
||||||
|
alpha-earth = "steps.s1_0_alphaearth.alphaearth:main"
|
||||||
|
era5 = "steps.s1_1_era5.era5:cli"
|
||||||
|
|
||||||
|
[tool.uv]
|
||||||
|
package = true
|
||||||
|
|
||||||
[tool.uv.sources]
|
[tool.uv.sources]
|
||||||
entropyc = { git = "ssh://git@github.com/AlbertEMC2Stein/entropyc", branch = "refactor/tobi" }
|
entropyc = { git = "ssh://git@github.com/AlbertEMC2Stein/entropyc", branch = "refactor/tobi" }
|
||||||
xanimate = { git = "https://github.com/davbyr/xAnimate" }
|
xanimate = { git = "https://github.com/davbyr/xAnimate" }
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@ Author: Tobias Hölzer
|
||||||
Date: 09. June 2025
|
Date: 09. June 2025
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
import cartopy.crs as ccrs
|
import cartopy.crs as ccrs
|
||||||
|
|
@ -26,6 +28,12 @@ from xdggs.healpix import HealpixInfo
|
||||||
traceback.install()
|
traceback.install()
|
||||||
pretty.install()
|
pretty.install()
|
||||||
|
|
||||||
|
DATA_DIR = Path(os.environ.get("DATA_DIR", "../../data")) / "entropyc-rts"
|
||||||
|
GRIDS_DIR = DATA_DIR / "grids"
|
||||||
|
FIGURES_DIR = DATA_DIR / "figures"
|
||||||
|
GRIDS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
FIGURES_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
@stopwatch("Create a global hex grid")
|
@stopwatch("Create a global hex grid")
|
||||||
def create_global_hex_grid(resolution):
|
def create_global_hex_grid(resolution):
|
||||||
|
|
@ -123,7 +131,7 @@ def filter_permafrost_grid(grid: gpd.GeoDataFrame):
|
||||||
grid = grid.to_crs("EPSG:3413")
|
grid = grid.to_crs("EPSG:3413")
|
||||||
|
|
||||||
# Filter out non-land areas (e.g., oceans)
|
# Filter out non-land areas (e.g., oceans)
|
||||||
water_mask = gpd.read_file("./data/simplified-water-polygons-split-3857/simplified_water_polygons.shp")
|
water_mask = gpd.read_file(DATA_DIR / "simplified-water-polygons-split-3857/simplified_water_polygons.shp")
|
||||||
water_mask = water_mask.to_crs("EPSG:3413")
|
water_mask = water_mask.to_crs("EPSG:3413")
|
||||||
|
|
||||||
ov = gpd.overlay(grid, water_mask, how="intersection")
|
ov = gpd.overlay(grid, water_mask, how="intersection")
|
||||||
|
|
@ -218,14 +226,20 @@ def cli(grid: Literal["hex", "healpix"], level: int):
|
||||||
print("No valid grid cells found.")
|
print("No valid grid cells found.")
|
||||||
return
|
return
|
||||||
|
|
||||||
grid_gdf.to_parquet(f"./data/grids/permafrost_{grid}{level}_grid.parquet")
|
grid_file = GRIDS_DIR / f"permafrost_{grid}{level}_grid.parquet"
|
||||||
print(f"Saved to ./data/grids/permafrost_{grid}{level}_grid.parquet")
|
grid_gdf.to_parquet(grid_file)
|
||||||
|
print(f"Saved to {grid_file.resolve()}")
|
||||||
|
|
||||||
fig = vizualize_grid(grid_gdf, grid, level)
|
fig = vizualize_grid(grid_gdf, grid, level)
|
||||||
fig.savefig(f"./figures/permafrost_{grid}{level}_grid.png", dpi=300)
|
fig_file = FIGURES_DIR / f"permafrost_{grid}{level}_grid.png"
|
||||||
print(f"Saved figure to ./figures/permafrost_{grid}{level}_grid.png")
|
fig.savefig(fig_file, dpi=300)
|
||||||
|
print(f"Saved figure to {fig_file.resolve()}")
|
||||||
plt.close(fig)
|
plt.close(fig)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def main(): # noqa: D103
|
||||||
cyclopts.run(cli)
|
cyclopts.run(cli)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
644
steps/s1_0_alphaearth/alphaearth.ipynb
Normal file
644
steps/s1_0_alphaearth/alphaearth.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -1,6 +1,7 @@
|
||||||
"""Extract satellite embeddings from Google Earth Engine and map them to a grid."""
|
"""Extract satellite embeddings from Google Earth Engine and map them to a grid."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import warnings
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
|
|
@ -10,14 +11,17 @@ import geemap
|
||||||
import geopandas as gpd
|
import geopandas as gpd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from rich import pretty, traceback
|
from rich import pretty, print, traceback
|
||||||
from rich.progress import track
|
from rich.progress import track
|
||||||
|
|
||||||
|
# Filter out the GeoDataFrame.swapaxes deprecation warning
|
||||||
|
warnings.filterwarnings("ignore", message=".*GeoDataFrame.swapaxes.*", category=FutureWarning)
|
||||||
|
|
||||||
pretty.install()
|
pretty.install()
|
||||||
traceback.install()
|
traceback.install()
|
||||||
ee.Initialize(project="ee-tobias-hoelzer")
|
ee.Initialize(project="ee-tobias-hoelzer")
|
||||||
|
|
||||||
DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) / "entropyc-rts"
|
DATA_DIR = Path(os.environ.get("DATA_DIR", "../../data")) / "entropyc-rts"
|
||||||
EMBEDDINGS_DIR = DATA_DIR / "embeddings"
|
EMBEDDINGS_DIR = DATA_DIR / "embeddings"
|
||||||
EMBEDDINGS_DIR.mkdir(parents=True, exist_ok=True)
|
EMBEDDINGS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
@ -34,10 +38,11 @@ def cli(grid: Literal["hex", "healpix"], level: int, backup_intermediate: bool =
|
||||||
gridname = f"permafrost_{grid}{level}"
|
gridname = f"permafrost_{grid}{level}"
|
||||||
grid = gpd.read_parquet(DATA_DIR / f"grids/{gridname}_grid.parquet")
|
grid = gpd.read_parquet(DATA_DIR / f"grids/{gridname}_grid.parquet")
|
||||||
|
|
||||||
for year in track(range(2022, 2025), total=3, description="Processing years..."):
|
for year in track(range(2017, 2025), total=8, description="Processing years..."):
|
||||||
embedding_collection = ee.ImageCollection("GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL")
|
embedding_collection = ee.ImageCollection("GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL")
|
||||||
embedding_collection = embedding_collection.filterDate(f"{year}-01-01", f"{year}-12-31")
|
embedding_collection = embedding_collection.filterDate(f"{year}-01-01", f"{year}-12-31")
|
||||||
bands = [f"A{str(i).zfill(2)}" for i in range(64)]
|
aggs = ["median", "stdDev", "min", "max", "mean", "p1", "p5", "p25", "p75", "p95", "p99"]
|
||||||
|
bands = [f"A{str(i).zfill(2)}_{agg}" for i in range(64) for agg in aggs]
|
||||||
|
|
||||||
def extract_embedding(feature):
|
def extract_embedding(feature):
|
||||||
# Filter collection by geometry
|
# Filter collection by geometry
|
||||||
|
|
@ -45,7 +50,11 @@ def cli(grid: Literal["hex", "healpix"], level: int, backup_intermediate: bool =
|
||||||
embedding = embedding_collection.filterBounds(geom).mosaic()
|
embedding = embedding_collection.filterBounds(geom).mosaic()
|
||||||
# Get mean embedding value for the geometry
|
# Get mean embedding value for the geometry
|
||||||
mean_dict = embedding.reduceRegion(
|
mean_dict = embedding.reduceRegion(
|
||||||
reducer=ee.Reducer.median(),
|
reducer=ee.Reducer.median()
|
||||||
|
.combine(ee.Reducer.stdDev(), sharedInputs=True)
|
||||||
|
.combine(ee.Reducer.minMax(), sharedInputs=True)
|
||||||
|
.combine(ee.Reducer.mean(), sharedInputs=True)
|
||||||
|
.combine(ee.Reducer.percentile([1, 5, 25, 75, 95, 99]), sharedInputs=True),
|
||||||
geometry=geom,
|
geometry=geom,
|
||||||
)
|
)
|
||||||
# Add mean embedding values as properties to the feature
|
# Add mean embedding values as properties to the feature
|
||||||
|
|
@ -79,8 +88,14 @@ def cli(grid: Literal["hex", "healpix"], level: int, backup_intermediate: bool =
|
||||||
# Combine all batch results
|
# Combine all batch results
|
||||||
df = pd.concat(all_results, ignore_index=True)
|
df = pd.concat(all_results, ignore_index=True)
|
||||||
embeddings_on_grid = grid.merge(df[[*bands, "cell_id"]], on="cell_id", how="left")
|
embeddings_on_grid = grid.merge(df[[*bands, "cell_id"]], on="cell_id", how="left")
|
||||||
embeddings_on_grid.to_parquet(EMBEDDINGS_DIR / f"{gridname}_embeddings-{year}.parquet")
|
embeddings_file = EMBEDDINGS_DIR / f"{gridname}_embeddings-{year}.parquet"
|
||||||
|
embeddings_on_grid.to_parquet(embeddings_file)
|
||||||
|
print(f"Saved embeddings for year {year} to {embeddings_file.resolve()}.")
|
||||||
|
|
||||||
|
|
||||||
|
def main(): # noqa: D103
|
||||||
|
cyclopts.run(cli)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cyclopts.run(cli)
|
main()
|
||||||
9
steps/s1_0_alphaearth/alphaearth.sh
Normal file
9
steps/s1_0_alphaearth/alphaearth.sh
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# uv run alpha-earth --grid hex --level 3
|
||||||
|
uv run alpha-earth --grid hex --level 4
|
||||||
|
uv run alpha-earth --grid hex --level 5
|
||||||
|
uv run alpha-earth --grid healpix --level 6
|
||||||
|
uv run alpha-earth --grid healpix --level 7
|
||||||
|
uv run alpha-earth --grid healpix --level 8
|
||||||
|
uv run alpha-earth --grid healpix --level 9
|
||||||
|
|
@ -14,7 +14,7 @@ from rich import pretty, print, traceback
|
||||||
traceback.install()
|
traceback.install()
|
||||||
pretty.install()
|
pretty.install()
|
||||||
|
|
||||||
DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) / "entropyc-rts"
|
DATA_DIR = Path(os.environ.get("DATA_DIR", "../../data")) / "entropyc-rts"
|
||||||
|
|
||||||
|
|
||||||
def hourly(years: str):
|
def hourly(years: str):
|
||||||
576
steps/s1_1_era5/era5.py
Normal file
576
steps/s1_1_era5/era5.py
Normal file
|
|
@ -0,0 +1,576 @@
|
||||||
|
"""Download and preprocess ERA5 data.
|
||||||
|
|
||||||
|
Variables of Interest:
|
||||||
|
- 2 metre temperature (t2m) [instant]
|
||||||
|
- Total precipitation (tp) [accum]
|
||||||
|
- Snow Fall (sf) [accum]
|
||||||
|
- Snow cover (snowc) [instant]
|
||||||
|
- Snow depth (sde) [instant]
|
||||||
|
- Surface sensible heat flux (sshf) [accum]
|
||||||
|
- Lake ice bottom temperature (lblt) [instant]
|
||||||
|
|
||||||
|
Naming patterns:
|
||||||
|
- Instant Variables are downloaded already as statistically aggregated (lossy),
|
||||||
|
therefore their names get the aggregation as suffix
|
||||||
|
- Accumulation Variables are downloaded as totals, their names stay the same
|
||||||
|
|
||||||
|
Daily Variables (downloaded from hourly data):
|
||||||
|
- t2m_max
|
||||||
|
- t2m_min
|
||||||
|
- snowc_mean
|
||||||
|
- sde_mean
|
||||||
|
- lblt_max
|
||||||
|
- tp
|
||||||
|
- sf
|
||||||
|
- sshf
|
||||||
|
|
||||||
|
Derived Daily Variables:
|
||||||
|
- t2m_daily_avg
|
||||||
|
- t2m_daily_range
|
||||||
|
- t2m_daily_skew
|
||||||
|
- thawing_degree_days
|
||||||
|
- freezing_degree_days
|
||||||
|
- thawing_days
|
||||||
|
- freezing_days
|
||||||
|
- precipitation_occurrences
|
||||||
|
- snowfall_occurrences
|
||||||
|
- snow_isolation (snowc * sde)
|
||||||
|
|
||||||
|
Monthly Variables:
|
||||||
|
- t2m_monthly_max
|
||||||
|
- t2m_monthly_min
|
||||||
|
- tp_monthly_sum
|
||||||
|
- sf_monthly_sum
|
||||||
|
- snowc_monthly_mean
|
||||||
|
- sde_monthly_mean
|
||||||
|
- sshf_monthly_sum
|
||||||
|
- lblt_monthly_max
|
||||||
|
- t2m_monthly_avg
|
||||||
|
- t2m_monthly_range_avg
|
||||||
|
- t2m_monthly_skew_avg
|
||||||
|
- thawing_degree_days_monthly
|
||||||
|
- freezing_degree_days_monthly
|
||||||
|
- thawing_days_monthly
|
||||||
|
- freezing_days_monthly
|
||||||
|
- precipitation_occurrences_monthly TODO: Rename to precipitation_days_monthly?
|
||||||
|
- snowfall_occurrences_monthly TODO: Rename to snowfall_days_monthly?
|
||||||
|
- snow_isolation_monthly_mean
|
||||||
|
|
||||||
|
Yearly Variables:
|
||||||
|
- TODO
|
||||||
|
|
||||||
|
# TODO Variables:
|
||||||
|
- Day of first thaw (yearly)
|
||||||
|
- Day of last thaw (yearly)
|
||||||
|
- Thawing period length (yearly)
|
||||||
|
- Freezing period length (yearly)
|
||||||
|
|
||||||
|
Author: Tobias Hölzer
|
||||||
|
Date: 09. June 2025
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
import cyclopts
|
||||||
|
import dask.distributed as dd
|
||||||
|
import geopandas as gpd
|
||||||
|
import odc.geo
|
||||||
|
import odc.geo.xr
|
||||||
|
import pandas as pd
|
||||||
|
import shapely
|
||||||
|
import shapely.ops
|
||||||
|
import xarray as xr
|
||||||
|
from numcodecs.zarr3 import Blosc
|
||||||
|
from rich import pretty, print, traceback
|
||||||
|
from rich.progress import track
|
||||||
|
from shapely.geometry import LineString, Polygon
|
||||||
|
|
||||||
|
traceback.install(show_locals=True, suppress=[cyclopts, xr, pd])
|
||||||
|
pretty.install()
|
||||||
|
|
||||||
|
cli = cyclopts.App()
|
||||||
|
|
||||||
|
# TODO: Directly handle download on a grid level - this is more what the zarr access is indented to do
|
||||||
|
|
||||||
|
DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) / "entropyc-rts"
|
||||||
|
ERA5_DIR = DATA_DIR / "era5"
|
||||||
|
DAILY_RAW_PATH = ERA5_DIR / "daily_raw.zarr"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_grid_paths(
|
||||||
|
agg: Literal["daily", "monthly", "summer", "winter", "yearly"],
|
||||||
|
grid: Literal["hex", "healpix"],
|
||||||
|
level: int,
|
||||||
|
):
|
||||||
|
gridname = f"permafrost_{grid}{level}"
|
||||||
|
aligned_path = ERA5_DIR / f"{agg}_{gridname}.zarr"
|
||||||
|
return aligned_path
|
||||||
|
|
||||||
|
|
||||||
|
min_lat = 50
|
||||||
|
max_lat = 83.7 # Ensures the right Chunks Size (90 - 64 / 10 + 0.1)
|
||||||
|
min_time = "1990-01-01"
|
||||||
|
max_time = "2024-12-31"
|
||||||
|
|
||||||
|
|
||||||
|
today = time.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
# ================
|
||||||
|
# === Download ===
|
||||||
|
# ================
|
||||||
|
|
||||||
|
|
||||||
|
def create_encoding(ds: xr.Dataset):
|
||||||
|
"""Create compression encoding for zarr dataset storage.
|
||||||
|
|
||||||
|
Creates Blosc compression configuration for all data variables and coordinates
|
||||||
|
in the dataset using zstd compression with level 9.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ds (xr.Dataset): The xarray Dataset to create encoding for.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Encoding dictionary with compression settings for each variable.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# encoding = {var: {"compressors": BloscCodec(cname="zlib", clevel=9)} for var in ds.data_vars}
|
||||||
|
encoding = {var: {"compressors": Blosc(cname="zstd", clevel=9)} for var in [*ds.data_vars, *ds.coords]}
|
||||||
|
return encoding
|
||||||
|
|
||||||
|
|
||||||
|
def download_daily_aggregated():
|
||||||
|
"""Download and aggregate ERA5 data to daily resolution.
|
||||||
|
|
||||||
|
Downloads ERA5 reanalysis data from the DESTINE Earth Data Hub and aggregates
|
||||||
|
it to daily resolution. Includes temperature extremes, precipitation, snow,
|
||||||
|
and surface heat flux variables.
|
||||||
|
|
||||||
|
The function downloads hourly data and creates daily aggregates:
|
||||||
|
- Temperature: daily min/max
|
||||||
|
- Precipitation and snowfall: daily totals
|
||||||
|
- Snow cover and depth: daily means
|
||||||
|
- Surface heat flux: daily totals
|
||||||
|
- Lake ice temperature: daily max
|
||||||
|
|
||||||
|
The aggregated data is saved to a zarr file with compression.
|
||||||
|
"""
|
||||||
|
era5 = xr.open_dataset(
|
||||||
|
"https://data.earthdatahub.destine.eu/era5/reanalysis-era5-land-no-antartica-v0.zarr",
|
||||||
|
storage_options={"client_kwargs": {"trust_env": True}},
|
||||||
|
chunks={},
|
||||||
|
# chunks={},
|
||||||
|
engine="zarr",
|
||||||
|
).rename({"valid_time": "time"})
|
||||||
|
subset = {
|
||||||
|
"latitude": slice(max_lat, min_lat),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Compute the clostest chunk-start to min_time, to avoid problems with cropped chunks at the start
|
||||||
|
tchunksize = era5.chunksizes["time"][0]
|
||||||
|
era5_chunk_starts = pd.date_range(era5.time.min().item(), era5.time.max().item(), freq=f"{tchunksize}h")
|
||||||
|
closest_chunk_start = era5_chunk_starts[
|
||||||
|
era5_chunk_starts.get_indexer([pd.to_datetime(min_time)], method="ffill")[0]
|
||||||
|
]
|
||||||
|
subset["time"] = slice(str(closest_chunk_start), max_time)
|
||||||
|
|
||||||
|
era5 = era5.sel(**subset)
|
||||||
|
|
||||||
|
daily_raw = xr.merge(
|
||||||
|
[
|
||||||
|
# Instant
|
||||||
|
era5.t2m.resample(time="1D").max().rename("t2m_max"),
|
||||||
|
era5.t2m.resample(time="1D").min().rename("t2m_min"),
|
||||||
|
era5.snowc.resample(time="1D").mean().rename("snowc_mean"),
|
||||||
|
era5.sde.resample(time="1D").mean().rename("sde_mean"),
|
||||||
|
era5.lblt.resample(time="1D").max().rename("lblt_max"),
|
||||||
|
# Accum
|
||||||
|
era5.tp.resample(time="1D").sum().rename("tp"),
|
||||||
|
era5.sf.resample(time="1D").sum().rename("sf"),
|
||||||
|
era5.sshf.resample(time="1D").sum().rename("sshf"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assign attributes
|
||||||
|
daily_raw["t2m_max"].attrs = {"long_name": "Daily maximum 2 metre temperature", "units": "K"}
|
||||||
|
daily_raw["t2m_min"].attrs = {"long_name": "Daily minimum 2 metre temperature", "units": "K"}
|
||||||
|
daily_raw["tp"].attrs = {"long_name": "Daily total precipitation", "units": "m"}
|
||||||
|
daily_raw["sf"].attrs = {"long_name": "Daily total snow fall", "units": "m"}
|
||||||
|
daily_raw["snowc_mean"].attrs = {"long_name": "Daily mean snow cover", "units": "m"}
|
||||||
|
daily_raw["sde_mean"].attrs = {"long_name": "Daily mean snow depth", "units": "m"}
|
||||||
|
daily_raw["sshf"].attrs = {"long_name": "Daily total surface sensible heat flux", "units": "J/m²"}
|
||||||
|
daily_raw["lblt_max"].attrs = {"long_name": "Daily maximum lake ice bottom temperature", "units": "K"}
|
||||||
|
|
||||||
|
daily_raw = daily_raw.odc.assign_crs("epsg:4326")
|
||||||
|
daily_raw = daily_raw.drop_vars(["surface", "number", "depthBelowLandLayer"])
|
||||||
|
daily_raw.to_zarr(DAILY_RAW_PATH, mode="w", encoding=create_encoding(daily_raw), consolidated=False)
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command
|
||||||
|
def download():
|
||||||
|
"""Download ERA5 data using Dask cluster for parallel processing.
|
||||||
|
|
||||||
|
Creates a local Dask cluster and downloads daily aggregated ERA5 data.
|
||||||
|
The cluster is configured with a single worker with 10 threads and 100GB
|
||||||
|
memory limit for optimal performance.
|
||||||
|
"""
|
||||||
|
with (
|
||||||
|
dd.LocalCluster(n_workers=1, threads_per_worker=10, memory_limit="100GB") as cluster,
|
||||||
|
dd.Client(cluster) as client,
|
||||||
|
):
|
||||||
|
print(client)
|
||||||
|
print(client.dashboard_link)
|
||||||
|
download_daily_aggregated()
|
||||||
|
print(f"Downloaded and aggregated ERA5 data to {DAILY_RAW_PATH.resolve()}.")
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================
|
||||||
|
# === Spatial Aggregation ===
|
||||||
|
# ===========================
|
||||||
|
|
||||||
|
|
||||||
|
def _crosses_antimeridian(geom: Polygon) -> bool:
|
||||||
|
coords = shapely.get_coordinates(geom)
|
||||||
|
crosses_any_meridian = (coords[:, 0] > 0).any() and (coords[:, 0] < 0).any()
|
||||||
|
return crosses_any_meridian and abs(coords[:, 0]).max() > 90
|
||||||
|
|
||||||
|
|
||||||
|
def _split_antimeridian_cell(geom: Polygon) -> list[Polygon]:
|
||||||
|
# Assumes that it is a antimeridian hex
|
||||||
|
coords = shapely.get_coordinates(geom)
|
||||||
|
for i in range(coords.shape[0]):
|
||||||
|
if coords[i, 0] < 0:
|
||||||
|
coords[i, 0] += 360
|
||||||
|
geom = Polygon(coords)
|
||||||
|
antimeridian = LineString([[180, -90], [180, 90]])
|
||||||
|
polys = shapely.ops.split(geom, antimeridian)
|
||||||
|
return list(polys.geoms)
|
||||||
|
|
||||||
|
|
||||||
|
def _check_geobox(geobox):
|
||||||
|
x, y = geobox.shape
|
||||||
|
return x > 1 and y > 1
|
||||||
|
|
||||||
|
|
||||||
|
def extract_cell_data(idx: int, geom: Polygon) -> xr.Dataset:
|
||||||
|
"""Extract ERA5 data for a specific grid cell geometry.
|
||||||
|
|
||||||
|
Extracts and spatially averages ERA5 data within the bounds of a grid cell.
|
||||||
|
Handles antimeridian-crossing cells by splitting them appropriately.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
idx (int): Index of the grid cell.
|
||||||
|
geom (Polygon): Polygon geometry of the grid cell.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
xr.Dataset: The computed cell dataset
|
||||||
|
|
||||||
|
"""
|
||||||
|
daily_raw = xr.open_zarr(DAILY_RAW_PATH, consolidated=False).set_coords("spatial_ref")
|
||||||
|
# cell.geometry is a shapely Polygon
|
||||||
|
if not _crosses_antimeridian(geom):
|
||||||
|
geoms = [geom]
|
||||||
|
# Split geometry in case it crossed antimeridian
|
||||||
|
else:
|
||||||
|
geoms = _split_antimeridian_cell(geom)
|
||||||
|
cell_data = []
|
||||||
|
for geom in geoms:
|
||||||
|
geom = odc.geo.Geometry(geom, crs="epsg:4326")
|
||||||
|
if not _check_geobox(daily_raw.odc.geobox.enclosing(geom)):
|
||||||
|
continue
|
||||||
|
# TODO: use mean for instant variables, sum for accum variables
|
||||||
|
cell_data.append(daily_raw.odc.crop(geom).drop_vars("spatial_ref").mean(["latitude", "longitude"]))
|
||||||
|
if len(cell_data) == 0:
|
||||||
|
return False
|
||||||
|
elif len(cell_data) == 1:
|
||||||
|
cell_data = cell_data[0]
|
||||||
|
else:
|
||||||
|
cell_data = xr.concat(cell_data, dim="part").mean("part")
|
||||||
|
cell_data = cell_data.expand_dims({"cell": [idx]}).compute()
|
||||||
|
return cell_data
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command
|
||||||
|
def spatial_agg(
|
||||||
|
grid: Literal["hex", "healpix"],
|
||||||
|
level: int,
|
||||||
|
n_workers: int = 10,
|
||||||
|
executor: Literal["threads", "processes"] = "threads",
|
||||||
|
):
|
||||||
|
"""Perform spatial aggregation of ERA5 data to grid cells.
|
||||||
|
|
||||||
|
Loads a grid and spatially aggregates ERA5 data to each grid cell using
|
||||||
|
parallel processing. Creates an empty zarr file first, then fills it
|
||||||
|
with extracted data for each cell.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
grid ("hex" | "healpix"): Grid type.
|
||||||
|
level (int): Grid resolution level.
|
||||||
|
n_workers (int, optional): Number of parallel workers to use. Defaults to 10.
|
||||||
|
executor ("threads" | "processes"): The type of parallel executor pool to use. Defaults to threads.
|
||||||
|
|
||||||
|
"""
|
||||||
|
gridname = f"permafrost_{grid}{level}"
|
||||||
|
daily_grid_path = _get_grid_paths("daily", grid, level)
|
||||||
|
grid = gpd.read_parquet(DATA_DIR / f"grids/{gridname}_grid.parquet")
|
||||||
|
# Create an empty zarr array with the right dimensions
|
||||||
|
daily_raw = xr.open_zarr(DAILY_RAW_PATH, consolidated=False).set_coords("spatial_ref")
|
||||||
|
assert {"latitude", "longitude", "time"} == set(daily_raw.dims), (
|
||||||
|
f"Expected dims ('latitude', 'longitude', 'time'), got {daily_raw.dims}"
|
||||||
|
)
|
||||||
|
assert daily_raw.odc.crs == "epsg:4326", f"Expected CRS 'epsg:4326', got {daily_raw.odc.crs}"
|
||||||
|
daily = (
|
||||||
|
xr.zeros_like(daily_raw.isel(latitude=0, longitude=0))
|
||||||
|
.expand_dims({"cell": [idx for idx, _ in grid.iterrows()]})
|
||||||
|
.chunk({"cell": min(len(grid), 1000), "time": len(daily_raw.time)}) # ~50MB chunks
|
||||||
|
)
|
||||||
|
daily.to_zarr(daily_grid_path, mode="w", consolidated=False, encoding=create_encoding(daily))
|
||||||
|
print(f"Created empty zarr at {daily_grid_path.resolve()} with shape {daily.sizes}.")
|
||||||
|
|
||||||
|
print(f"Starting spatial matching of {len(grid)} cells with {n_workers} workers...")
|
||||||
|
ExecutorCls = ThreadPoolExecutor if executor == "threads" else ProcessPoolExecutor
|
||||||
|
with ExecutorCls(max_workers=n_workers) as executor:
|
||||||
|
futures = {
|
||||||
|
executor.submit(extract_cell_data, idx, row.geometry): idx
|
||||||
|
for idx, row in grid.to_crs("epsg:4326").iterrows()
|
||||||
|
}
|
||||||
|
for future in track(as_completed(futures), total=len(futures), description="Processing cells"):
|
||||||
|
idx = futures[future]
|
||||||
|
try:
|
||||||
|
cell_data = future.result()
|
||||||
|
if not cell_data:
|
||||||
|
print(f"Cell {idx} did not overlap with ERA5 data.")
|
||||||
|
cell_data.to_zarr(daily_grid_path, region="auto", consolidated=False)
|
||||||
|
print(f"Successfully written cell {idx}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"{type(e)} processing cell {idx}: {e}")
|
||||||
|
print("Finished spatial matching.")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================
|
||||||
|
# === Temporal Aggregation ===
|
||||||
|
# ============================
|
||||||
|
|
||||||
|
|
||||||
|
def daily_enrich(grid: Literal["hex", "healpix"], level: int) -> xr.Dataset:
|
||||||
|
"""Enrich daily ERA5 data with derived climate variables.
|
||||||
|
|
||||||
|
Loads spatially aligned ERA5 data and computes additional climate variables.
|
||||||
|
Creates derived variables including temperature statistics, degree days, and occurrence indicators.
|
||||||
|
|
||||||
|
Derived variables include:
|
||||||
|
- Daily average and range temperature
|
||||||
|
- Temperature skewness
|
||||||
|
- Thawing and freezing degree days
|
||||||
|
- Thawing and freezing day counts
|
||||||
|
- Precipitation and snowfall occurrences
|
||||||
|
- Snow isolation index
|
||||||
|
|
||||||
|
Args:
|
||||||
|
grid ("hex", "healpix"): Grid type.
|
||||||
|
level (int): Grid resolution level.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
xr.Dataset: Enriched dataset with original and derived variables.
|
||||||
|
|
||||||
|
"""
|
||||||
|
daily_grid_path = _get_grid_paths("daily", grid, level)
|
||||||
|
daily = xr.open_zarr(daily_grid_path, consolidated=False).set_coords("spatial_ref")
|
||||||
|
assert {"cell", "time"} == set(daily.dims), f"Expected dims ('cell', 'time'), got {daily.dims}"
|
||||||
|
|
||||||
|
# Formulas based on Groeke et. al. (2025) Stochastic Weather generation...
|
||||||
|
daily["t2m_avg"] = (daily.t2m_max + daily.t2m_min) / 2
|
||||||
|
daily.t2m_avg.attrs = {"long_name": "Daily average 2 metre temperature", "units": "K"}
|
||||||
|
daily["t2m_range"] = daily.t2m_max - daily.t2m_min
|
||||||
|
daily.t2m_range.attrs = {"long_name": "Daily range of 2 metre temperature", "units": "K"}
|
||||||
|
daily["t2m_skew"] = (daily.t2m_avg - daily.t2m_min) / daily.t2m_range
|
||||||
|
daily.t2m_skew.attrs = {"long_name": "Daily skewness of 2 metre temperature"}
|
||||||
|
|
||||||
|
daily["thawing_degree_days"] = (daily.t2m_avg - 273.15).clip(min=0)
|
||||||
|
daily.thawing_degree_days.attrs = {"long_name": "Thawing degree days", "units": "K"}
|
||||||
|
daily["freezing_degree_days"] = (273.15 - daily.t2m_avg).clip(min=0)
|
||||||
|
daily.freezing_degree_days.attrs = {"long_name": "Freezing degree days", "units": "K"}
|
||||||
|
|
||||||
|
daily["thawing_days"] = (daily.t2m_avg > 273.15).astype(int)
|
||||||
|
daily.thawing_days.attrs = {"long_name": "Thawing days"}
|
||||||
|
daily["freezing_days"] = (daily.t2m_avg < 273.15).astype(int)
|
||||||
|
daily.freezing_days.attrs = {"long_name": "Freezing days"}
|
||||||
|
|
||||||
|
daily["precipitation_occurrences"] = (daily.tp > 0).astype(int)
|
||||||
|
daily.precipitation_occurrences.attrs = {"long_name": "Precipitation occurrences"}
|
||||||
|
daily["snowfall_occurrences"] = (daily.sf > 0).astype(int)
|
||||||
|
daily.snowfall_occurrences.attrs = {"long_name": "Snowfall occurrences"}
|
||||||
|
|
||||||
|
daily["snow_isolation"] = daily.snowc_mean * daily.sde_mean
|
||||||
|
daily.snow_isolation.attrs = {"long_name": "Snow isolation"}
|
||||||
|
|
||||||
|
return daily
|
||||||
|
|
||||||
|
|
||||||
|
def monthly_aggregate(grid: Literal["hex", "healpix"], level: int):
|
||||||
|
"""Aggregate enriched daily ERA5 data to monthly resolution.
|
||||||
|
|
||||||
|
Takes the enriched daily ERA5 data and creates monthly aggregates using
|
||||||
|
appropriate statistical functions for each variable type. Temperature
|
||||||
|
variables use min/max/mean, accumulation variables use sums, and derived
|
||||||
|
variables use appropriate aggregations.
|
||||||
|
|
||||||
|
The aggregated monthly data is saved to a zarr file for further processing.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
grid ("hex", "healpix"): Grid type.
|
||||||
|
level (int): Grid resolution level.
|
||||||
|
|
||||||
|
"""
|
||||||
|
daily = daily_enrich(grid, level)
|
||||||
|
assert {"cell", "time"} == set(daily.dims), f"Expected dims ('cell', 'time'), got {daily.dims}"
|
||||||
|
|
||||||
|
# Monthly aggregates
|
||||||
|
monthly = xr.merge(
|
||||||
|
[
|
||||||
|
# Original variables
|
||||||
|
daily.t2m_min.resample(time="1ME").min().rename("t2m_min"),
|
||||||
|
daily.t2m_max.resample(time="1ME").max().rename("t2m_max"),
|
||||||
|
daily.snowc_mean.resample(time="1ME").mean().rename("snowc_mean"),
|
||||||
|
daily.sde_mean.resample(time="1ME").mean().rename("sde_mean"),
|
||||||
|
daily.lblt_max.resample(time="1ME").max().rename("lblt_max"),
|
||||||
|
daily.tp.resample(time="1ME").sum().rename("tp"),
|
||||||
|
daily.sf.resample(time="1ME").sum().rename("sf"),
|
||||||
|
daily.sshf.resample(time="1ME").sum().rename("sshf"),
|
||||||
|
# Enriched variables
|
||||||
|
daily.t2m_avg.resample(time="1ME").mean().rename("t2m_avg"),
|
||||||
|
daily.t2m_range.resample(time="1ME").mean().rename("t2m_mean_range"),
|
||||||
|
daily.t2m_skew.resample(time="1ME").mean().rename("t2m_mean_skew"),
|
||||||
|
daily.thawing_degree_days.resample(time="1ME").sum().rename("thawing_degree_days"),
|
||||||
|
daily.freezing_degree_days.resample(time="1ME").sum().rename("freezing_degree_days"),
|
||||||
|
daily.thawing_days.resample(time="1ME").sum().rename("thawing_days"),
|
||||||
|
daily.freezing_days.resample(time="1ME").sum().rename("freezing_days"),
|
||||||
|
daily.precipitation_occurrences.resample(time="1ME").sum().rename("precipitation_occurrences"),
|
||||||
|
daily.snowfall_occurrences.resample(time="1ME").sum().rename("snowfall_occurrences"),
|
||||||
|
daily.snow_isolation.resample(time="1ME").mean().rename("snow_mean_isolation"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
monthly_grid_path = _get_grid_paths("monthly", grid, level)
|
||||||
|
monthly.to_zarr(monthly_grid_path, mode="w", encoding=create_encoding(monthly), consolidated=False)
|
||||||
|
|
||||||
|
|
||||||
|
def yearly_aggregate(monthly: xr.Dataset) -> xr.Dataset:
|
||||||
|
"""Aggregate monthly ERA5 data to yearly resolution.
|
||||||
|
|
||||||
|
Takes monthly aggregated data and creates yearly aggregates using a shifted
|
||||||
|
calendar (October to September) to better capture Arctic seasonal patterns.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
monthly (xr.Dataset): The monthly aggregates
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
xr.Dataset: The aggregated dataset
|
||||||
|
|
||||||
|
"""
|
||||||
|
return xr.merge(
|
||||||
|
[
|
||||||
|
# Original variables
|
||||||
|
monthly.t2m_min.resample(time="1YE").min().rename("t2m_min"),
|
||||||
|
monthly.t2m_max.resample(time="1YE").max().rename("t2m_max"),
|
||||||
|
monthly.snowc_mean.resample(time="1YE").mean().rename("snowc_mean"),
|
||||||
|
monthly.sde_mean.resample(time="1YE").mean().rename("sde_mean"),
|
||||||
|
monthly.lblt_max.resample(time="1YE").max().rename("lblt_max"),
|
||||||
|
monthly.tp.resample(time="1YE").sum().rename("tp"),
|
||||||
|
monthly.sf.resample(time="1YE").sum().rename("sf"),
|
||||||
|
monthly.sshf.resample(time="1YE").sum().rename("sshf"),
|
||||||
|
# Enriched variables
|
||||||
|
monthly.t2m_avg.resample(time="1YE").mean().rename("t2m_avg"),
|
||||||
|
# TODO: Check if this is correct -> use daily / hourly data instead for range and skew?
|
||||||
|
monthly.t2m_mean_range.resample(time="1YE").mean().rename("t2m_mean_range"),
|
||||||
|
monthly.t2m_mean_skew.resample(time="1YE").mean().rename("t2m_mean_skew"),
|
||||||
|
monthly.thawing_degree_days.resample(time="1YE").sum().rename("thawing_degree_days"),
|
||||||
|
monthly.freezing_degree_days.resample(time="1YE").sum().rename("freezing_degree_days"),
|
||||||
|
monthly.thawing_days.resample(time="1YE").sum().rename("thawing_days"),
|
||||||
|
monthly.freezing_days.resample(time="1YE").sum().rename("freezing_days"),
|
||||||
|
monthly.precipitation_occurrences.resample(time="1YE").sum().rename("precipitation_occurrences"),
|
||||||
|
monthly.snowfall_occurrences.resample(time="1YE").sum().rename("snowfall_occurrences"),
|
||||||
|
monthly.snow_mean_isolation.resample(time="1YE").mean().rename("snow_mean_isolation"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def yearly_and_seasonal_aggregate(grid: Literal["hex", "healpix"], level: int):
|
||||||
|
"""Aggregate monthly ERA5 data to yearly resolution with seasonal splits.
|
||||||
|
|
||||||
|
Takes monthly aggregated data and creates yearly aggregates using a shifted
|
||||||
|
calendar (October to September) to better capture Arctic seasonal patterns.
|
||||||
|
Creates separate aggregates for full year, winter (Oct-Apr), and summer
|
||||||
|
(May-Sep) periods.
|
||||||
|
|
||||||
|
The first and last incomplete years are excluded from the analysis.
|
||||||
|
Winter months are defined as months 1-7 in the shifted calendar,
|
||||||
|
and summer months are 8-12.
|
||||||
|
|
||||||
|
The final dataset includes yearly, winter, and summer aggregates for all
|
||||||
|
climate variables, saved to a zarr file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
grid ("hex", "healpix"): Grid type.
|
||||||
|
level (int): Grid resolution level.
|
||||||
|
|
||||||
|
"""
|
||||||
|
monthly_grid_path = _get_grid_paths("monthly", grid, level)
|
||||||
|
monthly = xr.open_zarr(monthly_grid_path, consolidated=False).set_coords("spatial_ref")
|
||||||
|
assert {"cell", "time"} == set(monthly.dims), f"Expected dims ('cell', 'time'), got {monthly.dims}"
|
||||||
|
|
||||||
|
valid_years = slice(str(monthly.time.min().dt.year.item() + 1), str(monthly.time.max().dt.year.item()))
|
||||||
|
|
||||||
|
# Summer aggregates
|
||||||
|
summer = yearly_aggregate(monthly.sel(time=monthly.time.dt.month.isin([5, 6, 7, 8, 9])).sel(time=valid_years))
|
||||||
|
|
||||||
|
# Yearly aggregates (shifted by +8 months to start in Oktober, first and last years will be cropped)
|
||||||
|
monthly_shifted = monthly.copy()
|
||||||
|
monthly_shifted["time"] = monthly_shifted.get_index("time") + pd.DateOffset(months=8)
|
||||||
|
monthly_shifted = monthly_shifted.sel(time=valid_years)
|
||||||
|
yearly = yearly_aggregate(monthly_shifted)
|
||||||
|
|
||||||
|
# Winter aggregates (shifted by +8 months to start in Oktober, first and last years will be cropped)
|
||||||
|
monthly_shifted = monthly.copy().sel(time=monthly.time.dt.month.isin([1, 2, 3, 4, 10, 11, 12]))
|
||||||
|
monthly_shifted["time"] = monthly_shifted.get_index("time") + pd.DateOffset(months=8)
|
||||||
|
monthly_shifted = monthly_shifted.sel(time=valid_years)
|
||||||
|
winter = yearly_aggregate(monthly_shifted)
|
||||||
|
|
||||||
|
yearly_grid_path = _get_grid_paths("yearly", grid, level)
|
||||||
|
yearly.to_zarr(yearly_grid_path, mode="w", encoding=create_encoding(yearly), consolidated=False)
|
||||||
|
|
||||||
|
winter_grid_path = _get_grid_paths("winter", grid, level)
|
||||||
|
winter.to_zarr(winter_grid_path, mode="w", encoding=create_encoding(winter), consolidated=False)
|
||||||
|
|
||||||
|
summer_grid_path = _get_grid_paths("summer", grid, level)
|
||||||
|
summer.to_zarr(summer_grid_path, mode="w", encoding=create_encoding(summer), consolidated=False)
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command
|
||||||
|
def temporal_agg(n_workers: int = 10):
|
||||||
|
"""Perform temporal aggregation of ERA5 data using Dask cluster.
|
||||||
|
|
||||||
|
Creates a Dask cluster and runs both monthly and yearly aggregation
|
||||||
|
functions to generate temporally aggregated climate datasets. The
|
||||||
|
processing uses parallel workers for efficient computation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
n_workers (int, optional): Number of Dask workers to use. Defaults to 10.
|
||||||
|
|
||||||
|
"""
|
||||||
|
with (
|
||||||
|
dd.LocalCluster(n_workers=n_workers, threads_per_worker=20, memory_limit="10GB") as cluster,
|
||||||
|
dd.Client(cluster) as client,
|
||||||
|
):
|
||||||
|
print(client)
|
||||||
|
print(client.dashboard_link)
|
||||||
|
monthly_aggregate()
|
||||||
|
yearly_and_seasonal_aggregate()
|
||||||
|
print("Enriched ERA5 data with additional features and aggregated it temporally.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
cli()
|
||||||
96
uv.lock
generated
96
uv.lock
generated
|
|
@ -850,17 +850,17 @@ wheels = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cyclopts"
|
name = "cyclopts"
|
||||||
version = "3.24.0"
|
version = "4.0.0"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "attrs" },
|
{ name = "attrs" },
|
||||||
{ name = "docstring-parser", marker = "python_full_version < '4'" },
|
{ name = "docstring-parser" },
|
||||||
{ name = "rich" },
|
{ name = "rich" },
|
||||||
{ name = "rich-rst" },
|
{ name = "rich-rst" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/30/ca/7782da3b03242d5f0a16c20371dff99d4bd1fedafe26bc48ff82e42be8c9/cyclopts-3.24.0.tar.gz", hash = "sha256:de6964a041dfb3c57bf043b41e68c43548227a17de1bad246e3a0bfc5c4b7417", size = 76131 }
|
sdist = { url = "https://files.pythonhosted.org/packages/9a/d1/2f2b99ec5ea54ac18baadfc4a011e2a1743c1eaae1e39838ca520dcf4811/cyclopts-4.0.0.tar.gz", hash = "sha256:0dae712085e91d32cc099ea3d78f305b0100a3998b1dec693be9feb0b1be101f", size = 143546 }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/f0/8b/2c95f0645c6f40211896375e6fa51f504b8ccb29c21f6ae661fe87ab044e/cyclopts-3.24.0-py3-none-any.whl", hash = "sha256:809d04cde9108617106091140c3964ee6fceb33cecdd537f7ffa360bde13ed71", size = 86154 },
|
{ url = "https://files.pythonhosted.org/packages/44/0e/0a22e076944600aeb06f40b7e03bbd762a42d56d43a2f5f4ab954aed9005/cyclopts-4.0.0-py3-none-any.whl", hash = "sha256:e64801a2c86b681f08323fd50110444ee961236a0bae402a66d2cc3feda33da7", size = 178837 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -885,6 +885,15 @@ wheels = [
|
||||||
array = [
|
array = [
|
||||||
{ name = "numpy" },
|
{ name = "numpy" },
|
||||||
]
|
]
|
||||||
|
complete = [
|
||||||
|
{ name = "bokeh" },
|
||||||
|
{ name = "distributed" },
|
||||||
|
{ name = "jinja2" },
|
||||||
|
{ name = "lz4" },
|
||||||
|
{ name = "numpy" },
|
||||||
|
{ name = "pandas" },
|
||||||
|
{ name = "pyarrow" },
|
||||||
|
]
|
||||||
distributed = [
|
distributed = [
|
||||||
{ name = "distributed" },
|
{ name = "distributed" },
|
||||||
]
|
]
|
||||||
|
|
@ -1064,7 +1073,7 @@ wheels = [
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "entropic-perma-risk"
|
name = "entropic-perma-risk"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = { virtual = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "aiohttp" },
|
{ name = "aiohttp" },
|
||||||
{ name = "bokeh" },
|
{ name = "bokeh" },
|
||||||
|
|
@ -1094,7 +1103,8 @@ dependencies = [
|
||||||
{ name = "rich" },
|
{ name = "rich" },
|
||||||
{ name = "rioxarray" },
|
{ name = "rioxarray" },
|
||||||
{ name = "scipy" },
|
{ name = "scipy" },
|
||||||
{ name = "smart-geocubes", extra = ["dask", "stac", "viz"] },
|
{ name = "seaborn" },
|
||||||
|
{ name = "smart-geocubes", extra = ["dask", "gee", "stac", "viz"] },
|
||||||
{ name = "stopuhr" },
|
{ name = "stopuhr" },
|
||||||
{ name = "xanimate" },
|
{ name = "xanimate" },
|
||||||
{ name = "xarray" },
|
{ name = "xarray" },
|
||||||
|
|
@ -1109,7 +1119,7 @@ requires-dist = [
|
||||||
{ name = "bokeh", specifier = ">=3.7.3" },
|
{ name = "bokeh", specifier = ">=3.7.3" },
|
||||||
{ name = "cartopy", specifier = ">=0.24.1" },
|
{ name = "cartopy", specifier = ">=0.24.1" },
|
||||||
{ name = "cdsapi", specifier = ">=0.7.6" },
|
{ name = "cdsapi", specifier = ">=0.7.6" },
|
||||||
{ name = "cyclopts", specifier = ">=3.17.0" },
|
{ name = "cyclopts", specifier = ">=4.0.0" },
|
||||||
{ name = "dask", specifier = ">=2025.5.1" },
|
{ name = "dask", specifier = ">=2025.5.1" },
|
||||||
{ name = "distributed", specifier = ">=2025.5.1" },
|
{ name = "distributed", specifier = ">=2025.5.1" },
|
||||||
{ name = "earthengine-api", specifier = ">=1.6.9" },
|
{ name = "earthengine-api", specifier = ">=1.6.9" },
|
||||||
|
|
@ -1133,7 +1143,8 @@ requires-dist = [
|
||||||
{ name = "rich", specifier = ">=14.0.0" },
|
{ name = "rich", specifier = ">=14.0.0" },
|
||||||
{ name = "rioxarray", specifier = ">=0.19.0" },
|
{ name = "rioxarray", specifier = ">=0.19.0" },
|
||||||
{ name = "scipy", specifier = ">=1.15.3" },
|
{ name = "scipy", specifier = ">=1.15.3" },
|
||||||
{ name = "smart-geocubes", extras = ["arcticdem", "dask", "stac", "viz"], specifier = ">=0.0.9" },
|
{ name = "seaborn", specifier = ">=0.13.2" },
|
||||||
|
{ name = "smart-geocubes", extras = ["gee", "dask", "stac", "viz"], specifier = ">=0.0.9" },
|
||||||
{ name = "stopuhr", specifier = ">=0.0.10" },
|
{ name = "stopuhr", specifier = ">=0.0.10" },
|
||||||
{ name = "xanimate", git = "https://github.com/davbyr/xAnimate" },
|
{ name = "xanimate", git = "https://github.com/davbyr/xAnimate" },
|
||||||
{ name = "xarray", specifier = ">=2025.9.0" },
|
{ name = "xarray", specifier = ">=2025.9.0" },
|
||||||
|
|
@ -2120,6 +2131,30 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/2a/86/30142987e19a0f9bb464ada0dad647fc39b81bac5eacefdcdfff0d14c41e/lonboard-0.12.1-py3-none-any.whl", hash = "sha256:74fd1a904443d38018aa142214367d183a89fc41d16a36f5b3cde17b2e293773", size = 928199 },
|
{ url = "https://files.pythonhosted.org/packages/2a/86/30142987e19a0f9bb464ada0dad647fc39b81bac5eacefdcdfff0d14c41e/lonboard-0.12.1-py3-none-any.whl", hash = "sha256:74fd1a904443d38018aa142214367d183a89fc41d16a36f5b3cde17b2e293773", size = 928199 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lz4"
|
||||||
|
version = "4.4.4"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/c6/5a/945f5086326d569f14c84ac6f7fcc3229f0b9b1e8cc536b951fd53dfb9e1/lz4-4.4.4.tar.gz", hash = "sha256:070fd0627ec4393011251a094e08ed9fdcc78cb4e7ab28f507638eee4e39abda", size = 171884 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f7/2d/5523b4fabe11cd98f040f715728d1932eb7e696bfe94391872a823332b94/lz4-4.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:23ae267494fdd80f0d2a131beff890cf857f1b812ee72dbb96c3204aab725553", size = 220669 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/91/06/1a5bbcacbfb48d8ee5b6eb3fca6aa84143a81d92946bdb5cd6b005f1863e/lz4-4.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fff9f3a1ed63d45cb6514bfb8293005dc4141341ce3500abdfeb76124c0b9b2e", size = 189661 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/fa/08/39eb7ac907f73e11a69a11576a75a9e36406b3241c0ba41453a7eb842abb/lz4-4.4.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ea7f07329f85a8eda4d8cf937b87f27f0ac392c6400f18bea2c667c8b7f8ecc", size = 1238775 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e9/26/05840fbd4233e8d23e88411a066ab19f1e9de332edddb8df2b6a95c7fddc/lz4-4.4.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ccab8f7f7b82f9fa9fc3b0ba584d353bd5aa818d5821d77d5b9447faad2aaad", size = 1265143 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b7/5d/5f2db18c298a419932f3ab2023deb689863cf8fd7ed875b1c43492479af2/lz4-4.4.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e43e9d48b2daf80e486213128b0763deed35bbb7a59b66d1681e205e1702d735", size = 1185032 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c4/e6/736ab5f128694b0f6aac58343bcf37163437ac95997276cd0be3ea4c3342/lz4-4.4.4-cp312-cp312-win32.whl", hash = "sha256:33e01e18e4561b0381b2c33d58e77ceee850a5067f0ece945064cbaac2176962", size = 88284 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/40/b8/243430cb62319175070e06e3a94c4c7bd186a812e474e22148ae1290d47d/lz4-4.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:d21d1a2892a2dcc193163dd13eaadabb2c1b803807a5117d8f8588b22eaf9f12", size = 99918 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/6c/e1/0686c91738f3e6c2e1a243e0fdd4371667c4d2e5009b0a3605806c2aa020/lz4-4.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:2f4f2965c98ab254feddf6b5072854a6935adab7bc81412ec4fe238f07b85f62", size = 89736 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/3b/3c/d1d1b926d3688263893461e7c47ed7382a969a0976fc121fc678ec325fc6/lz4-4.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ed6eb9f8deaf25ee4f6fad9625d0955183fdc90c52b6f79a76b7f209af1b6e54", size = 220678 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/26/89/8783d98deb058800dabe07e6cdc90f5a2a8502a9bad8c5343c641120ace2/lz4-4.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:18ae4fe3bafb344dbd09f976d45cbf49c05c34416f2462828f9572c1fa6d5af7", size = 189670 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/22/ab/a491ace69a83a8914a49f7391e92ca0698f11b28d5ce7b2ececa2be28e9a/lz4-4.4.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57fd20c5fc1a49d1bbd170836fccf9a338847e73664f8e313dce6ac91b8c1e02", size = 1238746 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/97/12/a1f2f4fdc6b7159c0d12249456f9fe454665b6126e98dbee9f2bd3cf735c/lz4-4.4.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9cb387c33f014dae4db8cb4ba789c8d2a0a6d045ddff6be13f6c8d9def1d2a6", size = 1265119 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/50/6e/e22e50f5207649db6ea83cd31b79049118305be67e96bec60becf317afc6/lz4-4.4.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d0be9f68240231e1e44118a4ebfecd8a5d4184f0bdf5c591c98dd6ade9720afd", size = 1184954 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/4c/c4/2a458039645fcc6324ece731d4d1361c5daf960b553d1fcb4261ba07d51c/lz4-4.4.4-cp313-cp313-win32.whl", hash = "sha256:e9ec5d45ea43684f87c316542af061ef5febc6a6b322928f059ce1fb289c298a", size = 88289 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/00/96/b8e24ea7537ab418074c226279acfcaa470e1ea8271003e24909b6db942b/lz4-4.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:a760a175b46325b2bb33b1f2bbfb8aa21b48e1b9653e29c10b6834f9bb44ead4", size = 99925 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a5/a5/f9838fe6aa132cfd22733ed2729d0592259fff074cefb80f19aa0607367b/lz4-4.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:f4c21648d81e0dda38b4720dccc9006ae33b0e9e7ffe88af6bf7d4ec124e2fba", size = 89743 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mapclassify"
|
name = "mapclassify"
|
||||||
version = "2.10.0"
|
version = "2.10.0"
|
||||||
|
|
@ -2406,6 +2441,11 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/71/96/d5d8859a6dac29f8ebc815ff8e75770bd513db9f08d7a711e21ae562a948/netCDF4-1.7.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30d20e56b9ba2c48884eb89c91b63e6c0612b4927881707e34402719153ef17f", size = 9378149 },
|
{ url = "https://files.pythonhosted.org/packages/71/96/d5d8859a6dac29f8ebc815ff8e75770bd513db9f08d7a711e21ae562a948/netCDF4-1.7.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30d20e56b9ba2c48884eb89c91b63e6c0612b4927881707e34402719153ef17f", size = 9378149 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/d1/80/b9c19f1bb4ac6c5fa6f94a4f278bc68a778473d1814a86a375d7cffa193a/netCDF4-1.7.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d6bfd38ba0bde04d56f06c1554714a2ea9dab75811c89450dc3ec57a9d36b80", size = 9254471 },
|
{ url = "https://files.pythonhosted.org/packages/d1/80/b9c19f1bb4ac6c5fa6f94a4f278bc68a778473d1814a86a375d7cffa193a/netCDF4-1.7.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d6bfd38ba0bde04d56f06c1554714a2ea9dab75811c89450dc3ec57a9d36b80", size = 9254471 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/66/b5/e04550fd53de57001dbd5a87242da7ff784c80790adc48897977b6ccf891/netCDF4-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:5c5fbee6134ee1246c397e1508e5297d825aa19221fdf3fa8dc9727ad824d7a5", size = 6990521 },
|
{ url = "https://files.pythonhosted.org/packages/66/b5/e04550fd53de57001dbd5a87242da7ff784c80790adc48897977b6ccf891/netCDF4-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:5c5fbee6134ee1246c397e1508e5297d825aa19221fdf3fa8dc9727ad824d7a5", size = 6990521 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/84/0a/182bb4fe5639699ba39d558b553b8e6f04fbfea6cf78404c0f21ef149bf7/netcdf4-1.7.2-cp311-abi3-macosx_13_0_x86_64.whl", hash = "sha256:7e81c3c47f2772eab0b93fba8bb05b17b58dce17720e1bed25e9d76551deecd0", size = 2751391 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/2d/1f/54ac27c791360f7452ca27ed1cb2917946bbe1ea4337c590a5abcef6332d/netcdf4-1.7.2-cp311-abi3-macosx_14_0_arm64.whl", hash = "sha256:cb2791dba37fc98fd1ac4e236c97822909f54efbcdf7f1415c9777810e0a28f4", size = 2387513 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5c/5e/9bf3008a9e45c08f4c9fedce4d6f722ef5d970f56a9c5eb375a200dd2b66/netcdf4-1.7.2-cp311-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf11480f6b8a5b246818ffff6b4d90481e51f8b9555b41af0c372eb0aaf8b65f", size = 9621674 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a1/75/46871e85f2bbfb1efe229623d25d7c9daa17e2e968d5235572b2c8bb53e8/netcdf4-1.7.2-cp311-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1ccc05328a8ff31921b539821791aeb20b054879f3fdf6d1d505bf6422824fec", size = 9453759 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/cd/10/c52f12297965938d9b9be666ea1f9d8340c2aea31d6909d90aa650847248/netcdf4-1.7.2-cp311-abi3-win_amd64.whl", hash = "sha256:999bfc4acebf400ed724d5e7329e2e768accc7ee1fa1d82d505da782f730301b", size = 7148514 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -3622,6 +3662,20 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/3b/88/d32b4b52dcdc5c9c5d6475ccbdc5272588c23523c2164b6717de0ff0801d/scooby-0.10.2-py3-none-any.whl", hash = "sha256:8aec2f3f7fb541bf2c9795cad43a88c976869248a4c16523f07f366388ffcfff", size = 18668 },
|
{ url = "https://files.pythonhosted.org/packages/3b/88/d32b4b52dcdc5c9c5d6475ccbdc5272588c23523c2164b6717de0ff0801d/scooby-0.10.2-py3-none-any.whl", hash = "sha256:8aec2f3f7fb541bf2c9795cad43a88c976869248a4c16523f07f366388ffcfff", size = 18668 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "seaborn"
|
||||||
|
version = "0.13.2"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "matplotlib" },
|
||||||
|
{ name = "numpy" },
|
||||||
|
{ name = "pandas" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/86/59/a451d7420a77ab0b98f7affa3a1d78a313d2f7281a57afb1a34bae8ab412/seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7", size = 1457696 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "shapely"
|
name = "shapely"
|
||||||
version = "2.1.2"
|
version = "2.1.2"
|
||||||
|
|
@ -3707,6 +3761,12 @@ dask = [
|
||||||
{ name = "dask" },
|
{ name = "dask" },
|
||||||
{ name = "distributed" },
|
{ name = "distributed" },
|
||||||
]
|
]
|
||||||
|
gee = [
|
||||||
|
{ name = "earthengine-api" },
|
||||||
|
{ name = "rasterio" },
|
||||||
|
{ name = "rioxarray" },
|
||||||
|
{ name = "xee" },
|
||||||
|
]
|
||||||
stac = [
|
stac = [
|
||||||
{ name = "odc-stac" },
|
{ name = "odc-stac" },
|
||||||
{ name = "pyarrow" },
|
{ name = "pyarrow" },
|
||||||
|
|
@ -3981,6 +4041,11 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/8d/f0/73c24457c941b8b08f7d090853e40f4b2cdde88b5da721f3f28e98df77c9/xarray-2025.9.0-py3-none-any.whl", hash = "sha256:79f0e25fb39571f612526ee998ee5404d8725a1db3951aabffdb287388885df0", size = 1349595 },
|
{ url = "https://files.pythonhosted.org/packages/8d/f0/73c24457c941b8b08f7d090853e40f4b2cdde88b5da721f3f28e98df77c9/xarray-2025.9.0-py3-none-any.whl", hash = "sha256:79f0e25fb39571f612526ee998ee5404d8725a1db3951aabffdb287388885df0", size = 1349595 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[package.optional-dependencies]
|
||||||
|
parallel = [
|
||||||
|
{ name = "dask", extra = ["complete"] },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xdggs"
|
name = "xdggs"
|
||||||
version = "0.2.2"
|
version = "0.2.2"
|
||||||
|
|
@ -4001,6 +4066,21 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/49/89/d62e492218806d2d40e9d7baa1fbf8e355fc3d5b8acbeaeac0d9f27d4eca/xdggs-0.2.2-py3-none-any.whl", hash = "sha256:66b9a5f84383e24aea00e986e964a7f77ae23b08ebe179f5c2e74b747006d470", size = 35393 },
|
{ url = "https://files.pythonhosted.org/packages/49/89/d62e492218806d2d40e9d7baa1fbf8e355fc3d5b8acbeaeac0d9f27d4eca/xdggs-0.2.2-py3-none-any.whl", hash = "sha256:66b9a5f84383e24aea00e986e964a7f77ae23b08ebe179f5c2e74b747006d470", size = 35393 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "xee"
|
||||||
|
version = "0.0.22"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "affine" },
|
||||||
|
{ name = "earthengine-api" },
|
||||||
|
{ name = "pyproj" },
|
||||||
|
{ name = "xarray", extra = ["parallel"] },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/fe/9c/722cf70efef355e476f218ba35f60ef4c64cea335ef6c9f772c259132856/xee-0.0.22.tar.gz", hash = "sha256:c3acf2e82ce36caddf84f43b442131cc3707300533143e19f70e31bc8652d4ac", size = 421940 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/76/86/659f5b3c692d3813edcb373b9260e79303bc4a81234b80e5438194e0c5f2/xee-0.0.22-py3-none-any.whl", hash = "sha256:9d05f2fff1d24e5d39ab0621c51486f511851334f1f2fc4e0779aec2beba322a", size = 31204 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xproj"
|
name = "xproj"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue