Make era5 and alphaearth downloads work
This commit is contained in:
parent
c0c3700be8
commit
2af5c011a3
6 changed files with 441 additions and 196 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -11,3 +11,6 @@ wheels/
|
|||
|
||||
# Data
|
||||
data
|
||||
|
||||
# Editors
|
||||
.vscode/
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
"""Extract satellite embeddings from Google Earth Engine and map them to a grid."""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
|
|
@ -16,22 +17,24 @@ pretty.install()
|
|||
traceback.install()
|
||||
ee.Initialize(project="ee-tobias-hoelzer")
|
||||
|
||||
DATA_DIR = Path("data")
|
||||
DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) / "entropyc-rts"
|
||||
EMBEDDINGS_DIR = DATA_DIR / "embeddings"
|
||||
EMBEDDINGS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def cli(grid: Literal["hex", "healpix"], level: int, year: int):
|
||||
def cli(grid: Literal["hex", "healpix"], level: int, backup_intermediate: bool = False):
|
||||
"""Extract satellite embeddings from Google Earth Engine and map them to a grid.
|
||||
|
||||
Args:
|
||||
grid (Literal["hex", "healpix"]): The grid type to use.
|
||||
level (int): The grid level to use.
|
||||
year (int): The year to extract embeddings for. Must be between 2017 and 2024.
|
||||
backup_intermediate (bool, optional): Whether to backup intermediate results. Defaults to False.
|
||||
|
||||
"""
|
||||
gridname = f"permafrost_{grid}{level}"
|
||||
grid = gpd.read_parquet(DATA_DIR / f"grids/{gridname}_grid.parquet")
|
||||
|
||||
for year in track(range(2022, 2025), total=3, description="Processing years..."):
|
||||
embedding_collection = ee.ImageCollection("GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL")
|
||||
embedding_collection = embedding_collection.filterDate(f"{year}-01-01", f"{year}-12-31")
|
||||
bands = [f"A{str(i).zfill(2)}" for i in range(64)]
|
||||
|
|
@ -57,8 +60,6 @@ def cli(grid: Literal["hex", "healpix"], level: int, year: int):
|
|||
description="Processing batches...",
|
||||
total=n_batches,
|
||||
):
|
||||
print(f"Processing batch with {len(batch_grid)} items")
|
||||
|
||||
# Convert batch to EE FeatureCollection
|
||||
eegrid_batch = ee.FeatureCollection(batch_grid.to_crs("epsg:4326").__geo_interface__)
|
||||
|
||||
|
|
@ -66,14 +67,15 @@ def cli(grid: Literal["hex", "healpix"], level: int, year: int):
|
|||
eeegrid_batch = eegrid_batch.map(extract_embedding)
|
||||
df_batch = geemap.ee_to_df(eeegrid_batch)
|
||||
|
||||
# Store batch results
|
||||
all_results.append(df_batch)
|
||||
|
||||
# Save batch immediately to disk as backup
|
||||
if backup_intermediate:
|
||||
batch_filename = f"{gridname}_embeddings-{year}_batch{batch_num:06d}.parquet"
|
||||
batch_result = batch_grid.merge(df_batch[[*bands, "cell_id"]], on="cell_id", how="left")
|
||||
batch_result.to_parquet(EMBEDDINGS_DIR / f"{batch_filename}")
|
||||
|
||||
# Store batch results
|
||||
all_results.append(df_batch)
|
||||
|
||||
# Combine all batch results
|
||||
df = pd.concat(all_results, ignore_index=True)
|
||||
embeddings_on_grid = grid.merge(df[[*bands, "cell_id"]], on="cell_id", how="left")
|
||||
|
|
|
|||
5
cds.py
5
cds.py
|
|
@ -3,6 +3,7 @@
|
|||
Web platform: https://cds.climate.copernicus.eu
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
|
|
@ -13,6 +14,8 @@ from rich import pretty, print, traceback
|
|||
traceback.install()
|
||||
pretty.install()
|
||||
|
||||
DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) / "entropyc-rts"
|
||||
|
||||
|
||||
def hourly(years: str):
|
||||
"""Download ERA5 data from the Copernicus Data Store.
|
||||
|
|
@ -28,7 +31,7 @@ def hourly(years: str):
|
|||
dataset = "reanalysis-era5-single-levels"
|
||||
client = cdsapi.Client(wait_until_complete=False)
|
||||
|
||||
outdir = Path("/isipd/projects/p_aicore_pf/tohoel001/era5-cds").resolve()
|
||||
outdir = (DATA_DIR / "era5/cds").resolve()
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"Downloading ERA5 data from {start_year} to {end_year}...")
|
||||
|
|
|
|||
510
era5.py
510
era5.py
|
|
@ -1,23 +1,70 @@
|
|||
"""Download and preprocess ERA5 data.
|
||||
|
||||
Variables of Interest:
|
||||
- 2 metre temperature (t2m)
|
||||
- Total precipitation (tp)
|
||||
- Snow Fall (sf)
|
||||
- Snow cover (snowc)
|
||||
- Snow depth (sde)
|
||||
- Surface sensible heat flux (sshf)
|
||||
- Lake ice bottom temperature (lblt)
|
||||
- 2 metre temperature (t2m) [instant]
|
||||
- Total precipitation (tp) [accum]
|
||||
- Snow Fall (sf) [accum]
|
||||
- Snow cover (snowc) [instant]
|
||||
- Snow depth (sde) [instant]
|
||||
- Surface sensible heat flux (sshf) [accum]
|
||||
- Lake ice bottom temperature (lblt) [instant]
|
||||
|
||||
Aggregations:
|
||||
- Summer / Winter 20-bin histogram?
|
||||
Daily Variables (downloaded from hourly data):
|
||||
- t2m_daily_max
|
||||
- t2m_daily_min
|
||||
- tp_daily_sum
|
||||
- sf_daily_sum
|
||||
- snowc_daily_mean
|
||||
- sde_daily_mean
|
||||
- sshf_daily_sum
|
||||
- lblt_daily_max
|
||||
|
||||
Spatial -> Enrich -> Temporal ?
|
||||
Derived Daily Variables:
|
||||
- t2m_daily_avg
|
||||
- t2m_daily_range
|
||||
- t2m_daily_skew
|
||||
- thawing_degree_days
|
||||
- freezing_degree_days
|
||||
- thawing_days
|
||||
- freezing_days
|
||||
- precipitation_occurrences
|
||||
- snowfall_occurrences
|
||||
- snow_isolation (snowc * sde)
|
||||
|
||||
Monthly Variables:
|
||||
- t2m_monthly_max
|
||||
- t2m_monthly_min
|
||||
- tp_monthly_sum
|
||||
- sf_monthly_sum
|
||||
- snowc_monthly_mean
|
||||
- sde_monthly_mean
|
||||
- sshf_monthly_sum
|
||||
- lblt_monthly_max
|
||||
- t2m_monthly_avg
|
||||
- t2m_monthly_range_avg
|
||||
- t2m_monthly_skew_avg
|
||||
- thawing_degree_days_monthly
|
||||
- freezing_degree_days_monthly
|
||||
- thawing_days_monthly
|
||||
- freezing_days_monthly
|
||||
- precipitation_occurrences_monthly TODO: Rename to precipitation_days_monthly?
|
||||
- snowfall_occurrences_monthly TODO: Rename to snowfall_days_monthly?
|
||||
- snow_isolation_monthly_mean
|
||||
|
||||
Yearly Variables:
|
||||
- TODO
|
||||
|
||||
# TODO Variables:
|
||||
- Day of first thaw (yearly)
|
||||
- Day of last thaw (yearly)
|
||||
- Thawing period length (yearly)
|
||||
- Freezing period length (yearly)
|
||||
|
||||
Author: Tobias Hölzer
|
||||
Date: 09. June 2025
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from pathlib import Path
|
||||
|
|
@ -34,24 +81,29 @@ import shapely.ops
|
|||
import xarray as xr
|
||||
from numcodecs.zarr3 import Blosc
|
||||
from rich import pretty, print, traceback
|
||||
from rich.progress import track
|
||||
from shapely.geometry import LineString, Polygon
|
||||
|
||||
traceback.install(show_locals=True)
|
||||
traceback.install(show_locals=True, suppress=[cyclopts, xr, pd])
|
||||
pretty.install()
|
||||
|
||||
DATA_DIR = Path("data/era5")
|
||||
AGG_PATH = DATA_DIR / "era5_agg.zarr"
|
||||
ALIGNED_PATH = DATA_DIR / "era5_spatial_aligned.zarr"
|
||||
MONTHLY_PATH = DATA_DIR / "era5_monthly.zarr"
|
||||
YEARLY_PATH = DATA_DIR / "era5_yearly.zarr"
|
||||
cli = cyclopts.App()
|
||||
|
||||
# TODO: Directly handle stuff on a grid level - this is more what the zarr access is indented to do
|
||||
|
||||
DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) / "entropyc-rts"
|
||||
# DATA_DIR = Path("data")
|
||||
ERA5_DIR = DATA_DIR / "era5"
|
||||
AGG_PATH = ERA5_DIR / "era5_agg.zarr"
|
||||
ALIGNED_PATH = ERA5_DIR / "era5_spatial_aligned.zarr"
|
||||
MONTHLY_PATH = ERA5_DIR / "era5_monthly.zarr"
|
||||
YEARLY_PATH = ERA5_DIR / "era5_yearly.zarr"
|
||||
|
||||
min_lat = 50
|
||||
max_lat = 85
|
||||
min_time = "2022-01-01"
|
||||
max_lat = 83.7 # Ensures the right Chunks Size (90 - 64 / 10 + 0.1)
|
||||
min_time = "1990-01-01"
|
||||
max_time = "2024-12-31"
|
||||
subset = {"latitude": slice(max_lat, min_lat), "time": slice(min_time, max_time)}
|
||||
|
||||
DATA_DIR = Path("/isipd/projects/p_aicore_pf/tohoel001/era5_thawing_data")
|
||||
|
||||
today = time.strftime("%Y-%m-%d")
|
||||
|
||||
|
|
@ -63,20 +115,67 @@ today = time.strftime("%Y-%m-%d")
|
|||
# Enrich -> Aggregate temporally
|
||||
|
||||
|
||||
# TODO: Rethink aggregations by differentiating between "instant" and "accum" variables:
|
||||
# https://consensus.app/search/instantaneous-versus-accumulated-weather/JBaNbhc1R_-BwN5E9Un0Fw/
|
||||
|
||||
# ================
|
||||
# === Download ===
|
||||
# ================
|
||||
|
||||
|
||||
def create_encoding(ds: xr.Dataset):
|
||||
"""Create compression encoding for zarr dataset storage.
|
||||
|
||||
Creates Blosc compression configuration for all data variables and coordinates
|
||||
in the dataset using zstd compression with level 9.
|
||||
|
||||
Args:
|
||||
ds (xr.Dataset): The xarray Dataset to create encoding for.
|
||||
|
||||
Returns:
|
||||
dict: Encoding dictionary with compression settings for each variable.
|
||||
|
||||
"""
|
||||
# encoding = {var: {"compressors": BloscCodec(cname="zlib", clevel=9)} for var in ds.data_vars}
|
||||
encoding = {var: {"compressors": Blosc(cname="zstd", clevel=9)} for var in [*ds.data_vars, *ds.coords]}
|
||||
return encoding
|
||||
|
||||
|
||||
def download_daily_aggregated():
|
||||
"""Download and aggregate ERA5 data to daily resolution.
|
||||
|
||||
Downloads ERA5 reanalysis data from the DESTINE Earth Data Hub and aggregates
|
||||
it to daily resolution. Includes temperature extremes, precipitation, snow,
|
||||
and surface heat flux variables.
|
||||
|
||||
The function downloads hourly data and creates daily aggregates:
|
||||
- Temperature: daily min/max
|
||||
- Precipitation and snowfall: daily totals
|
||||
- Snow cover and depth: daily means
|
||||
- Surface heat flux: daily totals
|
||||
- Lake ice temperature: daily max
|
||||
|
||||
The aggregated data is saved to a zarr file with compression.
|
||||
"""
|
||||
era5 = xr.open_dataset(
|
||||
"https://data.earthdatahub.destine.eu/era5/reanalysis-era5-land-no-antartica-v0.zarr",
|
||||
storage_options={"client_kwargs": {"trust_env": True}},
|
||||
chunks={"latitude": 64 * 4, "longitude": 64 * 4},
|
||||
chunks={},
|
||||
# chunks={},
|
||||
engine="zarr",
|
||||
).rename({"valid_time": "time"})
|
||||
subset = {
|
||||
"latitude": slice(max_lat, min_lat),
|
||||
}
|
||||
|
||||
# Compute the clostest chunk-start to min_time, to avoid problems with cropped chunks at the start
|
||||
tchunksize = era5.chunksizes["time"][0]
|
||||
era5_chunk_starts = pd.date_range(era5.time.min().item(), era5.time.max().item(), freq=f"{tchunksize}h")
|
||||
closest_chunk_start = era5_chunk_starts[
|
||||
era5_chunk_starts.get_indexer([pd.to_datetime(min_time)], method="ffill")[0]
|
||||
]
|
||||
subset["time"] = slice(str(closest_chunk_start), max_time)
|
||||
|
||||
era5 = era5.sel(**subset)
|
||||
|
||||
era5_agg = xr.merge(
|
||||
|
|
@ -84,38 +183,59 @@ def download_daily_aggregated():
|
|||
era5.t2m.resample(time="1D").max().rename("t2m_daily_max"),
|
||||
era5.t2m.resample(time="1D").min().rename("t2m_daily_min"),
|
||||
era5.tp.resample(time="1D").sum().rename("tp_daily_sum"),
|
||||
# era5.sf.resample(time="1D").sum().rename("sf_daily_sum"),
|
||||
# era5.snowc.resample(time="1D").mean().rename("snowc_daily_mean"),
|
||||
# era5.sde.resample(time="1D").mean().rename("sde_daily_mean"),
|
||||
# era5.sshf.resample(time="1D").sum().rename("sshf_daily_sum"),
|
||||
# era5.lblt.resample(time="1D").max().rename("lblt_daily_max"),
|
||||
era5.sf.resample(time="1D").sum().rename("sf_daily_sum"),
|
||||
era5.snowc.resample(time="1D").mean().rename("snowc_daily_mean"),
|
||||
era5.sde.resample(time="1D").mean().rename("sde_daily_mean"),
|
||||
era5.sshf.resample(time="1D").sum().rename("sshf_daily_sum"),
|
||||
era5.lblt.resample(time="1D").max().rename("lblt_daily_max"),
|
||||
]
|
||||
)
|
||||
|
||||
# Rechunk if the first time chunk is not the same as the middle ones
|
||||
if era5_agg.chunksizes["time"][0] != era5_agg.chunksizes["time"][1]:
|
||||
era5_agg = era5_agg.chunk({"time": 120})
|
||||
|
||||
# Assign attributes
|
||||
era5_agg["t2m_daily_max"].attrs = {"long_name": "Daily maximum 2 metre temperature", "units": "K"}
|
||||
era5_agg["t2m_daily_min"].attrs = {"long_name": "Daily minimum 2 metre temperature", "units": "K"}
|
||||
era5_agg["tp_daily_sum"].attrs = {"long_name": "Daily total precipitation", "units": "m"}
|
||||
# era5_agg["sf_daily_sum"].attrs = {"long_name": "Daily total snow fall", "units": "m"}
|
||||
# era5_agg["snowc_daily_mean"].attrs = {"long_name": "Daily mean snow cover", "units": "m"}
|
||||
# era5_agg["sde_daily_mean"].attrs = {"long_name": "Daily mean snow depth", "units": "m"}
|
||||
# era5_agg["sshf_daily_sum"].attrs = {"long_name": "Daily total surface sensible heat flux", "units": "J/m²"}
|
||||
# era5_agg["lblt_daily_max"].attrs = {"long_name": "Daily maximum lake ice bottom temperature", "units": "K"}
|
||||
era5_agg["sf_daily_sum"].attrs = {"long_name": "Daily total snow fall", "units": "m"}
|
||||
era5_agg["snowc_daily_mean"].attrs = {"long_name": "Daily mean snow cover", "units": "m"}
|
||||
era5_agg["sde_daily_mean"].attrs = {"long_name": "Daily mean snow depth", "units": "m"}
|
||||
era5_agg["sshf_daily_sum"].attrs = {"long_name": "Daily total surface sensible heat flux", "units": "J/m²"}
|
||||
era5_agg["lblt_daily_max"].attrs = {"long_name": "Daily maximum lake ice bottom temperature", "units": "K"}
|
||||
|
||||
era5_agg = era5_agg.odc.assign_crs("epsg:4326")
|
||||
era5_agg = era5_agg.drop_vars(["surface", "number", "depthBelowLandLayer"])
|
||||
era5_agg.to_zarr(AGG_PATH, mode="w", encoding=create_encoding(era5_agg), consolidated=False)
|
||||
|
||||
|
||||
def crosses_antimeridian(geom: Polygon) -> bool:
|
||||
@cli.command
|
||||
def download():
|
||||
"""Download ERA5 data using Dask cluster for parallel processing.
|
||||
|
||||
Creates a local Dask cluster and downloads daily aggregated ERA5 data.
|
||||
The cluster is configured with a single worker with 10 threads and 100GB
|
||||
memory limit for optimal performance.
|
||||
"""
|
||||
with (
|
||||
dd.LocalCluster(n_workers=1, threads_per_worker=10, memory_limit="100GB") as cluster,
|
||||
dd.Client(cluster) as client,
|
||||
):
|
||||
print(client)
|
||||
print(client.dashboard_link)
|
||||
download_daily_aggregated()
|
||||
print("Downloaded and aggregated ERA5 data.")
|
||||
|
||||
|
||||
# ===========================
|
||||
# === Spatial Aggregation ===
|
||||
# ===========================
|
||||
|
||||
|
||||
def _crosses_antimeridian(geom: Polygon) -> bool:
|
||||
coords = shapely.get_coordinates(geom)
|
||||
crosses_any_meridian = (coords[:, 0] > 0).any() and (coords[:, 0] < 0).any()
|
||||
return crosses_any_meridian and abs(coords[:, 0]).max() > 90
|
||||
|
||||
|
||||
def split_antimeridian_cell(geom: Polygon) -> list[Polygon]:
|
||||
def _split_antimeridian_cell(geom: Polygon) -> list[Polygon]:
|
||||
# Assumes that it is a antimeridian hex
|
||||
coords = shapely.get_coordinates(geom)
|
||||
for i in range(coords.shape[0]):
|
||||
|
|
@ -127,53 +247,134 @@ def split_antimeridian_cell(geom: Polygon) -> list[Polygon]:
|
|||
return list(polys.geoms)
|
||||
|
||||
|
||||
def check_geobox(geobox):
|
||||
def _check_geobox(geobox):
|
||||
x, y = geobox.shape
|
||||
return x > 1 and y > 1
|
||||
|
||||
|
||||
def extract_cell_data(idx: int, geom: Polygon) -> xr.Dataset:
|
||||
era5_agg = xr.open_zarr(AGG_PATH)
|
||||
assert {"latitude", "longitude", "time"} == set(era5_agg.dims), (
|
||||
f"Expected dims ('latitude', 'longitude', 'time'), got {era5_agg.dims}"
|
||||
"""Extract ERA5 data for a specific grid cell geometry.
|
||||
|
||||
Extracts and spatially averages ERA5 data within the bounds of a grid cell.
|
||||
Handles antimeridian-crossing cells by splitting them appropriately.
|
||||
The extracted data is written to the aligned zarr file.
|
||||
|
||||
Args:
|
||||
idx (int): Index of the grid cell.
|
||||
geom (Polygon): Polygon geometry of the grid cell.
|
||||
|
||||
Returns:
|
||||
xr.Dataset or bool: Returns True if successful, False if cell doesn't
|
||||
overlap with ERA5 data.
|
||||
|
||||
"""
|
||||
era5_agg = (
|
||||
xr.open_zarr(AGG_PATH, consolidated=False)
|
||||
.set_coords("spatial_ref")
|
||||
.drop_vars(["surface", "number", "depthBelowLandLayer"])
|
||||
)
|
||||
# cell.geometry is a shapely Polygon
|
||||
if not crosses_antimeridian(geom):
|
||||
if not _crosses_antimeridian(geom):
|
||||
geoms = [geom]
|
||||
# Split geometry in case it crossed antimeridian
|
||||
else:
|
||||
geoms = split_antimeridian_cell(geom)
|
||||
geoms = _split_antimeridian_cell(geom)
|
||||
cell_data = []
|
||||
for geom in geoms:
|
||||
geom = odc.geo.Geometry(geom, crs="epsg:4326")
|
||||
if not check_geobox(era5_agg.odc.geobox.enclosing(geom)):
|
||||
if not _check_geobox(era5_agg.odc.geobox.enclosing(geom)):
|
||||
continue
|
||||
cell_data.append(era5_agg.odc.crop(geom).drop_vars("spatial_ref").mean(["latitude", "longitude"]))
|
||||
if len(cell_data) == 0:
|
||||
return None
|
||||
return False
|
||||
elif len(cell_data) == 1:
|
||||
return cell_data[0].expand_dims({"cell": [idx]}).chunk({"cell": 1})
|
||||
cell_data = cell_data[0]
|
||||
else:
|
||||
return xr.concat(cell_data, dim="part").mean("part").expand_dims({"cell": [idx]}).chunk({"cell": 1})
|
||||
cell_data = xr.concat(cell_data, dim="part").mean("part")
|
||||
cell_data = cell_data.expand_dims({"cell": [idx]}).compute()
|
||||
cell_data.to_zarr(ALIGNED_PATH, region="auto", consolidated=False)
|
||||
return True
|
||||
|
||||
|
||||
def spatial_matching(grid: gpd.GeoDataFrame, n_workers: int = 10):
|
||||
@cli.command
|
||||
def spatial_agg(grid: Literal["hex", "healpix"], level: int, n_workers: int = 10):
|
||||
"""Perform spatial aggregation of ERA5 data to grid cells.
|
||||
|
||||
Loads a grid and spatially aggregates ERA5 data to each grid cell using
|
||||
parallel processing. Creates an empty zarr file first, then fills it
|
||||
with extracted data for each cell.
|
||||
|
||||
Args:
|
||||
grid ("hex", "healpix"): Grid type.
|
||||
level (int): Grid resolution level.
|
||||
n_workers (int, optional): Number of parallel workers to use. Defaults to 10.
|
||||
|
||||
"""
|
||||
gridname = f"permafrost_{grid}{level}"
|
||||
grid = gpd.read_parquet(DATA_DIR / f"grids/{gridname}_grid.parquet")
|
||||
# Create an empty zarr array with the right dimensions
|
||||
era5_agg = (
|
||||
xr.open_zarr(AGG_PATH, consolidated=False)
|
||||
.set_coords("spatial_ref")
|
||||
.drop_vars(["surface", "number", "depthBelowLandLayer"])
|
||||
)
|
||||
assert {"latitude", "longitude", "time"} == set(era5_agg.dims), (
|
||||
f"Expected dims ('latitude', 'longitude', 'time'), got {era5_agg.dims}"
|
||||
)
|
||||
assert era5_agg.odc.crs == "epsg:4326", f"Expected CRS 'epsg:4326', got {era5_agg.odc.crs}"
|
||||
empty = (
|
||||
xr.zeros_like(era5_agg.isel(latitude=0, longitude=0))
|
||||
.expand_dims({"cell": [idx for idx, _ in grid.iterrows()]})
|
||||
.chunk({"cell": 1, "time": len(era5_agg.time)})
|
||||
)
|
||||
empty.to_zarr(ALIGNED_PATH, mode="w", consolidated=False, encoding=create_encoding(empty))
|
||||
|
||||
print(f"Starting spatial matching of {len(grid)} cells with {n_workers} workers...")
|
||||
|
||||
# TODO: Maybe change to process pool executor?
|
||||
with ThreadPoolExecutor(max_workers=n_workers) as executor:
|
||||
futures = {
|
||||
executor.submit(extract_cell_data, idx, row.geometry): idx
|
||||
for idx, row in grid.to_crs("epsg:4326").iterrows()
|
||||
}
|
||||
for future in as_completed(futures):
|
||||
for future in track(as_completed(futures), total=len(futures), description="Processing cells"):
|
||||
idx = futures[future]
|
||||
try:
|
||||
data = future.result()
|
||||
data.to_zarr(ALIGNED_PATH, append_dim="cell", consolidated=False, encoding=create_encoding(data))
|
||||
flag = future.result()
|
||||
if flag:
|
||||
print(f"Successfully written cell {idx}")
|
||||
else:
|
||||
print(f"Cell {idx} did not overlap with ERA5 data.")
|
||||
except Exception as e:
|
||||
print(f"Error processing cell {idx}: {e}")
|
||||
print(type(e))
|
||||
print("Finished spatial matching.")
|
||||
|
||||
|
||||
# ============================
|
||||
# === Temporal Aggregation ===
|
||||
# ============================
|
||||
|
||||
|
||||
def daily_enrich() -> xr.Dataset:
|
||||
era5 = xr.open_zarr(ALIGNED_PATH)
|
||||
"""Enrich daily ERA5 data with derived climate variables.
|
||||
|
||||
Loads spatially aligned ERA5 data and computes additional climate variables.
|
||||
Creates derived variables including temperature statistics, degree days, and occurrence indicators.
|
||||
|
||||
Derived variables include:
|
||||
- Daily average and range temperature
|
||||
- Temperature skewness
|
||||
- Thawing and freezing degree days
|
||||
- Thawing and freezing day counts
|
||||
- Precipitation and snowfall occurrences
|
||||
- Snow isolation index
|
||||
|
||||
Returns:
|
||||
xr.Dataset: Enriched dataset with original and derived variables.
|
||||
|
||||
"""
|
||||
era5 = xr.open_zarr(ALIGNED_PATH, consolidated=False).set_coords("spatial_ref")
|
||||
assert {"cell", "time"} == set(era5.dims), f"Expected dims ('cell', 'time'), got {era5.dims}"
|
||||
|
||||
# Formulas based on Groeke et. al. (2025) Stochastic Weather generation...
|
||||
|
|
@ -206,6 +407,15 @@ def daily_enrich() -> xr.Dataset:
|
|||
|
||||
|
||||
def monthly_aggregate():
|
||||
"""Aggregate enriched daily ERA5 data to monthly resolution.
|
||||
|
||||
Takes the enriched daily ERA5 data and creates monthly aggregates using
|
||||
appropriate statistical functions for each variable type. Temperature
|
||||
variables use min/max/mean, accumulation variables use sums, and derived
|
||||
variables use appropriate aggregations.
|
||||
|
||||
The aggregated monthly data is saved to a zarr file for further processing.
|
||||
"""
|
||||
era5 = daily_enrich()
|
||||
assert {"cell", "time"} == set(era5.dims), f"Expected dims ('cell', 'time'), got {era5.dims}"
|
||||
|
||||
|
|
@ -213,32 +423,46 @@ def monthly_aggregate():
|
|||
monthly = xr.merge(
|
||||
[
|
||||
# Original variables
|
||||
era5.t2m_daily_min.resample(time="1M").min().rename("t2m_monthly_min"),
|
||||
era5.t2m_daily_max.resample(time="1M").max().rename("t2m_monthly_max"),
|
||||
era5.tp_daily_sum.resample(time="1M").sum().rename("tp_monthly_sum"),
|
||||
era5.sf_daily_sum.resample(time="1M").sum().rename("sf_monthly_sum"),
|
||||
era5.snowc_daily_mean.resample(time="1M").mean().rename("snowc_monthly_mean"),
|
||||
era5.sde_daily_mean.resample(time="1M").mean().rename("sde_monthly_mean"),
|
||||
era5.sshf_daily_sum.resample(time="1M").sum().rename("sshf_monthly_sum"),
|
||||
era5.lblt_daily_max.resample(time="1M").max().rename("lblt_monthly_max"),
|
||||
era5.t2m_daily_min.resample(time="1ME").min().rename("t2m_monthly_min"),
|
||||
era5.t2m_daily_max.resample(time="1ME").max().rename("t2m_monthly_max"),
|
||||
era5.tp_daily_sum.resample(time="1ME").sum().rename("tp_monthly_sum"),
|
||||
era5.sf_daily_sum.resample(time="1ME").sum().rename("sf_monthly_sum"),
|
||||
era5.snowc_daily_mean.resample(time="1ME").mean().rename("snowc_monthly_mean"),
|
||||
era5.sde_daily_mean.resample(time="1ME").mean().rename("sde_monthly_mean"),
|
||||
era5.sshf_daily_sum.resample(time="1ME").sum().rename("sshf_monthly_sum"),
|
||||
era5.lblt_daily_max.resample(time="1ME").max().rename("lblt_monthly_max"),
|
||||
# Enriched variables
|
||||
era5.t2m_daily_avg.resample(time="1M").mean().rename("t2m_monthly_avg"),
|
||||
era5.t2m_daily_range.resample(time="1M").mean().rename("t2m_daily_range_monthly_avg"),
|
||||
era5.t2m_daily_skew.resample(time="1M").mean().rename("t2m_daily_skew_monthly_avg"),
|
||||
era5.thawing_degree_days.resample(time="1M").sum().rename("thawing_degree_days_monthly"),
|
||||
era5.freezing_degree_days.resample(time="1M").sum().rename("freezing_degree_days_monthly"),
|
||||
era5.thawing_days.resample(time="1M").sum().rename("thawing_days_monthly"),
|
||||
era5.freezing_days.resample(time="1M").sum().rename("freezing_days_monthly"),
|
||||
era5.precipitation_occurrences.resample(time="1M").sum().rename("precipitation_occurrences_monthly"),
|
||||
era5.snowfall_occurrences.resample(time="1M").sum().rename("snowfall_occurrences_monthly"),
|
||||
era5.snow_isolation.resample(time="1M").mean().rename("snow_isolation_monthly_mean"),
|
||||
era5.t2m_daily_avg.resample(time="1ME").mean().rename("t2m_monthly_avg"),
|
||||
era5.t2m_daily_range.resample(time="1ME").mean().rename("t2m_monthly_range_avg"),
|
||||
era5.t2m_daily_skew.resample(time="1ME").mean().rename("t2m_monthly_skew_avg"),
|
||||
era5.thawing_degree_days.resample(time="1ME").sum().rename("thawing_degree_days_monthly"),
|
||||
era5.freezing_degree_days.resample(time="1ME").sum().rename("freezing_degree_days_monthly"),
|
||||
era5.thawing_days.resample(time="1ME").sum().rename("thawing_days_monthly"),
|
||||
era5.freezing_days.resample(time="1ME").sum().rename("freezing_days_monthly"),
|
||||
era5.precipitation_occurrences.resample(time="1ME").sum().rename("precipitation_occurrences_monthly"),
|
||||
era5.snowfall_occurrences.resample(time="1ME").sum().rename("snowfall_occurrences_monthly"),
|
||||
era5.snow_isolation.resample(time="1ME").mean().rename("snow_isolation_monthly_mean"),
|
||||
]
|
||||
)
|
||||
monthly.to_zarr(MONTHLY_PATH, mode="w", encoding=create_encoding(monthly), consolidated=False)
|
||||
|
||||
|
||||
def yearly_aggregate():
|
||||
monthly = xr.open_zarr(MONTHLY_PATH)
|
||||
"""Aggregate monthly ERA5 data to yearly resolution with seasonal splits.
|
||||
|
||||
Takes monthly aggregated data and creates yearly aggregates using a shifted
|
||||
calendar (October to September) to better capture Arctic seasonal patterns.
|
||||
Creates separate aggregates for full year, winter (Oct-Apr), and summer
|
||||
(May-Sep) periods.
|
||||
|
||||
The first and last incomplete years are excluded from the analysis.
|
||||
Winter months are defined as months 1-7 in the shifted calendar,
|
||||
and summer months are 8-12.
|
||||
|
||||
The final dataset includes yearly, winter, and summer aggregates for all
|
||||
climate variables, saved to a zarr file.
|
||||
"""
|
||||
monthly = xr.open_zarr(MONTHLY_PATH, consolidated=False).set_coords("spatial_ref")
|
||||
assert {"cell", "time"} == set(monthly.dims), f"Expected dims ('cell', 'time'), got {monthly.dims}"
|
||||
|
||||
# Yearly aggregates (shifted by +10 months to start in Oktober, first and last years will be cropped)
|
||||
|
|
@ -249,32 +473,34 @@ def yearly_aggregate():
|
|||
yearly = xr.merge(
|
||||
[
|
||||
# Original variables
|
||||
monthly_shifted.t2m_monthly_min.resample(time="1Y").min().rename("t2m_yearly_min"),
|
||||
monthly_shifted.t2m_monthly_max.resample(time="1Y").max().rename("t2m_yearly_max"),
|
||||
monthly_shifted.tp_monthly_sum.resample(time="1Y").sum().rename("tp_yearly_sum"),
|
||||
monthly_shifted.sf_monthly_sum.resample(time="1Y").sum().rename("sf_yearly_sum"),
|
||||
monthly_shifted.snowc_monthly_mean.resample(time="1Y").mean().rename("snowc_yearly_mean"),
|
||||
monthly_shifted.sde_monthly_mean.resample(time="1Y").mean().rename("sde_yearly_mean"),
|
||||
monthly_shifted.sshf_monthly_sum.resample(time="1Y").sum().rename("sshf_yearly_sum"),
|
||||
monthly_shifted.lblt_monthly_max.resample(time="1Y").max().rename("lblt_yearly_max"),
|
||||
monthly_shifted.t2m_monthly_min.resample(time="1YE").min().rename("t2m_yearly_min"),
|
||||
monthly_shifted.t2m_monthly_max.resample(time="1YE").max().rename("t2m_yearly_max"),
|
||||
monthly_shifted.tp_monthly_sum.resample(time="1YE").sum().rename("tp_yearly_sum"),
|
||||
monthly_shifted.sf_monthly_sum.resample(time="1YE").sum().rename("sf_yearly_sum"),
|
||||
monthly_shifted.snowc_monthly_mean.resample(time="1YE").mean().rename("snowc_yearly_mean"),
|
||||
monthly_shifted.sde_monthly_mean.resample(time="1YE").mean().rename("sde_yearly_mean"),
|
||||
monthly_shifted.sshf_monthly_sum.resample(time="1YE").sum().rename("sshf_yearly_sum"),
|
||||
monthly_shifted.lblt_monthly_max.resample(time="1YE").max().rename("lblt_yearly_max"),
|
||||
# Enriched variables
|
||||
monthly_shifted.t2m_monthly_avg.resample(time="1Y").mean().rename("t2m_yearly_avg"),
|
||||
monthly_shifted.t2m_monthly_avg.resample(time="1YE").mean().rename("t2m_yearly_avg"),
|
||||
# TODO: Check if this is correct -> use daily / hourly data instead for range and skew?
|
||||
monthly_shifted.t2m_monthly_range.resample(time="1Y").mean().rename("t2m_daily_range_yearly_avg"),
|
||||
monthly_shifted.t2m_monthly_skew.resample(time="1Y").mean().rename("t2m_daily_skew_yearly_avg"),
|
||||
monthly_shifted.thawing_degree_days_monthly.resample(time="1Y").sum().rename("thawing_degree_days_yearly"),
|
||||
monthly_shifted.freezing_degree_days_monthly.resample(time="1Y")
|
||||
monthly_shifted.t2m_monthly_range_avg.resample(time="1YE").mean().rename("t2m_daily_range_yearly_avg"),
|
||||
monthly_shifted.t2m_monthly_skew_avg.resample(time="1YE").mean().rename("t2m_daily_skew_yearly_avg"),
|
||||
monthly_shifted.thawing_degree_days_monthly.resample(time="1YE").sum().rename("thawing_degree_days_yearly"),
|
||||
monthly_shifted.freezing_degree_days_monthly.resample(time="1YE")
|
||||
.sum()
|
||||
.rename("freezing_degree_days_yearly"),
|
||||
monthly_shifted.thawing_days_monthly.resample(time="1Y").sum().rename("thawing_days_yearly"),
|
||||
monthly_shifted.freezing_days_monthly.resample(time="1Y").sum().rename("freezing_days_yearly"),
|
||||
monthly_shifted.precipitation_occurrences_monthly.resample(time="1Y")
|
||||
monthly_shifted.thawing_days_monthly.resample(time="1YE").sum().rename("thawing_days_yearly"),
|
||||
monthly_shifted.freezing_days_monthly.resample(time="1YE").sum().rename("freezing_days_yearly"),
|
||||
monthly_shifted.precipitation_occurrences_monthly.resample(time="1YE")
|
||||
.sum()
|
||||
.rename("precipitation_occurrences_yearly"),
|
||||
monthly_shifted.snowfall_occurrences_monthly.resample(time="1Y")
|
||||
monthly_shifted.snowfall_occurrences_monthly.resample(time="1YE")
|
||||
.sum()
|
||||
.rename("snowfall_occurrences_yearly"),
|
||||
monthly_shifted.snow_isolation_monthly_mean.resample(time="1Y").mean().rename("snow_isolation_yearly_mean"),
|
||||
monthly_shifted.snow_isolation_monthly_mean.resample(time="1YE")
|
||||
.mean()
|
||||
.rename("snow_isolation_yearly_mean"),
|
||||
]
|
||||
)
|
||||
# Summer / Winter aggregates
|
||||
|
|
@ -286,34 +512,36 @@ def yearly_aggregate():
|
|||
winter = xr.merge(
|
||||
[
|
||||
# Original variables
|
||||
monthly_shifted_winter.t2m_monthly_min.resample(time="1Y").min().rename("t2m_winter_min"),
|
||||
monthly_shifted_winter.t2m_monthly_max.resample(time="1Y").max().rename("t2m_winter_max"),
|
||||
monthly_shifted_winter.tp_monthly_sum.resample(time="1Y").sum().rename("tp_winter_sum"),
|
||||
monthly_shifted_winter.sf_monthly_sum.resample(time="1Y").sum().rename("sf_winter_sum"),
|
||||
monthly_shifted_winter.snowc_monthly_mean.resample(time="1Y").mean().rename("snowc_winter_mean"),
|
||||
monthly_shifted_winter.sde_monthly_mean.resample(time="1Y").mean().rename("sde_winter_mean"),
|
||||
monthly_shifted_winter.sshf_monthly_sum.resample(time="1Y").sum().rename("sshf_winter_sum"),
|
||||
monthly_shifted_winter.lblt_monthly_max.resample(time="1Y").max().rename("lblt_winter_max"),
|
||||
monthly_shifted_winter.t2m_monthly_min.resample(time="1YE").min().rename("t2m_winter_min"),
|
||||
monthly_shifted_winter.t2m_monthly_max.resample(time="1YE").max().rename("t2m_winter_max"),
|
||||
monthly_shifted_winter.tp_monthly_sum.resample(time="1YE").sum().rename("tp_winter_sum"),
|
||||
monthly_shifted_winter.sf_monthly_sum.resample(time="1YE").sum().rename("sf_winter_sum"),
|
||||
monthly_shifted_winter.snowc_monthly_mean.resample(time="1YE").mean().rename("snowc_winter_mean"),
|
||||
monthly_shifted_winter.sde_monthly_mean.resample(time="1YE").mean().rename("sde_winter_mean"),
|
||||
monthly_shifted_winter.sshf_monthly_sum.resample(time="1YE").sum().rename("sshf_winter_sum"),
|
||||
monthly_shifted_winter.lblt_monthly_max.resample(time="1YE").max().rename("lblt_winter_max"),
|
||||
# Enriched variables
|
||||
monthly_shifted_winter.t2m_monthly_avg.resample(time="1Y").mean().rename("t2m_winter_avg"),
|
||||
monthly_shifted_winter.t2m_monthly_avg.resample(time="1YE").mean().rename("t2m_winter_avg"),
|
||||
# TODO: Check if this is correct -> use daily / hourly data instead for range and skew?
|
||||
monthly_shifted_winter.t2m_monthly_range.resample(time="1Y").mean().rename("t2m_daily_range_winter_avg"),
|
||||
monthly_shifted_winter.t2m_monthly_skew.resample(time="1Y").mean().rename("t2m_daily_skew_winter_avg"),
|
||||
monthly_shifted_winter.thawing_degree_days_monthly.resample(time="1Y")
|
||||
monthly_shifted_winter.t2m_monthly_range_avg.resample(time="1YE")
|
||||
.mean()
|
||||
.rename("t2m_daily_range_winter_avg"),
|
||||
monthly_shifted_winter.t2m_monthly_skew_avg.resample(time="1YE").mean().rename("t2m_daily_skew_winter_avg"),
|
||||
monthly_shifted_winter.thawing_degree_days_monthly.resample(time="1YE")
|
||||
.sum()
|
||||
.rename("thawing_degree_days_winter"),
|
||||
monthly_shifted_winter.freezing_degree_days_monthly.resample(time="1Y")
|
||||
monthly_shifted_winter.freezing_degree_days_monthly.resample(time="1YE")
|
||||
.sum()
|
||||
.rename("freezing_degree_days_winter"),
|
||||
monthly_shifted_winter.thawing_days_monthly.resample(time="1Y").sum().rename("thawing_days_winter"),
|
||||
monthly_shifted_winter.freezing_days_monthly.resample(time="1Y").sum().rename("freezing_days_winter"),
|
||||
monthly_shifted_winter.precipitation_occurrences_monthly.resample(time="1Y")
|
||||
monthly_shifted_winter.thawing_days_monthly.resample(time="1YE").sum().rename("thawing_days_winter"),
|
||||
monthly_shifted_winter.freezing_days_monthly.resample(time="1YE").sum().rename("freezing_days_winter"),
|
||||
monthly_shifted_winter.precipitation_occurrences_monthly.resample(time="1YE")
|
||||
.sum()
|
||||
.rename("precipitation_occurrences_winter"),
|
||||
monthly_shifted_winter.snowfall_occurrences_monthly.resample(time="1Y")
|
||||
monthly_shifted_winter.snowfall_occurrences_monthly.resample(time="1YE")
|
||||
.sum()
|
||||
.rename("snowfall_occurrences_winter"),
|
||||
monthly_shifted_winter.snow_isolation_monthly_mean.resample(time="1Y")
|
||||
monthly_shifted_winter.snow_isolation_monthly_mean.resample(time="1YE")
|
||||
.mean()
|
||||
.rename("snow_isolation_winter_mean"),
|
||||
]
|
||||
|
|
@ -322,34 +550,36 @@ def yearly_aggregate():
|
|||
summer = xr.merge(
|
||||
[
|
||||
# Original variables
|
||||
monthly_shifted_summer.t2m_monthly_min.resample(time="1Y").min().rename("t2m_summer_min"),
|
||||
monthly_shifted_summer.t2m_monthly_max.resample(time="1Y").max().rename("t2m_summer_max"),
|
||||
monthly_shifted_summer.tp_monthly_sum.resample(time="1Y").sum().rename("tp_summer_sum"),
|
||||
monthly_shifted_summer.sf_monthly_sum.resample(time="1Y").sum().rename("sf_summer_sum"),
|
||||
monthly_shifted_summer.snowc_monthly_mean.resample(time="1Y").mean().rename("snowc_summer_mean"),
|
||||
monthly_shifted_summer.sde_monthly_mean.resample(time="1Y").mean().rename("sde_summer_mean"),
|
||||
monthly_shifted_summer.sshf_monthly_sum.resample(time="1Y").sum().rename("sshf_summer_sum"),
|
||||
monthly_shifted_summer.lblt_monthly_max.resample(time="1Y").max().rename("lblt_summer_max"),
|
||||
monthly_shifted_summer.t2m_monthly_min.resample(time="1YE").min().rename("t2m_summer_min"),
|
||||
monthly_shifted_summer.t2m_monthly_max.resample(time="1YE").max().rename("t2m_summer_max"),
|
||||
monthly_shifted_summer.tp_monthly_sum.resample(time="1YE").sum().rename("tp_summer_sum"),
|
||||
monthly_shifted_summer.sf_monthly_sum.resample(time="1YE").sum().rename("sf_summer_sum"),
|
||||
monthly_shifted_summer.snowc_monthly_mean.resample(time="1YE").mean().rename("snowc_summer_mean"),
|
||||
monthly_shifted_summer.sde_monthly_mean.resample(time="1YE").mean().rename("sde_summer_mean"),
|
||||
monthly_shifted_summer.sshf_monthly_sum.resample(time="1YE").sum().rename("sshf_summer_sum"),
|
||||
monthly_shifted_summer.lblt_monthly_max.resample(time="1YE").max().rename("lblt_summer_max"),
|
||||
# Enriched variables
|
||||
monthly_shifted_summer.t2m_monthly_avg.resample(time="1Y").mean().rename("t2m_summer_avg"),
|
||||
monthly_shifted_summer.t2m_monthly_avg.resample(time="1YE").mean().rename("t2m_summer_avg"),
|
||||
# TODO: Check if this is correct -> use daily / hourly data instead for range and skew?
|
||||
monthly_shifted_summer.t2m_monthly_range.resample(time="1Y").mean().rename("t2m_daily_range_summer_avg"),
|
||||
monthly_shifted_summer.t2m_monthly_skew.resample(time="1Y").mean().rename("t2m_daily_skew_summer_avg"),
|
||||
monthly_shifted_summer.thawing_degree_days_summer.resample(time="1Y")
|
||||
monthly_shifted_summer.t2m_monthly_range_avg.resample(time="1YE")
|
||||
.mean()
|
||||
.rename("t2m_daily_range_summer_avg"),
|
||||
monthly_shifted_summer.t2m_monthly_skew_avg.resample(time="1YE").mean().rename("t2m_daily_skew_summer_avg"),
|
||||
monthly_shifted_summer.thawing_degree_days_monthly.resample(time="1YE")
|
||||
.sum()
|
||||
.rename("thawing_degree_days_summer"),
|
||||
monthly_shifted_summer.freezing_degree_days_summer.resample(time="1Y")
|
||||
monthly_shifted_summer.freezing_degree_days_monthly.resample(time="1YE")
|
||||
.sum()
|
||||
.rename("freezing_degree_days_summer"),
|
||||
monthly_shifted_summer.thawing_days_summer.resample(time="1Y").sum().rename("thawing_days_summer"),
|
||||
monthly_shifted_summer.freezing_days_summer.resample(time="1Y").sum().rename("freezing_days_summer"),
|
||||
monthly_shifted_summer.precipitation_occurrences_summer.resample(time="1Y")
|
||||
monthly_shifted_summer.thawing_days_monthly.resample(time="1YE").sum().rename("thawing_days_summer"),
|
||||
monthly_shifted_summer.freezing_days_monthly.resample(time="1YE").sum().rename("freezing_days_summer"),
|
||||
monthly_shifted_summer.precipitation_occurrences_monthly.resample(time="1YE")
|
||||
.sum()
|
||||
.rename("precipitation_occurrences_summer"),
|
||||
monthly_shifted_summer.snowfall_occurrences_summer.resample(time="1Y")
|
||||
monthly_shifted_summer.snowfall_occurrences_monthly.resample(time="1YE")
|
||||
.sum()
|
||||
.rename("snowfall_occurrences_summer"),
|
||||
monthly_shifted_summer.snow_isolation_summer.resample(time="1Y")
|
||||
monthly_shifted_summer.snow_isolation_monthly_mean.resample(time="1YE")
|
||||
.mean()
|
||||
.rename("snow_isolation_summer_mean"),
|
||||
]
|
||||
|
|
@ -359,32 +589,28 @@ def yearly_aggregate():
|
|||
combined.to_zarr(YEARLY_PATH, mode="w", encoding=create_encoding(combined), consolidated=False)
|
||||
|
||||
|
||||
def cli(grid: Literal["hex", "healpix"], level: int, download: bool = False, n_workers: int = 10):
|
||||
"""Run the CLI for ERA5 data processing.
|
||||
@cli.command
|
||||
def temporal_agg(n_workers: int = 10):
|
||||
"""Perform temporal aggregation of ERA5 data using Dask cluster.
|
||||
|
||||
Creates a Dask cluster and runs both monthly and yearly aggregation
|
||||
functions to generate temporally aggregated climate datasets. The
|
||||
processing uses parallel workers for efficient computation.
|
||||
|
||||
Args:
|
||||
grid (Literal["hex", "healpix"]): The grid type to use.
|
||||
level (int): The processing level.
|
||||
download (bool, optional): Whether to download data. Defaults to False.
|
||||
n_workers (int, optional): Number of workers for parallel processing. Defaults to 10.
|
||||
n_workers (int, optional): Number of Dask workers to use. Defaults to 10.
|
||||
|
||||
"""
|
||||
cluster = dd.LocalCluster(n_workers=n_workers, threads_per_worker=4, memory_limit="20GB")
|
||||
client = dd.Client(cluster)
|
||||
with (
|
||||
dd.LocalCluster(n_workers=n_workers, threads_per_worker=20, memory_limit="10GB") as cluster,
|
||||
dd.Client(cluster) as client,
|
||||
):
|
||||
print(client)
|
||||
print(client.dashboard_link)
|
||||
|
||||
if download:
|
||||
download_daily_aggregated()
|
||||
print("Downloaded and aggregated ERA5 data.")
|
||||
|
||||
grid = gpd.read_parquet(DATA_DIR / f"grids/permafrost_{grid}{level}_grid.parquet")
|
||||
spatial_matching(grid, n_workers=n_workers)
|
||||
print("Spatially matched ERA5 data to grid.")
|
||||
monthly_aggregate()
|
||||
yearly_aggregate()
|
||||
print("Enriched ERA5 data with additional features and aggregated it temporally.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cyclopts.run(cli)
|
||||
cli()
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ dependencies = [
|
|||
"distributed>=2025.5.1",
|
||||
"earthengine-api>=1.6.9",
|
||||
"eemont>=2025.7.1",
|
||||
# "entropyc",
|
||||
"entropyc",
|
||||
"flox>=0.10.4",
|
||||
"folium>=0.19.7",
|
||||
"geemap>=0.36.3",
|
||||
|
|
@ -37,12 +37,12 @@ dependencies = [
|
|||
"smart-geocubes[arcticdem,dask,stac,viz]>=0.0.9",
|
||||
"stopuhr>=0.0.10",
|
||||
"xanimate",
|
||||
"xarray>=2025.4.0",
|
||||
"xarray>=2025.9.0",
|
||||
"xdggs>=0.2.1",
|
||||
"xvec>=0.5.1",
|
||||
"zarr[remote]>=3.0.8",
|
||||
"zarr[remote]>=3.1.3",
|
||||
]
|
||||
|
||||
[tool.uv.sources]
|
||||
# entropyc = { git = "ssh://git@github.com/AlbertEMC2Stein/entropyc", branch = "refactor/tobi" }
|
||||
entropyc = { git = "ssh://git@github.com/AlbertEMC2Stein/entropyc", branch = "refactor/tobi" }
|
||||
xanimate = { git = "https://github.com/davbyr/xAnimate" }
|
||||
|
|
|
|||
15
uv.lock
generated
15
uv.lock
generated
|
|
@ -1075,6 +1075,7 @@ dependencies = [
|
|||
{ name = "distributed" },
|
||||
{ name = "earthengine-api" },
|
||||
{ name = "eemont" },
|
||||
{ name = "entropyc" },
|
||||
{ name = "flox" },
|
||||
{ name = "folium" },
|
||||
{ name = "geemap" },
|
||||
|
|
@ -1113,6 +1114,7 @@ requires-dist = [
|
|||
{ name = "distributed", specifier = ">=2025.5.1" },
|
||||
{ name = "earthengine-api", specifier = ">=1.6.9" },
|
||||
{ name = "eemont", specifier = ">=2025.7.1" },
|
||||
{ name = "entropyc", git = "ssh://git@github.com/AlbertEMC2Stein/entropyc?branch=refactor%2Ftobi" },
|
||||
{ name = "flox", specifier = ">=0.10.4" },
|
||||
{ name = "folium", specifier = ">=0.19.7" },
|
||||
{ name = "geemap", specifier = ">=0.36.3" },
|
||||
|
|
@ -1134,10 +1136,19 @@ requires-dist = [
|
|||
{ name = "smart-geocubes", extras = ["arcticdem", "dask", "stac", "viz"], specifier = ">=0.0.9" },
|
||||
{ name = "stopuhr", specifier = ">=0.0.10" },
|
||||
{ name = "xanimate", git = "https://github.com/davbyr/xAnimate" },
|
||||
{ name = "xarray", specifier = ">=2025.4.0" },
|
||||
{ name = "xarray", specifier = ">=2025.9.0" },
|
||||
{ name = "xdggs", specifier = ">=0.2.1" },
|
||||
{ name = "xvec", specifier = ">=0.5.1" },
|
||||
{ name = "zarr", extras = ["remote"], specifier = ">=3.0.8" },
|
||||
{ name = "zarr", extras = ["remote"], specifier = ">=3.1.3" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "entropyc"
|
||||
version = "0.1.0"
|
||||
source = { git = "ssh://git@github.com/AlbertEMC2Stein/entropyc?branch=refactor%2Ftobi#22a191d194a76b6c182481acb2af1bde3f60b49e" }
|
||||
dependencies = [
|
||||
{ name = "numpy" },
|
||||
{ name = "scipy" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue