Finalize era5 and alphaearth

This commit is contained in:
Tobias Hölzer 2025-10-24 16:36:18 +02:00
parent ce4c728e1a
commit a562b2cf72
6 changed files with 1993 additions and 1392 deletions

View file

@ -11,6 +11,7 @@ import geemap
import geopandas as gpd
import numpy as np
import pandas as pd
import xarray as xr
from rich import pretty, print, traceback
from rich.progress import track
@ -26,7 +27,11 @@ EMBEDDINGS_DIR = DATA_DIR / "embeddings"
EMBEDDINGS_DIR.mkdir(parents=True, exist_ok=True)
def cli(grid: Literal["hex", "healpix"], level: int, backup_intermediate: bool = False):
cli = cyclopts.App(name="alpha-earth")
@cli.command()
def download(grid: Literal["hex", "healpix"], level: int, backup_intermediate: bool = False):
"""Extract satellite embeddings from Google Earth Engine and map them to a grid.
Args:
@ -93,8 +98,49 @@ def cli(grid: Literal["hex", "healpix"], level: int, backup_intermediate: bool =
print(f"Saved embeddings for year {year} to {embeddings_file.resolve()}.")
@cli.command()
def combine_to_zarr(grid: Literal["hex", "healpix"], level: int):
"""Combine yearly embeddings parquet files into a single zarr store.
Args:
grid (Literal["hex", "healpix"]): The grid type to use.
level (int): The grid level to use.
"""
embs = gpd.read_parquet(DATA_DIR / "embeddings" / f"permafrost_{grid}{level}_embeddings-2017.parquet")
# ? Converting cell IDs from hex strings to integers for xdggs compatibility
cells = [int(cid, 16) for cid in embs.cell_id.to_list()]
years = list(range(2017, 2025))
aggs = ["median", "stdDev", "min", "max", "mean", "p1", "p5", "p25", "p75", "p95", "p99"]
bands = [f"A{str(i).zfill(2)}" for i in range(64)]
a = xr.DataArray(
np.nan,
dims=("year", "cell", "band", "agg"),
coords={"year": years, "cell": cells, "band": bands, "agg": aggs},
)
# ? These attributes are needed for xdggs
a.cell.attrs = {
"grid_name": "h3" if grid == "hex" else "healpix",
"level": level,
}
if grid == "healpix":
a.cell.attrs["indexing_scheme"] = "nested"
for year in track(years, total=len(years), description="Processing years..."):
embs = gpd.read_parquet(DATA_DIR / "embeddings" / f"permafrost_{grid}{level}_embeddings-{year}.parquet")
for band in bands:
for agg in aggs:
col = f"{band}_{agg}"
a.loc[{"band": band, "agg": agg, "year": year}] = embs[col].to_list()
zarr_path = EMBEDDINGS_DIR / f"permafrost_{grid}{level}_embeddings.zarr"
a.to_zarr(zarr_path, consolidated=False, mode="w")
print(f"Saved combined embeddings to {zarr_path.resolve()}.")
def main(): # noqa: D103
cyclopts.run(cli)
cli()
if __name__ == "__main__":

View file

@ -1,9 +1,17 @@
#!/bin/bash
# uv run alpha-earth --grid hex --level 3
uv run alpha-earth --grid hex --level 4
uv run alpha-earth --grid hex --level 5
uv run alpha-earth --grid healpix --level 6
uv run alpha-earth --grid healpix --level 7
uv run alpha-earth --grid healpix --level 8
uv run alpha-earth --grid healpix --level 9
# uv run alpha-earth download --grid hex --level 3
# uv run alpha-earth download --grid hex --level 4
# uv run alpha-earth download --grid hex --level 5
# uv run alpha-earth download --grid healpix --level 6
# uv run alpha-earth download --grid healpix --level 7
# uv run alpha-earth download --grid healpix --level 8
# uv run alpha-earth download --grid healpix --level 9
uv run alpha-earth combine-to-zarr --grid hex --level 3
uv run alpha-earth combine-to-zarr --grid hex --level 4
uv run alpha-earth combine-to-zarr --grid hex --level 5
uv run alpha-earth combine-to-zarr --grid healpix --level 6
uv run alpha-earth combine-to-zarr --grid healpix --level 7
uv run alpha-earth combine-to-zarr --grid healpix --level 8
uv run alpha-earth combine-to-zarr --grid healpix --level 9