diff --git a/alphaearth.py b/alphaearth.py new file mode 100644 index 0000000..40c0b9b --- /dev/null +++ b/alphaearth.py @@ -0,0 +1,54 @@ +"""Extract satellite embeddings from Google Earth Engine and map them to a grid.""" + +from pathlib import Path +from typing import Literal + +import cyclopts +import ee +import geemap +import geopandas as gpd +from rich import pretty, traceback + +pretty.install() +traceback.install() +ee.Initialize(project="ee-tobias-hoelzer") + +DATA_DIR = Path("data") + + +def cli(grid: Literal["hex", "healpix"], level: int, year: int): + """Extract satellite embeddings from Google Earth Engine and map them to a grid. + + Args: + grid (Literal["hex", "healpix"]): The grid type to use. + level (int): The grid level to use. + year (int): The year to extract embeddings for. Must be between 2017 and 2024. + + """ + grid = gpd.read_parquet(DATA_DIR / f"grids/permafrost_{grid}{level}_grid.parquet") + eegrid = ee.FeatureCollection(grid.to_crs("epsg:4326").__geo_interface__) + embedding_collection = ee.ImageCollection("GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL").filterDate( + f"{year}-01-01", f"{year}-12-31" + ) + + def extract_embedding(feature): + # Filter collection by geometry + geom = feature.geometry() + embedding = embedding_collection.filterBounds(geom).mosaic().clip(geom) + # Get mean embedding value for the geometry + mean_dict = embedding.reduceRegion( + reducer=ee.Reducer.median(), + geometry=geom, + ) + # Add mean embedding values as properties to the feature + return feature.set(mean_dict) + + eeegrid = eegrid.map(extract_embedding) + df = geemap.ee_to_df(eeegrid) + bands = [f"A{str(i).zfill(2)}" for i in range(64)] + embeddings_on_grid = grid.merge(df[[*bands, "cell_id"]], on="cell_id", how="left") + embeddings_on_grid.to_parquet(DATA_DIR / f"embeddings/permafrost_{grid}{level}_embeddings-{year}.parquet") + + +if __name__ == "__main__": + cyclopts.run(cli)