entropice/cds.py

66 lines
2 KiB
Python

"""Download ERA5 data from the Copernicus Data Store.
Web platform: https://cds.climate.copernicus.eu
"""
import os
import re
from pathlib import Path
import cdsapi
import cyclopts
from rich import pretty, print, traceback
traceback.install()
pretty.install()
DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) / "entropyc-rts"
def hourly(years: str):
"""Download ERA5 data from the Copernicus Data Store.
Args:
years (str): Years to download, seperated by a '-'.
"""
assert re.compile(r"^\d{4}-\d{4}$").match(years), "Years must be in the format 'YYYY-YYYY'"
start_year, end_year = map(int, years.split("-"))
assert 1950 <= start_year <= end_year <= 2024, "Years must be between 1950 and 2024"
dataset = "reanalysis-era5-single-levels"
client = cdsapi.Client(wait_until_complete=False)
outdir = (DATA_DIR / "era5/cds").resolve()
outdir.mkdir(parents=True, exist_ok=True)
print(f"Downloading ERA5 data from {start_year} to {end_year}...")
for y in range(start_year, end_year + 1):
for month in [f"{i:02d}" for i in range(1, 13)]:
request = {
"product_type": ["reanalysis"],
"variable": [
"2m_temperature",
"total_precipitation",
"snow_depth",
"snow_density",
"snowfall",
"lake_ice_temperature",
"surface_sensible_heat_flux",
],
"year": [str(y)],
"month": [month],
"day": [f"{i:02d}" for i in range(1, 32)],
"time": [f"{i:02d}:00" for i in range(0, 24)],
"data_format": "netcdf",
"download_format": "unarchived",
"area": [85, -180, 50, 180],
}
outpath = outdir / f"era5_{y}_{month}.zip"
client.retrieve(dataset, request).download(str(outpath))
print(f"Downloaded {dataset} for {y}-{month}")
if __name__ == "__main__":
cyclopts.run(hourly)