Update imports

This commit is contained in:
Tobias Hölzer 2025-12-28 20:48:49 +01:00
parent 45bc61e49e
commit 495ddc13f9
18 changed files with 64 additions and 40 deletions

View file

@ -129,11 +129,22 @@ pixi install # NOT pip install or conda install
pixi run pytest pixi run pytest
``` ```
### Running Python Commands
Always use `pixi run` to execute Python commands to use the correct environment:
```bash
pixi run python script.py
pixi run python -c "import entropice"
```
### Common Tasks ### Common Tasks
- **Generate grids**: Use `spatial/grids.py` CLI **Important**: Always use `pixi run` prefix for Python commands to ensure correct environment.
- **Process labels**: Use `ingest/darts.py` CLI
- **Train models**: Use `ml/training.py` CLI with TOML config - **Generate grids**: Use `pixi run create-grid` or `spatial/grids.py` CLI
- **Process labels**: Use `pixi run darts` or `ingest/darts.py` CLI
- **Train models**: Use `pixi run train` with TOML config or `ml/training.py` CLI
- **Run inference**: Use `ml/inference.py` CLI - **Run inference**: Use `ml/inference.py` CLI
- **View results**: `pixi run dashboard` - **View results**: `pixi run dashboard`
@ -191,6 +202,7 @@ To extend Entropice:
## Important Notes ## Important Notes
- **Always use `pixi run` prefix** for Python commands (not plain `python`)
- Grid resolutions: **H3** (3-6), **HEALPix** (6-10) - Grid resolutions: **H3** (3-6), **HEALPix** (6-10)
- Arctic years run **October 1 to September 30** (not calendar years) - Arctic years run **October 1 to September 30** (not calendar years)
- Handle **antimeridian crossing** in polar regions - Handle **antimeridian crossing** in polar regions

View file

@ -20,6 +20,8 @@ This will set up the complete environment including RAPIDS, PyTorch, and all geo
## Development Workflow ## Development Workflow
**Important**: Always use `pixi run` to execute Python commands and scripts to ensure you're using the correct environment with all dependencies.
### Code Organization ### Code Organization
- **`src/entropice/ingest/`**: Data ingestion modules (darts, era5, arcticdem, alphaearth) - **`src/entropice/ingest/`**: Data ingestion modules (darts, era5, arcticdem, alphaearth)
@ -72,6 +74,13 @@ Run tests for specific modules:
pixi run pytest pixi run pytest
``` ```
When running Python scripts or commands, always use `pixi run`:
```bash
pixi run python script.py
pixi run python -c "import entropice"
```
When adding features, include tests that verify: When adding features, include tests that verify:
- Correct handling of geospatial coordinates and projections - Correct handling of geospatial coordinates and projections

View file

@ -12,7 +12,7 @@ import toml
import xarray as xr import xarray as xr
from rich import print from rich import print
from entropice.paths import RESULTS_DIR from entropice.utils.paths import RESULTS_DIR
def fix_xgboost_model_state(results_dir: Path) -> bool: def fix_xgboost_model_state(results_dir: Path) -> bool:

View file

@ -10,7 +10,7 @@ import streamlit as st
from entropice.dashboard.plots.colors import get_palette from entropice.dashboard.plots.colors import get_palette
from entropice.dashboard.utils.data import load_all_training_results from entropice.dashboard.utils.data import load_all_training_results
from entropice.dataset import DatasetEnsemble from entropice.ml.dataset import DatasetEnsemble
# Type definitions for dataset statistics # Type definitions for dataset statistics

View file

@ -13,7 +13,7 @@ import streamlit as st
from shapely.geometry import shape from shapely.geometry import shape
from entropice.dashboard.plots.colors import get_cmap, get_palette from entropice.dashboard.plots.colors import get_cmap, get_palette
from entropice.dataset import DatasetEnsemble from entropice.ml.dataset import DatasetEnsemble
def render_performance_summary(results: pd.DataFrame, refit_metric: str): def render_performance_summary(results: pd.DataFrame, refit_metric: str):

View file

@ -8,7 +8,7 @@ import streamlit as st
from shapely.geometry import shape from shapely.geometry import shape
from entropice.dashboard.plots.colors import get_palette from entropice.dashboard.plots.colors import get_palette
from entropice.dataset import CategoricalTrainingDataset from entropice.ml.dataset import CategoricalTrainingDataset
def render_all_distribution_histograms(train_data_dict: dict[str, CategoricalTrainingDataset]): def render_all_distribution_histograms(train_data_dict: dict[str, CategoricalTrainingDataset]):

View file

@ -2,7 +2,6 @@
import streamlit as st import streamlit as st
from entropice import grids
from entropice.dashboard.plots.source_data import ( from entropice.dashboard.plots.source_data import (
render_alphaearth_map, render_alphaearth_map,
render_alphaearth_overview, render_alphaearth_overview,
@ -17,7 +16,8 @@ from entropice.dashboard.plots.source_data import (
) )
from entropice.dashboard.plots.training_data import render_all_distribution_histograms, render_spatial_map from entropice.dashboard.plots.training_data import render_all_distribution_histograms, render_spatial_map
from entropice.dashboard.utils.data import load_all_training_data, load_source_data from entropice.dashboard.utils.data import load_all_training_data, load_source_data
from entropice.dataset import DatasetEnsemble from entropice.ml.dataset import DatasetEnsemble
from entropice.spatial import grids
def render_training_data_page(): def render_training_data_page():

View file

@ -11,8 +11,8 @@ import toml
import xarray as xr import xarray as xr
from shapely.geometry import shape from shapely.geometry import shape
import entropice.paths import entropice.utils.paths
from entropice.dataset import CategoricalTrainingDataset, DatasetEnsemble from entropice.ml.dataset import CategoricalTrainingDataset, DatasetEnsemble
@dataclass @dataclass
@ -89,7 +89,7 @@ def _fix_hex_geometry(geom):
@st.cache_data @st.cache_data
def load_all_training_results() -> list[TrainingResult]: def load_all_training_results() -> list[TrainingResult]:
"""Load all training results from the results directory.""" """Load all training results from the results directory."""
results_dir = entropice.paths.RESULTS_DIR results_dir = entropice.utils.paths.RESULTS_DIR
training_results: list[TrainingResult] = [] training_results: list[TrainingResult] = []
for result_path in results_dir.iterdir(): for result_path in results_dir.iterdir():
if not result_path.is_dir(): if not result_path.is_dir():
@ -402,11 +402,11 @@ def extract_era5_features(
) )
era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year", "agg"]).unstack( era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year", "agg"]).unstack(
"feature" "feature"
) # noqa: PD010 )
else: else:
era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year"]).unstack( era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year"]).unstack(
"feature" "feature"
) # noqa: PD010 )
else: else:
# For yearly: keep as variable, time, (agg) # For yearly: keep as variable, time, (agg)
era5_features_array = era5_features_array.assign_coords( era5_features_array = era5_features_array.assign_coords(

View file

@ -22,8 +22,9 @@ import xdggs
from rich import pretty, print, traceback from rich import pretty, print, traceback
from rich.progress import track from rich.progress import track
from entropice import codecs, grids from entropice.spatial import grids
from entropice.paths import get_annual_embeddings_file, get_embeddings_store from entropice.utils import codecs
from entropice.utils.paths import get_annual_embeddings_file, get_embeddings_store
# Filter out the GeoDataFrame.swapaxes deprecation warning # Filter out the GeoDataFrame.swapaxes deprecation warning
warnings.filterwarnings("ignore", message=".*GeoDataFrame.swapaxes.*", category=FutureWarning) warnings.filterwarnings("ignore", message=".*GeoDataFrame.swapaxes.*", category=FutureWarning)

View file

@ -28,9 +28,10 @@ from xrspatial.curvature import _run_cupy as curvature_cupy
from xrspatial.slope import _run_cupy as slope_cupy from xrspatial.slope import _run_cupy as slope_cupy
from zarr.codecs import BloscCodec from zarr.codecs import BloscCodec
from entropice import codecs, grids, watermask from entropice.spatial import grids, watermask
from entropice.aggregators import _Aggregations, aggregate_raster_into_grid from entropice.spatial.aggregators import _Aggregations, aggregate_raster_into_grid
from entropice.paths import get_arcticdem_stores from entropice.utils import codecs
from entropice.utils.paths import get_arcticdem_stores
traceback.install(show_locals=True, suppress=[cyclopts]) traceback.install(show_locals=True, suppress=[cyclopts])
pretty.install() pretty.install()

View file

@ -15,8 +15,8 @@ from rich import pretty, print, traceback
from rich.progress import track from rich.progress import track
from stopuhr import stopwatch from stopuhr import stopwatch
from entropice import grids from entropice.spatial import grids
from entropice.paths import darts_ml_training_labels_repo, dartsl2_cov_file, dartsl2_file, get_darts_rts_file from entropice.utils.paths import darts_ml_training_labels_repo, dartsl2_cov_file, dartsl2_file, get_darts_rts_file
traceback.install() traceback.install()
pretty.install() pretty.install()

View file

@ -96,10 +96,11 @@ from rasterio.features import shapes
from rich import pretty, print, traceback from rich import pretty, print, traceback
from stopuhr import stopwatch from stopuhr import stopwatch
from entropice import codecs, grids, watermask from entropice.spatial import grids, watermask
from entropice.aggregators import _Aggregations, aggregate_raster_into_grid from entropice.spatial.aggregators import _Aggregations, aggregate_raster_into_grid
from entropice.paths import FIGURES_DIR, get_era5_stores from entropice.spatial.xvec import to_xvec
from entropice.xvec import to_xvec from entropice.utils import codecs
from entropice.utils.paths import FIGURES_DIR, get_era5_stores
traceback.install(show_locals=True, suppress=[cyclopts, xr, pd, cProfile]) traceback.install(show_locals=True, suppress=[cyclopts, xr, pd, cProfile])
pretty.install() pretty.install()

View file

@ -31,7 +31,7 @@ from rich import pretty, traceback
from sklearn import set_config from sklearn import set_config
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import entropice.paths import entropice.utils.paths
traceback.install() traceback.install()
pretty.install() pretty.install()
@ -200,12 +200,12 @@ class DatasetEnsemble:
def _read_member(self, member: L2Dataset, targets: gpd.GeoDataFrame, lazy: bool = False) -> xr.Dataset: def _read_member(self, member: L2Dataset, targets: gpd.GeoDataFrame, lazy: bool = False) -> xr.Dataset:
if member == "AlphaEarth": if member == "AlphaEarth":
store = entropice.paths.get_embeddings_store(grid=self.grid, level=self.level) store = entropice.utils.paths.get_embeddings_store(grid=self.grid, level=self.level)
elif member in ["ERA5-yearly", "ERA5-seasonal", "ERA5-shoulder"]: elif member in ["ERA5-yearly", "ERA5-seasonal", "ERA5-shoulder"]:
era5_agg: Literal["yearly", "seasonal", "shoulder"] = member.split("-")[1] # ty:ignore[invalid-assignment] era5_agg: Literal["yearly", "seasonal", "shoulder"] = member.split("-")[1] # ty:ignore[invalid-assignment]
store = entropice.paths.get_era5_stores(era5_agg, grid=self.grid, level=self.level) store = entropice.utils.paths.get_era5_stores(era5_agg, grid=self.grid, level=self.level)
elif member == "ArcticDEM": elif member == "ArcticDEM":
store = entropice.paths.get_arcticdem_stores(grid=self.grid, level=self.level) store = entropice.utils.paths.get_arcticdem_stores(grid=self.grid, level=self.level)
else: else:
raise NotImplementedError(f"Member {member} not implemented.") raise NotImplementedError(f"Member {member} not implemented.")
@ -244,9 +244,9 @@ class DatasetEnsemble:
def _read_target(self) -> gpd.GeoDataFrame: def _read_target(self) -> gpd.GeoDataFrame:
if self.target == "darts_rts": if self.target == "darts_rts":
target_store = entropice.paths.get_darts_rts_file(grid=self.grid, level=self.level) target_store = entropice.utils.paths.get_darts_rts_file(grid=self.grid, level=self.level)
elif self.target == "darts_mllabels": elif self.target == "darts_mllabels":
target_store = entropice.paths.get_darts_rts_file(grid=self.grid, level=self.level, labels=True) target_store = entropice.utils.paths.get_darts_rts_file(grid=self.grid, level=self.level, labels=True)
else: else:
raise NotImplementedError(f"Target {self.target} not implemented.") raise NotImplementedError(f"Target {self.target} not implemented.")
targets = gpd.read_parquet(target_store) targets = gpd.read_parquet(target_store)
@ -343,7 +343,7 @@ class DatasetEnsemble:
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def create(self, filter_target_col: str | None = None, cache_mode: Literal["n", "o", "r"] = "r") -> pd.DataFrame: def create(self, filter_target_col: str | None = None, cache_mode: Literal["n", "o", "r"] = "r") -> pd.DataFrame:
# n: no cache, o: overwrite cache, r: read cache if exists # n: no cache, o: overwrite cache, r: read cache if exists
cache_file = entropice.paths.get_dataset_cache(self.id(), subset=filter_target_col) cache_file = entropice.utils.paths.get_dataset_cache(self.id(), subset=filter_target_col)
if cache_mode == "r" and cache_file.exists(): if cache_mode == "r" and cache_file.exists():
dataset = gpd.read_parquet(cache_file) dataset = gpd.read_parquet(cache_file)
print( print(
@ -393,7 +393,7 @@ class DatasetEnsemble:
for i in range(0, len(targets), batch_size): for i in range(0, len(targets), batch_size):
# n: no cache, o: overwrite cache, r: read cache if exists # n: no cache, o: overwrite cache, r: read cache if exists
cache_file = entropice.paths.get_dataset_cache( cache_file = entropice.utils.paths.get_dataset_cache(
self.id(), subset=filter_target_col, batch=(i, i + batch_size) self.id(), subset=filter_target_col, batch=(i, i + batch_size)
) )
if cache_mode == "r" and cache_file.exists(): if cache_mode == "r" and cache_file.exists():

View file

@ -11,7 +11,7 @@ from rich import pretty, traceback
from sklearn import set_config from sklearn import set_config
from xgboost.sklearn import XGBClassifier from xgboost.sklearn import XGBClassifier
from entropice.dataset import DatasetEnsemble from entropice.ml.dataset import DatasetEnsemble
traceback.install() traceback.install()
pretty.install() pretty.install()

View file

@ -20,9 +20,9 @@ from sklearn.model_selection import KFold, RandomizedSearchCV
from stopuhr import stopwatch from stopuhr import stopwatch
from xgboost.sklearn import XGBClassifier from xgboost.sklearn import XGBClassifier
from entropice.dataset import DatasetEnsemble from entropice.ml.dataset import DatasetEnsemble
from entropice.inference import predict_proba from entropice.ml.inference import predict_proba
from entropice.paths import get_cv_results_dir from entropice.utils.paths import get_cv_results_dir
traceback.install() traceback.install()
pretty.install() pretty.install()

View file

@ -32,7 +32,7 @@ from shapely.geometry import LineString, Polygon
from stopuhr import stopwatch from stopuhr import stopwatch
from xdggs.healpix import HealpixInfo from xdggs.healpix import HealpixInfo
from entropice import grids from entropice.spatial import grids
@dataclass(frozen=True) @dataclass(frozen=True)

View file

@ -25,7 +25,7 @@ from shapely.ops import transform
from stopuhr import stopwatch from stopuhr import stopwatch
from xdggs.healpix import HealpixInfo from xdggs.healpix import HealpixInfo
from entropice.paths import get_grid_file, get_grid_viz_file, watermask_file from entropice.utils.paths import get_grid_file, get_grid_viz_file, watermask_file
traceback.install() traceback.install()
pretty.install() pretty.install()

View file

@ -3,7 +3,7 @@
import duckdb import duckdb
import geopandas as gpd import geopandas as gpd
from entropice.paths import watermask_file from entropice.utils.paths import watermask_file
def open(): def open():