From 495ddc13f9487ece5d071a81ff96554e606ae665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20H=C3=B6lzer?= Date: Sun, 28 Dec 2025 20:48:49 +0100 Subject: [PATCH] Update imports --- .github/copilot-instructions.md | 18 +++++++++++++++--- CONTRIBUTING.md | 9 +++++++++ scripts/fix_xgboost_importance.py | 2 +- src/entropice/dashboard/overview_page.py | 2 +- .../dashboard/plots/hyperparameter_analysis.py | 2 +- src/entropice/dashboard/plots/training_data.py | 2 +- src/entropice/dashboard/training_data_page.py | 4 ++-- src/entropice/dashboard/utils/data.py | 10 +++++----- src/entropice/ingest/alphaearth.py | 5 +++-- src/entropice/ingest/arcticdem.py | 7 ++++--- src/entropice/ingest/darts.py | 4 ++-- src/entropice/ingest/era5.py | 9 +++++---- src/entropice/ml/dataset.py | 16 ++++++++-------- src/entropice/ml/inference.py | 2 +- src/entropice/ml/training.py | 6 +++--- src/entropice/spatial/aggregators.py | 2 +- src/entropice/spatial/grids.py | 2 +- src/entropice/spatial/watermask.py | 2 +- 18 files changed, 64 insertions(+), 40 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index a9b1f83..f856fe3 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -129,11 +129,22 @@ pixi install # NOT pip install or conda install pixi run pytest ``` +### Running Python Commands + +Always use `pixi run` to execute Python commands to use the correct environment: + +```bash +pixi run python script.py +pixi run python -c "import entropice" +``` + ### Common Tasks -- **Generate grids**: Use `spatial/grids.py` CLI -- **Process labels**: Use `ingest/darts.py` CLI -- **Train models**: Use `ml/training.py` CLI with TOML config +**Important**: Always use `pixi run` prefix for Python commands to ensure correct environment. + +- **Generate grids**: Use `pixi run create-grid` or `spatial/grids.py` CLI +- **Process labels**: Use `pixi run darts` or `ingest/darts.py` CLI +- **Train models**: Use `pixi run train` with TOML config or `ml/training.py` CLI - **Run inference**: Use `ml/inference.py` CLI - **View results**: `pixi run dashboard` @@ -191,6 +202,7 @@ To extend Entropice: ## Important Notes +- **Always use `pixi run` prefix** for Python commands (not plain `python`) - Grid resolutions: **H3** (3-6), **HEALPix** (6-10) - Arctic years run **October 1 to September 30** (not calendar years) - Handle **antimeridian crossing** in polar regions diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 71f722a..ec4fe38 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,6 +20,8 @@ This will set up the complete environment including RAPIDS, PyTorch, and all geo ## Development Workflow +**Important**: Always use `pixi run` to execute Python commands and scripts to ensure you're using the correct environment with all dependencies. + ### Code Organization - **`src/entropice/ingest/`**: Data ingestion modules (darts, era5, arcticdem, alphaearth) @@ -72,6 +74,13 @@ Run tests for specific modules: pixi run pytest ``` +When running Python scripts or commands, always use `pixi run`: + +```bash +pixi run python script.py +pixi run python -c "import entropice" +``` + When adding features, include tests that verify: - Correct handling of geospatial coordinates and projections diff --git a/scripts/fix_xgboost_importance.py b/scripts/fix_xgboost_importance.py index a343fff..533a552 100644 --- a/scripts/fix_xgboost_importance.py +++ b/scripts/fix_xgboost_importance.py @@ -12,7 +12,7 @@ import toml import xarray as xr from rich import print -from entropice.paths import RESULTS_DIR +from entropice.utils.paths import RESULTS_DIR def fix_xgboost_model_state(results_dir: Path) -> bool: diff --git a/src/entropice/dashboard/overview_page.py b/src/entropice/dashboard/overview_page.py index bb43dd9..830845a 100644 --- a/src/entropice/dashboard/overview_page.py +++ b/src/entropice/dashboard/overview_page.py @@ -10,7 +10,7 @@ import streamlit as st from entropice.dashboard.plots.colors import get_palette from entropice.dashboard.utils.data import load_all_training_results -from entropice.dataset import DatasetEnsemble +from entropice.ml.dataset import DatasetEnsemble # Type definitions for dataset statistics diff --git a/src/entropice/dashboard/plots/hyperparameter_analysis.py b/src/entropice/dashboard/plots/hyperparameter_analysis.py index 18945a9..2fff6b5 100644 --- a/src/entropice/dashboard/plots/hyperparameter_analysis.py +++ b/src/entropice/dashboard/plots/hyperparameter_analysis.py @@ -13,7 +13,7 @@ import streamlit as st from shapely.geometry import shape from entropice.dashboard.plots.colors import get_cmap, get_palette -from entropice.dataset import DatasetEnsemble +from entropice.ml.dataset import DatasetEnsemble def render_performance_summary(results: pd.DataFrame, refit_metric: str): diff --git a/src/entropice/dashboard/plots/training_data.py b/src/entropice/dashboard/plots/training_data.py index eb0639e..916a91f 100644 --- a/src/entropice/dashboard/plots/training_data.py +++ b/src/entropice/dashboard/plots/training_data.py @@ -8,7 +8,7 @@ import streamlit as st from shapely.geometry import shape from entropice.dashboard.plots.colors import get_palette -from entropice.dataset import CategoricalTrainingDataset +from entropice.ml.dataset import CategoricalTrainingDataset def render_all_distribution_histograms(train_data_dict: dict[str, CategoricalTrainingDataset]): diff --git a/src/entropice/dashboard/training_data_page.py b/src/entropice/dashboard/training_data_page.py index 1dc280b..fe91036 100644 --- a/src/entropice/dashboard/training_data_page.py +++ b/src/entropice/dashboard/training_data_page.py @@ -2,7 +2,6 @@ import streamlit as st -from entropice import grids from entropice.dashboard.plots.source_data import ( render_alphaearth_map, render_alphaearth_overview, @@ -17,7 +16,8 @@ from entropice.dashboard.plots.source_data import ( ) from entropice.dashboard.plots.training_data import render_all_distribution_histograms, render_spatial_map from entropice.dashboard.utils.data import load_all_training_data, load_source_data -from entropice.dataset import DatasetEnsemble +from entropice.ml.dataset import DatasetEnsemble +from entropice.spatial import grids def render_training_data_page(): diff --git a/src/entropice/dashboard/utils/data.py b/src/entropice/dashboard/utils/data.py index 83515b0..b57ef37 100644 --- a/src/entropice/dashboard/utils/data.py +++ b/src/entropice/dashboard/utils/data.py @@ -11,8 +11,8 @@ import toml import xarray as xr from shapely.geometry import shape -import entropice.paths -from entropice.dataset import CategoricalTrainingDataset, DatasetEnsemble +import entropice.utils.paths +from entropice.ml.dataset import CategoricalTrainingDataset, DatasetEnsemble @dataclass @@ -89,7 +89,7 @@ def _fix_hex_geometry(geom): @st.cache_data def load_all_training_results() -> list[TrainingResult]: """Load all training results from the results directory.""" - results_dir = entropice.paths.RESULTS_DIR + results_dir = entropice.utils.paths.RESULTS_DIR training_results: list[TrainingResult] = [] for result_path in results_dir.iterdir(): if not result_path.is_dir(): @@ -402,11 +402,11 @@ def extract_era5_features( ) era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year", "agg"]).unstack( "feature" - ) # noqa: PD010 + ) else: era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year"]).unstack( "feature" - ) # noqa: PD010 + ) else: # For yearly: keep as variable, time, (agg) era5_features_array = era5_features_array.assign_coords( diff --git a/src/entropice/ingest/alphaearth.py b/src/entropice/ingest/alphaearth.py index 65dbaf4..da164b7 100644 --- a/src/entropice/ingest/alphaearth.py +++ b/src/entropice/ingest/alphaearth.py @@ -22,8 +22,9 @@ import xdggs from rich import pretty, print, traceback from rich.progress import track -from entropice import codecs, grids -from entropice.paths import get_annual_embeddings_file, get_embeddings_store +from entropice.spatial import grids +from entropice.utils import codecs +from entropice.utils.paths import get_annual_embeddings_file, get_embeddings_store # Filter out the GeoDataFrame.swapaxes deprecation warning warnings.filterwarnings("ignore", message=".*GeoDataFrame.swapaxes.*", category=FutureWarning) diff --git a/src/entropice/ingest/arcticdem.py b/src/entropice/ingest/arcticdem.py index 790ef5f..b98e356 100644 --- a/src/entropice/ingest/arcticdem.py +++ b/src/entropice/ingest/arcticdem.py @@ -28,9 +28,10 @@ from xrspatial.curvature import _run_cupy as curvature_cupy from xrspatial.slope import _run_cupy as slope_cupy from zarr.codecs import BloscCodec -from entropice import codecs, grids, watermask -from entropice.aggregators import _Aggregations, aggregate_raster_into_grid -from entropice.paths import get_arcticdem_stores +from entropice.spatial import grids, watermask +from entropice.spatial.aggregators import _Aggregations, aggregate_raster_into_grid +from entropice.utils import codecs +from entropice.utils.paths import get_arcticdem_stores traceback.install(show_locals=True, suppress=[cyclopts]) pretty.install() diff --git a/src/entropice/ingest/darts.py b/src/entropice/ingest/darts.py index ebcf304..1a4087c 100644 --- a/src/entropice/ingest/darts.py +++ b/src/entropice/ingest/darts.py @@ -15,8 +15,8 @@ from rich import pretty, print, traceback from rich.progress import track from stopuhr import stopwatch -from entropice import grids -from entropice.paths import darts_ml_training_labels_repo, dartsl2_cov_file, dartsl2_file, get_darts_rts_file +from entropice.spatial import grids +from entropice.utils.paths import darts_ml_training_labels_repo, dartsl2_cov_file, dartsl2_file, get_darts_rts_file traceback.install() pretty.install() diff --git a/src/entropice/ingest/era5.py b/src/entropice/ingest/era5.py index 8263a57..68cc95c 100644 --- a/src/entropice/ingest/era5.py +++ b/src/entropice/ingest/era5.py @@ -96,10 +96,11 @@ from rasterio.features import shapes from rich import pretty, print, traceback from stopuhr import stopwatch -from entropice import codecs, grids, watermask -from entropice.aggregators import _Aggregations, aggregate_raster_into_grid -from entropice.paths import FIGURES_DIR, get_era5_stores -from entropice.xvec import to_xvec +from entropice.spatial import grids, watermask +from entropice.spatial.aggregators import _Aggregations, aggregate_raster_into_grid +from entropice.spatial.xvec import to_xvec +from entropice.utils import codecs +from entropice.utils.paths import FIGURES_DIR, get_era5_stores traceback.install(show_locals=True, suppress=[cyclopts, xr, pd, cProfile]) pretty.install() diff --git a/src/entropice/ml/dataset.py b/src/entropice/ml/dataset.py index 2160737..eb6fcb8 100644 --- a/src/entropice/ml/dataset.py +++ b/src/entropice/ml/dataset.py @@ -31,7 +31,7 @@ from rich import pretty, traceback from sklearn import set_config from sklearn.model_selection import train_test_split -import entropice.paths +import entropice.utils.paths traceback.install() pretty.install() @@ -200,12 +200,12 @@ class DatasetEnsemble: def _read_member(self, member: L2Dataset, targets: gpd.GeoDataFrame, lazy: bool = False) -> xr.Dataset: if member == "AlphaEarth": - store = entropice.paths.get_embeddings_store(grid=self.grid, level=self.level) + store = entropice.utils.paths.get_embeddings_store(grid=self.grid, level=self.level) elif member in ["ERA5-yearly", "ERA5-seasonal", "ERA5-shoulder"]: era5_agg: Literal["yearly", "seasonal", "shoulder"] = member.split("-")[1] # ty:ignore[invalid-assignment] - store = entropice.paths.get_era5_stores(era5_agg, grid=self.grid, level=self.level) + store = entropice.utils.paths.get_era5_stores(era5_agg, grid=self.grid, level=self.level) elif member == "ArcticDEM": - store = entropice.paths.get_arcticdem_stores(grid=self.grid, level=self.level) + store = entropice.utils.paths.get_arcticdem_stores(grid=self.grid, level=self.level) else: raise NotImplementedError(f"Member {member} not implemented.") @@ -244,9 +244,9 @@ class DatasetEnsemble: def _read_target(self) -> gpd.GeoDataFrame: if self.target == "darts_rts": - target_store = entropice.paths.get_darts_rts_file(grid=self.grid, level=self.level) + target_store = entropice.utils.paths.get_darts_rts_file(grid=self.grid, level=self.level) elif self.target == "darts_mllabels": - target_store = entropice.paths.get_darts_rts_file(grid=self.grid, level=self.level, labels=True) + target_store = entropice.utils.paths.get_darts_rts_file(grid=self.grid, level=self.level, labels=True) else: raise NotImplementedError(f"Target {self.target} not implemented.") targets = gpd.read_parquet(target_store) @@ -343,7 +343,7 @@ class DatasetEnsemble: @lru_cache(maxsize=1) def create(self, filter_target_col: str | None = None, cache_mode: Literal["n", "o", "r"] = "r") -> pd.DataFrame: # n: no cache, o: overwrite cache, r: read cache if exists - cache_file = entropice.paths.get_dataset_cache(self.id(), subset=filter_target_col) + cache_file = entropice.utils.paths.get_dataset_cache(self.id(), subset=filter_target_col) if cache_mode == "r" and cache_file.exists(): dataset = gpd.read_parquet(cache_file) print( @@ -393,7 +393,7 @@ class DatasetEnsemble: for i in range(0, len(targets), batch_size): # n: no cache, o: overwrite cache, r: read cache if exists - cache_file = entropice.paths.get_dataset_cache( + cache_file = entropice.utils.paths.get_dataset_cache( self.id(), subset=filter_target_col, batch=(i, i + batch_size) ) if cache_mode == "r" and cache_file.exists(): diff --git a/src/entropice/ml/inference.py b/src/entropice/ml/inference.py index b2d87dc..05c9dae 100644 --- a/src/entropice/ml/inference.py +++ b/src/entropice/ml/inference.py @@ -11,7 +11,7 @@ from rich import pretty, traceback from sklearn import set_config from xgboost.sklearn import XGBClassifier -from entropice.dataset import DatasetEnsemble +from entropice.ml.dataset import DatasetEnsemble traceback.install() pretty.install() diff --git a/src/entropice/ml/training.py b/src/entropice/ml/training.py index bc34803..d307854 100644 --- a/src/entropice/ml/training.py +++ b/src/entropice/ml/training.py @@ -20,9 +20,9 @@ from sklearn.model_selection import KFold, RandomizedSearchCV from stopuhr import stopwatch from xgboost.sklearn import XGBClassifier -from entropice.dataset import DatasetEnsemble -from entropice.inference import predict_proba -from entropice.paths import get_cv_results_dir +from entropice.ml.dataset import DatasetEnsemble +from entropice.ml.inference import predict_proba +from entropice.utils.paths import get_cv_results_dir traceback.install() pretty.install() diff --git a/src/entropice/spatial/aggregators.py b/src/entropice/spatial/aggregators.py index 5dccdca..caa2641 100644 --- a/src/entropice/spatial/aggregators.py +++ b/src/entropice/spatial/aggregators.py @@ -32,7 +32,7 @@ from shapely.geometry import LineString, Polygon from stopuhr import stopwatch from xdggs.healpix import HealpixInfo -from entropice import grids +from entropice.spatial import grids @dataclass(frozen=True) diff --git a/src/entropice/spatial/grids.py b/src/entropice/spatial/grids.py index c562d45..fb4b15f 100644 --- a/src/entropice/spatial/grids.py +++ b/src/entropice/spatial/grids.py @@ -25,7 +25,7 @@ from shapely.ops import transform from stopuhr import stopwatch from xdggs.healpix import HealpixInfo -from entropice.paths import get_grid_file, get_grid_viz_file, watermask_file +from entropice.utils.paths import get_grid_file, get_grid_viz_file, watermask_file traceback.install() pretty.install() diff --git a/src/entropice/spatial/watermask.py b/src/entropice/spatial/watermask.py index d744056..464f72f 100644 --- a/src/entropice/spatial/watermask.py +++ b/src/entropice/spatial/watermask.py @@ -3,7 +3,7 @@ import duckdb import geopandas as gpd -from entropice.paths import watermask_file +from entropice.utils.paths import watermask_file def open():