Update imports

This commit is contained in:
Tobias Hölzer 2025-12-28 20:48:49 +01:00
parent 45bc61e49e
commit 495ddc13f9
18 changed files with 64 additions and 40 deletions

View file

@ -129,11 +129,22 @@ pixi install # NOT pip install or conda install
pixi run pytest
```
### Running Python Commands
Always use `pixi run` to execute Python commands to use the correct environment:
```bash
pixi run python script.py
pixi run python -c "import entropice"
```
### Common Tasks
- **Generate grids**: Use `spatial/grids.py` CLI
- **Process labels**: Use `ingest/darts.py` CLI
- **Train models**: Use `ml/training.py` CLI with TOML config
**Important**: Always use `pixi run` prefix for Python commands to ensure correct environment.
- **Generate grids**: Use `pixi run create-grid` or `spatial/grids.py` CLI
- **Process labels**: Use `pixi run darts` or `ingest/darts.py` CLI
- **Train models**: Use `pixi run train` with TOML config or `ml/training.py` CLI
- **Run inference**: Use `ml/inference.py` CLI
- **View results**: `pixi run dashboard`
@ -191,6 +202,7 @@ To extend Entropice:
## Important Notes
- **Always use `pixi run` prefix** for Python commands (not plain `python`)
- Grid resolutions: **H3** (3-6), **HEALPix** (6-10)
- Arctic years run **October 1 to September 30** (not calendar years)
- Handle **antimeridian crossing** in polar regions

View file

@ -20,6 +20,8 @@ This will set up the complete environment including RAPIDS, PyTorch, and all geo
## Development Workflow
**Important**: Always use `pixi run` to execute Python commands and scripts to ensure you're using the correct environment with all dependencies.
### Code Organization
- **`src/entropice/ingest/`**: Data ingestion modules (darts, era5, arcticdem, alphaearth)
@ -72,6 +74,13 @@ Run tests for specific modules:
pixi run pytest
```
When running Python scripts or commands, always use `pixi run`:
```bash
pixi run python script.py
pixi run python -c "import entropice"
```
When adding features, include tests that verify:
- Correct handling of geospatial coordinates and projections

View file

@ -12,7 +12,7 @@ import toml
import xarray as xr
from rich import print
from entropice.paths import RESULTS_DIR
from entropice.utils.paths import RESULTS_DIR
def fix_xgboost_model_state(results_dir: Path) -> bool:

View file

@ -10,7 +10,7 @@ import streamlit as st
from entropice.dashboard.plots.colors import get_palette
from entropice.dashboard.utils.data import load_all_training_results
from entropice.dataset import DatasetEnsemble
from entropice.ml.dataset import DatasetEnsemble
# Type definitions for dataset statistics

View file

@ -13,7 +13,7 @@ import streamlit as st
from shapely.geometry import shape
from entropice.dashboard.plots.colors import get_cmap, get_palette
from entropice.dataset import DatasetEnsemble
from entropice.ml.dataset import DatasetEnsemble
def render_performance_summary(results: pd.DataFrame, refit_metric: str):

View file

@ -8,7 +8,7 @@ import streamlit as st
from shapely.geometry import shape
from entropice.dashboard.plots.colors import get_palette
from entropice.dataset import CategoricalTrainingDataset
from entropice.ml.dataset import CategoricalTrainingDataset
def render_all_distribution_histograms(train_data_dict: dict[str, CategoricalTrainingDataset]):

View file

@ -2,7 +2,6 @@
import streamlit as st
from entropice import grids
from entropice.dashboard.plots.source_data import (
render_alphaearth_map,
render_alphaearth_overview,
@ -17,7 +16,8 @@ from entropice.dashboard.plots.source_data import (
)
from entropice.dashboard.plots.training_data import render_all_distribution_histograms, render_spatial_map
from entropice.dashboard.utils.data import load_all_training_data, load_source_data
from entropice.dataset import DatasetEnsemble
from entropice.ml.dataset import DatasetEnsemble
from entropice.spatial import grids
def render_training_data_page():

View file

@ -11,8 +11,8 @@ import toml
import xarray as xr
from shapely.geometry import shape
import entropice.paths
from entropice.dataset import CategoricalTrainingDataset, DatasetEnsemble
import entropice.utils.paths
from entropice.ml.dataset import CategoricalTrainingDataset, DatasetEnsemble
@dataclass
@ -89,7 +89,7 @@ def _fix_hex_geometry(geom):
@st.cache_data
def load_all_training_results() -> list[TrainingResult]:
"""Load all training results from the results directory."""
results_dir = entropice.paths.RESULTS_DIR
results_dir = entropice.utils.paths.RESULTS_DIR
training_results: list[TrainingResult] = []
for result_path in results_dir.iterdir():
if not result_path.is_dir():
@ -402,11 +402,11 @@ def extract_era5_features(
)
era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year", "agg"]).unstack(
"feature"
) # noqa: PD010
)
else:
era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year"]).unstack(
"feature"
) # noqa: PD010
)
else:
# For yearly: keep as variable, time, (agg)
era5_features_array = era5_features_array.assign_coords(

View file

@ -22,8 +22,9 @@ import xdggs
from rich import pretty, print, traceback
from rich.progress import track
from entropice import codecs, grids
from entropice.paths import get_annual_embeddings_file, get_embeddings_store
from entropice.spatial import grids
from entropice.utils import codecs
from entropice.utils.paths import get_annual_embeddings_file, get_embeddings_store
# Filter out the GeoDataFrame.swapaxes deprecation warning
warnings.filterwarnings("ignore", message=".*GeoDataFrame.swapaxes.*", category=FutureWarning)

View file

@ -28,9 +28,10 @@ from xrspatial.curvature import _run_cupy as curvature_cupy
from xrspatial.slope import _run_cupy as slope_cupy
from zarr.codecs import BloscCodec
from entropice import codecs, grids, watermask
from entropice.aggregators import _Aggregations, aggregate_raster_into_grid
from entropice.paths import get_arcticdem_stores
from entropice.spatial import grids, watermask
from entropice.spatial.aggregators import _Aggregations, aggregate_raster_into_grid
from entropice.utils import codecs
from entropice.utils.paths import get_arcticdem_stores
traceback.install(show_locals=True, suppress=[cyclopts])
pretty.install()

View file

@ -15,8 +15,8 @@ from rich import pretty, print, traceback
from rich.progress import track
from stopuhr import stopwatch
from entropice import grids
from entropice.paths import darts_ml_training_labels_repo, dartsl2_cov_file, dartsl2_file, get_darts_rts_file
from entropice.spatial import grids
from entropice.utils.paths import darts_ml_training_labels_repo, dartsl2_cov_file, dartsl2_file, get_darts_rts_file
traceback.install()
pretty.install()

View file

@ -96,10 +96,11 @@ from rasterio.features import shapes
from rich import pretty, print, traceback
from stopuhr import stopwatch
from entropice import codecs, grids, watermask
from entropice.aggregators import _Aggregations, aggregate_raster_into_grid
from entropice.paths import FIGURES_DIR, get_era5_stores
from entropice.xvec import to_xvec
from entropice.spatial import grids, watermask
from entropice.spatial.aggregators import _Aggregations, aggregate_raster_into_grid
from entropice.spatial.xvec import to_xvec
from entropice.utils import codecs
from entropice.utils.paths import FIGURES_DIR, get_era5_stores
traceback.install(show_locals=True, suppress=[cyclopts, xr, pd, cProfile])
pretty.install()

View file

@ -31,7 +31,7 @@ from rich import pretty, traceback
from sklearn import set_config
from sklearn.model_selection import train_test_split
import entropice.paths
import entropice.utils.paths
traceback.install()
pretty.install()
@ -200,12 +200,12 @@ class DatasetEnsemble:
def _read_member(self, member: L2Dataset, targets: gpd.GeoDataFrame, lazy: bool = False) -> xr.Dataset:
if member == "AlphaEarth":
store = entropice.paths.get_embeddings_store(grid=self.grid, level=self.level)
store = entropice.utils.paths.get_embeddings_store(grid=self.grid, level=self.level)
elif member in ["ERA5-yearly", "ERA5-seasonal", "ERA5-shoulder"]:
era5_agg: Literal["yearly", "seasonal", "shoulder"] = member.split("-")[1] # ty:ignore[invalid-assignment]
store = entropice.paths.get_era5_stores(era5_agg, grid=self.grid, level=self.level)
store = entropice.utils.paths.get_era5_stores(era5_agg, grid=self.grid, level=self.level)
elif member == "ArcticDEM":
store = entropice.paths.get_arcticdem_stores(grid=self.grid, level=self.level)
store = entropice.utils.paths.get_arcticdem_stores(grid=self.grid, level=self.level)
else:
raise NotImplementedError(f"Member {member} not implemented.")
@ -244,9 +244,9 @@ class DatasetEnsemble:
def _read_target(self) -> gpd.GeoDataFrame:
if self.target == "darts_rts":
target_store = entropice.paths.get_darts_rts_file(grid=self.grid, level=self.level)
target_store = entropice.utils.paths.get_darts_rts_file(grid=self.grid, level=self.level)
elif self.target == "darts_mllabels":
target_store = entropice.paths.get_darts_rts_file(grid=self.grid, level=self.level, labels=True)
target_store = entropice.utils.paths.get_darts_rts_file(grid=self.grid, level=self.level, labels=True)
else:
raise NotImplementedError(f"Target {self.target} not implemented.")
targets = gpd.read_parquet(target_store)
@ -343,7 +343,7 @@ class DatasetEnsemble:
@lru_cache(maxsize=1)
def create(self, filter_target_col: str | None = None, cache_mode: Literal["n", "o", "r"] = "r") -> pd.DataFrame:
# n: no cache, o: overwrite cache, r: read cache if exists
cache_file = entropice.paths.get_dataset_cache(self.id(), subset=filter_target_col)
cache_file = entropice.utils.paths.get_dataset_cache(self.id(), subset=filter_target_col)
if cache_mode == "r" and cache_file.exists():
dataset = gpd.read_parquet(cache_file)
print(
@ -393,7 +393,7 @@ class DatasetEnsemble:
for i in range(0, len(targets), batch_size):
# n: no cache, o: overwrite cache, r: read cache if exists
cache_file = entropice.paths.get_dataset_cache(
cache_file = entropice.utils.paths.get_dataset_cache(
self.id(), subset=filter_target_col, batch=(i, i + batch_size)
)
if cache_mode == "r" and cache_file.exists():

View file

@ -11,7 +11,7 @@ from rich import pretty, traceback
from sklearn import set_config
from xgboost.sklearn import XGBClassifier
from entropice.dataset import DatasetEnsemble
from entropice.ml.dataset import DatasetEnsemble
traceback.install()
pretty.install()

View file

@ -20,9 +20,9 @@ from sklearn.model_selection import KFold, RandomizedSearchCV
from stopuhr import stopwatch
from xgboost.sklearn import XGBClassifier
from entropice.dataset import DatasetEnsemble
from entropice.inference import predict_proba
from entropice.paths import get_cv_results_dir
from entropice.ml.dataset import DatasetEnsemble
from entropice.ml.inference import predict_proba
from entropice.utils.paths import get_cv_results_dir
traceback.install()
pretty.install()

View file

@ -32,7 +32,7 @@ from shapely.geometry import LineString, Polygon
from stopuhr import stopwatch
from xdggs.healpix import HealpixInfo
from entropice import grids
from entropice.spatial import grids
@dataclass(frozen=True)

View file

@ -25,7 +25,7 @@ from shapely.ops import transform
from stopuhr import stopwatch
from xdggs.healpix import HealpixInfo
from entropice.paths import get_grid_file, get_grid_viz_file, watermask_file
from entropice.utils.paths import get_grid_file, get_grid_viz_file, watermask_file
traceback.install()
pretty.install()

View file

@ -3,7 +3,7 @@
import duckdb
import geopandas as gpd
from entropice.paths import watermask_file
from entropice.utils.paths import watermask_file
def open():