Update imports
This commit is contained in:
parent
45bc61e49e
commit
495ddc13f9
18 changed files with 64 additions and 40 deletions
18
.github/copilot-instructions.md
vendored
18
.github/copilot-instructions.md
vendored
|
|
@ -129,11 +129,22 @@ pixi install # NOT pip install or conda install
|
||||||
pixi run pytest
|
pixi run pytest
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Running Python Commands
|
||||||
|
|
||||||
|
Always use `pixi run` to execute Python commands to use the correct environment:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pixi run python script.py
|
||||||
|
pixi run python -c "import entropice"
|
||||||
|
```
|
||||||
|
|
||||||
### Common Tasks
|
### Common Tasks
|
||||||
|
|
||||||
- **Generate grids**: Use `spatial/grids.py` CLI
|
**Important**: Always use `pixi run` prefix for Python commands to ensure correct environment.
|
||||||
- **Process labels**: Use `ingest/darts.py` CLI
|
|
||||||
- **Train models**: Use `ml/training.py` CLI with TOML config
|
- **Generate grids**: Use `pixi run create-grid` or `spatial/grids.py` CLI
|
||||||
|
- **Process labels**: Use `pixi run darts` or `ingest/darts.py` CLI
|
||||||
|
- **Train models**: Use `pixi run train` with TOML config or `ml/training.py` CLI
|
||||||
- **Run inference**: Use `ml/inference.py` CLI
|
- **Run inference**: Use `ml/inference.py` CLI
|
||||||
- **View results**: `pixi run dashboard`
|
- **View results**: `pixi run dashboard`
|
||||||
|
|
||||||
|
|
@ -191,6 +202,7 @@ To extend Entropice:
|
||||||
|
|
||||||
## Important Notes
|
## Important Notes
|
||||||
|
|
||||||
|
- **Always use `pixi run` prefix** for Python commands (not plain `python`)
|
||||||
- Grid resolutions: **H3** (3-6), **HEALPix** (6-10)
|
- Grid resolutions: **H3** (3-6), **HEALPix** (6-10)
|
||||||
- Arctic years run **October 1 to September 30** (not calendar years)
|
- Arctic years run **October 1 to September 30** (not calendar years)
|
||||||
- Handle **antimeridian crossing** in polar regions
|
- Handle **antimeridian crossing** in polar regions
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,8 @@ This will set up the complete environment including RAPIDS, PyTorch, and all geo
|
||||||
|
|
||||||
## Development Workflow
|
## Development Workflow
|
||||||
|
|
||||||
|
**Important**: Always use `pixi run` to execute Python commands and scripts to ensure you're using the correct environment with all dependencies.
|
||||||
|
|
||||||
### Code Organization
|
### Code Organization
|
||||||
|
|
||||||
- **`src/entropice/ingest/`**: Data ingestion modules (darts, era5, arcticdem, alphaearth)
|
- **`src/entropice/ingest/`**: Data ingestion modules (darts, era5, arcticdem, alphaearth)
|
||||||
|
|
@ -72,6 +74,13 @@ Run tests for specific modules:
|
||||||
pixi run pytest
|
pixi run pytest
|
||||||
```
|
```
|
||||||
|
|
||||||
|
When running Python scripts or commands, always use `pixi run`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pixi run python script.py
|
||||||
|
pixi run python -c "import entropice"
|
||||||
|
```
|
||||||
|
|
||||||
When adding features, include tests that verify:
|
When adding features, include tests that verify:
|
||||||
|
|
||||||
- Correct handling of geospatial coordinates and projections
|
- Correct handling of geospatial coordinates and projections
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ import toml
|
||||||
import xarray as xr
|
import xarray as xr
|
||||||
from rich import print
|
from rich import print
|
||||||
|
|
||||||
from entropice.paths import RESULTS_DIR
|
from entropice.utils.paths import RESULTS_DIR
|
||||||
|
|
||||||
|
|
||||||
def fix_xgboost_model_state(results_dir: Path) -> bool:
|
def fix_xgboost_model_state(results_dir: Path) -> bool:
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ import streamlit as st
|
||||||
|
|
||||||
from entropice.dashboard.plots.colors import get_palette
|
from entropice.dashboard.plots.colors import get_palette
|
||||||
from entropice.dashboard.utils.data import load_all_training_results
|
from entropice.dashboard.utils.data import load_all_training_results
|
||||||
from entropice.dataset import DatasetEnsemble
|
from entropice.ml.dataset import DatasetEnsemble
|
||||||
|
|
||||||
|
|
||||||
# Type definitions for dataset statistics
|
# Type definitions for dataset statistics
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ import streamlit as st
|
||||||
from shapely.geometry import shape
|
from shapely.geometry import shape
|
||||||
|
|
||||||
from entropice.dashboard.plots.colors import get_cmap, get_palette
|
from entropice.dashboard.plots.colors import get_cmap, get_palette
|
||||||
from entropice.dataset import DatasetEnsemble
|
from entropice.ml.dataset import DatasetEnsemble
|
||||||
|
|
||||||
|
|
||||||
def render_performance_summary(results: pd.DataFrame, refit_metric: str):
|
def render_performance_summary(results: pd.DataFrame, refit_metric: str):
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ import streamlit as st
|
||||||
from shapely.geometry import shape
|
from shapely.geometry import shape
|
||||||
|
|
||||||
from entropice.dashboard.plots.colors import get_palette
|
from entropice.dashboard.plots.colors import get_palette
|
||||||
from entropice.dataset import CategoricalTrainingDataset
|
from entropice.ml.dataset import CategoricalTrainingDataset
|
||||||
|
|
||||||
|
|
||||||
def render_all_distribution_histograms(train_data_dict: dict[str, CategoricalTrainingDataset]):
|
def render_all_distribution_histograms(train_data_dict: dict[str, CategoricalTrainingDataset]):
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
from entropice import grids
|
|
||||||
from entropice.dashboard.plots.source_data import (
|
from entropice.dashboard.plots.source_data import (
|
||||||
render_alphaearth_map,
|
render_alphaearth_map,
|
||||||
render_alphaearth_overview,
|
render_alphaearth_overview,
|
||||||
|
|
@ -17,7 +16,8 @@ from entropice.dashboard.plots.source_data import (
|
||||||
)
|
)
|
||||||
from entropice.dashboard.plots.training_data import render_all_distribution_histograms, render_spatial_map
|
from entropice.dashboard.plots.training_data import render_all_distribution_histograms, render_spatial_map
|
||||||
from entropice.dashboard.utils.data import load_all_training_data, load_source_data
|
from entropice.dashboard.utils.data import load_all_training_data, load_source_data
|
||||||
from entropice.dataset import DatasetEnsemble
|
from entropice.ml.dataset import DatasetEnsemble
|
||||||
|
from entropice.spatial import grids
|
||||||
|
|
||||||
|
|
||||||
def render_training_data_page():
|
def render_training_data_page():
|
||||||
|
|
|
||||||
|
|
@ -11,8 +11,8 @@ import toml
|
||||||
import xarray as xr
|
import xarray as xr
|
||||||
from shapely.geometry import shape
|
from shapely.geometry import shape
|
||||||
|
|
||||||
import entropice.paths
|
import entropice.utils.paths
|
||||||
from entropice.dataset import CategoricalTrainingDataset, DatasetEnsemble
|
from entropice.ml.dataset import CategoricalTrainingDataset, DatasetEnsemble
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -89,7 +89,7 @@ def _fix_hex_geometry(geom):
|
||||||
@st.cache_data
|
@st.cache_data
|
||||||
def load_all_training_results() -> list[TrainingResult]:
|
def load_all_training_results() -> list[TrainingResult]:
|
||||||
"""Load all training results from the results directory."""
|
"""Load all training results from the results directory."""
|
||||||
results_dir = entropice.paths.RESULTS_DIR
|
results_dir = entropice.utils.paths.RESULTS_DIR
|
||||||
training_results: list[TrainingResult] = []
|
training_results: list[TrainingResult] = []
|
||||||
for result_path in results_dir.iterdir():
|
for result_path in results_dir.iterdir():
|
||||||
if not result_path.is_dir():
|
if not result_path.is_dir():
|
||||||
|
|
@ -402,11 +402,11 @@ def extract_era5_features(
|
||||||
)
|
)
|
||||||
era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year", "agg"]).unstack(
|
era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year", "agg"]).unstack(
|
||||||
"feature"
|
"feature"
|
||||||
) # noqa: PD010
|
)
|
||||||
else:
|
else:
|
||||||
era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year"]).unstack(
|
era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year"]).unstack(
|
||||||
"feature"
|
"feature"
|
||||||
) # noqa: PD010
|
)
|
||||||
else:
|
else:
|
||||||
# For yearly: keep as variable, time, (agg)
|
# For yearly: keep as variable, time, (agg)
|
||||||
era5_features_array = era5_features_array.assign_coords(
|
era5_features_array = era5_features_array.assign_coords(
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,9 @@ import xdggs
|
||||||
from rich import pretty, print, traceback
|
from rich import pretty, print, traceback
|
||||||
from rich.progress import track
|
from rich.progress import track
|
||||||
|
|
||||||
from entropice import codecs, grids
|
from entropice.spatial import grids
|
||||||
from entropice.paths import get_annual_embeddings_file, get_embeddings_store
|
from entropice.utils import codecs
|
||||||
|
from entropice.utils.paths import get_annual_embeddings_file, get_embeddings_store
|
||||||
|
|
||||||
# Filter out the GeoDataFrame.swapaxes deprecation warning
|
# Filter out the GeoDataFrame.swapaxes deprecation warning
|
||||||
warnings.filterwarnings("ignore", message=".*GeoDataFrame.swapaxes.*", category=FutureWarning)
|
warnings.filterwarnings("ignore", message=".*GeoDataFrame.swapaxes.*", category=FutureWarning)
|
||||||
|
|
|
||||||
|
|
@ -28,9 +28,10 @@ from xrspatial.curvature import _run_cupy as curvature_cupy
|
||||||
from xrspatial.slope import _run_cupy as slope_cupy
|
from xrspatial.slope import _run_cupy as slope_cupy
|
||||||
from zarr.codecs import BloscCodec
|
from zarr.codecs import BloscCodec
|
||||||
|
|
||||||
from entropice import codecs, grids, watermask
|
from entropice.spatial import grids, watermask
|
||||||
from entropice.aggregators import _Aggregations, aggregate_raster_into_grid
|
from entropice.spatial.aggregators import _Aggregations, aggregate_raster_into_grid
|
||||||
from entropice.paths import get_arcticdem_stores
|
from entropice.utils import codecs
|
||||||
|
from entropice.utils.paths import get_arcticdem_stores
|
||||||
|
|
||||||
traceback.install(show_locals=True, suppress=[cyclopts])
|
traceback.install(show_locals=True, suppress=[cyclopts])
|
||||||
pretty.install()
|
pretty.install()
|
||||||
|
|
|
||||||
|
|
@ -15,8 +15,8 @@ from rich import pretty, print, traceback
|
||||||
from rich.progress import track
|
from rich.progress import track
|
||||||
from stopuhr import stopwatch
|
from stopuhr import stopwatch
|
||||||
|
|
||||||
from entropice import grids
|
from entropice.spatial import grids
|
||||||
from entropice.paths import darts_ml_training_labels_repo, dartsl2_cov_file, dartsl2_file, get_darts_rts_file
|
from entropice.utils.paths import darts_ml_training_labels_repo, dartsl2_cov_file, dartsl2_file, get_darts_rts_file
|
||||||
|
|
||||||
traceback.install()
|
traceback.install()
|
||||||
pretty.install()
|
pretty.install()
|
||||||
|
|
|
||||||
|
|
@ -96,10 +96,11 @@ from rasterio.features import shapes
|
||||||
from rich import pretty, print, traceback
|
from rich import pretty, print, traceback
|
||||||
from stopuhr import stopwatch
|
from stopuhr import stopwatch
|
||||||
|
|
||||||
from entropice import codecs, grids, watermask
|
from entropice.spatial import grids, watermask
|
||||||
from entropice.aggregators import _Aggregations, aggregate_raster_into_grid
|
from entropice.spatial.aggregators import _Aggregations, aggregate_raster_into_grid
|
||||||
from entropice.paths import FIGURES_DIR, get_era5_stores
|
from entropice.spatial.xvec import to_xvec
|
||||||
from entropice.xvec import to_xvec
|
from entropice.utils import codecs
|
||||||
|
from entropice.utils.paths import FIGURES_DIR, get_era5_stores
|
||||||
|
|
||||||
traceback.install(show_locals=True, suppress=[cyclopts, xr, pd, cProfile])
|
traceback.install(show_locals=True, suppress=[cyclopts, xr, pd, cProfile])
|
||||||
pretty.install()
|
pretty.install()
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ from rich import pretty, traceback
|
||||||
from sklearn import set_config
|
from sklearn import set_config
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
import entropice.paths
|
import entropice.utils.paths
|
||||||
|
|
||||||
traceback.install()
|
traceback.install()
|
||||||
pretty.install()
|
pretty.install()
|
||||||
|
|
@ -200,12 +200,12 @@ class DatasetEnsemble:
|
||||||
|
|
||||||
def _read_member(self, member: L2Dataset, targets: gpd.GeoDataFrame, lazy: bool = False) -> xr.Dataset:
|
def _read_member(self, member: L2Dataset, targets: gpd.GeoDataFrame, lazy: bool = False) -> xr.Dataset:
|
||||||
if member == "AlphaEarth":
|
if member == "AlphaEarth":
|
||||||
store = entropice.paths.get_embeddings_store(grid=self.grid, level=self.level)
|
store = entropice.utils.paths.get_embeddings_store(grid=self.grid, level=self.level)
|
||||||
elif member in ["ERA5-yearly", "ERA5-seasonal", "ERA5-shoulder"]:
|
elif member in ["ERA5-yearly", "ERA5-seasonal", "ERA5-shoulder"]:
|
||||||
era5_agg: Literal["yearly", "seasonal", "shoulder"] = member.split("-")[1] # ty:ignore[invalid-assignment]
|
era5_agg: Literal["yearly", "seasonal", "shoulder"] = member.split("-")[1] # ty:ignore[invalid-assignment]
|
||||||
store = entropice.paths.get_era5_stores(era5_agg, grid=self.grid, level=self.level)
|
store = entropice.utils.paths.get_era5_stores(era5_agg, grid=self.grid, level=self.level)
|
||||||
elif member == "ArcticDEM":
|
elif member == "ArcticDEM":
|
||||||
store = entropice.paths.get_arcticdem_stores(grid=self.grid, level=self.level)
|
store = entropice.utils.paths.get_arcticdem_stores(grid=self.grid, level=self.level)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(f"Member {member} not implemented.")
|
raise NotImplementedError(f"Member {member} not implemented.")
|
||||||
|
|
||||||
|
|
@ -244,9 +244,9 @@ class DatasetEnsemble:
|
||||||
|
|
||||||
def _read_target(self) -> gpd.GeoDataFrame:
|
def _read_target(self) -> gpd.GeoDataFrame:
|
||||||
if self.target == "darts_rts":
|
if self.target == "darts_rts":
|
||||||
target_store = entropice.paths.get_darts_rts_file(grid=self.grid, level=self.level)
|
target_store = entropice.utils.paths.get_darts_rts_file(grid=self.grid, level=self.level)
|
||||||
elif self.target == "darts_mllabels":
|
elif self.target == "darts_mllabels":
|
||||||
target_store = entropice.paths.get_darts_rts_file(grid=self.grid, level=self.level, labels=True)
|
target_store = entropice.utils.paths.get_darts_rts_file(grid=self.grid, level=self.level, labels=True)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(f"Target {self.target} not implemented.")
|
raise NotImplementedError(f"Target {self.target} not implemented.")
|
||||||
targets = gpd.read_parquet(target_store)
|
targets = gpd.read_parquet(target_store)
|
||||||
|
|
@ -343,7 +343,7 @@ class DatasetEnsemble:
|
||||||
@lru_cache(maxsize=1)
|
@lru_cache(maxsize=1)
|
||||||
def create(self, filter_target_col: str | None = None, cache_mode: Literal["n", "o", "r"] = "r") -> pd.DataFrame:
|
def create(self, filter_target_col: str | None = None, cache_mode: Literal["n", "o", "r"] = "r") -> pd.DataFrame:
|
||||||
# n: no cache, o: overwrite cache, r: read cache if exists
|
# n: no cache, o: overwrite cache, r: read cache if exists
|
||||||
cache_file = entropice.paths.get_dataset_cache(self.id(), subset=filter_target_col)
|
cache_file = entropice.utils.paths.get_dataset_cache(self.id(), subset=filter_target_col)
|
||||||
if cache_mode == "r" and cache_file.exists():
|
if cache_mode == "r" and cache_file.exists():
|
||||||
dataset = gpd.read_parquet(cache_file)
|
dataset = gpd.read_parquet(cache_file)
|
||||||
print(
|
print(
|
||||||
|
|
@ -393,7 +393,7 @@ class DatasetEnsemble:
|
||||||
|
|
||||||
for i in range(0, len(targets), batch_size):
|
for i in range(0, len(targets), batch_size):
|
||||||
# n: no cache, o: overwrite cache, r: read cache if exists
|
# n: no cache, o: overwrite cache, r: read cache if exists
|
||||||
cache_file = entropice.paths.get_dataset_cache(
|
cache_file = entropice.utils.paths.get_dataset_cache(
|
||||||
self.id(), subset=filter_target_col, batch=(i, i + batch_size)
|
self.id(), subset=filter_target_col, batch=(i, i + batch_size)
|
||||||
)
|
)
|
||||||
if cache_mode == "r" and cache_file.exists():
|
if cache_mode == "r" and cache_file.exists():
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from rich import pretty, traceback
|
||||||
from sklearn import set_config
|
from sklearn import set_config
|
||||||
from xgboost.sklearn import XGBClassifier
|
from xgboost.sklearn import XGBClassifier
|
||||||
|
|
||||||
from entropice.dataset import DatasetEnsemble
|
from entropice.ml.dataset import DatasetEnsemble
|
||||||
|
|
||||||
traceback.install()
|
traceback.install()
|
||||||
pretty.install()
|
pretty.install()
|
||||||
|
|
|
||||||
|
|
@ -20,9 +20,9 @@ from sklearn.model_selection import KFold, RandomizedSearchCV
|
||||||
from stopuhr import stopwatch
|
from stopuhr import stopwatch
|
||||||
from xgboost.sklearn import XGBClassifier
|
from xgboost.sklearn import XGBClassifier
|
||||||
|
|
||||||
from entropice.dataset import DatasetEnsemble
|
from entropice.ml.dataset import DatasetEnsemble
|
||||||
from entropice.inference import predict_proba
|
from entropice.ml.inference import predict_proba
|
||||||
from entropice.paths import get_cv_results_dir
|
from entropice.utils.paths import get_cv_results_dir
|
||||||
|
|
||||||
traceback.install()
|
traceback.install()
|
||||||
pretty.install()
|
pretty.install()
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ from shapely.geometry import LineString, Polygon
|
||||||
from stopuhr import stopwatch
|
from stopuhr import stopwatch
|
||||||
from xdggs.healpix import HealpixInfo
|
from xdggs.healpix import HealpixInfo
|
||||||
|
|
||||||
from entropice import grids
|
from entropice.spatial import grids
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ from shapely.ops import transform
|
||||||
from stopuhr import stopwatch
|
from stopuhr import stopwatch
|
||||||
from xdggs.healpix import HealpixInfo
|
from xdggs.healpix import HealpixInfo
|
||||||
|
|
||||||
from entropice.paths import get_grid_file, get_grid_viz_file, watermask_file
|
from entropice.utils.paths import get_grid_file, get_grid_viz_file, watermask_file
|
||||||
|
|
||||||
traceback.install()
|
traceback.install()
|
||||||
pretty.install()
|
pretty.install()
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
import duckdb
|
import duckdb
|
||||||
import geopandas as gpd
|
import geopandas as gpd
|
||||||
|
|
||||||
from entropice.paths import watermask_file
|
from entropice.utils.paths import watermask_file
|
||||||
|
|
||||||
|
|
||||||
def open():
|
def open():
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue