Update imports
This commit is contained in:
parent
45bc61e49e
commit
495ddc13f9
18 changed files with 64 additions and 40 deletions
18
.github/copilot-instructions.md
vendored
18
.github/copilot-instructions.md
vendored
|
|
@ -129,11 +129,22 @@ pixi install # NOT pip install or conda install
|
|||
pixi run pytest
|
||||
```
|
||||
|
||||
### Running Python Commands
|
||||
|
||||
Always use `pixi run` to execute Python commands to use the correct environment:
|
||||
|
||||
```bash
|
||||
pixi run python script.py
|
||||
pixi run python -c "import entropice"
|
||||
```
|
||||
|
||||
### Common Tasks
|
||||
|
||||
- **Generate grids**: Use `spatial/grids.py` CLI
|
||||
- **Process labels**: Use `ingest/darts.py` CLI
|
||||
- **Train models**: Use `ml/training.py` CLI with TOML config
|
||||
**Important**: Always use `pixi run` prefix for Python commands to ensure correct environment.
|
||||
|
||||
- **Generate grids**: Use `pixi run create-grid` or `spatial/grids.py` CLI
|
||||
- **Process labels**: Use `pixi run darts` or `ingest/darts.py` CLI
|
||||
- **Train models**: Use `pixi run train` with TOML config or `ml/training.py` CLI
|
||||
- **Run inference**: Use `ml/inference.py` CLI
|
||||
- **View results**: `pixi run dashboard`
|
||||
|
||||
|
|
@ -191,6 +202,7 @@ To extend Entropice:
|
|||
|
||||
## Important Notes
|
||||
|
||||
- **Always use `pixi run` prefix** for Python commands (not plain `python`)
|
||||
- Grid resolutions: **H3** (3-6), **HEALPix** (6-10)
|
||||
- Arctic years run **October 1 to September 30** (not calendar years)
|
||||
- Handle **antimeridian crossing** in polar regions
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ This will set up the complete environment including RAPIDS, PyTorch, and all geo
|
|||
|
||||
## Development Workflow
|
||||
|
||||
**Important**: Always use `pixi run` to execute Python commands and scripts to ensure you're using the correct environment with all dependencies.
|
||||
|
||||
### Code Organization
|
||||
|
||||
- **`src/entropice/ingest/`**: Data ingestion modules (darts, era5, arcticdem, alphaearth)
|
||||
|
|
@ -72,6 +74,13 @@ Run tests for specific modules:
|
|||
pixi run pytest
|
||||
```
|
||||
|
||||
When running Python scripts or commands, always use `pixi run`:
|
||||
|
||||
```bash
|
||||
pixi run python script.py
|
||||
pixi run python -c "import entropice"
|
||||
```
|
||||
|
||||
When adding features, include tests that verify:
|
||||
|
||||
- Correct handling of geospatial coordinates and projections
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import toml
|
|||
import xarray as xr
|
||||
from rich import print
|
||||
|
||||
from entropice.paths import RESULTS_DIR
|
||||
from entropice.utils.paths import RESULTS_DIR
|
||||
|
||||
|
||||
def fix_xgboost_model_state(results_dir: Path) -> bool:
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import streamlit as st
|
|||
|
||||
from entropice.dashboard.plots.colors import get_palette
|
||||
from entropice.dashboard.utils.data import load_all_training_results
|
||||
from entropice.dataset import DatasetEnsemble
|
||||
from entropice.ml.dataset import DatasetEnsemble
|
||||
|
||||
|
||||
# Type definitions for dataset statistics
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ import streamlit as st
|
|||
from shapely.geometry import shape
|
||||
|
||||
from entropice.dashboard.plots.colors import get_cmap, get_palette
|
||||
from entropice.dataset import DatasetEnsemble
|
||||
from entropice.ml.dataset import DatasetEnsemble
|
||||
|
||||
|
||||
def render_performance_summary(results: pd.DataFrame, refit_metric: str):
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import streamlit as st
|
|||
from shapely.geometry import shape
|
||||
|
||||
from entropice.dashboard.plots.colors import get_palette
|
||||
from entropice.dataset import CategoricalTrainingDataset
|
||||
from entropice.ml.dataset import CategoricalTrainingDataset
|
||||
|
||||
|
||||
def render_all_distribution_histograms(train_data_dict: dict[str, CategoricalTrainingDataset]):
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
import streamlit as st
|
||||
|
||||
from entropice import grids
|
||||
from entropice.dashboard.plots.source_data import (
|
||||
render_alphaearth_map,
|
||||
render_alphaearth_overview,
|
||||
|
|
@ -17,7 +16,8 @@ from entropice.dashboard.plots.source_data import (
|
|||
)
|
||||
from entropice.dashboard.plots.training_data import render_all_distribution_histograms, render_spatial_map
|
||||
from entropice.dashboard.utils.data import load_all_training_data, load_source_data
|
||||
from entropice.dataset import DatasetEnsemble
|
||||
from entropice.ml.dataset import DatasetEnsemble
|
||||
from entropice.spatial import grids
|
||||
|
||||
|
||||
def render_training_data_page():
|
||||
|
|
|
|||
|
|
@ -11,8 +11,8 @@ import toml
|
|||
import xarray as xr
|
||||
from shapely.geometry import shape
|
||||
|
||||
import entropice.paths
|
||||
from entropice.dataset import CategoricalTrainingDataset, DatasetEnsemble
|
||||
import entropice.utils.paths
|
||||
from entropice.ml.dataset import CategoricalTrainingDataset, DatasetEnsemble
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -89,7 +89,7 @@ def _fix_hex_geometry(geom):
|
|||
@st.cache_data
|
||||
def load_all_training_results() -> list[TrainingResult]:
|
||||
"""Load all training results from the results directory."""
|
||||
results_dir = entropice.paths.RESULTS_DIR
|
||||
results_dir = entropice.utils.paths.RESULTS_DIR
|
||||
training_results: list[TrainingResult] = []
|
||||
for result_path in results_dir.iterdir():
|
||||
if not result_path.is_dir():
|
||||
|
|
@ -402,11 +402,11 @@ def extract_era5_features(
|
|||
)
|
||||
era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year", "agg"]).unstack(
|
||||
"feature"
|
||||
) # noqa: PD010
|
||||
)
|
||||
else:
|
||||
era5_features_array = era5_features_array.set_index(feature=["variable", "season", "year"]).unstack(
|
||||
"feature"
|
||||
) # noqa: PD010
|
||||
)
|
||||
else:
|
||||
# For yearly: keep as variable, time, (agg)
|
||||
era5_features_array = era5_features_array.assign_coords(
|
||||
|
|
|
|||
|
|
@ -22,8 +22,9 @@ import xdggs
|
|||
from rich import pretty, print, traceback
|
||||
from rich.progress import track
|
||||
|
||||
from entropice import codecs, grids
|
||||
from entropice.paths import get_annual_embeddings_file, get_embeddings_store
|
||||
from entropice.spatial import grids
|
||||
from entropice.utils import codecs
|
||||
from entropice.utils.paths import get_annual_embeddings_file, get_embeddings_store
|
||||
|
||||
# Filter out the GeoDataFrame.swapaxes deprecation warning
|
||||
warnings.filterwarnings("ignore", message=".*GeoDataFrame.swapaxes.*", category=FutureWarning)
|
||||
|
|
|
|||
|
|
@ -28,9 +28,10 @@ from xrspatial.curvature import _run_cupy as curvature_cupy
|
|||
from xrspatial.slope import _run_cupy as slope_cupy
|
||||
from zarr.codecs import BloscCodec
|
||||
|
||||
from entropice import codecs, grids, watermask
|
||||
from entropice.aggregators import _Aggregations, aggregate_raster_into_grid
|
||||
from entropice.paths import get_arcticdem_stores
|
||||
from entropice.spatial import grids, watermask
|
||||
from entropice.spatial.aggregators import _Aggregations, aggregate_raster_into_grid
|
||||
from entropice.utils import codecs
|
||||
from entropice.utils.paths import get_arcticdem_stores
|
||||
|
||||
traceback.install(show_locals=True, suppress=[cyclopts])
|
||||
pretty.install()
|
||||
|
|
|
|||
|
|
@ -15,8 +15,8 @@ from rich import pretty, print, traceback
|
|||
from rich.progress import track
|
||||
from stopuhr import stopwatch
|
||||
|
||||
from entropice import grids
|
||||
from entropice.paths import darts_ml_training_labels_repo, dartsl2_cov_file, dartsl2_file, get_darts_rts_file
|
||||
from entropice.spatial import grids
|
||||
from entropice.utils.paths import darts_ml_training_labels_repo, dartsl2_cov_file, dartsl2_file, get_darts_rts_file
|
||||
|
||||
traceback.install()
|
||||
pretty.install()
|
||||
|
|
|
|||
|
|
@ -96,10 +96,11 @@ from rasterio.features import shapes
|
|||
from rich import pretty, print, traceback
|
||||
from stopuhr import stopwatch
|
||||
|
||||
from entropice import codecs, grids, watermask
|
||||
from entropice.aggregators import _Aggregations, aggregate_raster_into_grid
|
||||
from entropice.paths import FIGURES_DIR, get_era5_stores
|
||||
from entropice.xvec import to_xvec
|
||||
from entropice.spatial import grids, watermask
|
||||
from entropice.spatial.aggregators import _Aggregations, aggregate_raster_into_grid
|
||||
from entropice.spatial.xvec import to_xvec
|
||||
from entropice.utils import codecs
|
||||
from entropice.utils.paths import FIGURES_DIR, get_era5_stores
|
||||
|
||||
traceback.install(show_locals=True, suppress=[cyclopts, xr, pd, cProfile])
|
||||
pretty.install()
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ from rich import pretty, traceback
|
|||
from sklearn import set_config
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
import entropice.paths
|
||||
import entropice.utils.paths
|
||||
|
||||
traceback.install()
|
||||
pretty.install()
|
||||
|
|
@ -200,12 +200,12 @@ class DatasetEnsemble:
|
|||
|
||||
def _read_member(self, member: L2Dataset, targets: gpd.GeoDataFrame, lazy: bool = False) -> xr.Dataset:
|
||||
if member == "AlphaEarth":
|
||||
store = entropice.paths.get_embeddings_store(grid=self.grid, level=self.level)
|
||||
store = entropice.utils.paths.get_embeddings_store(grid=self.grid, level=self.level)
|
||||
elif member in ["ERA5-yearly", "ERA5-seasonal", "ERA5-shoulder"]:
|
||||
era5_agg: Literal["yearly", "seasonal", "shoulder"] = member.split("-")[1] # ty:ignore[invalid-assignment]
|
||||
store = entropice.paths.get_era5_stores(era5_agg, grid=self.grid, level=self.level)
|
||||
store = entropice.utils.paths.get_era5_stores(era5_agg, grid=self.grid, level=self.level)
|
||||
elif member == "ArcticDEM":
|
||||
store = entropice.paths.get_arcticdem_stores(grid=self.grid, level=self.level)
|
||||
store = entropice.utils.paths.get_arcticdem_stores(grid=self.grid, level=self.level)
|
||||
else:
|
||||
raise NotImplementedError(f"Member {member} not implemented.")
|
||||
|
||||
|
|
@ -244,9 +244,9 @@ class DatasetEnsemble:
|
|||
|
||||
def _read_target(self) -> gpd.GeoDataFrame:
|
||||
if self.target == "darts_rts":
|
||||
target_store = entropice.paths.get_darts_rts_file(grid=self.grid, level=self.level)
|
||||
target_store = entropice.utils.paths.get_darts_rts_file(grid=self.grid, level=self.level)
|
||||
elif self.target == "darts_mllabels":
|
||||
target_store = entropice.paths.get_darts_rts_file(grid=self.grid, level=self.level, labels=True)
|
||||
target_store = entropice.utils.paths.get_darts_rts_file(grid=self.grid, level=self.level, labels=True)
|
||||
else:
|
||||
raise NotImplementedError(f"Target {self.target} not implemented.")
|
||||
targets = gpd.read_parquet(target_store)
|
||||
|
|
@ -343,7 +343,7 @@ class DatasetEnsemble:
|
|||
@lru_cache(maxsize=1)
|
||||
def create(self, filter_target_col: str | None = None, cache_mode: Literal["n", "o", "r"] = "r") -> pd.DataFrame:
|
||||
# n: no cache, o: overwrite cache, r: read cache if exists
|
||||
cache_file = entropice.paths.get_dataset_cache(self.id(), subset=filter_target_col)
|
||||
cache_file = entropice.utils.paths.get_dataset_cache(self.id(), subset=filter_target_col)
|
||||
if cache_mode == "r" and cache_file.exists():
|
||||
dataset = gpd.read_parquet(cache_file)
|
||||
print(
|
||||
|
|
@ -393,7 +393,7 @@ class DatasetEnsemble:
|
|||
|
||||
for i in range(0, len(targets), batch_size):
|
||||
# n: no cache, o: overwrite cache, r: read cache if exists
|
||||
cache_file = entropice.paths.get_dataset_cache(
|
||||
cache_file = entropice.utils.paths.get_dataset_cache(
|
||||
self.id(), subset=filter_target_col, batch=(i, i + batch_size)
|
||||
)
|
||||
if cache_mode == "r" and cache_file.exists():
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from rich import pretty, traceback
|
|||
from sklearn import set_config
|
||||
from xgboost.sklearn import XGBClassifier
|
||||
|
||||
from entropice.dataset import DatasetEnsemble
|
||||
from entropice.ml.dataset import DatasetEnsemble
|
||||
|
||||
traceback.install()
|
||||
pretty.install()
|
||||
|
|
|
|||
|
|
@ -20,9 +20,9 @@ from sklearn.model_selection import KFold, RandomizedSearchCV
|
|||
from stopuhr import stopwatch
|
||||
from xgboost.sklearn import XGBClassifier
|
||||
|
||||
from entropice.dataset import DatasetEnsemble
|
||||
from entropice.inference import predict_proba
|
||||
from entropice.paths import get_cv_results_dir
|
||||
from entropice.ml.dataset import DatasetEnsemble
|
||||
from entropice.ml.inference import predict_proba
|
||||
from entropice.utils.paths import get_cv_results_dir
|
||||
|
||||
traceback.install()
|
||||
pretty.install()
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ from shapely.geometry import LineString, Polygon
|
|||
from stopuhr import stopwatch
|
||||
from xdggs.healpix import HealpixInfo
|
||||
|
||||
from entropice import grids
|
||||
from entropice.spatial import grids
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from shapely.ops import transform
|
|||
from stopuhr import stopwatch
|
||||
from xdggs.healpix import HealpixInfo
|
||||
|
||||
from entropice.paths import get_grid_file, get_grid_viz_file, watermask_file
|
||||
from entropice.utils.paths import get_grid_file, get_grid_viz_file, watermask_file
|
||||
|
||||
traceback.install()
|
||||
pretty.install()
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
import duckdb
|
||||
import geopandas as gpd
|
||||
|
||||
from entropice.paths import watermask_file
|
||||
from entropice.utils.paths import watermask_file
|
||||
|
||||
|
||||
def open():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue