Run main experiments

This commit is contained in:
Tobias Hölzer 2026-02-12 18:34:35 +01:00
parent 3ce6b6e867
commit f9df8e9fe6
43 changed files with 4112 additions and 4022 deletions

197
pixi.lock generated
View file

@ -243,6 +243,7 @@ environments:
- pypi: https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/aa/f3/0b6ced594e51cc95d8c1fc1640d3623770d01e4969d29c0bd09945fafefa/altair-5.5.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/aa/f3/0b6ced594e51cc95d8c1fc1640d3623770d01e4969d29c0bd09945fafefa/altair-5.5.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/c8/a7/a597ff7dd1e1603abd94991ce242f93979d5f10b0d45ed23976dfb22bf64/altair_tiles-0.4.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c8/a7/a597ff7dd1e1603abd94991ce242f93979d5f10b0d45ed23976dfb22bf64/altair_tiles-0.4.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/69/ce/68d6e31f0a75a5cccc03535e47434c0ca4be37fe950e93117e455cbc362c/antimeridian-0.4.5-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/69/ce/68d6e31f0a75a5cccc03535e47434c0ca4be37fe950e93117e455cbc362c/antimeridian-0.4.5-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz - pypi: https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz
@ -257,14 +258,14 @@ environments:
- pypi: https://files.pythonhosted.org/packages/31/4a/72dc383d1a0d14f1d453e334e3461e229762edb1bf3f75b3ab977e9386ed/arro3_core-0.6.5-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/31/4a/72dc383d1a0d14f1d453e334e3461e229762edb1bf3f75b3ab977e9386ed/arro3_core-0.6.5-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/1b/df/2a5a1306dc1699b51b02c1c38c55f3564a8c4f84087c23c61e7e7ae37dfa/arro3_io-0.6.5-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/1b/df/2a5a1306dc1699b51b02c1c38c55f3564a8c4f84087c23c61e7e7ae37dfa/arro3_io-0.6.5-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/c3/1c/f06ad85180e7dd9855aa5ede901bfc2be858d7bee17d4e978a14c0ecec14/astropy-7.2.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/c3/1c/f06ad85180e7dd9855aa5ede901bfc2be858d7bee17d4e978a14c0ecec14/astropy-7.2.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/74/51/59effa402d4ce8813e42eb62416059d42dd07826b0e7aa2db057c336972d/astropy_iers_data-0.2026.2.2.0.48.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/4e/67/6af8b422c04dec79c908cf60fdcd4725c3c112b2a058087c4ff58284a142/astropy_iers_data-0.2026.2.9.0.50.33-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/d2/39/e7eaf1799466a4aef85b6a4fe7bd175ad2b1c6345066aa33f1f58d4b18d0/asttokens-3.0.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d2/39/e7eaf1799466a4aef85b6a4fe7bd175ad2b1c6345066aa33f1f58d4b18d0/asttokens-3.0.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/2d/1b/37d8a28965907d23eeba8bce56272932ee01176d192cefdf19a4a0b53c00/autogluon_common-1.5.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/2d/1b/37d8a28965907d23eeba8bce56272932ee01176d192cefdf19a4a0b53c00/autogluon_common-1.5.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/96/de/4bffa0f6f3257e73a22402019d19fbe34dfedc2865896f97ad57935cf7dd/autogluon_core-1.5.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/96/de/4bffa0f6f3257e73a22402019d19fbe34dfedc2865896f97ad57935cf7dd/autogluon_core-1.5.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/f3/c8/46eb69e371da89337419d3c754140f3ddae3c85a81b061ba3f275f442475/autogluon_features-1.5.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/f3/c8/46eb69e371da89337419d3c754140f3ddae3c85a81b061ba3f275f442475/autogluon_features-1.5.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/48/7c/50547d2940e98c8a15b8c92cd4953814385b95f5fc1dec806fa240389417/autogluon_tabular-1.5.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/7c/50547d2940e98c8a15b8c92cd4953814385b95f5fc1dec806fa240389417/autogluon_tabular-1.5.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/fc/d8/b8fcba9464f02b121f39de2db2bf57f0b216fe11d014513d666e8634380d/azure_core-1.38.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/db/88/aaea2ad269ce70b446660371286272c1f6ba66541a7f6f635baf8b0db726/azure_core-1.38.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/d8/3a/6ef2047a072e54e1142718d433d50e9514c999a58f51abfff7902f3a72f8/azure_storage_blob-12.28.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d8/3a/6ef2047a072e54e1142718d433d50e9514c999a58f51abfff7902f3a72f8/azure_storage_blob-12.28.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/71/cc/18245721fa7747065ab478316c7fea7c74777d07f37ae60db2e84f8172e8/beartype-0.22.9-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/71/cc/18245721fa7747065ab478316c7fea7c74777d07f37ae60db2e84f8172e8/beartype-0.22.9-py3-none-any.whl
@ -298,10 +299,10 @@ environments:
- pypi: https://files.pythonhosted.org/packages/60/97/891a0971e1e4a8c5d2b20bbe0e524dc04548d2307fee33cdeba148fd4fc7/comm-0.2.3-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/60/97/891a0971e1e4a8c5d2b20bbe0e524dc04548d2307fee33cdeba148fd4fc7/comm-0.2.3-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/0c/00/3106b1854b45bd0474ced037dfe6b73b90fe68a68968cef47c23de3d43d2/confection-0.1.5-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0c/00/3106b1854b45bd0474ced037dfe6b73b90fe68a68968cef47c23de3d43d2/confection-0.1.5-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/4b/32/e0f13a1c5b0f8572d0ec6ae2f6c677b7991fafd95da523159c19eff0696a/contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/4b/32/e0f13a1c5b0f8572d0ec6ae2f6c677b7991fafd95da523159c19eff0696a/contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/2b/08/f83e2e0814248b844265802d081f2fac2f1cbe6cd258e72ba14ff006823a/cryptography-46.0.4-cp311-abi3-manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/6b/e7/237155ae19a9023de7e30ec64e5d99a9431a567407ac21170a046d22a5a3/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/fa/25/0be9314cd72fe2ee2ef89ceb1f438bc156428a12177d684040456eee4a56/cupy_xarray-0.1.4-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fa/25/0be9314cd72fe2ee2ef89ceb1f438bc156428a12177d684040456eee4a56/cupy_xarray-0.1.4-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/1c/7c/996760c30f1302704af57c66ff2d723f7d656d0d0b93563b5528a51484bb/cyclopts-4.5.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/2b/03/f906829bcfcbb945f19d6a64240ffb66a31d69ca5533e95882f0efc9c13c/cyclopts-4.5.2-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/36/36/bc980b9a14409f3356309c45a8d88d58797d02002a9d794dd6c84e809d3a/cymem-2.0.13-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl - pypi: https://files.pythonhosted.org/packages/36/36/bc980b9a14409f3356309c45a8d88d58797d02002a9d794dd6c84e809d3a/cymem-2.0.13-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/e5/23/d39ccc4ed76222db31530b0a7d38876fdb7673e23f838e8d8f0ed4651a4f/dask-2026.1.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/e5/23/d39ccc4ed76222db31530b0a7d38876fdb7673e23f838e8d8f0ed4651a4f/dask-2026.1.2-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/28/0e/b11ad5fd77e3dd0baad9cac3184315be7654ae401e3b0b0c324503f23d96/datashader-0.18.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/28/0e/b11ad5fd77e3dd0baad9cac3184315be7654ae401e3b0b0c324503f23d96/datashader-0.18.2-py3-none-any.whl
@ -314,7 +315,7 @@ environments:
- pypi: https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/0c/d5/c5db1ea3394c6e1732fb3286b3bd878b59507a8f77d32a2cebda7d7b7cd4/donfig-0.8.1.post1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0c/d5/c5db1ea3394c6e1732fb3286b3bd878b59507a8f77d32a2cebda7d7b7cd4/donfig-0.8.1.post1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/53/32/256df3dbaa198c58539ad94f9a41e98c2c8ff23f126b8f5f52c7dcd0a738/duckdb-1.4.4-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/53/32/256df3dbaa198c58539ad94f9a41e98c2c8ff23f126b8f5f52c7dcd0a738/duckdb-1.4.4-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/a7/39/ce46ee84779ef19d88fd028fc786a6dcc68b73ace33c31997aeda0dfecdc/earthengine_api-1.7.12-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c2/0f/875b6df73f884062f3bd7d62a2fb9bfc1d07d1c93a611e999401c5b10ca0/earthengine_api-1.7.13-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/04/40/2ccf4c87a5f9c8198fe71600d5f307f5dada201c091af8774a9c1e360865/ecmwf_datastores_client-0.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/04/40/2ccf4c87a5f9c8198fe71600d5f307f5dada201c091af8774a9c1e360865/ecmwf_datastores_client-0.4.2-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/65/54/5e3b0e41799e17e5eff1547fda4aab53878c0adb4243de6b95f8ddef899e/ee_extra-2025.7.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/65/54/5e3b0e41799e17e5eff1547fda4aab53878c0adb4243de6b95f8ddef899e/ee_extra-2025.7.2-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/06/98/3e22f4386f6c1957f5994c9aa9cedd8a442bb75766bd0b2e2c1c92854af9/eemont-2025.7.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/06/98/3e22f4386f6c1957f5994c9aa9cedd8a442bb75766bd0b2e2c1c92854af9/eemont-2025.7.1-py3-none-any.whl
@ -326,12 +327,14 @@ environments:
- pypi: https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/7e/31/d229f6cdb9cbe03020499d69c4b431b705aa19a55aa0fe698c98022b2fef/faiss_cpu-1.12.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/7e/31/d229f6cdb9cbe03020499d69c4b431b705aa19a55aa0fe698c98022b2fef/faiss_cpu-1.12.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/c7/7d/74dd43d58f37584b32f0d781c8dbea9a286ee73e90393394e70569d4f254/fastai-2.8.6-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c7/7d/74dd43d58f37584b32f0d781c8dbea9a286ee73e90393394e70569d4f254/fastai-2.8.6-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/23/03/2fe18e3d718b5a36d6c548df3e7662a4c433efea4d28662063d259248a1d/fastcore-1.12.11-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/59/f3/f71552b94a39509b62e72c4a26b6e4440bb9ce6decacf90af2916829e69e/fastcluster-1.3.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/ea/d6/bb13c44b5863c0be7a27ef02982eca88f50d717549df1979e85942292239/fastcore-1.12.12-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/47/60/ed35253a05a70b63e4f52df1daa39a6a464a3e22b0bd060b77f63e2e2b6a/fastdownload-0.0.7-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/47/60/ed35253a05a70b63e4f52df1daa39a6a464a3e22b0bd060b77f63e2e2b6a/fastdownload-0.0.7-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/cb/a8/20d0723294217e47de6d9e2e40fd4a9d2f7c4b6ef974babd482a59743694/fastjsonschema-2.21.2-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/fe/a7/af33584fa6d17b911cfaba460efd3409cb5dd47083c181a4fdfec4bef840/fastlite-0.2.4-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fe/a7/af33584fa6d17b911cfaba460efd3409cb5dd47083c181a4fdfec4bef840/fastlite-0.2.4-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/79/45/4aa502bbda9b63c792463c3466a2c5ef3c0830935f81906043f66b2b6c74/fastprogress-1.1.3-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/79/45/4aa502bbda9b63c792463c3466a2c5ef3c0830935f81906043f66b2b6c74/fastprogress-1.1.3-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/47/3d/4b85b47a7e70d5c7cc0cf7d7b2883646c9c0bd3ef54a33f23d5873aa910c/fasttransform-0.0.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/47/3d/4b85b47a7e70d5c7cc0cf7d7b2883646c9c0bd3ef54a33f23d5873aa910c/fasttransform-0.0.2-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/fa/97/3702c3be0e5ad3f46a75ccb9f30b6d20bd9432d9940a0c62dfa4869b4758/flox-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/92/54/dc5aec836660a37f11a8c66300bc2c18be254ef3a78ff08869ed1960c0fb/flox-0.11.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/b5/a8/5f764f333204db0390362a4356d03a43626997f26818a0e9396f1b3bd8c9/folium-0.20.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/b5/a8/5f764f333204db0390362a4356d03a43626997f26818a0e9396f1b3bd8c9/folium-0.20.0-py2.py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/a3/4b/d67eedaed19def5967fade3297fed8161b25ba94699efc124b14fb68cdbc/fonttools-4.61.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl - pypi: https://files.pythonhosted.org/packages/a3/4b/d67eedaed19def5967fade3297fed8161b25ba94699efc124b14fb68cdbc/fonttools-4.61.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/38/74/f94141b38a51a553efef7f510fc213894161ae49b88bffd037f8d2a7cb2f/frozendict-2.4.7-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/38/74/f94141b38a51a553efef7f510fc213894161ae49b88bffd037f8d2a7cb2f/frozendict-2.4.7-py3-none-any.whl
@ -347,7 +350,7 @@ environments:
- pypi: https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/77/b6/85c4d21067220b9a78cfb81f516f9725ea6befc1544ec9bd2c1acd97c324/google_api_core-2.29.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/77/b6/85c4d21067220b9a78cfb81f516f9725ea6befc1544ec9bd2c1acd97c324/google_api_core-2.29.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/04/44/3677ff27998214f2fa7957359da48da378a0ffff1bd0bdaba42e752bc13e/google_api_python_client-2.189.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/07/ad/223d5f4b0b987669ffeb3eadd7e9f85ece633aa7fd3246f1e2f6238e1e05/google_api_python_client-2.190.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/83/1d/d6466de3a5249d35e832a52834115ca9d1d0de6abc22065f049707516d47/google_auth-2.48.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/83/1d/d6466de3a5249d35e832a52834115ca9d1d0de6abc22065f049707516d47/google_auth-2.48.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/99/d5/3c97526c8796d3caf5f4b3bed2b05e8a7102326f00a334e7a438237f3b22/google_auth_httplib2-0.3.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/99/d5/3c97526c8796d3caf5f4b3bed2b05e8a7102326f00a334e7a438237f3b22/google_auth_httplib2-0.3.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/89/20/bfa472e327c8edee00f04beecc80baeddd2ab33ee0e86fd7654da49d45e9/google_cloud_core-2.5.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/89/20/bfa472e327c8edee00f04beecc80baeddd2ab33ee0e86fd7654da49d45e9/google_cloud_core-2.5.0-py3-none-any.whl
@ -400,7 +403,7 @@ environments:
- pypi: https://files.pythonhosted.org/packages/de/73/3d757cb3fc16f0f9794dd289bcd0c4a031d9cf54d8137d6b984b2d02edf3/lightning_utilities-0.15.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/de/73/3d757cb3fc16f0f9794dd289bcd0c4a031d9cf54d8137d6b984b2d02edf3/lightning_utilities-0.15.2-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/db/bc/83e112abc66cd466c6b83f99118035867cecd41802f8d044638aa78a106e/locket-1.0.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/db/bc/83e112abc66cd466c6b83f99118035867cecd41802f8d044638aa78a106e/locket-1.0.0-py2.py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/c4/bd/ba44a47578ea48ee28b54543c1de8c529eedad8317516a2a753e6d9c77c5/lonboard-0.13.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/87/c4/c15eb88220cc6211eb3756c858a76f6ac26b99e2433831d2d7022ad0ff72/lonboard-0.14.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/a7/5e/19fb53bd69379498c47bc234ca4d2851cfbca333d6d6929b10251916da25/mapclassify-2.10.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/a7/5e/19fb53bd69379498c47bc234ca4d2851cfbca333d6d6929b10251916da25/mapclassify-2.10.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/75/97/a471f1c3eb1fd6f6c24a31a5858f443891d5127e63a7788678d14e249aea/matplotlib-3.10.8-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl - pypi: https://files.pythonhosted.org/packages/75/97/a471f1c3eb1fd6f6c24a31a5858f443891d5127e63a7788678d14e249aea/matplotlib-3.10.8-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/af/33/ee4519fa02ed11a94aef9559552f3b17bb863f2ecfe1a35dc7f548cde231/matplotlib_inline-0.2.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/af/33/ee4519fa02ed11a94aef9559552f3b17bb863f2ecfe1a35dc7f548cde231/matplotlib_inline-0.2.1-py3-none-any.whl
@ -414,6 +417,7 @@ environments:
- pypi: https://files.pythonhosted.org/packages/93/cf/be4e93afbfa0def2cd6fac9302071db0bd6d0617999ecbf53f92b9398de3/multiurl-0.3.7-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/93/cf/be4e93afbfa0def2cd6fac9302071db0bd6d0617999ecbf53f92b9398de3/multiurl-0.3.7-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/e2/63/58e2de2b5232cd294c64092688c422196e74f9fa8b3958bdf02d33df24b9/murmurhash-1.0.15-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl - pypi: https://files.pythonhosted.org/packages/e2/63/58e2de2b5232cd294c64092688c422196e74f9fa8b3958bdf02d33df24b9/murmurhash-1.0.15-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/03/cc/7cb74758e6df95e0c4e1253f203b6dd7f348bf2f29cf89e9210a2416d535/narwhals-2.16.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/03/cc/7cb74758e6df95e0c4e1253f203b6dd7f348bf2f29cf89e9210a2416d535/narwhals-2.16.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/7b/7a/a8d32501bb95ecff342004a674720164f95ad616f269450b3bc13dc88ae3/netcdf4-1.7.4-cp311-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/7b/7a/a8d32501bb95ecff342004a674720164f95ad616f269450b3bc13dc88ae3/netcdf4-1.7.4-cp311-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/ae/d3/ff8f1b9968aa4dcd1da1880322ed492314cc920998182e549b586c895a17/numbagg-0.9.4-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/ae/d3/ff8f1b9968aa4dcd1da1880322ed492314cc920998182e549b586c895a17/numbagg-0.9.4-py3-none-any.whl
@ -436,13 +440,13 @@ environments:
- pypi: https://files.pythonhosted.org/packages/23/cd/066e86230ae37ed0be70aae89aabf03ca8d9f39c8aea0dec8029455b5540/opt_einsum-3.4.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/23/cd/066e86230ae37ed0be70aae89aabf03ca8d9f39c8aea0dec8029455b5540/opt_einsum-3.4.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/d1/c6/df1fe324248424f77b89371116dab5243db7f052c32cc9fe7442ad9c5f75/pandas_stubs-2.3.3.260113-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d1/c6/df1fe324248424f77b89371116dab5243db7f052c32cc9fe7442ad9c5f75/pandas_stubs-2.3.3.260113-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/11/b6/f8c7e1f5f716e16070cf35f90c24f95f397376bb810e65000b6bc55950cc/param-2.3.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/11/b6/f8c7e1f5f716e16070cf35f90c24f95f397376bb810e65000b6bc55950cc/param-2.3.2-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/16/32/f8e3c85d1d5250232a5d3477a2a28cc291968ff175caeadaf3cc19ce0e4a/parso-0.8.5-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/b6/61/fae042894f4296ec49e3f193aff5d7c18440da9e48102c3315e1bc4519a7/parso-0.8.6-py2.py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/71/e7/40fb618334dcdf7c5a316c0e7343c5cd82d3d866edc100d98e29bc945ecd/partd-1.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/71/e7/40fb618334dcdf7c5a316c0e7343c5cd82d3d866edc100d98e29bc945ecd/partd-1.4.2-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/c0/db/61efa0d08a99f897ef98256b03e563092d36cc38dc4ebe4a85020fe40b31/pbr-7.0.3-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c0/db/61efa0d08a99f897ef98256b03e563092d36cc38dc4ebe4a85020fe40b31/pbr-7.0.3-py2.py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/01/9a/632e58ec89a32738cabfd9ec418f0e9898a2b4719afc581f07c04a05e3c9/pillow-12.1.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/71/24/538bff45bde96535d7d998c6fed1a751c75ac7c53c37c90dc2601b243893/pillow-12.1.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/de/f0/c81e05b613866b76d2d1066490adf1a3dbc4ee9d9c839961c3fc8a6997af/pip-26.0.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/de/f0/c81e05b613866b76d2d1066490adf1a3dbc4ee9d9c839961c3fc8a6997af/pip-26.0.1-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/da/10/1b0dcf51427326f70e50d98df21b18c228117a743a1fc515a42f8dc7d342/platformdirs-4.6.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/8a/67/f95b5460f127840310d2187f916cf0023b5875c0717fdf893f71e1325e87/plotly-6.5.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/8a/67/f95b5460f127840310d2187f916cf0023b5875c0717fdf893f71e1325e87/plotly-6.5.2-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/42/88/71fa06eb487ed9d4fab0ad173300b7a58706385f98fb66b1ccdc3ec3d4dd/plum_dispatch-2.6.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/42/88/71fa06eb487ed9d4fab0ad173300b7a58706385f98fb66b1ccdc3ec3d4dd/plum_dispatch-2.6.1-py3-none-any.whl
@ -505,6 +509,7 @@ environments:
- pypi: https://files.pythonhosted.org/packages/ca/63/2c6daf59d86b1c30600bff679d039f57fd1932af82c43c0bde1cbc55e8d4/sentry_sdk-2.52.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/ca/63/2c6daf59d86b1c30600bff679d039f57fd1932af82c43c0bde1cbc55e8d4/sentry_sdk-2.52.0-py2.py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/6b/6a/c006de5df0e0f4850aa94019df1f79bf6a5342fa851ca85e4728691fd0c4/shap-0.50.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/6b/6a/c006de5df0e0f4850aa94019df1f79bf6a5342fa851ca85e4728691fd0c4/shap-0.50.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/f2/a2/83fc37e2a58090e3d2ff79175a95493c664bcd0b653dd75cb9134645a4e5/shapely-2.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl - pypi: https://files.pythonhosted.org/packages/f2/a2/83fc37e2a58090e3d2ff79175a95493c664bcd0b653dd75cb9134645a4e5/shapely-2.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/63/81/9ef641ff4e12cbcca30e54e72fb0951a2ba195d0cda0ba4100e532d929db/slicer-0.0.8-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/63/81/9ef641ff4e12cbcca30e54e72fb0951a2ba195d0cda0ba4100e532d929db/slicer-0.0.8-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/ab/6c/1d4db72c5dbbb9ea2fbc323a40986917cca84ca098f6fcf80624370979e7/smart_geocubes-0.1.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/ab/6c/1d4db72c5dbbb9ea2fbc323a40986917cca84ca098f6fcf80624370979e7/smart_geocubes-0.1.2-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/ad/95/bc978be7ea0babf2fb48a414b6afaad414c6a9e8b1eafc5b8a53c030381a/smart_open-7.5.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/ad/95/bc978be7ea0babf2fb48a414b6afaad414c6a9e8b1eafc5b8a53c030381a/smart_open-7.5.0-py3-none-any.whl
@ -544,7 +549,8 @@ environments:
- pypi: https://files.pythonhosted.org/packages/8d/c0/fdf9d3ee103ce66a55f0532835ad5e154226c5222423c6636ba049dc42fc/traittypes-0.2.3-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/8d/c0/fdf9d3ee103ce66a55f0532835ad5e154226c5222423c6636ba049dc42fc/traittypes-0.2.3-py2.py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/74/18/8dd4fe6df1fd66f3e83b4798eddb1d8482d9d9b105f25099b76703402ebb/ty-0.0.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - pypi: https://files.pythonhosted.org/packages/74/18/8dd4fe6df1fd66f3e83b4798eddb1d8482d9d9b105f25099b76703402ebb/ty-0.0.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
- pypi: https://files.pythonhosted.org/packages/c8/0a/4aca634faf693e33004796b6cee0ae2e1dba375a800c16ab8d3eff4bb800/typer_slim-0.21.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/7a/ed/d6fca788b51d0d4640c4bc82d0e85bad4b49809bca36bf4af01b4dcb66a7/typer-0.23.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/07/3e/ba3a222c80ee070d9497ece3e1fe77253c142925dd4c90f04278aac0a9eb/typer_slim-0.23.0-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/e7/c1/56ef16bf5dcd255155cc736d276efa6ae0a5c26fd685e28f0412a4013c01/types_pytz-2025.2.0.20251108-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/e7/c1/56ef16bf5dcd255155cc736d276efa6ae0a5c26fd685e28f0412a4013c01/types_pytz-2025.2.0.20251108-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl
- pypi: https://files.pythonhosted.org/packages/d6/32/48209716f9715d77f1bce084ad74c5d3cfcf41fd78d0c7e7dbe4829cfa3a/ultraplot-1.72.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/d6/32/48209716f9715d77f1bce084ad74c5d3cfcf41fd78d0c7e7dbe4829cfa3a/ultraplot-1.72.0-py3-none-any.whl
@ -732,6 +738,11 @@ packages:
- jupyter-book ; extra == 'doc' - jupyter-book ; extra == 'doc'
- vl-convert-python ; extra == 'doc' - vl-convert-python ; extra == 'doc'
requires_python: '>=3.9' requires_python: '>=3.9'
- pypi: https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl
name: annotated-doc
version: 0.0.4
sha256: 571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320
requires_python: '>=3.8'
- pypi: https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl
name: annotated-types name: annotated-types
version: 0.7.0 version: 0.7.0
@ -920,10 +931,10 @@ packages:
- astropy[dev] ; extra == 'dev-all' - astropy[dev] ; extra == 'dev-all'
- astropy[test-all] ; extra == 'dev-all' - astropy[test-all] ; extra == 'dev-all'
requires_python: '>=3.11' requires_python: '>=3.11'
- pypi: https://files.pythonhosted.org/packages/74/51/59effa402d4ce8813e42eb62416059d42dd07826b0e7aa2db057c336972d/astropy_iers_data-0.2026.2.2.0.48.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/4e/67/6af8b422c04dec79c908cf60fdcd4725c3c112b2a058087c4ff58284a142/astropy_iers_data-0.2026.2.9.0.50.33-py3-none-any.whl
name: astropy-iers-data name: astropy-iers-data
version: 0.2026.2.2.0.48.1 version: 0.2026.2.9.0.50.33
sha256: 62aecb2faea740e0d714808b85512ebe4f29adbfe1e8d5e5481cfd66494d164f sha256: ac01dede0240499b23c2b89fdc93093500336197c5c794e6a01173cfd78a7620
requires_dist: requires_dist:
- pytest ; extra == 'docs' - pytest ; extra == 'docs'
- hypothesis ; extra == 'test' - hypothesis ; extra == 'test'
@ -1295,10 +1306,10 @@ packages:
purls: [] purls: []
size: 3472674 size: 3472674
timestamp: 1765257107074 timestamp: 1765257107074
- pypi: https://files.pythonhosted.org/packages/fc/d8/b8fcba9464f02b121f39de2db2bf57f0b216fe11d014513d666e8634380d/azure_core-1.38.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/db/88/aaea2ad269ce70b446660371286272c1f6ba66541a7f6f635baf8b0db726/azure_core-1.38.1-py3-none-any.whl
name: azure-core name: azure-core
version: 1.38.0 version: 1.38.1
sha256: ab0c9b2cd71fecb1842d52c965c95285d3cfb38902f6766e4a471f1cd8905335 sha256: 69f08ee3d55136071b7100de5b198994fc1c5f89d2b91f2f43156d20fcf200a4
requires_dist: requires_dist:
- requests>=2.21.0 - requests>=2.21.0
- typing-extensions>=4.6.0 - typing-extensions>=4.6.0
@ -1871,17 +1882,17 @@ packages:
purls: [] purls: []
size: 48369 size: 48369
timestamp: 1765019689213 timestamp: 1765019689213
- pypi: https://files.pythonhosted.org/packages/2b/08/f83e2e0814248b844265802d081f2fac2f1cbe6cd258e72ba14ff006823a/cryptography-46.0.4-cp311-abi3-manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/6b/e7/237155ae19a9023de7e30ec64e5d99a9431a567407ac21170a046d22a5a3/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl
name: cryptography name: cryptography
version: 46.0.4 version: 46.0.5
sha256: 0a9ad24359fee86f131836a9ac3bffc9329e956624a2d379b613f8f8abaf5255 sha256: 3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed
requires_dist: requires_dist:
- cffi>=1.14 ; python_full_version == '3.8.*' and platform_python_implementation != 'PyPy' - cffi>=1.14 ; python_full_version == '3.8.*' and platform_python_implementation != 'PyPy'
- cffi>=2.0.0 ; python_full_version >= '3.9' and platform_python_implementation != 'PyPy' - cffi>=2.0.0 ; python_full_version >= '3.9' and platform_python_implementation != 'PyPy'
- typing-extensions>=4.13.2 ; python_full_version < '3.11' - typing-extensions>=4.13.2 ; python_full_version < '3.11'
- bcrypt>=3.1.5 ; extra == 'ssh' - bcrypt>=3.1.5 ; extra == 'ssh'
- nox[uv]>=2024.4.15 ; extra == 'nox' - nox[uv]>=2024.4.15 ; extra == 'nox'
- cryptography-vectors==46.0.4 ; extra == 'test' - cryptography-vectors==46.0.5 ; extra == 'test'
- pytest>=7.4.0 ; extra == 'test' - pytest>=7.4.0 ; extra == 'test'
- pytest-benchmark>=4.0 ; extra == 'test' - pytest-benchmark>=4.0 ; extra == 'test'
- pytest-cov>=2.10.1 ; extra == 'test' - pytest-cov>=2.10.1 ; extra == 'test'
@ -2390,10 +2401,10 @@ packages:
- pytest-cov ; extra == 'tests' - pytest-cov ; extra == 'tests'
- pytest-xdist ; extra == 'tests' - pytest-xdist ; extra == 'tests'
requires_python: '>=3.8' requires_python: '>=3.8'
- pypi: https://files.pythonhosted.org/packages/1c/7c/996760c30f1302704af57c66ff2d723f7d656d0d0b93563b5528a51484bb/cyclopts-4.5.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/2b/03/f906829bcfcbb945f19d6a64240ffb66a31d69ca5533e95882f0efc9c13c/cyclopts-4.5.2-py3-none-any.whl
name: cyclopts name: cyclopts
version: 4.5.1 version: 4.5.2
sha256: 0642c93601e554ca6b7b9abd81093847ea4448b2616280f2a0952416574e8c7a sha256: ee56ee23c2c81abc34b66b5aa8fd2698ca699740054e84e534449ec3eb7f944d
requires_dist: requires_dist:
- attrs>=23.1.0 - attrs>=23.1.0
- docstring-parser>=0.15,<4.0 - docstring-parser>=0.15,<4.0
@ -2576,10 +2587,10 @@ packages:
- pyarrow ; extra == 'all' - pyarrow ; extra == 'all'
- adbc-driver-manager ; extra == 'all' - adbc-driver-manager ; extra == 'all'
requires_python: '>=3.9.0' requires_python: '>=3.9.0'
- pypi: https://files.pythonhosted.org/packages/a7/39/ce46ee84779ef19d88fd028fc786a6dcc68b73ace33c31997aeda0dfecdc/earthengine_api-1.7.12-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c2/0f/875b6df73f884062f3bd7d62a2fb9bfc1d07d1c93a611e999401c5b10ca0/earthengine_api-1.7.13-py3-none-any.whl
name: earthengine-api name: earthengine-api
version: 1.7.12 version: 1.7.13
sha256: 39c24f65b97e88bfed325e55d7f9fa5c8e8a9f92280c5ed24e3ab36560f2b543 sha256: 32a24b6003287f71afb24e2cee7718296f8d82778488b88c1e760279c7e47840
requires_dist: requires_dist:
- google-cloud-storage - google-cloud-storage
- google-api-python-client>=1.12.1 - google-api-python-client>=1.12.1
@ -2648,7 +2659,7 @@ packages:
- pypi: ./ - pypi: ./
name: entropice name: entropice
version: 0.1.0 version: 0.1.0
sha256: 07232c2b09b1b8b691cc8ca7d25b3c0041f2324236a11491f9f07b7e6827973a sha256: d9313dad098d69cd67a908e0cb26d4506c8bf723d6a42d0b213d84a6bdb03e9e
requires_dist: requires_dist:
- aiohttp>=3.12.11 - aiohttp>=3.12.11
- bokeh>=3.7.3 - bokeh>=3.7.3
@ -2715,6 +2726,8 @@ packages:
- shap>=0.50.0,<0.51 - shap>=0.50.0,<0.51
- h5py>=3.15.1,<4 - h5py>=3.15.1,<4
- pydantic>=2.12.5,<3 - pydantic>=2.12.5,<3
- nbformat>=5.10.4,<6
- fastcluster>=1.3.0,<2
requires_python: '>=3.13,<3.14' requires_python: '>=3.13,<3.14'
- pypi: git+ssh://git@forgejo.tobiashoelzer.de:22222/tobias/entropy.git#9152653278559faff830ff984a66d30b8ae5657c - pypi: git+ssh://git@forgejo.tobiashoelzer.de:22222/tobias/entropy.git#9152653278559faff830ff984a66d30b8ae5657c
name: entropy name: entropy
@ -2801,10 +2814,17 @@ packages:
- accelerate>=0.21 ; extra == 'dev' - accelerate>=0.21 ; extra == 'dev'
- ipykernel ; extra == 'dev' - ipykernel ; extra == 'dev'
requires_python: '>=3.10' requires_python: '>=3.10'
- pypi: https://files.pythonhosted.org/packages/23/03/2fe18e3d718b5a36d6c548df3e7662a4c433efea4d28662063d259248a1d/fastcore-1.12.11-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/59/f3/f71552b94a39509b62e72c4a26b6e4440bb9ce6decacf90af2916829e69e/fastcluster-1.3.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl
name: fastcluster
version: 1.3.0
sha256: 2dce31ace6f8e08c5400d6e19492fe09aba2b050f78a7aa6943ba2ae50dcd1b0
requires_dist:
- numpy>=2
requires_python: '>=3'
- pypi: https://files.pythonhosted.org/packages/ea/d6/bb13c44b5863c0be7a27ef02982eca88f50d717549df1979e85942292239/fastcore-1.12.12-py3-none-any.whl
name: fastcore name: fastcore
version: 1.12.11 version: 1.12.12
sha256: b6a0ce9f48509405109251d00ac0576cfe5cba0a2b1b495a4126283969efbad5 sha256: bb1a3a3accd62a72bad56af974e0617af078316be5bb5dcc8763b8244c197fa8
requires_dist: requires_dist:
- numpy ; extra == 'dev' - numpy ; extra == 'dev'
- nbdev>=0.2.39 ; extra == 'dev' - nbdev>=0.2.39 ; extra == 'dev'
@ -2826,6 +2846,19 @@ packages:
- fastprogress - fastprogress
- fastcore>=1.3.26 - fastcore>=1.3.26
requires_python: '>=3.6' requires_python: '>=3.6'
- pypi: https://files.pythonhosted.org/packages/cb/a8/20d0723294217e47de6d9e2e40fd4a9d2f7c4b6ef974babd482a59743694/fastjsonschema-2.21.2-py3-none-any.whl
name: fastjsonschema
version: 2.21.2
sha256: 1c797122d0a86c5cace2e54bf4e819c36223b552017172f32c5c024a6b77e463
requires_dist:
- colorama ; extra == 'devel'
- jsonschema ; extra == 'devel'
- json-spec ; extra == 'devel'
- pylint ; extra == 'devel'
- pytest ; extra == 'devel'
- pytest-benchmark ; extra == 'devel'
- pytest-cache ; extra == 'devel'
- validictory ; extra == 'devel'
- pypi: https://files.pythonhosted.org/packages/fe/a7/af33584fa6d17b911cfaba460efd3409cb5dd47083c181a4fdfec4bef840/fastlite-0.2.4-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fe/a7/af33584fa6d17b911cfaba460efd3409cb5dd47083c181a4fdfec4bef840/fastlite-0.2.4-py3-none-any.whl
name: fastlite name: fastlite
version: 0.2.4 version: 0.2.4
@ -2879,10 +2912,10 @@ packages:
- pkg:pypi/filelock?source=hash-mapping - pkg:pypi/filelock?source=hash-mapping
size: 18609 size: 18609
timestamp: 1765846639623 timestamp: 1765846639623
- pypi: https://files.pythonhosted.org/packages/fa/97/3702c3be0e5ad3f46a75ccb9f30b6d20bd9432d9940a0c62dfa4869b4758/flox-0.11.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/92/54/dc5aec836660a37f11a8c66300bc2c18be254ef3a78ff08869ed1960c0fb/flox-0.11.1-py3-none-any.whl
name: flox name: flox
version: 0.11.0 version: 0.11.1
sha256: 61620abc0eec12a3d6f93fd08f17326435b17d256678a5380598d10b25012751 sha256: 2c5da10771d139118eee7ca453b5a60c34f051cf1c06f2e5446728bc09fce2ec
requires_dist: requires_dist:
- pandas>=2.1 - pandas>=2.1
- packaging>=21.3 - packaging>=21.3
@ -3343,10 +3376,10 @@ packages:
- grpcio-gcp>=0.2.2,<1.0.0 ; extra == 'grpcgcp' - grpcio-gcp>=0.2.2,<1.0.0 ; extra == 'grpcgcp'
- grpcio-gcp>=0.2.2,<1.0.0 ; extra == 'grpcio-gcp' - grpcio-gcp>=0.2.2,<1.0.0 ; extra == 'grpcio-gcp'
requires_python: '>=3.7' requires_python: '>=3.7'
- pypi: https://files.pythonhosted.org/packages/04/44/3677ff27998214f2fa7957359da48da378a0ffff1bd0bdaba42e752bc13e/google_api_python_client-2.189.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/07/ad/223d5f4b0b987669ffeb3eadd7e9f85ece633aa7fd3246f1e2f6238e1e05/google_api_python_client-2.190.0-py3-none-any.whl
name: google-api-python-client name: google-api-python-client
version: 2.189.0 version: 2.190.0
sha256: a258c09660a49c6159173f8bbece171278e917e104a11f0640b34751b79c8a1a sha256: d9b5266758f96c39b8c21d9bbfeb4e58c14dbfba3c931f7c5a8d7fdcd292dd57
requires_dist: requires_dist:
- httplib2>=0.19.0,<1.0.0 - httplib2>=0.19.0,<1.0.0
- google-auth>=1.32.0,!=2.24.0,!=2.25.0,<3.0.0 - google-auth>=1.32.0,!=2.24.0,!=2.25.0,<3.0.0
@ -5787,10 +5820,10 @@ packages:
- build==1.2.2 ; python_full_version >= '3.11' and extra == 'dev' - build==1.2.2 ; python_full_version >= '3.11' and extra == 'dev'
- twine==6.0.1 ; python_full_version >= '3.11' and extra == 'dev' - twine==6.0.1 ; python_full_version >= '3.11' and extra == 'dev'
requires_python: '>=3.5,<4.0' requires_python: '>=3.5,<4.0'
- pypi: https://files.pythonhosted.org/packages/c4/bd/ba44a47578ea48ee28b54543c1de8c529eedad8317516a2a753e6d9c77c5/lonboard-0.13.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/87/c4/c15eb88220cc6211eb3756c858a76f6ac26b99e2433831d2d7022ad0ff72/lonboard-0.14.0-py3-none-any.whl
name: lonboard name: lonboard
version: 0.13.0 version: 0.14.0
sha256: 8acb17fdcbb34bd147a68aebd4b887996171e0eb9df7f4fc06e467cdfa32fb07 sha256: 35f3218490e7bf07562575a872b45d44cf02026c3db9d0af486ad716dca02551
requires_dist: requires_dist:
- anywidget~=0.9.0 - anywidget~=0.9.0
- arro3-compute>=0.4.1 - arro3-compute>=0.4.1
@ -5805,12 +5838,14 @@ packages:
- click>=8.1.7 ; extra == 'cli' - click>=8.1.7 ; extra == 'cli'
- pyogrio>=0.8 ; extra == 'cli' - pyogrio>=0.8 ; extra == 'cli'
- shapely>=2 ; extra == 'cli' - shapely>=2 ; extra == 'cli'
- async-geotiff>=0.1.0 ; python_full_version >= '3.11' and extra == 'cog'
- morecantile>=7.0 ; python_full_version >= '3.11' and extra == 'cog'
- geopandas>=0.13 ; extra == 'geopandas' - geopandas>=0.13 ; extra == 'geopandas'
- pandas>=2 ; extra == 'geopandas' - pandas>=2 ; extra == 'geopandas'
- pyarrow>=16 ; extra == 'geopandas' - pyarrow>=16 ; extra == 'geopandas'
- shapely>=2 ; extra == 'geopandas' - shapely>=2 ; extra == 'geopandas'
- movingpandas>=0.17 ; extra == 'movingpandas' - movingpandas>=0.17 ; extra == 'movingpandas'
requires_python: '>=3.10' requires_python: '>=3.11'
- conda: https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.10.0-h5888daf_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.10.0-h5888daf_1.conda
sha256: 47326f811392a5fd3055f0f773036c392d26fdb32e4d8e7a8197eed951489346 sha256: 47326f811392a5fd3055f0f773036c392d26fdb32e4d8e7a8197eed951489346
md5: 9de5350a85c4a20c685259b889aa6393 md5: 9de5350a85c4a20c685259b889aa6393
@ -6127,6 +6162,25 @@ packages:
- sqlparse ; extra == 'sql' - sqlparse ; extra == 'sql'
- sqlframe>=3.22.0,!=3.39.3 ; extra == 'sqlframe' - sqlframe>=3.22.0,!=3.39.3 ; extra == 'sqlframe'
requires_python: '>=3.9' requires_python: '>=3.9'
- pypi: https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl
name: nbformat
version: 5.10.4
sha256: 3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b
requires_dist:
- fastjsonschema>=2.15
- jsonschema>=2.6
- jupyter-core>=4.12,!=5.0.*
- traitlets>=5.1
- myst-parser ; extra == 'docs'
- pydata-sphinx-theme ; extra == 'docs'
- sphinx ; extra == 'docs'
- sphinxcontrib-github-alt ; extra == 'docs'
- sphinxcontrib-spelling ; extra == 'docs'
- pep440 ; extra == 'test'
- pre-commit ; extra == 'test'
- pytest ; extra == 'test'
- testpath ; extra == 'test'
requires_python: '>=3.8'
- conda: https://conda.anaconda.org/conda-forge/linux-64/nccl-2.28.9.1-h4d09622_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/nccl-2.28.9.1-h4d09622_1.conda
sha256: a132df4a0b4c36932cfd5e931b4c88e83991ad77de9adf13c206caefdaf3b8b0 sha256: a132df4a0b4c36932cfd5e931b4c88e83991ad77de9adf13c206caefdaf3b8b0
md5: af3e8d72000a10bd8159d7e28daf4bfc md5: af3e8d72000a10bd8159d7e28daf4bfc
@ -6710,15 +6764,15 @@ packages:
- nest-asyncio ; extra == 'tests-pypy' - nest-asyncio ; extra == 'tests-pypy'
- numpy ; extra == 'tests-pypy' - numpy ; extra == 'tests-pypy'
requires_python: '>=3.10' requires_python: '>=3.10'
- pypi: https://files.pythonhosted.org/packages/16/32/f8e3c85d1d5250232a5d3477a2a28cc291968ff175caeadaf3cc19ce0e4a/parso-0.8.5-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/b6/61/fae042894f4296ec49e3f193aff5d7c18440da9e48102c3315e1bc4519a7/parso-0.8.6-py2.py3-none-any.whl
name: parso name: parso
version: 0.8.5 version: 0.8.6
sha256: 646204b5ee239c396d040b90f9e272e9a8017c630092bf59980beb62fd033887 sha256: 2c549f800b70a5c4952197248825584cb00f033b29c692671d3bf08bf380baff
requires_dist: requires_dist:
- pytest ; extra == 'testing' - pytest ; extra == 'testing'
- docopt ; extra == 'testing' - docopt ; extra == 'testing'
- flake8==5.0.4 ; extra == 'qa' - flake8==5.0.4 ; extra == 'qa'
- mypy==0.971 ; extra == 'qa' - zuban==0.5.1 ; extra == 'qa'
- types-setuptools==67.2.0.1 ; extra == 'qa' - types-setuptools==67.2.0.1 ; extra == 'qa'
requires_python: '>=3.6' requires_python: '>=3.6'
- pypi: https://files.pythonhosted.org/packages/71/e7/40fb618334dcdf7c5a316c0e7343c5cd82d3d866edc100d98e29bc945ecd/partd-1.4.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/71/e7/40fb618334dcdf7c5a316c0e7343c5cd82d3d866edc100d98e29bc945ecd/partd-1.4.2-py3-none-any.whl
@ -6746,10 +6800,10 @@ packages:
sha256: 7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523 sha256: 7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523
requires_dist: requires_dist:
- ptyprocess>=0.5 - ptyprocess>=0.5
- pypi: https://files.pythonhosted.org/packages/01/9a/632e58ec89a32738cabfd9ec418f0e9898a2b4719afc581f07c04a05e3c9/pillow-12.1.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/71/24/538bff45bde96535d7d998c6fed1a751c75ac7c53c37c90dc2601b243893/pillow-12.1.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
name: pillow name: pillow
version: 12.1.0 version: 12.1.1
sha256: 6741e6f3074a35e47c77b23a4e4f2d90db3ed905cb1c5e6e0d49bff2045632bc sha256: 47b94983da0c642de92ced1702c5b6c292a84bd3a8e1d1702ff923f183594717
requires_dist: requires_dist:
- furo ; extra == 'docs' - furo ; extra == 'docs'
- olefile ; extra == 'docs' - olefile ; extra == 'docs'
@ -6783,21 +6837,10 @@ packages:
version: 26.0.1 version: 26.0.1
sha256: bdb1b08f4274833d62c1aa29e20907365a2ceb950410df15fc9521bad440122b sha256: bdb1b08f4274833d62c1aa29e20907365a2ceb950410df15fc9521bad440122b
requires_python: '>=3.9' requires_python: '>=3.9'
- pypi: https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/da/10/1b0dcf51427326f70e50d98df21b18c228117a743a1fc515a42f8dc7d342/platformdirs-4.6.0-py3-none-any.whl
name: platformdirs name: platformdirs
version: 4.5.1 version: 4.6.0
sha256: d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31 sha256: dd7f808d828e1764a22ebff09e60f175ee3c41876606a6132a688d809c7c9c73
requires_dist:
- furo>=2025.9.25 ; extra == 'docs'
- proselint>=0.14 ; extra == 'docs'
- sphinx-autodoc-typehints>=3.2 ; extra == 'docs'
- sphinx>=8.2.3 ; extra == 'docs'
- appdirs==1.4.4 ; extra == 'test'
- covdefaults>=2.3 ; extra == 'test'
- pytest-cov>=7 ; extra == 'test'
- pytest-mock>=3.15.1 ; extra == 'test'
- pytest>=8.4.2 ; extra == 'test'
- mypy>=1.18.2 ; extra == 'type'
requires_python: '>=3.10' requires_python: '>=3.10'
- pypi: https://files.pythonhosted.org/packages/8a/67/f95b5460f127840310d2187f916cf0023b5875c0717fdf893f71e1325e87/plotly-6.5.2-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/8a/67/f95b5460f127840310d2187f916cf0023b5875c0717fdf893f71e1325e87/plotly-6.5.2-py3-none-any.whl
name: plotly name: plotly
@ -8464,6 +8507,11 @@ packages:
- sphinx-book-theme ; extra == 'docs' - sphinx-book-theme ; extra == 'docs'
- sphinx-remove-toctrees ; extra == 'docs' - sphinx-remove-toctrees ; extra == 'docs'
requires_python: '>=3.10' requires_python: '>=3.10'
- pypi: https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl
name: shellingham
version: 1.5.4
sha256: 7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686
requires_python: '>=3.7'
- conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhe01879c_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhe01879c_1.conda
sha256: 458227f759d5e3fcec5d9b7acce54e10c9e1f4f4b7ec978f3bfd54ce4ee9853d sha256: 458227f759d5e3fcec5d9b7acce54e10c9e1f4f4b7ec978f3bfd54ce4ee9853d
md5: 3339e3b65d58accf4ca4fb8748ab16b3 md5: 3339e3b65d58accf4ca4fb8748ab16b3
@ -9775,15 +9823,22 @@ packages:
version: 0.0.11 version: 0.0.11
sha256: 25f88e8789072830348cb59b761d5ced70642ed5600673b4bf6a849af71eca8b sha256: 25f88e8789072830348cb59b761d5ced70642ed5600673b4bf6a849af71eca8b
requires_python: '>=3.8' requires_python: '>=3.8'
- pypi: https://files.pythonhosted.org/packages/c8/0a/4aca634faf693e33004796b6cee0ae2e1dba375a800c16ab8d3eff4bb800/typer_slim-0.21.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/7a/ed/d6fca788b51d0d4640c4bc82d0e85bad4b49809bca36bf4af01b4dcb66a7/typer-0.23.0-py3-none-any.whl
name: typer-slim name: typer
version: 0.21.1 version: 0.23.0
sha256: 6e6c31047f171ac93cc5a973c9e617dbc5ab2bddc4d0a3135dc161b4e2020e0d sha256: 79f4bc262b6c37872091072a3cb7cb6d7d79ee98c0c658b4364bdcde3c42c913
requires_dist: requires_dist:
- click>=8.0.0 - click>=8.0.0
- typing-extensions>=3.7.4.3 - shellingham>=1.3.0
- shellingham>=1.3.0 ; extra == 'standard' - rich>=10.11.0
- rich>=10.11.0 ; extra == 'standard' - annotated-doc>=0.0.2
requires_python: '>=3.9'
- pypi: https://files.pythonhosted.org/packages/07/3e/ba3a222c80ee070d9497ece3e1fe77253c142925dd4c90f04278aac0a9eb/typer_slim-0.23.0-py3-none-any.whl
name: typer-slim
version: 0.23.0
sha256: 1d693daf22d998a7b1edab8413cdcb8af07254154ce3956c1664dc11b01e2f8b
requires_dist:
- typer>=0.23.0
requires_python: '>=3.9' requires_python: '>=3.9'
- pypi: https://files.pythonhosted.org/packages/e7/c1/56ef16bf5dcd255155cc736d276efa6ae0a5c26fd685e28f0412a4013c01/types_pytz-2025.2.0.20251108-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/e7/c1/56ef16bf5dcd255155cc736d276efa6ae0a5c26fd685e28f0412a4013c01/types_pytz-2025.2.0.20251108-py3-none-any.whl
name: types-pytz name: types-pytz

View file

@ -70,7 +70,7 @@ dependencies = [
"autogluon-tabular[all,mitra,realmlp,interpret,fastai,tabm,tabpfn,tabdpt,tabpfnmix,tabicl,skew,imodels]>=1.5.0", "autogluon-tabular[all,mitra,realmlp,interpret,fastai,tabm,tabpfn,tabdpt,tabpfnmix,tabicl,skew,imodels]>=1.5.0",
"shap>=0.50.0,<0.51", "shap>=0.50.0,<0.51",
"h5py>=3.15.1,<4", "h5py>=3.15.1,<4",
"pydantic>=2.12.5,<3", "pydantic>=2.12.5,<3", "nbformat>=5.10.4,<6", "fastcluster>=1.3.0,<2",
] ]
[project.scripts] [project.scripts]

View file

@ -1,6 +1,33 @@
#! /bin/bash #! /bin/bash
# Check if running inside the pixi environment
which darts >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "This script must be run inside the pixi environment."
exit 1
fi
# pixi shell # pixi shell
darts extract-darts-v2 --grid hex --level 3
darts extract-darts-v2 --grid hex --level 4
darts extract-darts-v2 --grid hex --level 5
darts extract-darts-v2 --grid hex --level 6
darts extract-darts-v2 --grid healpix --level 6
darts extract-darts-v2 --grid healpix --level 7
darts extract-darts-v2 --grid healpix --level 8
darts extract-darts-v2 --grid healpix --level 9
darts extract-darts-v2 --grid healpix --level 10
darts extract-darts-v2-aggregated --grid hex --level 3
darts extract-darts-v2-aggregated --grid hex --level 4
darts extract-darts-v2-aggregated --grid hex --level 5
darts extract-darts-v2-aggregated --grid hex --level 6
darts extract-darts-v2-aggregated --grid healpix --level 6
darts extract-darts-v2-aggregated --grid healpix --level 7
darts extract-darts-v2-aggregated --grid healpix --level 8
darts extract-darts-v2-aggregated --grid healpix --level 9
darts extract-darts-v2-aggregated --grid healpix --level 10
exit 0
darts extract-darts-v1 --grid hex --level 3 darts extract-darts-v1 --grid hex --level 3
darts extract-darts-v1 --grid hex --level 4 darts extract-darts-v1 --grid hex --level 4
darts extract-darts-v1 --grid hex --level 5 darts extract-darts-v1 --grid hex --level 5
@ -22,7 +49,6 @@ darts extract-darts-v1-aggregated --grid healpix --level 8
darts extract-darts-v1-aggregated --grid healpix --level 9 darts extract-darts-v1-aggregated --grid healpix --level 9
darts extract-darts-v1-aggregated --grid healpix --level 10 darts extract-darts-v1-aggregated --grid healpix --level 10
darts extract-darts-mllabels --grid hex --level 3 darts extract-darts-mllabels --grid hex --level 3
darts extract-darts-mllabels --grid hex --level 4 darts extract-darts-mllabels --grid hex --level 4
darts extract-darts-mllabels --grid hex --level 5 darts extract-darts-mllabels --grid hex --level 5

View file

@ -16,7 +16,6 @@ import streamlit as st
from entropice.dashboard.views.dataset_page import render_dataset_page from entropice.dashboard.views.dataset_page import render_dataset_page
from entropice.dashboard.views.experiment_analysis_page import render_experiment_analysis_page from entropice.dashboard.views.experiment_analysis_page import render_experiment_analysis_page
from entropice.dashboard.views.inference_page import render_inference_page from entropice.dashboard.views.inference_page import render_inference_page
from entropice.dashboard.views.model_state_page import render_model_state_page
from entropice.dashboard.views.overview_page import render_overview_page from entropice.dashboard.views.overview_page import render_overview_page
from entropice.dashboard.views.training_analysis_page import render_training_analysis_page from entropice.dashboard.views.training_analysis_page import render_training_analysis_page
@ -30,14 +29,13 @@ def main():
data_page = st.Page(render_dataset_page, title="Dataset", icon="📊") data_page = st.Page(render_dataset_page, title="Dataset", icon="📊")
training_analysis_page = st.Page(render_training_analysis_page, title="Training Results Analysis", icon="🦾") training_analysis_page = st.Page(render_training_analysis_page, title="Training Results Analysis", icon="🦾")
experiment_analysis_page = st.Page(render_experiment_analysis_page, title="Experiment Analysis", icon="🔬") experiment_analysis_page = st.Page(render_experiment_analysis_page, title="Experiment Analysis", icon="🔬")
model_state_page = st.Page(render_model_state_page, title="Model State", icon="🧮")
inference_page = st.Page(render_inference_page, title="Inference", icon="🗺️") inference_page = st.Page(render_inference_page, title="Inference", icon="🗺️")
pg = st.navigation( pg = st.navigation(
{ {
"Overview": [overview_page], "Overview": [overview_page],
"Data": [data_page], "Data": [data_page],
"Experiments": [training_analysis_page, experiment_analysis_page, model_state_page], "Experiments": [training_analysis_page, experiment_analysis_page],
"Inference": [inference_page], "Inference": [inference_page],
} }
) )

View file

@ -24,7 +24,7 @@ def create_grid_level_comparison_plot(
Args: Args:
results_df: DataFrame with experiment results including grid, level, model, and metrics results_df: DataFrame with experiment results including grid, level, model, and metrics
metric: Metric to compare (e.g., 'f1', 'accuracy', 'r2') metric: Metric to compare (e.g., 'f1', 'accuracy', 'r2')
split: Data split to show ('train', 'test', or 'combined') split: Data split to show ('train', 'test', or 'complete')
Returns: Returns:
Plotly figure showing performance across grid levels Plotly figure showing performance across grid levels
@ -82,7 +82,7 @@ def create_grid_level_comparison_plot(
"autogluon": "star", "autogluon": "star",
} }
# Add a combined column for hover information # Add a complete column for hover information
results_df["model_display"] = results_df["model"].str.upper() results_df["model_display"] = results_df["model"].str.upper()
# Create box plot without individual points first # Create box plot without individual points first

View file

@ -1,493 +0,0 @@
"""Plotting functions for inference result visualizations."""
import geopandas as gpd
import pandas as pd
import plotly.graph_objects as go
import pydeck as pdk
import streamlit as st
from entropice.dashboard.utils.colors import get_palette
from entropice.dashboard.utils.geometry import fix_hex_geometry
from entropice.dashboard.utils.loaders import TrainingResult
from entropice.utils.types import Task
# Canonical orderings imported from the ML pipeline
# Binary labels are defined inline in dataset.py: {False: "No RTS", True: "RTS"}
# Count/Density labels are defined in the bin_values function
BINARY_LABELS = ["No RTS", "RTS"]
COUNT_LABELS = ["None", "Very Few", "Few", "Several", "Many", "Very Many"]
DENSITY_LABELS = ["Empty", "Very Sparse", "Sparse", "Moderate", "Dense", "Very Dense"]
CLASS_ORDERINGS: dict[Task | str, list[str]] = {
"binary": BINARY_LABELS,
"count_regimes": COUNT_LABELS,
"density_regimes": DENSITY_LABELS,
# Legacy aliases (deprecated)
"count": COUNT_LABELS,
"density": DENSITY_LABELS,
}
def get_ordered_classes(task: Task | str, available_classes: list[str] | None = None) -> list[str]:
"""Get properly ordered class labels for a given task.
This uses the same canonical ordering as defined in the ML dataset module,
ensuring consistency between training and inference visualizations.
Args:
task: Task type ('binary', 'count_regimes', 'density_regimes', 'count', 'density').
available_classes: Optional list of available classes to filter and order.
If None, returns all canonical classes for the task.
Returns:
List of class labels in proper order.
Examples:
>>> get_ordered_classes("binary")
['No RTS', 'RTS']
>>> get_ordered_classes("count_regimes", ["None", "Few", "Several"])
['None', 'Few', 'Several']
"""
canonical_order = CLASS_ORDERINGS[task]
if available_classes is None:
return canonical_order
# Filter canonical order to only include available classes, preserving order
return [cls for cls in canonical_order if cls in available_classes]
def sort_class_series(series: pd.Series, task: Task | str) -> pd.Series:
"""Sort a pandas Series with class labels according to canonical ordering.
Args:
series: Pandas Series with class labels as index.
task: Task type ('binary', 'count_regimes', 'density_regimes', 'count', 'density').
Returns:
Sorted Series with classes in canonical order.
"""
available_classes = series.index.tolist()
ordered_classes = get_ordered_classes(task, available_classes)
# Reindex to get proper order
return series.reindex(ordered_classes)
def render_inference_statistics(predictions_gdf: gpd.GeoDataFrame, task: str):
"""Render summary statistics about inference results.
Args:
predictions_gdf: GeoDataFrame with predictions.
task: Task type ('binary', 'count', 'density').
"""
st.subheader("📊 Inference Summary")
# Get class distribution
class_counts = predictions_gdf["predicted_class"].value_counts()
# Create metrics layout
if task == "binary":
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Predictions", f"{len(predictions_gdf):,}")
with col2:
rts_count = class_counts.get("RTS", 0)
rts_pct = rts_count / len(predictions_gdf) * 100 if len(predictions_gdf) > 0 else 0
st.metric("RTS Predictions", f"{rts_count:,} ({rts_pct:.1f}%)")
with col3:
no_rts_count = class_counts.get("No-RTS", 0)
no_rts_pct = no_rts_count / len(predictions_gdf) * 100 if len(predictions_gdf) > 0 else 0
st.metric("No-RTS Predictions", f"{no_rts_count:,} ({no_rts_pct:.1f}%)")
else:
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Predictions", f"{len(predictions_gdf):,}")
with col2:
st.metric("Unique Classes", len(class_counts))
with col3:
most_common = class_counts.index[0] if len(class_counts) > 0 else "N/A"
st.metric("Most Common Class", most_common)
def render_class_distribution_histogram(predictions_gdf: gpd.GeoDataFrame, task: str):
"""Render histogram of predicted class distribution.
Args:
predictions_gdf: GeoDataFrame with predictions.
task: Task type ('binary', 'count_regimes', 'density_regimes', 'count', 'density').
"""
st.subheader("📊 Predicted Class Distribution")
# Get class counts and order them properly
class_counts = predictions_gdf["predicted_class"].value_counts()
class_counts = sort_class_series(class_counts, task)
# Get colors based on task
categories = class_counts.index.tolist()
colors = get_palette(task, len(categories))
# Create bar chart
fig = go.Figure()
fig.add_trace(
go.Bar(
x=categories,
y=class_counts.values,
marker_color=colors,
opacity=0.9,
text=class_counts.to_numpy(),
textposition="outside",
textfont={"size": 12},
hovertemplate="<b>%{x}</b><br>Count: %{y:,}<br>Percentage: %{customdata:.1f}%<extra></extra>",
customdata=class_counts.to_numpy() / len(predictions_gdf) * 100,
)
)
fig.update_layout(
height=400,
margin={"l": 20, "r": 20, "t": 40, "b": 20},
showlegend=False,
xaxis_title="Predicted Class",
yaxis_title="Count",
xaxis={"tickangle": -45 if len(categories) > 3 else 0},
)
st.plotly_chart(fig, width="stretch")
# Show percentages in a table
with st.expander("📋 Detailed Class Distribution", expanded=False):
distribution_df = pd.DataFrame(
{
"Class": categories,
"Count": class_counts.to_numpy(),
"Percentage": (class_counts.to_numpy() / len(predictions_gdf) * 100).round(2),
}
)
st.dataframe(distribution_df, hide_index=True, width="stretch")
def render_spatial_distribution_stats(predictions_gdf: gpd.GeoDataFrame):
"""Render spatial statistics about predictions.
Args:
predictions_gdf: GeoDataFrame with predictions.
"""
st.subheader("🌍 Spatial Coverage")
# Calculate spatial extent
bounds = predictions_gdf.to_crs("EPSG:4326").total_bounds
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Min Latitude", f"{bounds[1]:.2f}°")
with col2:
st.metric("Max Latitude", f"{bounds[3]:.2f}°")
with col3:
st.metric("Min Longitude", f"{bounds[0]:.2f}°")
with col4:
st.metric("Max Longitude", f"{bounds[2]:.2f}°")
# Calculate total area if cell_area is available
if "cell_area" in predictions_gdf.columns:
total_area = predictions_gdf["cell_area"].sum()
st.info(f"📏 **Total Area Covered:** {total_area:,.0f} km²")
def _prepare_geojson_features(display_gdf_wgs84: gpd.GeoDataFrame) -> list:
"""Convert GeoDataFrame to GeoJSON features for pydeck.
Args:
display_gdf_wgs84: GeoDataFrame in WGS84 projection with required columns.
Returns:
List of GeoJSON feature dictionaries.
"""
geojson_data = []
for _, row in display_gdf_wgs84.iterrows():
feature = {
"type": "Feature",
"geometry": row["geometry"].__geo_interface__,
"properties": {
"cell_id": str(row["cell_id"]),
"predicted_class": str(row["predicted_class"]),
"fill_color": row["fill_color"],
"elevation": float(row["elevation"]),
},
}
geojson_data.append(feature)
return geojson_data
@st.fragment
def render_inference_map(result: TrainingResult):
"""Render 3D pydeck map showing inference results with interactive controls.
This is a Streamlit fragment that reruns independently when users interact with the
visualization controls (color mode and opacity), without re-running the entire page.
Args:
result: TrainingResult object containing prediction data.
"""
st.subheader("🗺️ Inference Results Map")
# Load predictions
preds_gdf = gpd.read_parquet(result.path / "predicted_probabilities.parquet")
# Get settings
task = result.settings.task
grid = result.settings.grid
# Create controls in columns
col1, col2, col3 = st.columns([2, 2, 1])
with col1:
# Get unique classes for filtering (properly ordered)
all_classes = get_ordered_classes(task, preds_gdf["predicted_class"].unique().tolist())
filter_options = ["All Classes", *all_classes]
selected_filter = st.selectbox(
"Filter by Predicted Class",
options=filter_options,
key="inference_map_filter",
)
with col2:
use_elevation = st.checkbox(
"Enable 3D Elevation",
value=True,
help="Show predictions with elevation (requires count/density for meaningful height)",
key="inference_map_elevation",
)
with col3:
opacity = st.slider(
"Opacity",
min_value=0.1,
max_value=1.0,
value=0.7,
step=0.1,
key="inference_map_opacity",
)
# Filter data if needed
if selected_filter != "All Classes":
display_gdf = preds_gdf[preds_gdf["predicted_class"] == selected_filter].copy()
else:
display_gdf = preds_gdf.copy()
if len(display_gdf) == 0:
st.warning(f"No predictions found for filter: {selected_filter}")
return
st.info(f"Displaying {len(display_gdf):,} out of {len(preds_gdf):,} total predictions")
# Convert to WGS84 for pydeck
display_gdf_wgs84 = display_gdf.to_crs("EPSG:4326")
# Fix antimeridian issues for hex grids
if grid == "hex":
display_gdf_wgs84["geometry"] = display_gdf_wgs84["geometry"].apply(fix_hex_geometry)
# Assign colors based on predicted class (using canonical ordering)
# Get all possible classes for this task to ensure consistent colors
canonical_classes = get_ordered_classes(task)
colors_palette = get_palette(task, len(canonical_classes))
# Create color mapping for canonical classes
color_map = {cls: colors_palette[i] for i, cls in enumerate(canonical_classes)}
# Convert hex colors to RGB
def hex_to_rgb(hex_color):
hex_color = hex_color.lstrip("#")
return [int(hex_color[i : i + 2], 16) for i in (0, 2, 4)]
display_gdf_wgs84["fill_color"] = display_gdf_wgs84["predicted_class"].map(
{cls: hex_to_rgb(color) for cls, color in color_map.items()}
)
# Add elevation based on class encoding (for ordered classes)
if use_elevation and len(all_classes) > 1:
# Create a normalized elevation based on class order
class_to_elevation = {cls: i / (len(all_classes) - 1) for i, cls in enumerate(all_classes)}
display_gdf_wgs84["elevation"] = display_gdf_wgs84["predicted_class"].map(class_to_elevation)
else:
display_gdf_wgs84["elevation"] = 0.0
# Convert to GeoJSON format
geojson_data = _prepare_geojson_features(display_gdf_wgs84)
# Create pydeck layer
layer = pdk.Layer(
"GeoJsonLayer",
geojson_data,
opacity=opacity,
stroked=True,
filled=True,
extruded=use_elevation,
wireframe=False,
get_fill_color="properties.fill_color",
get_line_color=[80, 80, 80],
line_width_min_pixels=0.5,
get_elevation="properties.elevation" if use_elevation else 0,
elevation_scale=500000, # Scale to 500km height
pickable=True,
)
# Set initial view state (centered on the Arctic)
view_state = pdk.ViewState(
latitude=70,
longitude=0,
zoom=2 if not use_elevation else 1.5,
pitch=0 if not use_elevation else 45,
)
# Create deck
deck = pdk.Deck(
layers=[layer],
initial_view_state=view_state,
tooltip={
"html": "<b>Cell ID:</b> {cell_id}<br/><b>Predicted Class:</b> {predicted_class}",
"style": {"backgroundColor": "steelblue", "color": "white"},
},
map_style="https://basemaps.cartocdn.com/gl/dark-matter-gl-style/style.json",
)
# Render the map
st.pydeck_chart(deck)
# Show info about 3D visualization
if use_elevation:
st.info("💡 3D elevation represents class order. Rotate the map by holding Ctrl/Cmd and dragging.")
# Add legend
with st.expander("Legend", expanded=True):
st.markdown("**Predicted Classes:**")
for cls in all_classes:
color_hex = color_map[cls]
count = len(display_gdf[display_gdf["predicted_class"] == cls])
total_count = len(preds_gdf[preds_gdf["predicted_class"] == cls])
percentage = total_count / len(preds_gdf) * 100 if len(preds_gdf) > 0 else 0
# Show if currently displayed or total count
if selected_filter == "All Classes":
count_str = f"{count:,} ({percentage:.1f}%)"
else:
count_str = f"{count:,} displayed / {total_count:,} total ({percentage:.1f}%)"
st.markdown(
f'<div style="display: flex; align-items: center; margin-bottom: 4px;">'
f'<div style="width: 20px; height: 20px; background-color: {color_hex}; '
f'margin-right: 8px; border: 1px solid #ccc; flex-shrink: 0;"></div>'
f"<span>{cls}: {count_str}</span></div>",
unsafe_allow_html=True,
)
if use_elevation and len(all_classes) > 1:
st.markdown("---")
st.markdown("**Elevation (3D):**")
st.markdown(f"Height represents class order: {all_classes[0]} (low) → {all_classes[-1]} (high)")
def render_class_comparison(predictions_gdf: gpd.GeoDataFrame, task: str):
"""Render comparison plots between different predicted classes.
Args:
predictions_gdf: GeoDataFrame with predictions.
task: Task type ('binary', 'count_regimes', 'density_regimes', 'count', 'density').
"""
st.subheader("🔍 Class Comparison")
# Get class distribution and order properly
class_counts = predictions_gdf["predicted_class"].value_counts()
class_counts = sort_class_series(class_counts, task)
if len(class_counts) < 2:
st.info("Need at least 2 classes for comparison.")
return
# Create pie chart
col1, col2 = st.columns(2)
with col1:
st.markdown("**Class Proportions")
# Get colors matching canonical order
canonical_classes = get_ordered_classes(task)
all_colors = get_palette(task, len(canonical_classes))
color_map = {cls: all_colors[i] for i, cls in enumerate(canonical_classes)}
# Extract colors for available classes in proper order
colors = [color_map[cls] for cls in class_counts.index]
fig = go.Figure(
data=[
go.Pie(
labels=class_counts.index,
values=class_counts.values,
marker_colors=colors,
textinfo="label+percent",
textposition="auto",
hovertemplate="<b>%{label}</b><br>Count: %{value:,}<br>Percentage: %{percent}<extra></extra>",
)
]
)
fig.update_layout(
height=400,
margin={"l": 20, "r": 20, "t": 20, "b": 20},
showlegend=True,
)
st.plotly_chart(fig, width="stretch")
with col2:
st.markdown("**Cumulative Distribution")
# Create cumulative distribution
sorted_counts = class_counts.sort_values(ascending=False)
cumulative = sorted_counts.cumsum()
cumulative_pct = cumulative / cumulative.iloc[-1] * 100
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=list(range(len(cumulative))),
y=cumulative_pct.to_numpy(),
mode="lines+markers",
line={"color": colors[0], "width": 3},
marker={"size": 8},
customdata=sorted_counts.index,
hovertemplate="<b>%{customdata}</b><br>Cumulative: %{y:.1f}%<extra></extra>",
)
)
fig.update_layout(
height=400,
margin={"l": 20, "r": 20, "t": 20, "b": 20},
xaxis_title="Class Rank",
yaxis_title="Cumulative Percentage",
yaxis={"range": [0, 105]},
)
st.plotly_chart(fig, width="stretch")

File diff suppressed because it is too large Load diff

View file

@ -220,7 +220,7 @@ def _add_regression_subplot(
train_values = z_values[dataset.split == "train"] train_values = z_values[dataset.split == "train"]
test_values = z_values[dataset.split == "test"] test_values = z_values[dataset.split == "test"]
# Determine bin edges based on combined data # Determine bin edges based on complete data
all_values = pd.concat([train_values, test_values]) all_values = pd.concat([train_values, test_values])
# Use a reasonable number of bins # Use a reasonable number of bins
n_bins = min(30, int(np.sqrt(len(all_values)))) n_bins = min(30, int(np.sqrt(len(all_values))))

View file

@ -19,25 +19,25 @@ def render_run_information(selected_result: TrainingResult, refit_metric):
""" """
st.header("📋 Run Information") st.header("📋 Run Information")
grid_config = GridConfig.from_grid_level(f"{selected_result.settings.grid}{selected_result.settings.level}") # ty:ignore[invalid-argument-type] grid_config = GridConfig.from_grid_level((selected_result.run.dataset.grid, selected_result.run.dataset.level))
col1, col2, col3, col4, col5 = st.columns(5) col1, col2, col3, col4, col5 = st.columns(5)
with col1: with col1:
st.metric("Task", selected_result.settings.task.capitalize()) st.metric("Task", selected_result.run.task.capitalize())
with col2: with col2:
st.metric("Target", selected_result.settings.target.capitalize()) st.metric("Target", selected_result.run.target.capitalize())
with col3: with col3:
st.metric("Grid", grid_config.display_name) st.metric("Grid", grid_config.display_name)
with col4: with col4:
st.metric("Model", selected_result.settings.model.upper()) st.metric("Model", selected_result.run.model_type.upper())
with col5: with col5:
st.metric("Trials", len(selected_result.results)) st.metric("Trials", selected_result.run.n_trials or "N/A")
st.caption(f"**Refit Metric:** {format_metric_name(refit_metric)}") st.caption(f"**Refit Metric:** {format_metric_name(refit_metric)}")
def _render_metrics(metrics: dict[str, float]): def _render_metrics(metrics: dict[str, float]):
"""Render a set of metrics in a two-column layout. """Render a set of metrics in a max-five-column layout.
Args: Args:
metrics: Dictionary of metric names and their values. metrics: Dictionary of metric names and their values.
@ -57,20 +57,25 @@ def render_metrics_section(selected_result: TrainingResult):
selected_result: The selected TrainingResult object. selected_result: The selected TrainingResult object.
""" """
# Extract metrics for each split
test_metrics = selected_result.run.get_metrics_from_split("test")
train_metrics = selected_result.run.get_metrics_from_split("train")
complete_metrics = selected_result.run.get_metrics_from_split("complete")
# Test # Test
st.header("🎯 Test Set Performance") st.header("🎯 Test Set Performance")
st.caption("Performance metrics on the held-out test set (best model from hyperparameter search)") st.caption("Performance metrics on the held-out test set (best model from hyperparameter search)")
_render_metrics(selected_result.test_metrics) _render_metrics(test_metrics)
# Train # Train
st.header("🏋️‍♂️ Training Set Performance") st.header("🏋️‍♂️ Training Set Performance")
st.caption("Performance metrics on the training set (best model from hyperparameter search)") st.caption("Performance metrics on the training set (best model from hyperparameter search)")
_render_metrics(selected_result.train_metrics) _render_metrics(train_metrics)
# Combined / All # Complete / All
st.header("🧮 Overall Performance") st.header("🧮 Overall Performance")
st.caption("Overall performance metrics combining training and test sets") st.caption("Overall performance metrics combining training and test sets")
_render_metrics(selected_result.combined_metrics) _render_metrics(complete_metrics)
@st.fragment @st.fragment
@ -84,7 +89,7 @@ def render_confusion_matrices(selected_result: TrainingResult):
st.header("🎭 Confusion Matrices") st.header("🎭 Confusion Matrices")
# Check if this is a classification task # Check if this is a classification task
if selected_result.settings.task not in ["binary", "count_regimes", "density_regimes"]: if selected_result.run.task not in ["binary", "count_regimes", "density_regimes"]:
st.info( st.info(
"📊 Confusion matrices are only available for classification tasks " "📊 Confusion matrices are only available for classification tasks "
"(binary, count_regimes, density_regimes)." "(binary, count_regimes, density_regimes)."
@ -93,11 +98,11 @@ def render_confusion_matrices(selected_result: TrainingResult):
return return
# Check if confusion matrix data is available # Check if confusion matrix data is available
if selected_result.confusion_matrix is None: if selected_result.run.confusion_matrix is None:
st.warning("⚠️ No confusion matrix data found for this training result.") st.warning("⚠️ No confusion matrix data found for this training result.")
return return
cm = selected_result.confusion_matrix cm = selected_result.run.confusion_matrix
# Add normalization selection # Add normalization selection
st.subheader("Display Options") st.subheader("Display Options")
@ -131,11 +136,11 @@ def render_confusion_matrices(selected_result: TrainingResult):
fig_train = plot_confusion_matrix(cm["train"], title="Training Set", normalize=normalize_mode) fig_train = plot_confusion_matrix(cm["train"], title="Training Set", normalize=normalize_mode)
st.plotly_chart(fig_train, width="stretch") st.plotly_chart(fig_train, width="stretch")
with cols[2]: with cols[2]:
# Combined Confusion Matrix # Complete Confusion Matrix
st.subheader("Combined") st.subheader("Complete")
st.caption("Train + Test sets") st.caption("Train + Test sets")
fig_combined = plot_confusion_matrix(cm["combined"], title="Combined", normalize=normalize_mode) fig_complete = plot_confusion_matrix(cm["complete"], title="Complete", normalize=normalize_mode)
st.plotly_chart(fig_combined, width="stretch") st.plotly_chart(fig_complete, width="stretch")
def render_cv_statistics_section(cv_stats: CVMetricStatistics, test_score: float): def render_cv_statistics_section(cv_stats: CVMetricStatistics, test_score: float):

View file

@ -413,13 +413,13 @@ def _render_aggregation_selection(
col_btn1, col_btn2, col_btn3, _ = st.columns([1, 1, 1, 3]) col_btn1, col_btn2, col_btn3, _ = st.columns([1, 1, 1, 3])
with col_btn1: with col_btn1:
if st.button("✅ Select All", use_container_width=True): if st.button("✅ Select All", width="content"):
_set_all_aggregations(member_datasets, members_with_aggs, member_aggregations, selected=True) _set_all_aggregations(member_datasets, members_with_aggs, member_aggregations, selected=True)
with col_btn2: with col_btn2:
if st.button("📊 Median Only", use_container_width=True): if st.button("📊 Median Only", width="content"):
_set_median_only_aggregations(member_datasets, members_with_aggs, member_aggregations) _set_median_only_aggregations(member_datasets, members_with_aggs, member_aggregations)
with col_btn3: with col_btn3:
if st.button("❌ Deselect All", use_container_width=True): if st.button("❌ Deselect All", width="content"):
_set_all_aggregations(member_datasets, members_with_aggs, member_aggregations, selected=False) _set_all_aggregations(member_datasets, members_with_aggs, member_aggregations, selected=False)
# Render the form with checkboxes # Render the form with checkboxes

View file

@ -8,10 +8,7 @@ from entropice.dashboard.plots.experiment_comparison import (
create_feature_consistency_plot, create_feature_consistency_plot,
create_feature_importance_by_grid_level, create_feature_importance_by_grid_level,
) )
from entropice.dashboard.utils.loaders import ( from entropice.dashboard.utils.loaders import TrainingResult
AutogluonTrainingResult,
TrainingResult,
)
def _extract_feature_importance_from_results( def _extract_feature_importance_from_results(
@ -23,107 +20,25 @@ def _extract_feature_importance_from_results(
training_results: List of TrainingResult objects training_results: List of TrainingResult objects
Returns: Returns:
DataFrame with columns: feature, importance, model, grid, level, task, target DataFrame with columns: feature, importance, stddev, model, grid, level, task, target, data_source, grid_level
""" """
records = [] fis = []
for tr in training_results: for tr in training_results:
# Load model state if available fi = tr.run.feature_importance.reset_index().rename(columns={"index": "feature"})
model_state = tr.load_model_state() fi["model"] = tr.run.model_type
if model_state is None: fi["grid"] = tr.run.dataset.grid
continue fi["level"] = tr.run.dataset.level
fi["task"] = tr.run.task
fi["target"] = tr.run.target
fis.append(fi)
info = tr.display_info fi = pd.concat(fis, ignore_index=True)
# Add data source categorization
fi["data_source"] = fi["feature"].apply(_categorize_feature)
fi["grid_level"] = fi["grid"] + "_" + fi["level"].astype(str)
# Extract feature importance based on available data return fi
if "feature_importance" in model_state.data_vars:
# eSPA or similar models with direct feature importance
importance_data = model_state["feature_importance"]
for feature_idx, feature_name in enumerate(importance_data.coords["feature"].values):
importance_value = float(importance_data.isel(feature=feature_idx).values)
records.append(
{
"feature": str(feature_name),
"importance": importance_value,
"model": info.model,
"grid": info.grid,
"level": info.level,
"task": info.task,
"target": info.target,
}
)
elif "gain" in model_state.data_vars:
# XGBoost-style feature importance
gain_data = model_state["gain"]
for feature_idx, feature_name in enumerate(gain_data.coords["feature"].values):
importance_value = float(gain_data.isel(feature=feature_idx).values)
records.append(
{
"feature": str(feature_name),
"importance": importance_value,
"model": info.model,
"grid": info.grid,
"level": info.level,
"task": info.task,
"target": info.target,
}
)
elif "feature_importances_" in model_state.data_vars:
# Random Forest style
importance_data = model_state["feature_importances_"]
for feature_idx, feature_name in enumerate(importance_data.coords["feature"].values):
importance_value = float(importance_data.isel(feature=feature_idx).values)
records.append(
{
"feature": str(feature_name),
"importance": importance_value,
"model": info.model,
"grid": info.grid,
"level": info.level,
"task": info.task,
"target": info.target,
}
)
return pd.DataFrame(records)
def _extract_feature_importance_from_autogluon(
autogluon_results: list[AutogluonTrainingResult],
) -> pd.DataFrame:
"""Extract feature importance from AutoGluon results.
Args:
autogluon_results: List of AutogluonTrainingResult objects
Returns:
DataFrame with columns: feature, importance, model, grid, level, task, target
"""
records = []
for ag in autogluon_results:
if ag.feature_importance is None:
continue
info = ag.display_info
# AutoGluon feature importance is already a DataFrame with features as index
for feature_name, importance_value in ag.feature_importance["importance"].items():
records.append(
{
"feature": str(feature_name),
"importance": float(importance_value),
"model": "autogluon",
"grid": info.grid,
"level": info.level,
"task": info.task,
"target": info.target,
}
)
return pd.DataFrame(records)
def _categorize_feature(feature_name: str) -> str: def _categorize_feature(feature_name: str) -> str:
@ -138,46 +53,12 @@ def _categorize_feature(feature_name: str) -> str:
return "General" return "General"
def _prepare_feature_importance_data(
training_results: list[TrainingResult],
autogluon_results: list[AutogluonTrainingResult],
) -> pd.DataFrame | None:
"""Extract and prepare feature importance data.
Args:
training_results: List of RandomSearchCV training results
autogluon_results: List of AutoGluon training results
Returns:
DataFrame with feature importance data or None if no data available
"""
fi_df_cv = _extract_feature_importance_from_results(training_results)
fi_df_ag = _extract_feature_importance_from_autogluon(autogluon_results)
if fi_df_cv.empty and fi_df_ag.empty:
return None
# Combine both
fi_df = pd.concat([fi_df_cv, fi_df_ag], ignore_index=True)
# Add data source categorization
fi_df["data_source"] = fi_df["feature"].apply(_categorize_feature)
fi_df["grid_level"] = fi_df["grid"] + "_" + fi_df["level"].astype(str)
return fi_df
@st.fragment @st.fragment
def render_feature_importance_analysis( def render_feature_importance_analysis(training_results: list[TrainingResult]):
training_results: list[TrainingResult],
autogluon_results: list[AutogluonTrainingResult],
):
"""Render feature importance analysis section. """Render feature importance analysis section.
Args: Args:
training_results: List of RandomSearchCV training results training_results: List of RandomSearchCV training results
autogluon_results: List of AutoGluon training results
""" """
st.header("🔍 Feature Importance Analysis") st.header("🔍 Feature Importance Analysis")
@ -191,13 +72,13 @@ def render_feature_importance_analysis(
# Extract feature importance # Extract feature importance
with st.spinner("Extracting feature importance from training results..."): with st.spinner("Extracting feature importance from training results..."):
fi_df = _prepare_feature_importance_data(training_results, autogluon_results) fi = _extract_feature_importance_from_results(training_results)
if fi_df is None: if fi is None:
st.warning("No feature importance data available. Model state files may be missing.") st.warning("No feature importance data available. Model state files may be missing.")
return return
st.success(f"Extracted feature importance from {len(fi_df)} feature-model combinations") st.success(f"Extracted feature importance from {len(fi)} feature-model combinations")
# Filters # Filters
st.subheader("Filters") st.subheader("Filters")
@ -205,12 +86,12 @@ def render_feature_importance_analysis(
with col1: with col1:
# Task filter # Task filter
available_tasks = ["All", *sorted(fi_df["task"].unique().tolist())] available_tasks = ["All", *sorted(fi["task"].unique().tolist())]
selected_task = st.selectbox("Task", options=available_tasks, index=0, key="fi_task_filter") selected_task = st.selectbox("Task", options=available_tasks, index=0, key="fi_task_filter")
with col2: with col2:
# Target filter # Target filter
available_targets = ["All", *sorted(fi_df["target"].unique().tolist())] available_targets = ["All", *sorted(fi["target"].unique().tolist())]
selected_target = st.selectbox("Target Dataset", options=available_targets, index=0, key="fi_target_filter") selected_target = st.selectbox("Target Dataset", options=available_targets, index=0, key="fi_target_filter")
with col3: with col3:
@ -218,13 +99,12 @@ def render_feature_importance_analysis(
top_n_features = st.number_input("Top N Features", min_value=5, max_value=50, value=15, key="top_n_features") top_n_features = st.number_input("Top N Features", min_value=5, max_value=50, value=15, key="top_n_features")
# Apply filters # Apply filters
filtered_fi_df = fi_df.copy() filtered_fi = fi.copy()
if selected_task != "All": if selected_task != "All":
filtered_fi_df = filtered_fi_df.loc[filtered_fi_df["task"] == selected_task] filtered_fi = filtered_fi.loc[filtered_fi["task"] == selected_task]
if selected_target != "All": if selected_target != "All":
filtered_fi_df = filtered_fi_df.loc[filtered_fi_df["target"] == selected_target] filtered_fi = filtered_fi.loc[filtered_fi["target"] == selected_target]
if len(filtered_fi) == 0:
if len(filtered_fi_df) == 0:
st.warning("No feature importance data available for the selected filters.") st.warning("No feature importance data available for the selected filters.")
return return
@ -232,17 +112,17 @@ def render_feature_importance_analysis(
st.subheader("Top Features by Grid Level") st.subheader("Top Features by Grid Level")
try: try:
fig = create_feature_importance_by_grid_level(filtered_fi_df, top_n=top_n_features) fig = create_feature_importance_by_grid_level(filtered_fi, top_n=top_n_features)
st.plotly_chart(fig, width="stretch") st.plotly_chart(fig, width="stretch")
except Exception as e: except Exception as e:
st.error(f"Could not create feature importance by grid level plot: {e}") st.error(f"Could not create feature importance by grid level plot: {e}")
# Show detailed breakdown in expander # Show detailed breakdown in expander
grid_levels = sorted(filtered_fi_df["grid_level"].unique()) grid_levels = sorted(filtered_fi["grid_level"].unique())
with st.expander("Show Detailed Breakdown by Grid Level", expanded=False): with st.expander("Show Detailed Breakdown by Grid Level", expanded=False):
for grid_level in grid_levels: for grid_level in grid_levels:
grid_data = filtered_fi_df[filtered_fi_df["grid_level"] == grid_level] grid_data = filtered_fi[filtered_fi["grid_level"] == grid_level]
# Get top features for this grid level # Get top features for this grid level
top_features_grid = ( top_features_grid = (
@ -271,7 +151,7 @@ def render_feature_importance_analysis(
) )
try: try:
fig = create_feature_consistency_plot(filtered_fi_df, top_n=top_n_features) fig = create_feature_consistency_plot(filtered_fi, top_n=top_n_features)
st.plotly_chart(fig, width="stretch") st.plotly_chart(fig, width="stretch")
except Exception as e: except Exception as e:
st.error(f"Could not create feature consistency plot: {e}") st.error(f"Could not create feature consistency plot: {e}")
@ -280,7 +160,7 @@ def render_feature_importance_analysis(
with st.expander("Show Detailed Statistics", expanded=False): with st.expander("Show Detailed Statistics", expanded=False):
# Get top features overall # Get top features overall
overall_top_features = ( overall_top_features = (
filtered_fi_df.groupby("feature")["importance"] filtered_fi.groupby("feature")["importance"]
.mean() .mean()
.reset_index() .reset_index()
.nlargest(top_n_features, "importance")["feature"] .nlargest(top_n_features, "importance")["feature"]
@ -289,7 +169,7 @@ def render_feature_importance_analysis(
# Calculate variance in importance across models for each feature # Calculate variance in importance across models for each feature
feature_variance = ( feature_variance = (
filtered_fi_df[filtered_fi_df["feature"].isin(overall_top_features)] filtered_fi[filtered_fi["feature"].isin(overall_top_features)]
.groupby("feature")["importance"] .groupby("feature")["importance"]
.agg(["mean", "std", "min", "max"]) .agg(["mean", "std", "min", "max"])
.reset_index() .reset_index()
@ -299,7 +179,7 @@ def render_feature_importance_analysis(
# Add data source # Add data source
feature_variance = feature_variance.merge( feature_variance = feature_variance.merge(
filtered_fi_df[["feature", "data_source"]].drop_duplicates(), on="feature", how="left" filtered_fi[["feature", "data_source"]].drop_duplicates(), on="feature", how="left"
) )
feature_variance.columns = ["Feature", "Mean", "Std Dev", "Min", "Max", "CV", "Data Source"] feature_variance.columns = ["Feature", "Mean", "Std Dev", "Min", "Max", "CV", "Data Source"]
@ -314,7 +194,7 @@ def render_feature_importance_analysis(
st.subheader("Feature Importance by Data Source") st.subheader("Feature Importance by Data Source")
try: try:
fig = create_data_source_importance_bars(filtered_fi_df) fig = create_data_source_importance_bars(filtered_fi)
st.plotly_chart(fig, width="stretch") st.plotly_chart(fig, width="stretch")
except Exception as e: except Exception as e:
st.error(f"Could not create data source importance chart: {e}") st.error(f"Could not create data source importance chart: {e}")
@ -322,9 +202,7 @@ def render_feature_importance_analysis(
# Show detailed table in expander # Show detailed table in expander
with st.expander("Show Data Source Statistics", expanded=False): with st.expander("Show Data Source Statistics", expanded=False):
# Aggregate importance by data source # Aggregate importance by data source
source_importance = ( source_importance = filtered_fi.groupby("data_source")["importance"].agg(["sum", "mean", "count"]).reset_index()
filtered_fi_df.groupby("data_source")["importance"].agg(["sum", "mean", "count"]).reset_index()
)
source_importance.columns = ["Data Source", "Total Importance", "Mean Importance", "Feature Count"] source_importance.columns = ["Data Source", "Total Importance", "Mean Importance", "Feature Count"]
source_importance = source_importance.sort_values("Total Importance", ascending=False) source_importance = source_importance.sort_values("Total Importance", ascending=False)

View file

@ -39,7 +39,7 @@ def render_grid_level_analysis(summary_df: pd.DataFrame, available_metrics: list
unique_tasks = summary_df["task"].unique() unique_tasks = summary_df["task"].unique()
# Split selection # Split selection
split = st.selectbox("Data Split", options=["test", "train", "combined"], index=0, key="grid_split") split = st.selectbox("Data Split", options=["test", "train", "complete"], index=0, key="grid_split")
# Create plots for each task # Create plots for each task
for task in sorted(unique_tasks): for task in sorted(unique_tasks):

View file

@ -34,12 +34,12 @@ def render_inference_maps_section(
# Extract unique grid configurations from training results # Extract unique grid configurations from training results
available_grid_configs = sorted( available_grid_configs = sorted(
{GridConfig.from_grid_level((tr.settings.grid, tr.settings.level)) for tr in training_results}, {GridConfig.from_grid_level((tr.run.dataset.grid, tr.run.dataset.level)) for tr in training_results},
key=lambda gc: gc.sort_key, key=lambda gc: gc.sort_key,
) )
available_tasks = sorted({tr.settings.task for tr in training_results}) available_tasks = sorted({tr.run.task for tr in training_results})
available_targets = sorted({tr.settings.target for tr in training_results}) available_targets = sorted({tr.run.target for tr in training_results})
available_models = sorted({tr.settings.model for tr in training_results}) available_models = sorted({tr.run.model_type for tr in training_results})
# Create form for selecting parameters # Create form for selecting parameters
with st.form("inference_map_form"): with st.form("inference_map_form"):
@ -92,11 +92,11 @@ def render_inference_maps_section(
filtered_results = [ filtered_results = [
tr tr
for tr in training_results for tr in training_results
if tr.settings.grid == selected_grid if tr.run.dataset.grid == selected_grid
and tr.settings.level == selected_level and tr.run.dataset.level == selected_level
and tr.settings.task == selected_task and tr.run.task == selected_task
and tr.settings.target in selected_targets and tr.run.target in selected_targets
and tr.settings.model in selected_models and tr.run.model_type in selected_models
] ]
if not filtered_results: if not filtered_results:

View file

@ -34,7 +34,7 @@ def render_model_comparison(summary_df: pd.DataFrame, available_metrics: list[st
} }
# Split selection # Split selection
split = st.selectbox("Data Split", options=["test", "train", "combined"], index=0, key="model_split") split = st.selectbox("Data Split", options=["test", "train", "complete"], index=0, key="model_split")
# Get unique tasks # Get unique tasks
unique_tasks = summary_df["task"].unique() unique_tasks = summary_df["task"].unique()

View file

@ -3,11 +3,7 @@
import pandas as pd import pandas as pd
import streamlit as st import streamlit as st
from entropice.dashboard.utils.loaders import ( from entropice.dashboard.utils.loaders import TrainingResult, get_available_experiments
AutogluonTrainingResult,
TrainingResult,
get_available_experiments,
)
def render_experiment_sidebar() -> str | None: def render_experiment_sidebar() -> str | None:
@ -38,7 +34,6 @@ def render_experiment_sidebar() -> str | None:
def render_experiment_overview( def render_experiment_overview(
experiment_name: str, experiment_name: str,
training_results: list[TrainingResult], training_results: list[TrainingResult],
autogluon_results: list[AutogluonTrainingResult],
summary_df: pd.DataFrame, summary_df: pd.DataFrame,
): ):
"""Render experiment overview section. """Render experiment overview section.
@ -46,7 +41,6 @@ def render_experiment_overview(
Args: Args:
experiment_name: Name of the experiment experiment_name: Name of the experiment
training_results: List of RandomSearchCV training results training_results: List of RandomSearchCV training results
autogluon_results: List of AutoGluon training results
summary_df: Summary DataFrame with all results summary_df: Summary DataFrame with all results
""" """
@ -56,13 +50,15 @@ def render_experiment_overview(
col1, col2, col3, col4 = st.columns(4) col1, col2, col3, col4 = st.columns(4)
with col1: with col1:
st.metric("Total Training Runs", len(training_results) + len(autogluon_results)) st.metric("Total Training Runs", len(training_results))
with col2: with col2:
st.metric("RandomSearchCV Runs", len(training_results)) hpsearch_runs = [tr for tr in training_results if tr.run.method_type == "HPOCV"]
st.metric("RandomSearchCV Runs", len(hpsearch_runs))
with col3: with col3:
st.metric("AutoGluon Runs", len(autogluon_results)) autogluon_runs = [tr for tr in training_results if tr.run.method_type == "AutoML"]
st.metric("AutoGluon Runs", len(autogluon_runs))
with col4: with col4:
unique_configs = summary_df[["grid", "level", "task", "target"]].drop_duplicates() unique_configs = summary_df[["grid", "level", "task", "target"]].drop_duplicates()

View file

@ -2,16 +2,15 @@
from datetime import datetime from datetime import datetime
import pandas as pd
import streamlit as st import streamlit as st
from entropice.dashboard.utils.loaders import AutogluonTrainingResult, TrainingResult from entropice.dashboard.utils.loaders import TrainingResult
from entropice.utils.types import ( from entropice.utils.types import (
GridConfig, GridConfig,
) )
def render_training_results_summary(training_results: list[TrainingResult | AutogluonTrainingResult]): def render_training_results_summary(training_results: list[TrainingResult]):
"""Render summary metrics for training results.""" """Render summary metrics for training results."""
st.header("📊 Training Results Summary") st.header("📊 Training Results Summary")
col1, col2, col3, col4 = st.columns(4) col1, col2, col3, col4 = st.columns(4)
@ -24,7 +23,7 @@ def render_training_results_summary(training_results: list[TrainingResult | Auto
st.metric("Total Runs", len(training_results)) st.metric("Total Runs", len(training_results))
with col3: with col3:
models = {tr.settings.model for tr in training_results if hasattr(tr.settings, "model")} models = {tr.run.model_type for tr in training_results}
st.metric("Model Types", len(models)) st.metric("Model Types", len(models))
with col4: with col4:
@ -34,15 +33,15 @@ def render_training_results_summary(training_results: list[TrainingResult | Auto
@st.fragment @st.fragment
def render_experiment_results(training_results: list[TrainingResult | AutogluonTrainingResult]): # noqa: C901 def render_experiment_results(training_results: list[TrainingResult]): # noqa: C901
"""Render detailed experiment results table and expandable details.""" """Render detailed experiment results table and expandable details."""
st.header("🎯 Experiment Results") st.header("🎯 Experiment Results")
# Filters # Filters
experiments = sorted({tr.experiment for tr in training_results if tr.experiment}) experiments = sorted({tr.experiment for tr in training_results if tr.experiment})
tasks = sorted({tr.settings.task for tr in training_results}) tasks = sorted({tr.run.task for tr in training_results})
models = sorted({tr.settings.model if isinstance(tr, TrainingResult) else "autogluon" for tr in training_results}) models = sorted({tr.run.model_type for tr in training_results})
grids = sorted({f"{tr.settings.grid}-{tr.settings.level}" for tr in training_results}) grids = sorted({f"{tr.run.dataset.grid}-{tr.run.dataset.level}" for tr in training_results})
# Create filter columns # Create filter columns
filter_cols = st.columns(4) filter_cols = st.columns(4)
@ -83,30 +82,21 @@ def render_experiment_results(training_results: list[TrainingResult | AutogluonT
) )
# Apply filters # Apply filters
filtered_results = training_results filtered_results: list[TrainingResult] = training_results
if selected_experiment != "All": if selected_experiment != "All":
filtered_results = [tr for tr in filtered_results if tr.experiment == selected_experiment] filtered_results = [tr for tr in filtered_results if tr.experiment == selected_experiment]
if selected_task != "All": if selected_task != "All":
filtered_results = [tr for tr in filtered_results if tr.settings.task == selected_task] filtered_results = [tr for tr in filtered_results if tr.run.task == selected_task]
if selected_model != "All" and selected_model != "autogluon": if selected_model != "All":
filtered_results = [ filtered_results = [tr for tr in filtered_results if tr.run.model_type == selected_model]
tr for tr in filtered_results if isinstance(tr, TrainingResult) and tr.settings.model == selected_model
]
elif selected_model == "autogluon":
filtered_results = [tr for tr in filtered_results if isinstance(tr, AutogluonTrainingResult)]
if selected_grid != "All": if selected_grid != "All":
filtered_results = [tr for tr in filtered_results if f"{tr.settings.grid}-{tr.settings.level}" == selected_grid] filtered_results = [
tr for tr in filtered_results if f"{tr.run.dataset.grid}-{tr.run.dataset.level}" == selected_grid
]
st.subheader("Results Table") st.subheader("Results Table")
summary_df = TrainingResult.to_dataframe([tr for tr in filtered_results if isinstance(tr, TrainingResult)]) summary_df = TrainingResult.to_dataframe(filtered_results)
autogluon_df = AutogluonTrainingResult.to_dataframe(
[tr for tr in filtered_results if isinstance(tr, AutogluonTrainingResult)]
)
if len(summary_df) == 0:
summary_df = autogluon_df
elif len(autogluon_df) > 0:
summary_df = pd.concat([summary_df, autogluon_df], ignore_index=True)
# Display with color coding for best scores # Display with color coding for best scores
st.dataframe( st.dataframe(
@ -120,25 +110,22 @@ def render_experiment_results(training_results: list[TrainingResult | AutogluonT
for tr in filtered_results: for tr in filtered_results:
tr_info = tr.display_info tr_info = tr.display_info
display_name = tr_info.get_display_name("model_first") display_name = tr_info.get_display_name("model_first")
model = "autogluon" if isinstance(tr, AutogluonTrainingResult) else tr.settings.model model = tr.run.model_type
cv_splits = tr.settings.cv_splits if hasattr(tr.settings, "cv_splits") else "N/A"
with st.expander(display_name): with st.expander(display_name):
col1, col2 = st.columns([1, 2]) col1, col2 = st.columns([1, 2])
with col1: with col1:
grid_config = GridConfig.from_grid_level((tr.settings.grid, tr.settings.level)) grid_config = GridConfig.from_grid_level((tr.run.dataset.grid, tr.run.dataset.level))
st.write( st.write(
"**Configuration:**\n" "**Configuration:**\n"
f"- **Experiment:** {tr.experiment}\n" f"- **Experiment:** {tr.experiment}\n"
f"- **Task:** {tr.settings.task}\n" f"- **Task:** {tr.run.task}\n"
f"- **Target:** {tr.settings.target}\n" f"- **Target:** {tr.run.target}\n"
f"- **Model:** {model}\n" f"- **Model:** {model}\n"
f"- **Grid:** {grid_config.display_name}\n" f"- **Grid:** {grid_config.display_name}\n"
f"- **Created At:** {tr_info.timestamp.strftime('%Y-%m-%d %H:%M')}\n" f"- **Created At:** {tr_info.timestamp.strftime('%Y-%m-%d %H:%M')}\n"
f"- **Temporal Mode:** {tr.settings.temporal_mode}\n" f"- **Temporal Mode:** {tr.run.dataset.temporal_mode}\n"
f"- **Members:** {', '.join(tr.settings.members)}\n" f"- **Members:** {', '.join(tr.run.dataset.members)}\n"
f"- **CV Splits:** {cv_splits}\n"
f"- **Classes:** {tr.settings.classes}\n"
) )
file_str = "\n**Files:**\n" file_str = "\n**Files:**\n"
@ -155,29 +142,32 @@ def render_experiment_results(training_results: list[TrainingResult | AutogluonT
file_str += f"- 📄 `{file.name}`\n" file_str += f"- 📄 `{file.name}`\n"
st.write(file_str) st.write(file_str)
with col2: with col2:
if isinstance(tr, AutogluonTrainingResult): if tr.run.method_type == "AutoML":
st.write("**Leaderboard:**") st.write("**Leaderboard:**")
st.dataframe(tr.leaderboard, width="stretch", hide_index=True) st.dataframe(tr.run.leaderboard, width="stretch", hide_index=True)
else: else:
st.write("**CV Score Summary:**") st.write("**CV Score Summary:**")
# Extract all test scores # Extract all test scores
metric_df = tr.get_metric_dataframe() metric_df = tr.get_cv_results_dataframe()
if metric_df is not None: if metric_df is not None:
st.dataframe(metric_df, width="stretch", hide_index=True) st.dataframe(metric_df, width="stretch", hide_index=True)
else: else:
st.write("No test scores found in results.") st.write("No test scores found in results.")
cv_results = tr.run.cv_results
assert cv_results is not None, "CV results should not be None for non-AutoML runs"
# Show parameter space explored # Show parameter space explored
if "initial_K" in tr.results.columns: # Common parameter if "initial_K" in cv_results.columns: # Common parameter
st.write("\n**Parameter Ranges Explored:**") st.write("\n**Parameter Ranges Explored:**")
for param in ["initial_K", "eps_cl", "eps_e"]: for param in ["initial_K", "eps_cl", "eps_e"]:
if param in tr.results.columns: if param in cv_results.columns:
min_val = tr.results[param].min() min_val = cv_results[param].min()
max_val = tr.results[param].max() max_val = cv_results[param].max()
unique_vals = tr.results[param].nunique() unique_vals = cv_results[param].nunique()
st.write(f"- **{param}:** {unique_vals} values ({min_val:.2e} to {max_val:.2e})") st.write(f"- **{param}:** {unique_vals} values ({min_val:.2e} to {max_val:.2e})")
st.write("**CV Results DataFrame:**") st.write("**CV Results DataFrame:**")
st.dataframe(tr.results, width="stretch", hide_index=True) st.dataframe(cv_results, width="stretch", hide_index=True)
st.write(f"\n**Path:** `{tr.path}`") st.write(f"\n**Path:** `{tr.path}`")

View file

@ -1,5 +1,6 @@
"""Hyperparameter Space Visualization Section.""" """Hyperparameter Space Visualization Section."""
import pandas as pd
import streamlit as st import streamlit as st
from entropice.dashboard.plots.hyperparameter_space import ( from entropice.dashboard.plots.hyperparameter_space import (
@ -11,9 +12,10 @@ from entropice.dashboard.plots.hyperparameter_space import (
) )
from entropice.dashboard.utils.formatters import format_metric_name from entropice.dashboard.utils.formatters import format_metric_name
from entropice.dashboard.utils.loaders import TrainingResult from entropice.dashboard.utils.loaders import TrainingResult
from entropice.utils.training import HPOCV
def _render_performance_summary(results, refit_metric: str): def _render_performance_summary(results: pd.DataFrame, refit_metric: str):
"""Render performance summary subsection.""" """Render performance summary subsection."""
best_idx = results[f"mean_test_{refit_metric}"].idxmax() best_idx = results[f"mean_test_{refit_metric}"].idxmax()
best_row = results.loc[best_idx] best_row = results.loc[best_idx]
@ -47,7 +49,7 @@ def _render_performance_summary(results, refit_metric: str):
st.metric(param_name, formatted_value) st.metric(param_name, formatted_value)
def _render_parameter_distributions(results, param_grid: dict | None): def _render_parameter_distributions(results: pd.DataFrame, param_grid: dict | None):
"""Render parameter distributions subsection.""" """Render parameter distributions subsection."""
st.subheader("Parameter Distributions") st.subheader("Parameter Distributions")
st.caption("Distribution of hyperparameter values explored during random search") st.caption("Distribution of hyperparameter values explored during random search")
@ -73,7 +75,7 @@ def _render_parameter_distributions(results, param_grid: dict | None):
st.plotly_chart(param_charts[param_name], width="stretch") st.plotly_chart(param_charts[param_name], width="stretch")
def _render_score_evolution(results, selected_metric: str): def _render_score_evolution(results: pd.DataFrame, selected_metric: str):
"""Render score evolution subsection.""" """Render score evolution subsection."""
st.subheader("Score Evolution Over Iterations") st.subheader("Score Evolution Over Iterations")
st.caption(f"How {format_metric_name(selected_metric)} evolved during the random search") st.caption(f"How {format_metric_name(selected_metric)} evolved during the random search")
@ -85,7 +87,7 @@ def _render_score_evolution(results, selected_metric: str):
st.warning(f"Score evolution not available for metric: {selected_metric}") st.warning(f"Score evolution not available for metric: {selected_metric}")
def _render_score_vs_parameters(results, selected_metric: str, param_grid: dict | None): def _render_score_vs_parameters(results: pd.DataFrame, selected_metric: str, param_grid: dict | None):
"""Render score vs parameters subsection.""" """Render score vs parameters subsection."""
st.subheader("Score vs Individual Parameters") st.subheader("Score vs Individual Parameters")
st.caption(f"Relationship between {format_metric_name(selected_metric)} and each hyperparameter") st.caption(f"Relationship between {format_metric_name(selected_metric)} and each hyperparameter")
@ -110,7 +112,7 @@ def _render_score_vs_parameters(results, selected_metric: str, param_grid: dict
st.plotly_chart(score_vs_param_charts[param_name], width="stretch") st.plotly_chart(score_vs_param_charts[param_name], width="stretch")
def _render_parameter_correlations(results, selected_metric: str): def _render_parameter_correlations(results: pd.DataFrame, selected_metric: str):
"""Render parameter correlations subsection.""" """Render parameter correlations subsection."""
st.subheader("Parameter-Score Correlations") st.subheader("Parameter-Score Correlations")
st.caption(f"Correlation between numeric parameters and {format_metric_name(selected_metric)}") st.caption(f"Correlation between numeric parameters and {format_metric_name(selected_metric)}")
@ -122,7 +124,7 @@ def _render_parameter_correlations(results, selected_metric: str):
st.info("No numeric parameters found for correlation analysis.") st.info("No numeric parameters found for correlation analysis.")
def _render_parameter_interactions(results, selected_metric: str, param_grid: dict | None): def _render_parameter_interactions(results: pd.DataFrame, selected_metric: str, param_grid: dict | None):
"""Render parameter interactions subsection.""" """Render parameter interactions subsection."""
st.subheader("Parameter Interactions") st.subheader("Parameter Interactions")
st.caption(f"Interaction between parameter pairs and their effect on {format_metric_name(selected_metric)}") st.caption(f"Interaction between parameter pairs and their effect on {format_metric_name(selected_metric)}")
@ -154,19 +156,24 @@ def render_hparam_space_section(selected_result: TrainingResult, selected_metric
""" """
st.header("🧩 Hyperparameter Space Exploration") st.header("🧩 Hyperparameter Space Exploration")
if selected_result.run.method_type != "HPOCV":
st.warning("Hyperparameter space visualization is only available for RandomSearchCV runs.")
return
assert isinstance(selected_result.run.method, HPOCV), "Expected method to be HPOCV for HPOCV runs"
results = selected_result.results cv_results = selected_result.run.cv_results
assert cv_results is not None, "CV results should not be None for HPOCV runs"
refit_metric = selected_result._get_best_metric_name() refit_metric = selected_result._get_best_metric_name()
param_grid = selected_result.settings.param_grid param_grid = selected_result.run.method.hpconfig
_render_performance_summary(results, refit_metric) _render_performance_summary(cv_results, refit_metric)
_render_parameter_distributions(results, param_grid) _render_parameter_distributions(cv_results, param_grid)
_render_score_evolution(results, selected_metric) _render_score_evolution(cv_results, selected_metric)
_render_score_vs_parameters(results, selected_metric, param_grid) _render_score_vs_parameters(cv_results, selected_metric, param_grid)
_render_parameter_correlations(results, selected_metric) _render_parameter_correlations(cv_results, selected_metric)
_render_parameter_interactions(results, selected_metric, param_grid) _render_parameter_interactions(cv_results, selected_metric, param_grid)

View file

@ -17,7 +17,7 @@ def render_regression_analysis(selected_result: TrainingResult):
st.header("📊 Regression Analysis") st.header("📊 Regression Analysis")
# Check if this is a regression task # Check if this is a regression task
if selected_result.settings.task in ["binary", "count_regimes", "density_regimes"]: if selected_result.run.task in ["binary", "count_regimes", "density_regimes"]:
st.info("📈 Regression analysis is only available for regression tasks (count, density).") st.info("📈 Regression analysis is only available for regression tasks (count, density).")
return return
@ -30,19 +30,18 @@ def render_regression_analysis(selected_result: TrainingResult):
# Create DatasetEnsemble from settings # Create DatasetEnsemble from settings
with st.spinner("Loading training data to get true values..."): with st.spinner("Loading training data to get true values..."):
ensemble = DatasetEnsemble( ensemble = DatasetEnsemble(
grid=selected_result.settings.grid, grid=selected_result.run.dataset.grid,
level=selected_result.settings.level, level=selected_result.run.dataset.level,
members=selected_result.settings.members, members=selected_result.run.dataset.members,
temporal_mode=selected_result.settings.temporal_mode, temporal_mode=selected_result.run.dataset.temporal_mode,
dimension_filters=selected_result.settings.dimension_filters, dimension_filters=selected_result.run.dataset.dimension_filters,
variable_filters=selected_result.settings.variable_filters, variable_filters=selected_result.run.dataset.variable_filters,
add_lonlat=selected_result.settings.add_lonlat,
) )
# Create training set to get true values # Create training set to get true values
training_set = ensemble.create_training_set( training_set = ensemble.create_training_set(
task=selected_result.settings.task, task=selected_result.run.task,
target=selected_result.settings.target, target=selected_result.run.target,
device="cpu", device="cpu",
cache_mode="read", cache_mode="read",
) )
@ -59,7 +58,7 @@ def render_regression_analysis(selected_result: TrainingResult):
merged = predictions_df.merge(true_values, on="cell_id", how="inner") merged = predictions_df.merge(true_values, on="cell_id", how="inner")
merged["split"] = split_series.reindex(merged["cell_id"]).values merged["split"] = split_series.reindex(merged["cell_id"]).values
# Get train, test, and combined data # Get train, test, and complete data
train_data = merged[merged["split"] == "train"] train_data = merged[merged["split"] == "train"]
test_data = merged[merged["split"] == "test"] test_data = merged[merged["split"] == "test"]
@ -94,14 +93,14 @@ def render_regression_analysis(selected_result: TrainingResult):
st.plotly_chart(fig_train, use_container_width=True) st.plotly_chart(fig_train, use_container_width=True)
with cols[2]: with cols[2]:
st.markdown("#### Combined") st.markdown("#### Combplete")
st.caption("Train + Test sets") st.caption("Train + Test sets")
fig_combined = plot_regression_scatter( fig_complete = plot_regression_scatter(
merged["y"], merged["y"],
merged["predicted"], merged["predicted"],
title="Combined", title="Complete",
) )
st.plotly_chart(fig_combined, use_container_width=True) st.plotly_chart(fig_complete, use_container_width=True)
# Display residual plots # Display residual plots
st.subheader("Residual Analysis") st.subheader("Residual Analysis")
@ -118,5 +117,5 @@ def render_regression_analysis(selected_result: TrainingResult):
st.plotly_chart(fig_train_res, use_container_width=True) st.plotly_chart(fig_train_res, use_container_width=True)
with cols[2]: with cols[2]:
fig_combined_res = plot_residuals(merged["y"], merged["predicted"], title="Combined Residuals") fig_complete_res = plot_residuals(merged["y"], merged["predicted"], title="Complete Residuals")
st.plotly_chart(fig_combined_res, use_container_width=True) st.plotly_chart(fig_complete_res, use_container_width=True)

View file

@ -3,24 +3,24 @@
import json import json
import pickle import pickle
import subprocess import subprocess
from collections import defaultdict
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Literal
import antimeridian import antimeridian
import geopandas as gpd import geopandas as gpd
import pandas as pd import pandas as pd
import streamlit as st import streamlit as st
import toml
import xarray as xr import xarray as xr
from shapely.geometry import shape from shapely.geometry import shape
import entropice.spatial.grids import entropice.spatial.grids
import entropice.utils.paths import entropice.utils.paths
from entropice.dashboard.utils.formatters import TrainingResultDisplayInfo from entropice.dashboard.utils.formatters import TrainingResultDisplayInfo
from entropice.ml.autogluon_training import AutoGluonTrainingSettings
from entropice.ml.dataset import DatasetEnsemble, TrainingSet from entropice.ml.dataset import DatasetEnsemble, TrainingSet
from entropice.ml.randomsearch import TrainingSettings from entropice.utils.training import Training
from entropice.utils.types import GridConfig, TargetDataset, Task, all_target_datasets, all_tasks from entropice.utils.types import GridConfig, TargetDataset, Task, all_target_datasets, all_tasks
@ -39,12 +39,7 @@ class TrainingResult:
path: Path path: Path
experiment: str experiment: str
settings: TrainingSettings run: Training
results: pd.DataFrame
train_metrics: dict[str, float]
test_metrics: dict[str, float]
combined_metrics: dict[str, float]
confusion_matrix: xr.Dataset | None
created_at: float created_at: float
available_metrics: list[str] available_metrics: list[str]
files: list[Path] files: list[Path]
@ -52,52 +47,16 @@ class TrainingResult:
@classmethod @classmethod
def from_path(cls, result_path: Path, experiment_name: str | None = None) -> "TrainingResult": def from_path(cls, result_path: Path, experiment_name: str | None = None) -> "TrainingResult":
"""Load a TrainingResult from a given result directory path.""" """Load a TrainingResult from a given result directory path."""
result_file = result_path / "search_results.parquet"
preds_file = result_path / "predicted_probabilities.parquet"
settings_file = result_path / "search_settings.toml"
metrics_file = result_path / "metrics.toml"
confusion_matrix_file = result_path / "confusion_matrix.nc"
all_files = list(result_path.iterdir()) all_files = list(result_path.iterdir())
if not result_file.exists(): run = Training.load(result_path)
raise FileNotFoundError(f"Missing results file in {result_path}")
if not settings_file.exists():
raise FileNotFoundError(f"Missing settings file in {result_path}")
if not preds_file.exists():
raise FileNotFoundError(f"Missing predictions file in {result_path}")
if not metrics_file.exists():
raise FileNotFoundError(f"Missing metrics file in {result_path}")
created_at = result_path.stat().st_ctime created_at = result_path.stat().st_ctime
settings_dict = toml.load(settings_file)["settings"] available_metrics = [str(v) for v in run.metrics["metric"].unique()]
# Handle backward compatibility: add missing fields with defaults
if "classes" not in settings_dict:
settings_dict["classes"] = None
if "param_grid" not in settings_dict:
settings_dict["param_grid"] = {}
if "cv_splits" not in settings_dict:
settings_dict["cv_splits"] = 5
if "metrics" not in settings_dict:
settings_dict["metrics"] = []
settings = TrainingSettings(**settings_dict)
results = pd.read_parquet(result_file)
metrics = toml.load(metrics_file)
if not confusion_matrix_file.exists():
confusion_matrix = None
else:
confusion_matrix = xr.open_dataset(confusion_matrix_file, engine="h5netcdf")
available_metrics = [col.replace("mean_test_", "") for col in results.columns if col.startswith("mean_test_")]
return cls( return cls(
path=result_path, path=result_path,
experiment=experiment_name or "N/A", experiment=experiment_name or "N/A",
settings=settings, run=run,
results=results,
train_metrics=metrics["train_metrics"],
test_metrics=metrics["test_metrics"],
combined_metrics=metrics["combined_metrics"],
confusion_matrix=confusion_matrix,
created_at=created_at, created_at=created_at,
available_metrics=available_metrics, available_metrics=available_metrics,
files=all_files, files=all_files,
@ -107,11 +66,11 @@ class TrainingResult:
def display_info(self) -> TrainingResultDisplayInfo: def display_info(self) -> TrainingResultDisplayInfo:
"""Get display information for the training result.""" """Get display information for the training result."""
return TrainingResultDisplayInfo( return TrainingResultDisplayInfo(
task=self.settings.task, task=self.run.task,
target=self.settings.target, target=self.run.target,
model=self.settings.model, model=self.run.model_type,
grid=self.settings.grid, grid=self.run.dataset.grid,
level=self.settings.level, level=self.run.dataset.level,
timestamp=datetime.fromtimestamp(self.created_at), timestamp=datetime.fromtimestamp(self.created_at),
) )
@ -155,18 +114,21 @@ class TrainingResult:
st.error(f"Error loading predictions: {e}") st.error(f"Error loading predictions: {e}")
return None return None
def get_metric_dataframe(self) -> pd.DataFrame | None: def get_cv_results_dataframe(self) -> pd.DataFrame | None:
"""Get a DataFrame of available metrics for this training result.""" """Get a DataFrame of available metrics for this training result."""
metric_cols = [col for col in self.results.columns if col.startswith("mean_test_")] results = self.run.cv_results
if results is None:
return None
metric_cols = [col for col in results.columns if col.startswith("mean_test_")]
if not metric_cols: if not metric_cols:
return None return None
metric_data = [] metric_data = []
for col in metric_cols: for col in metric_cols:
metric_name = col.replace("mean_test_", "").replace("neg_", "").title() metric_name = col.replace("mean_test_", "").replace("neg_", "").title()
metrics = self.results[col] metrics = results[col]
# Check if the metric is negative # Check if the metric is negative
if col.startswith("mean_test_neg_"): if col.startswith("mean_test_neg_"):
task_multiplier = 1 if self.settings.task != "density" else 100 task_multiplier = 1 if self.run.task != "density" else 100
task_multiplier *= -1 task_multiplier *= -1
metrics = metrics * task_multiplier metrics = metrics * task_multiplier
@ -184,7 +146,7 @@ class TrainingResult:
def _get_best_metric_name(self) -> str: def _get_best_metric_name(self) -> str:
"""Get the primary metric name for a given task.""" """Get the primary metric name for a given task."""
match self.settings.task: match self.run.task:
case "binary": case "binary":
return "f1" return "f1"
case "count_regimes" | "density_regimes": case "count_regimes" | "density_regimes":
@ -192,6 +154,16 @@ class TrainingResult:
case _: # regression tasks case _: # regression tasks
return "r2" return "r2"
def _get_best_score(self, split: Literal["train", "test", "complete"]) -> float:
"""Get the best score for the primary metric on a given split."""
metric = self._get_best_metric_name()
scores = self.run.metrics[(self.run.metrics["metric"] == metric) & (self.run.metrics["split"] == split)]
# Should leave a single row
assert len(scores) == 1, (
f"Expected exactly one score for metric {metric} and split {split}, but found {len(scores)}: {scores}"
)
return float(scores["score"].iloc[0])
@staticmethod @staticmethod
def to_dataframe(training_results: list["TrainingResult"]) -> pd.DataFrame: def to_dataframe(training_results: list["TrainingResult"]) -> pd.DataFrame:
"""Convert a list of TrainingResult objects to a DataFrame for display.""" """Convert a list of TrainingResult objects to a DataFrame for display."""
@ -199,6 +171,8 @@ class TrainingResult:
for tr in training_results: for tr in training_results:
info = tr.display_info info = tr.display_info
best_metric_name = tr._get_best_metric_name() best_metric_name = tr._get_best_metric_name()
best_train_score = tr._get_best_score("train")
best_test_score = tr._get_best_score("test")
record = { record = {
"Experiment": tr.experiment if tr.experiment else "N/A", "Experiment": tr.experiment if tr.experiment else "N/A",
@ -208,9 +182,9 @@ class TrainingResult:
"Grid": GridConfig.from_grid_level((info.grid, info.level)).display_name, "Grid": GridConfig.from_grid_level((info.grid, info.level)).display_name,
"Created At": info.timestamp.strftime("%Y-%m-%d %H:%M"), "Created At": info.timestamp.strftime("%Y-%m-%d %H:%M"),
"Score-Metric": best_metric_name.title(), "Score-Metric": best_metric_name.title(),
"Best Models Score (Train-Set)": tr.train_metrics.get(best_metric_name), "Best Models Score (Train-Set)": best_train_score,
"Best Models Score (Test-Set)": tr.test_metrics.get(best_metric_name), "Best Models Score (Test-Set)": best_test_score,
"Trials": len(tr.results), "Trials": tr.run.n_trials or "N/A",
"Path": str(tr.path.name), "Path": str(tr.path.name),
} }
records.append(record) records.append(record)
@ -219,11 +193,15 @@ class TrainingResult:
@staticmethod @staticmethod
def calculate_inference_maps(training_results: list["TrainingResult"]) -> gpd.GeoDataFrame: def calculate_inference_maps(training_results: list["TrainingResult"]) -> gpd.GeoDataFrame:
"""Calculate the mean and standard deviation of inference maps across multiple training results.""" """Calculate the mean and standard deviation of inference maps across multiple training results."""
assert len({tr.settings.grid for tr in training_results}) == 1, "All training results must have the same grid" assert len({tr.run.dataset.grid for tr in training_results}) == 1, (
assert len({tr.settings.level for tr in training_results}) == 1, "All training results must have the same level" "All training results must have the same grid"
)
assert len({tr.run.dataset.level for tr in training_results}) == 1, (
"All training results must have the same level"
)
grid = training_results[0].settings.grid grid = training_results[0].run.dataset.grid
level = training_results[0].settings.level level = training_results[0].run.dataset.level
gridfile = entropice.utils.paths.get_grid_file(grid, level) gridfile = entropice.utils.paths.get_grid_file(grid, level)
cells = gpd.read_parquet(gridfile, columns=["cell_id", "geometry"]) cells = gpd.read_parquet(gridfile, columns=["cell_id", "geometry"])
if grid == "hex": if grid == "hex":
@ -232,11 +210,9 @@ class TrainingResult:
vals = [] vals = []
for tr in training_results: for tr in training_results:
preds_file = tr.path / "predicted_probabilities.parquet" preds = tr.run.predictions.set_index("cell_id")[["predicted"]]
if not preds_file.exists():
continue
preds = pd.read_parquet(preds_file, columns=["cell_id", "predicted"]).set_index("cell_id")
if preds["predicted"].dtype == "category": if preds["predicted"].dtype == "category":
# We can do this because the categories are ordered
preds["predicted"] = preds["predicted"].cat.codes preds["predicted"] = preds["predicted"].cat.codes
vals.append(preds) vals.append(preds)
all_preds = pd.concat(vals, axis=1) all_preds = pd.concat(vals, axis=1)
@ -257,9 +233,6 @@ def load_all_training_results() -> list[TrainingResult]:
for result_path in results_dir.iterdir(): for result_path in results_dir.iterdir():
if not result_path.is_dir(): if not result_path.is_dir():
continue continue
# Skip AutoGluon results directory
if "autogluon" in result_path.name.lower():
continue
try: try:
training_result = TrainingResult.from_path(result_path) training_result = TrainingResult.from_path(result_path)
training_results.append(training_result) training_results.append(training_result)
@ -288,155 +261,10 @@ def load_all_training_results() -> list[TrainingResult]:
return training_results return training_results
@dataclass
class AutogluonTrainingResult:
"""Wrapper for training result data and metadata."""
path: Path
experiment: str
settings: AutoGluonTrainingSettings
test_metrics: dict[str, float | dict | pd.DataFrame]
leaderboard: pd.DataFrame
feature_importance: pd.DataFrame | None
created_at: float
files: list[Path]
@classmethod
def from_path(cls, result_path: Path, experiment_name: str | None = None) -> "AutogluonTrainingResult":
"""Load an AutogluonTrainingResult from a given result directory path."""
settings_file = result_path / "training_settings.toml"
metrics_file = result_path / "test_metrics.pickle"
leaderboard_file = result_path / "leaderboard.parquet"
feature_importance_file = result_path / "feature_importance.parquet"
all_files = list(result_path.iterdir())
if not settings_file.exists():
raise FileNotFoundError(f"Missing settings file in {result_path}")
if not metrics_file.exists():
raise FileNotFoundError(f"Missing metrics file in {result_path}")
if not leaderboard_file.exists():
raise FileNotFoundError(f"Missing leaderboard file in {result_path}")
created_at = result_path.stat().st_ctime
settings_dict = toml.load(settings_file)["settings"]
settings = AutoGluonTrainingSettings(**settings_dict)
with open(metrics_file, "rb") as f:
metrics = pickle.load(f)
leaderboard = pd.read_parquet(leaderboard_file)
if feature_importance_file.exists():
feature_importance = pd.read_parquet(feature_importance_file)
else:
feature_importance = None
return cls(
path=result_path,
experiment=experiment_name or "N/A",
settings=settings,
test_metrics=metrics,
leaderboard=leaderboard,
feature_importance=feature_importance,
created_at=created_at,
files=all_files,
)
@property
def test_confusion_matrix(self) -> pd.DataFrame | None:
"""Get the test confusion matrix."""
if "confusion_matrix" not in self.test_metrics:
return None
assert isinstance(self.test_metrics["confusion_matrix"], pd.DataFrame)
return self.test_metrics["confusion_matrix"]
@property
def display_info(self) -> TrainingResultDisplayInfo:
"""Get display information for the training result."""
return TrainingResultDisplayInfo(
task=self.settings.task,
target=self.settings.target,
model="autogluon",
grid=self.settings.grid,
level=self.settings.level,
timestamp=datetime.fromtimestamp(self.created_at),
)
def _get_best_metric_name(self) -> str:
"""Get the primary metric name for a given task."""
match self.settings.task:
case "binary":
return "f1"
case "count_regimes" | "density_regimes":
return "f1_weighted"
case _: # regression tasks
return "root_mean_squared_error"
@staticmethod
def to_dataframe(training_results: list["AutogluonTrainingResult"]) -> pd.DataFrame:
"""Convert a list of AutogluonTrainingResult objects to a DataFrame for display."""
records = []
for tr in training_results:
info = tr.display_info
best_metric_name = tr._get_best_metric_name()
record = {
"Experiment": tr.experiment if tr.experiment else "N/A",
"Task": info.task,
"Target": info.target,
"Model": info.model,
"Grid": GridConfig.from_grid_level((info.grid, info.level)).display_name,
"Created At": info.timestamp.strftime("%Y-%m-%d %H:%M"),
"Score-Metric": best_metric_name.title(),
"Best Models Score (Test-Set)": tr.test_metrics.get(best_metric_name),
"Path": str(tr.path.name),
}
records.append(record)
return pd.DataFrame.from_records(records)
@st.cache_data(ttl=300) # Cache for 5 minutes
def load_all_autogluon_training_results() -> list[AutogluonTrainingResult]:
"""Load all training results from the results directory."""
results_dir = entropice.utils.paths.RESULTS_DIR
training_results: list[AutogluonTrainingResult] = []
incomplete_results: list[tuple[Path, Exception]] = []
for result_path in results_dir.iterdir():
if not result_path.is_dir():
continue
# Skip AutoGluon results directory
if "autogluon" not in result_path.name.lower():
continue
try:
training_result = AutogluonTrainingResult.from_path(result_path)
training_results.append(training_result)
except FileNotFoundError as e:
is_experiment_dir = False
for experiment_path in result_path.iterdir():
if not experiment_path.is_dir():
continue
try:
experiment_name = experiment_path.parent.name
training_result = AutogluonTrainingResult.from_path(experiment_path, experiment_name)
training_results.append(training_result)
is_experiment_dir = True
except FileNotFoundError as e2:
incomplete_results.append((experiment_path, e2))
if not is_experiment_dir:
incomplete_results.append((result_path, e))
if len(incomplete_results) > 0:
st.warning(
f"Found {len(incomplete_results)} incomplete autogluon training results that were skipped:\n - "
+ "\n - ".join(f"{p}: {e}" for p, e in incomplete_results)
)
# Sort by creation time (most recent first)
training_results.sort(key=lambda tr: tr.created_at, reverse=True)
return training_results
def load_training_sets(ensemble: DatasetEnsemble) -> dict[TargetDataset, dict[Task, TrainingSet]]: def load_training_sets(ensemble: DatasetEnsemble) -> dict[TargetDataset, dict[Task, TrainingSet]]:
"""Load training sets for all target-task combinations in the ensemble.""" """Load training sets for all target-task combinations in the ensemble."""
train_data_dict: dict[TargetDataset, dict[Task, TrainingSet]] = {} train_data_dict: dict[TargetDataset, dict[Task, TrainingSet]] = defaultdict(dict)
for target in all_target_datasets: for target in all_target_datasets:
train_data_dict[target] = {}
for task in all_tasks: for task in all_tasks:
train_data_dict[target][task] = ensemble.create_training_set(target=target, task=task) train_data_dict[target][task] = ensemble.create_training_set(target=target, task=task)
return train_data_dict return train_data_dict
@ -490,10 +318,6 @@ def load_experiment_training_results(experiment_name: str) -> list[TrainingResul
for result_path in experiment_dir.iterdir(): for result_path in experiment_dir.iterdir():
if not result_path.is_dir(): if not result_path.is_dir():
continue continue
# Skip AutoGluon results
if "autogluon" in result_path.name.lower():
continue
try: try:
training_result = TrainingResult.from_path(result_path, experiment_name) training_result = TrainingResult.from_path(result_path, experiment_name)
training_results.append(training_result) training_results.append(training_result)
@ -505,47 +329,11 @@ def load_experiment_training_results(experiment_name: str) -> list[TrainingResul
return training_results return training_results
def load_experiment_autogluon_results(experiment_name: str) -> list[AutogluonTrainingResult]: def create_experiment_summary_df(training_results: list[TrainingResult]) -> pd.DataFrame:
"""Load all AutoGluon training results for a specific experiment.
Args:
experiment_name: Name of the experiment directory
Returns:
List of AutogluonTrainingResult objects for the experiment
"""
experiment_dir = entropice.utils.paths.RESULTS_DIR / experiment_name
if not experiment_dir.exists():
return []
training_results: list[AutogluonTrainingResult] = []
for result_path in experiment_dir.iterdir():
if not result_path.is_dir():
continue
# Only include AutoGluon results
if "autogluon" not in result_path.name.lower():
continue
try:
training_result = AutogluonTrainingResult.from_path(result_path, experiment_name)
training_results.append(training_result)
except FileNotFoundError:
pass # Skip incomplete results
# Sort by creation time (most recent first)
training_results.sort(key=lambda tr: tr.created_at, reverse=True)
return training_results
def create_experiment_summary_df(
training_results: list[TrainingResult], autogluon_results: list[AutogluonTrainingResult]
) -> pd.DataFrame:
"""Create a summary DataFrame for all results in an experiment. """Create a summary DataFrame for all results in an experiment.
Args: Args:
training_results: List of TrainingResult objects training_results: List of TrainingResult objects
autogluon_results: List of AutogluonTrainingResult objects
Returns: Returns:
DataFrame with summary statistics for the experiment DataFrame with summary statistics for the experiment
@ -566,55 +354,26 @@ def create_experiment_summary_df(
"grid": info.grid, "grid": info.grid,
"level": info.level, "level": info.level,
"grid_level": f"{info.grid}_{info.level}", "grid_level": f"{info.grid}_{info.level}",
"train_score": tr.train_metrics.get(best_metric_name, float("nan")), "train_score": tr._get_best_score("train"),
"test_score": tr.test_metrics.get(best_metric_name, float("nan")), "test_score": tr._get_best_score("test"),
"combined_score": tr.combined_metrics.get(best_metric_name, float("nan")), "complete_score": tr._get_best_score("complete"),
"best_metric": best_metric_name, "best_metric": best_metric_name,
"n_trials": len(tr.results), "n_trials": tr.run.n_trials or "N/A",
"created_at": tr.created_at, "created_at": tr.created_at,
"path": tr.path, "path": tr.path,
} }
# Add all train metrics # Add all train metrics
for metric, value in tr.train_metrics.items(): for metric, value in tr.run.get_metrics_from_split("train").items():
record[f"train_{metric}"] = value record[f"train_{metric}"] = value
# Add all test metrics # Add all test metrics
for metric, value in tr.test_metrics.items(): for metric, value in tr.run.get_metrics_from_split("test").items():
record[f"test_{metric}"] = value record[f"test_{metric}"] = value
# Add all combined metrics # Add all complete metrics
for metric, value in tr.combined_metrics.items(): for metric, value in tr.run.get_metrics_from_split("complete").items():
record[f"combined_{metric}"] = value record[f"complete_{metric}"] = value
records.append(record)
# Add AutoGluon results
for ag in autogluon_results:
info = ag.display_info
best_metric_name = ag._get_best_metric_name()
record = {
"method": "AutoGluon",
"task": info.task,
"target": info.target,
"model": "ensemble", # AutoGluon is an ensemble
"grid": info.grid,
"level": info.level,
"grid_level": f"{info.grid}_{info.level}",
"train_score": float("nan"), # AutoGluon doesn't separate train scores
"test_score": ag.test_metrics.get(best_metric_name, float("nan")),
"combined_score": float("nan"),
"best_metric": best_metric_name,
"n_trials": len(ag.leaderboard),
"created_at": ag.created_at,
"path": ag.path,
}
# Add test metrics
for metric, value in ag.test_metrics.items():
if isinstance(value, (int, float)):
record[f"test_{metric}"] = value
records.append(record) records.append(record)

View file

@ -278,11 +278,11 @@ def load_all_default_dataset_statistics() -> dict[GridLevel, dict[TemporalMode,
dataset_stats: dict[GridLevel, dict[TemporalMode, DatasetStatistics]] = {} dataset_stats: dict[GridLevel, dict[TemporalMode, DatasetStatistics]] = {}
for grid_config in grid_configs: for grid_config in grid_configs:
dataset_stats[grid_config.id] = {} dataset_stats[grid_config.id] = {}
with stopwatch(f"Loading statistics for grid={grid_config.grid}, level={grid_config.level}"): grid_gdf = entropice.spatial.grids.open(grid_config.grid, grid_config.level) # Ensure grid is registered
grid_gdf = entropice.spatial.grids.open(grid_config.grid, grid_config.level) # Ensure grid is registered total_cells = len(grid_gdf)
total_cells = len(grid_gdf) assert total_cells > 0, "Grid must contain at least one cell."
assert total_cells > 0, "Grid must contain at least one cell." for temporal_mode in all_temporal_modes:
for temporal_mode in all_temporal_modes: with stopwatch(f"Loading statistics for {grid_config.grid=}, {grid_config.level=}, {temporal_mode=}"):
e = DatasetEnsemble(grid=grid_config.grid, level=grid_config.level, temporal_mode=temporal_mode) e = DatasetEnsemble(grid=grid_config.grid, level=grid_config.level, temporal_mode=temporal_mode)
target_statistics = {} target_statistics = {}
for target in all_target_datasets: for target in all_target_datasets:
@ -437,23 +437,27 @@ class CVMetricStatistics:
mean_cv_std: float | None mean_cv_std: float | None
@classmethod @classmethod
def compute(cls, result: TrainingResult, metric: str) -> "CVMetricStatistics": def compute(cls, result: TrainingResult, metric: str) -> "CVMetricStatistics | None":
"""Get cross-validation statistics for a metric.""" """Get cross-validation statistics for a metric."""
score_col = f"mean_test_{metric}" score_col = f"mean_test_{metric}"
std_col = f"std_test_{metric}" std_col = f"std_test_{metric}"
if score_col not in result.results.columns: cv_results = result.run.cv_results
if cv_results is None:
return None
if score_col not in cv_results.columns:
raise ValueError(f"Metric {metric} not found in results.") raise ValueError(f"Metric {metric} not found in results.")
best_score = result.results[score_col].max() best_score = cv_results[score_col].max()
mean_score = result.results[score_col].mean() mean_score = cv_results[score_col].mean()
std_score = result.results[score_col].std() std_score = cv_results[score_col].std()
worst_score = result.results[score_col].min() worst_score = cv_results[score_col].min()
median_score = result.results[score_col].median() median_score = cv_results[score_col].median()
mean_cv_std = None mean_cv_std = None
if std_col in result.results.columns: if std_col in cv_results.columns:
mean_cv_std = result.results[std_col].mean() mean_cv_std = cv_results[std_col].mean()
return CVMetricStatistics( return CVMetricStatistics(
best_score=best_score, best_score=best_score,
@ -477,10 +481,12 @@ class ParameterSpaceSummary:
unique_values: int unique_values: int
@classmethod @classmethod
def compute(cls, result: TrainingResult, param_col: str) -> "ParameterSpaceSummary": def compute(cls, result: TrainingResult, param_col: str) -> "ParameterSpaceSummary | None":
"""Get cross-validation statistics for a metric.""" """Get cross-validation statistics for a metric."""
if result.run.cv_results is None:
return None
param_name = param_col.replace("param_", "") param_name = param_col.replace("param_", "")
param_values = result.results[param_col].dropna() param_values = result.run.cv_results[param_col].dropna()
if pd.api.types.is_numeric_dtype(param_values): if pd.api.types.is_numeric_dtype(param_values):
return ParameterSpaceSummary( return ParameterSpaceSummary(
@ -511,14 +517,19 @@ class CVResultsStatistics:
parameter_summary: list[ParameterSpaceSummary] parameter_summary: list[ParameterSpaceSummary]
@classmethod @classmethod
def compute(cls, result: TrainingResult) -> "CVResultsStatistics": def compute(cls, result: TrainingResult) -> "CVResultsStatistics | None":
"""Get cross-validation statistics for a metric.""" """Get cross-validation statistics for a metric."""
if result.run.cv_results is None:
return None
metrics = result.available_metrics metrics = result.available_metrics
metric_stats: dict[str, CVMetricStatistics] = {} metric_stats: dict[str, CVMetricStatistics] = {}
for metric in metrics: for metric in metrics:
metric_stats[metric] = CVMetricStatistics.compute(result, metric) stats = CVMetricStatistics.compute(result, metric)
if stats is None:
continue
metric_stats[metric] = stats
param_cols = [col for col in result.results.columns if col.startswith("param_") and col != "params"] param_cols = [col for col in result.run.cv_results.columns if col.startswith("param_") and col != "params"]
summary_data = [] summary_data = []
for param_col in param_cols: for param_col in param_cols:
summary_data.append(ParameterSpaceSummary.compute(result, param_col)) summary_data.append(ParameterSpaceSummary.compute(result, param_col))

View file

@ -34,7 +34,7 @@ def render_dataset_configuration_sidebar() -> DatasetEnsemble:
# Grid selection # Grid selection
grid_options = [gc.display_name for gc in grid_configs] grid_options = [gc.display_name for gc in grid_configs]
grid_level_combined = st.selectbox( grid_level_complete = st.selectbox(
"Grid Configuration", "Grid Configuration",
options=grid_options, options=grid_options,
index=0, index=0,
@ -43,7 +43,7 @@ def render_dataset_configuration_sidebar() -> DatasetEnsemble:
) )
# Find the selected grid config # Find the selected grid config
selected_grid_config: GridConfig = next(gc for gc in grid_configs if gc.display_name == grid_level_combined) selected_grid_config: GridConfig = next(gc for gc in grid_configs if gc.display_name == grid_level_complete)
# Temporal mode selection # Temporal mode selection
temporal_mode = st.selectbox( temporal_mode = st.selectbox(

View file

@ -12,7 +12,6 @@ from entropice.dashboard.sections.experiment_overview import (
) )
from entropice.dashboard.utils.loaders import ( from entropice.dashboard.utils.loaders import (
create_experiment_summary_df, create_experiment_summary_df,
load_experiment_autogluon_results,
load_experiment_training_results, load_experiment_training_results,
) )
@ -43,14 +42,13 @@ def render_experiment_analysis_page():
# Load experiment results # Load experiment results
with st.spinner(f"Loading results for experiment: {selected_experiment}..."): with st.spinner(f"Loading results for experiment: {selected_experiment}..."):
training_results = load_experiment_training_results(selected_experiment) training_results = load_experiment_training_results(selected_experiment)
autogluon_results = load_experiment_autogluon_results(selected_experiment)
if not training_results and not autogluon_results: if not training_results:
st.warning(f"No training results found in experiment: {selected_experiment}") st.warning(f"No training results found in experiment: {selected_experiment}")
st.stop() st.stop()
# Create summary DataFrame # Create summary DataFrame
summary_df = create_experiment_summary_df(training_results, autogluon_results) summary_df = create_experiment_summary_df(training_results)
# Get available metrics # Get available metrics
metric_columns = [col for col in summary_df.columns if col.startswith("test_")] metric_columns = [col for col in summary_df.columns if col.startswith("test_")]
@ -61,7 +59,7 @@ def render_experiment_analysis_page():
st.stop() st.stop()
# Render analysis sections # Render analysis sections
render_experiment_overview(selected_experiment, training_results, autogluon_results, summary_df) render_experiment_overview(selected_experiment, training_results, summary_df)
st.divider() st.divider()
@ -73,7 +71,7 @@ def render_experiment_analysis_page():
st.divider() st.divider()
render_feature_importance_analysis(training_results, autogluon_results) render_feature_importance_analysis(training_results)
st.divider() st.divider()

View file

@ -1,16 +1,8 @@
"""Inference page: Visualization of model inference results across the study region.""" """Inference page: Visualization of model inference results across the study region."""
import geopandas as gpd
import streamlit as st import streamlit as st
from stopuhr import stopwatch from stopuhr import stopwatch
from entropice.dashboard.plots.inference import (
render_class_comparison,
render_class_distribution_histogram,
render_inference_map,
render_inference_statistics,
render_spatial_distribution_stats,
)
from entropice.dashboard.utils.loaders import TrainingResult, load_all_training_results from entropice.dashboard.utils.loaders import TrainingResult, load_all_training_results
@ -27,7 +19,9 @@ def render_sidebar_selection(training_results: list[TrainingResult]) -> Training
st.header("Select Training Run") st.header("Select Training Run")
# Create selection options with task-first naming # Create selection options with task-first naming
training_options = {tr.display_info.get_display_name("task_first"): tr for tr in training_results} training_options: dict[str, TrainingResult] = {
tr.display_info.get_display_name("task_first"): tr for tr in training_results
}
selected_name = st.selectbox( selected_name = st.selectbox(
"Training Run", "Training Run",
@ -44,113 +38,14 @@ def render_sidebar_selection(training_results: list[TrainingResult]) -> Training
# Show run information in sidebar # Show run information in sidebar
st.subheader("Run Information") st.subheader("Run Information")
st.markdown(f"**Task:** {selected_result.settings.task.capitalize()}") st.markdown(f"**Task:** {selected_result.run.task.capitalize()}")
st.markdown(f"**Model:** {selected_result.settings.model.upper()}") st.markdown(f"**Model:** {selected_result.run.model_type.upper()}")
st.markdown(f"**Grid:** {selected_result.settings.grid.capitalize()}") st.markdown(f"**Grid:** {selected_result.run.dataset.grid.capitalize()}")
st.markdown(f"**Level:** {selected_result.settings.level}") st.markdown(f"**Level:** {selected_result.run.dataset.level}")
st.markdown(f"**Target:** {selected_result.settings.target.replace('darts_', '')}") st.markdown(f"**Target:** {selected_result.run.target.replace('darts_', '')}")
return selected_result return selected_result
def render_run_information(selected_result: TrainingResult):
"""Render training run configuration overview.
Args:
selected_result: The selected TrainingResult object.
"""
st.header("📋 Run Configuration")
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
st.metric("Task", selected_result.settings.task.capitalize())
with col2:
st.metric("Model", selected_result.settings.model.upper())
with col3:
st.metric("Grid", selected_result.settings.grid.capitalize())
with col4:
st.metric("Level", selected_result.settings.level)
with col5:
st.metric("Target", selected_result.settings.target.replace("darts_", ""))
def render_inference_statistics_section(predictions_gdf: gpd.GeoDataFrame, task: str):
"""Render inference summary statistics section.
Args:
predictions_gdf: GeoDataFrame with predictions.
task: Task type ('binary', 'count_regimes', 'density_regimes', 'count', 'density').
"""
st.header("📊 Inference Summary")
render_inference_statistics(predictions_gdf, task)
def render_spatial_coverage_section(predictions_gdf: gpd.GeoDataFrame):
"""Render spatial coverage statistics section.
Args:
predictions_gdf: GeoDataFrame with predictions.
"""
st.header("🌍 Spatial Coverage")
render_spatial_distribution_stats(predictions_gdf)
def render_map_visualization_section(selected_result: TrainingResult):
"""Render 3D map visualization section.
Args:
selected_result: The selected TrainingResult object.
"""
st.header("🗺️ Interactive Prediction Map")
st.markdown(
"""
3D visualization of predictions across the study region. The map shows predicted
classes with color coding and spatial distribution of model outputs.
"""
)
render_inference_map(selected_result)
def render_class_distribution_section(predictions_gdf: gpd.GeoDataFrame, task: str):
"""Render class distribution histogram section.
Args:
predictions_gdf: GeoDataFrame with predictions.
task: Task type ('binary', 'count_regimes', 'density_regimes', 'count', 'density').
"""
st.header("📈 Class Distribution")
st.markdown("Distribution of predicted classes across all inference cells.")
render_class_distribution_histogram(predictions_gdf, task)
def render_class_comparison_section(predictions_gdf: gpd.GeoDataFrame, task: str):
"""Render class comparison analysis section.
Args:
predictions_gdf: GeoDataFrame with predictions.
task: Task type ('binary', 'count_regimes', 'density_regimes', 'count', 'density').
"""
st.header("🔍 Class Comparison Analysis")
st.markdown(
"""
Detailed comparison of predicted classes showing probability distributions
and confidence metrics for different class predictions.
"""
)
render_class_comparison(predictions_gdf, task)
def render_inference_page(): def render_inference_page():
"""Render the Inference page of the dashboard.""" """Render the Inference page of the dashboard."""
st.title("🗺️ Inference Results") st.title("🗺️ Inference Results")
@ -179,45 +74,18 @@ def render_inference_page():
with st.sidebar: with st.sidebar:
selected_result = render_sidebar_selection(training_results) selected_result = render_sidebar_selection(training_results)
# Main content area - Run Information # Main content area
render_run_information(selected_result)
st.divider()
# Check if predictions file exists
preds_file = selected_result.path / "predicted_probabilities.parquet"
if not preds_file.exists():
st.error("No inference results found for this training run.")
st.info("Inference results are generated automatically during training.")
return
# Load predictions # Load predictions
with st.spinner("Loading inference results..."): with st.spinner("Loading inference results..."):
predictions_gdf = gpd.read_parquet(preds_file) # Columns: Index(['cell_id', 'predicted', 'geometry'], dtype='object')
task = selected_result.settings.task predictions_gdf = selected_result.run.predictions
task = selected_result.run.task
# Inference Statistics Section # TODO: Implement the sections
render_inference_statistics_section(predictions_gdf, task) # Map, optionally 3D
# Some statistics about the predictions
st.divider() # Class Distribution for classification tasks
# Distribution of predicted values for regression tasks
# Spatial Coverage Section
render_spatial_coverage_section(predictions_gdf)
st.divider()
# 3D Map Visualization Section
render_map_visualization_section(selected_result)
st.divider()
# Class Distribution Section
render_class_distribution_section(predictions_gdf, task)
st.divider()
# Class Comparison Section
render_class_comparison_section(predictions_gdf, task)
st.balloons() st.balloons()
stopwatch.summary() stopwatch.summary()

View file

@ -1,919 +0,0 @@
"""Model State page: Visualization of model internal state and feature importance."""
import streamlit as st
import xarray as xr
from stopuhr import stopwatch
from entropice.dashboard.plots.model_state import (
plot_arcticdem_heatmap,
plot_arcticdem_summary,
plot_box_assignment_bars,
plot_box_assignments,
plot_common_features,
plot_embedding_aggregation_summary,
plot_embedding_heatmap,
plot_era5_heatmap,
plot_era5_summary,
plot_era5_time_heatmap,
plot_top_features,
)
from entropice.dashboard.utils.colors import generate_unified_colormap
from entropice.dashboard.utils.loaders import TrainingResult, load_all_training_results
from entropice.dashboard.utils.unsembler import (
extract_arcticdem_features,
extract_common_features,
extract_embedding_features,
extract_era5_features,
)
from entropice.utils.types import L2SourceDataset
def get_members_from_settings(settings) -> list[L2SourceDataset]:
"""Extract dataset members from training settings.
Args:
settings: TrainingSettings object containing dataset configuration.
Returns:
List of L2SourceDataset members used in training.
"""
return settings.members
def render_sidebar_selection(training_results: list[TrainingResult]) -> TrainingResult:
"""Render sidebar for training run selection.
Args:
training_results: List of available TrainingResult objects.
Returns:
Selected TrainingResult object.
"""
st.header("Select Training Run")
# Result selection with task-first naming
result_options = {tr.display_info.get_display_name("task_first"): tr for tr in training_results}
selected_name = st.selectbox(
"Training Run",
options=list(result_options.keys()),
index=0,
help="Choose a training result to visualize model state",
key="model_state_training_run_select",
)
selected_result = result_options[selected_name]
return selected_result
def render_model_info(model_state: xr.Dataset, model_type: str):
"""Render basic model state information.
Args:
model_state: Xarray dataset containing model state.
model_type: Type of model (espa, xgboost, rf, knn).
"""
with st.expander("Model State Information", expanded=False):
st.write(f"**Model Type:** {model_type.upper()}")
st.write(f"**Variables:** {list(model_state.data_vars)}")
st.write(f"**Dimensions:** {dict(model_state.sizes)}")
st.write(f"**Coordinates:** {list(model_state.coords)}")
st.write(f"**Attributes:** {dict(model_state.attrs)}")
def render_training_data_summary(members: list[L2SourceDataset]):
"""Render summary of training data sources.
Args:
members: List of dataset members used in training.
"""
st.header("📊 Training Data Summary")
st.markdown(
f"""
**Dataset Members Used in Training:** {len(members)}
The following data sources were used to train this model:
"""
)
# Create a nice display of members with emojis
member_display = {
"AlphaEarth": "🛰️ AlphaEarth (Satellite Embeddings)",
"ArcticDEM": "🏔️ ArcticDEM (Topography)",
"ERA5-yearly": "⛅ ERA5 Yearly (Climate)",
"ERA5-seasonal": "⛅ ERA5 Seasonal (Summer/Winter)",
"ERA5-shoulder": "⛅ ERA5 Shoulder Seasons (JFM/AMJ/JAS/OND)",
}
cols = st.columns(min(len(members), 3))
for idx, member in enumerate(members):
with cols[idx % 3]:
display_name = member_display.get(member, f"📁 {member}")
st.info(display_name)
def render_model_state_page():
"""Render the Model State page of the dashboard."""
st.title("🔬 Model State")
st.markdown(
"""
Comprehensive visualization of the best model's internal state and feature importance.
Select a training run from the sidebar to explore model parameters, feature weights,
and data source contributions.
"""
)
# Load available training results
training_results = load_all_training_results()
if not training_results:
st.warning("No training results found. Please run some training experiments first.")
st.info("Run training using: `pixi run python -m entropice.ml.training`")
return
st.success(f"Found **{len(training_results)}** training result(s)")
st.divider()
# Sidebar: Training run selection
with st.sidebar:
selected_result = render_sidebar_selection(training_results)
# Get the model type from settings
model_type = selected_result.settings.model
# Load model state
with st.spinner("Loading model state..."):
model_state = selected_result.load_model_state()
if model_state is None:
st.error("Could not load model state for this result.")
st.info("The model state file (best_estimator_state.nc) may be missing from the training results.")
return
# Display basic model state info
render_model_info(model_state, model_type)
# Display dataset members summary
members = get_members_from_settings(selected_result.settings)
render_training_data_summary(members)
st.divider()
# Render model-specific visualizations
if model_type == "espa":
render_espa_model_state(model_state, selected_result)
elif model_type == "xgboost":
render_xgboost_model_state(model_state, selected_result)
elif model_type == "rf":
render_rf_model_state(model_state, selected_result)
elif model_type == "knn":
render_knn_model_state(model_state, selected_result)
else:
st.warning(f"Visualization for model type '{model_type}' is not yet implemented.")
st.balloons()
stopwatch.summary()
def render_espa_model_state(model_state: xr.Dataset, selected_result: TrainingResult):
"""Render visualizations for ESPA model.
Args:
model_state: Xarray dataset containing ESPA model state.
selected_result: TrainingResult object containing training configuration.
"""
# Scale feature weights by number of features
n_features = model_state.sizes["feature"]
model_state["feature_weights"] *= n_features
# Get members used in training
members = get_members_from_settings(selected_result.settings)
# Extract different feature types based on what was used in training
embedding_feature_array = None
if "AlphaEarth" in members:
embedding_feature_array = extract_embedding_features(model_state)
era5_yearly_array = None
era5_seasonal_array = None
era5_shoulder_array = None
if "ERA5-yearly" in members:
era5_yearly_array = extract_era5_features(model_state, temporal_group="yearly")
if "ERA5-seasonal" in members:
era5_seasonal_array = extract_era5_features(model_state, temporal_group="seasonal")
if "ERA5-shoulder" in members:
era5_shoulder_array = extract_era5_features(model_state, temporal_group="shoulder")
arcticdem_feature_array = None
if "ArcticDEM" in members:
arcticdem_feature_array = extract_arcticdem_features(model_state)
common_feature_array = extract_common_features(model_state)
# Generate unified colormaps (convert dataclass to dict)
settings_dict = {"task": selected_result.settings.task, "classes": selected_result.settings.classes}
_, _, altair_colors = generate_unified_colormap(settings_dict)
# Feature importance section
st.header("Feature Importance")
st.markdown("The most important features based on learned feature weights from the best estimator.")
@st.fragment
def render_feature_importance():
# Slider to control number of features to display
top_n = st.slider(
"Number of top features to display",
min_value=5,
max_value=50,
value=10,
step=5,
help="Select how many of the most important features to visualize",
)
with st.spinner("Generating feature importance plot..."):
feature_chart = plot_top_features(model_state, top_n=top_n)
st.altair_chart(feature_chart, width="stretch")
st.markdown(
"""
**Interpretation:**
- **Magnitude**: Larger absolute values indicate more important features
- **Color**: Blue bars indicate positive weights, coral bars indicate negative weights
"""
)
render_feature_importance()
# Box-to-Label Assignment Visualization
st.header("Box-to-Label Assignments")
st.markdown(
"""
This visualization shows how the learned boxes (prototypes in feature space) are
assigned to different class labels. The ESPA classifier learns K boxes and assigns
them to classes through the Lambda matrix. Higher values indicate stronger assignment
of a box to a particular class.
"""
)
with st.spinner("Generating box assignment visualizations..."):
col1, col2 = st.columns([0.7, 0.3])
with col1:
st.markdown("### Assignment Heatmap")
box_assignment_heatmap = plot_box_assignments(model_state)
st.altair_chart(box_assignment_heatmap, width="stretch")
with col2:
st.markdown("### Box Count by Class")
box_assignment_bars = plot_box_assignment_bars(model_state, altair_colors)
st.altair_chart(box_assignment_bars, width="stretch")
# Show statistics
with st.expander("Box Assignment Statistics"):
box_assignments = model_state["box_assignments"].to_pandas()
st.write("**Assignment Matrix Statistics:**")
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Boxes", len(box_assignments.columns))
with col2:
st.metric("Number of Classes", len(box_assignments.index))
with col3:
st.metric("Mean Assignment", f"{box_assignments.to_numpy().mean():.4f}")
with col4:
st.metric("Max Assignment", f"{box_assignments.to_numpy().max():.4f}")
# Show which boxes are most strongly assigned to each class
st.write("**Top Box Assignments per Class:**")
for class_label in box_assignments.index:
top_boxes = box_assignments.loc[class_label].nlargest(5)
st.write(
f"**Class {class_label}:** Boxes {', '.join(map(str, top_boxes.index.tolist()))} "
f"(strengths: {', '.join(f'{v:.3f}' for v in top_boxes.to_numpy())})"
)
st.markdown(
"""
**Interpretation:**
- Each box can be assigned to multiple classes with different strengths
- Boxes with higher assignment values for a class contribute more to that class's predictions
- The distribution shows how the model partitions the feature space for classification
"""
)
# Embedding features analysis (if present)
if embedding_feature_array is not None:
render_embedding_features(embedding_feature_array)
# ERA5 features analysis (if present) - split by temporal group
if era5_yearly_array is not None:
render_era5_features(era5_yearly_array, temporal_group="Yearly")
if era5_seasonal_array is not None:
render_era5_features(era5_seasonal_array, temporal_group="Seasonal")
if era5_shoulder_array is not None:
render_era5_features(era5_shoulder_array, temporal_group="Shoulder")
# ArcticDEM features analysis (if present)
if arcticdem_feature_array is not None:
render_arcticdem_features(arcticdem_feature_array)
# Common features analysis (if present)
if common_feature_array is not None:
render_common_features(common_feature_array)
def render_xgboost_model_state(model_state: xr.Dataset, selected_result: TrainingResult):
"""Render visualizations for XGBoost model.
Args:
model_state: Xarray dataset containing XGBoost model state.
selected_result: TrainingResult object containing training configuration.
"""
from entropice.dashboard.plots.model_state import (
plot_xgboost_feature_importance,
plot_xgboost_importance_comparison,
)
st.header("🌲 XGBoost Model Analysis")
st.markdown(
f"""
XGBoost gradient boosted tree model with **{model_state.attrs.get("n_trees", "N/A")} trees**.
**Objective:** {model_state.attrs.get("objective", "N/A")}
"""
)
# Feature importance with different types
st.subheader("Feature Importance Analysis")
st.markdown(
"""
XGBoost provides multiple ways to measure feature importance:
- **Weight**: Number of times a feature is used to split the data
- **Gain**: Average gain across all splits using the feature
- **Cover**: Average coverage across all splits using the feature
- **Total Gain**: Total gain across all splits
- **Total Cover**: Total coverage across all splits
"""
)
# Importance type selector
importance_type = st.selectbox(
"Select Importance Type",
options=["gain", "weight", "cover", "total_gain", "total_cover"],
index=0,
help="Choose which importance metric to visualize",
key="model_state_importance_type",
)
# Top N slider
top_n = st.slider(
"Number of top features to display",
min_value=5,
max_value=50,
value=20,
step=5,
help="Select how many of the most important features to visualize",
)
with st.spinner("Generating feature importance plot..."):
importance_chart = plot_xgboost_feature_importance(model_state, importance_type=importance_type, top_n=top_n)
st.altair_chart(importance_chart, width="stretch")
# Comparison of importance types
st.subheader("Importance Type Comparison")
st.markdown("Compare the top features across different importance metrics.")
with st.spinner("Generating importance comparison..."):
comparison_chart = plot_xgboost_importance_comparison(model_state, top_n=15)
st.altair_chart(comparison_chart, width="stretch")
# Statistics
with st.expander("Model Statistics"):
st.write("**Overall Statistics:**")
col1, col2 = st.columns(2)
with col1:
st.metric("Number of Trees", model_state.attrs.get("n_trees", "N/A"))
with col2:
st.metric("Total Features", model_state.sizes.get("feature", "N/A"))
# Feature source analysis
st.subheader("Feature Importance by Data Source")
st.markdown(
"""
Breakdown of feature importance by data source (AlphaEarth embeddings, ERA5 climate,
ArcticDEM topography, and common features).
"""
)
# Get members used in training
members = get_members_from_settings(selected_result.settings)
# Extract features by source using the selected importance type
importance_var = f"feature_importance_{importance_type}"
embedding_feature_array = None
if "AlphaEarth" in members:
embedding_feature_array = extract_embedding_features(model_state, importance_type=importance_var)
era5_yearly_array = None
era5_seasonal_array = None
era5_shoulder_array = None
if "ERA5-yearly" in members:
era5_yearly_array = extract_era5_features(model_state, importance_type=importance_var, temporal_group="yearly")
if "ERA5-seasonal" in members:
era5_seasonal_array = extract_era5_features(
model_state, importance_type=importance_var, temporal_group="seasonal"
)
if "ERA5-shoulder" in members:
era5_shoulder_array = extract_era5_features(
model_state, importance_type=importance_var, temporal_group="shoulder"
)
arcticdem_feature_array = None
if "ArcticDEM" in members:
arcticdem_feature_array = extract_arcticdem_features(model_state, importance_type=importance_var)
common_feature_array = extract_common_features(model_state, importance_type=importance_var)
# Render each source's features if present
if embedding_feature_array is not None:
render_embedding_features(embedding_feature_array)
if era5_yearly_array is not None:
render_era5_features(era5_yearly_array, temporal_group="Yearly")
if era5_seasonal_array is not None:
render_era5_features(era5_seasonal_array, temporal_group="Seasonal")
if era5_shoulder_array is not None:
render_era5_features(era5_shoulder_array, temporal_group="Shoulder")
if arcticdem_feature_array is not None:
render_arcticdem_features(arcticdem_feature_array)
if common_feature_array is not None:
render_common_features(common_feature_array)
def render_rf_model_state(model_state: xr.Dataset, selected_result: TrainingResult):
"""Render visualizations for Random Forest model.
Args:
model_state: Xarray dataset containing Random Forest model state.
selected_result: TrainingResult object containing training configuration.
"""
from entropice.dashboard.plots.model_state import plot_rf_feature_importance
st.header("🌳 Random Forest Model Analysis")
# Check if using cuML (which doesn't provide tree statistics)
is_cuml = "cuML" in model_state.attrs.get("description", "")
st.markdown(
f"""
Random Forest ensemble with **{model_state.attrs.get("n_estimators", "N/A")} trees**
(max depth: {model_state.attrs.get("max_depth", "N/A")}).
"""
)
if is_cuml:
st.info(" Using cuML GPU-accelerated Random Forest. Individual tree statistics are not available.")
# Display OOB score if available
oob_score = model_state.attrs.get("oob_score")
if oob_score is not None:
st.info(f"**Out-of-Bag Score:** {oob_score:.4f}")
# Feature importance
st.subheader("Feature Importance (Gini Importance)")
st.markdown(
"""
Random Forest uses Gini impurity to measure feature importance. Features with higher
importance values contribute more to the model's predictions.
"""
)
# Top N slider
top_n = st.slider(
"Number of top features to display",
min_value=5,
max_value=50,
value=20,
step=5,
help="Select how many of the most important features to visualize",
)
with st.spinner("Generating feature importance plot..."):
importance_chart = plot_rf_feature_importance(model_state, top_n=top_n)
st.altair_chart(importance_chart, width="stretch")
# Tree statistics (only if available - sklearn RF has them, cuML RF doesn't)
if not is_cuml and "tree_depths" in model_state:
from entropice.dashboard.plots.model_state import plot_rf_tree_statistics
st.subheader("Tree Structure Statistics")
st.markdown("Distribution of tree properties across the forest.")
with st.spinner("Generating tree statistics..."):
chart_depths, chart_leaves, chart_nodes = plot_rf_tree_statistics(model_state)
col1, col2, col3 = st.columns(3)
with col1:
st.altair_chart(chart_depths, width="stretch")
with col2:
st.altair_chart(chart_leaves, width="stretch")
with col3:
st.altair_chart(chart_nodes, width="stretch")
# Statistics
with st.expander("Forest Statistics"):
st.write("**Overall Statistics:**")
depths = model_state["tree_depths"].to_pandas()
leaves = model_state["tree_n_leaves"].to_pandas()
nodes = model_state["tree_n_nodes"].to_pandas()
col1, col2, col3 = st.columns(3)
with col1:
st.write("**Tree Depths:**")
st.metric("Mean Depth", f"{depths.mean():.2f}")
st.metric("Max Depth", f"{depths.max()}")
st.metric("Min Depth", f"{depths.min()}")
with col2:
st.write("**Leaf Counts:**")
st.metric("Mean Leaves", f"{leaves.mean():.2f}")
st.metric("Max Leaves", f"{leaves.max()}")
st.metric("Min Leaves", f"{leaves.min()}")
with col3:
st.write("**Node Counts:**")
st.metric("Mean Nodes", f"{nodes.mean():.2f}")
st.metric("Max Nodes", f"{nodes.max()}")
st.metric("Min Nodes", f"{nodes.min()}")
# Feature source analysis
st.subheader("Feature Importance by Data Source")
st.markdown(
"""
Breakdown of feature importance by data source (AlphaEarth embeddings, ERA5 climate,
ArcticDEM topography, and common features).
"""
)
# Get members used in training
members = get_members_from_settings(selected_result.settings)
# Extract features by source
embedding_feature_array = None
if "AlphaEarth" in members:
embedding_feature_array = extract_embedding_features(model_state, importance_type="feature_importance")
era5_yearly_array = None
era5_seasonal_array = None
era5_shoulder_array = None
if "ERA5-yearly" in members:
era5_yearly_array = extract_era5_features(
model_state, importance_type="feature_importance", temporal_group="yearly"
)
if "ERA5-seasonal" in members:
era5_seasonal_array = extract_era5_features(
model_state, importance_type="feature_importance", temporal_group="seasonal"
)
if "ERA5-shoulder" in members:
era5_shoulder_array = extract_era5_features(
model_state, importance_type="feature_importance", temporal_group="shoulder"
)
arcticdem_feature_array = None
if "ArcticDEM" in members:
arcticdem_feature_array = extract_arcticdem_features(model_state, importance_type="feature_importance")
common_feature_array = extract_common_features(model_state, importance_type="feature_importance")
# Render each source's features if present
if embedding_feature_array is not None:
render_embedding_features(embedding_feature_array)
if era5_yearly_array is not None:
render_era5_features(era5_yearly_array, temporal_group="Yearly")
if era5_seasonal_array is not None:
render_era5_features(era5_seasonal_array, temporal_group="Seasonal")
if era5_shoulder_array is not None:
render_era5_features(era5_shoulder_array, temporal_group="Shoulder")
if arcticdem_feature_array is not None:
render_arcticdem_features(arcticdem_feature_array)
if common_feature_array is not None:
render_common_features(common_feature_array)
def render_knn_model_state(model_state: xr.Dataset, selected_result: TrainingResult):
"""Render visualizations for KNN model.
Args:
model_state: Xarray dataset containing KNN model state.
selected_result: TrainingResult object containing training configuration.
"""
st.header("🔍 K-Nearest Neighbors Model Analysis")
st.markdown(
"""
K-Nearest Neighbors is a non-parametric, instance-based learning algorithm.
Unlike tree-based or parametric models, KNN doesn't learn feature weights or build
a model structure. Instead, it memorizes the training data and makes predictions
based on the k nearest neighbors.
"""
)
# Display model metadata
st.subheader("Model Configuration")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Number of Neighbors (k)", model_state.attrs.get("n_neighbors", "N/A"))
st.metric("Training Samples", model_state.attrs.get("n_samples_fit", "N/A"))
with col2:
st.metric("Weights", model_state.attrs.get("weights", "N/A"))
st.metric("Algorithm", model_state.attrs.get("algorithm", "N/A"))
with col3:
st.metric("Metric", model_state.attrs.get("metric", "N/A"))
st.info(
"""
**Note:** KNN doesn't have traditional feature importance or model parameters to visualize.
The model's behavior depends entirely on:
- The number of neighbors (k)
- The distance metric used
- The weighting scheme for neighbors
To understand the model better, consider visualizing the decision boundaries on a
reduced-dimensional representation of your data (e.g., using PCA or t-SNE).
"""
)
# Helper functions for embedding/era5/common features
def render_embedding_features(embedding_feature_array: xr.DataArray):
"""Render embedding feature visualizations.
Args:
embedding_feature_array: DataArray containing AlphaEarth embedding feature weights.
"""
with st.container(border=True):
st.header("🛰️ Embedding Feature Analysis")
st.markdown(
"""
Analysis of embedding features showing which aggregations, bands, and years
are most important for the model predictions.
"""
)
# Summary bar charts
st.markdown("### Importance by Dimension")
with st.spinner("Generating dimension summaries..."):
chart_agg, chart_band, chart_year = plot_embedding_aggregation_summary(embedding_feature_array)
col1, col2, col3 = st.columns(3)
with col1:
st.altair_chart(chart_agg, width="stretch")
with col2:
st.altair_chart(chart_band, width="stretch")
with col3:
st.altair_chart(chart_year, width="stretch")
# Detailed heatmap
st.markdown("### Detailed Heatmap by Aggregation")
st.markdown("Shows the weight of each band-year combination for each aggregation type.")
with st.spinner("Generating heatmap..."):
heatmap_chart = plot_embedding_heatmap(embedding_feature_array)
st.altair_chart(heatmap_chart, width="stretch")
# Statistics
with st.expander("Embedding Feature Statistics"):
st.write("**Overall Statistics:**")
n_emb_features = embedding_feature_array.size
mean_weight = float(embedding_feature_array.mean().values)
max_weight = float(embedding_feature_array.max().values)
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Embedding Features", n_emb_features)
with col2:
st.metric("Mean Weight", f"{mean_weight:.4f}")
with col3:
st.metric("Max Weight", f"{max_weight:.4f}")
# Show top embedding features
st.write("**Top 10 Embedding Features:**")
emb_df = embedding_feature_array.to_dataframe(name="weight").reset_index()
top_emb = emb_df.nlargest(10, "weight")[["agg", "band", "year", "weight"]]
st.dataframe(top_emb, width="stretch")
def render_era5_features(era5_feature_array: xr.DataArray, temporal_group: str = ""):
"""Render ERA5 feature visualizations.
Args:
era5_feature_array: ERA5 feature importance array.
temporal_group: Name of the temporal grouping (e.g., "Yearly", "Seasonal", "Shoulder").
"""
group_suffix = f" ({temporal_group})" if temporal_group else ""
with st.container(border=True):
st.header(f"⛅ ERA5 Feature Analysis{group_suffix}")
temporal_suffix = f" for {temporal_group.lower()} aggregation" if temporal_group else ""
st.markdown(
f"""
Analysis of ERA5 climate features{temporal_suffix} showing which variables and time periods
are most important for the model predictions.
"""
)
# Summary bar charts
st.markdown("### Importance by Dimension")
with st.spinner("Generating ERA5 dimension summaries..."):
charts = plot_era5_summary(era5_feature_array)
# Check if this is seasonal/shoulder data (returns 3 charts) or yearly (returns 2 charts)
if len(charts) == 3:
chart_variable, chart_season, chart_year = charts
col1, col2, col3 = st.columns(3)
with col1:
st.altair_chart(chart_variable, width="stretch")
with col2:
st.altair_chart(chart_season, width="stretch")
with col3:
st.altair_chart(chart_year, width="stretch")
else:
chart_variable, chart_time = charts
col1, col2 = st.columns(2)
with col1:
st.altair_chart(chart_variable, width="stretch")
with col2:
st.altair_chart(chart_time, width="stretch")
# Detailed heatmap
st.markdown("### Detailed Heatmap")
# Check if this is seasonal/shoulder data
has_season = "season" in era5_feature_array.dims
if has_season:
st.markdown("Shows the weight of each variable-season-year combination.")
with st.spinner("Generating ERA5 season heatmap..."):
era5_heatmap_chart = plot_era5_heatmap(era5_feature_array)
st.altair_chart(era5_heatmap_chart, width="stretch")
# Add time-based heatmap for seasonal/shoulder
st.markdown("### By Time Heatmap")
st.markdown("Shows temporal trends by averaging over seasons.")
with st.spinner("Generating ERA5 time heatmap..."):
era5_time_heatmap_chart = plot_era5_time_heatmap(era5_feature_array)
if era5_time_heatmap_chart is not None:
st.altair_chart(era5_time_heatmap_chart, width="stretch")
else:
st.markdown("Shows the weight of each variable-time combination.")
with st.spinner("Generating ERA5 heatmap..."):
era5_heatmap_chart = plot_era5_heatmap(era5_feature_array)
st.altair_chart(era5_heatmap_chart, width="stretch")
# Statistics
with st.expander("ERA5 Feature Statistics"):
st.write("**Overall Statistics:**")
n_era5_features = era5_feature_array.size
mean_weight = float(era5_feature_array.mean().values)
max_weight = float(era5_feature_array.max().values)
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total ERA5 Features", n_era5_features)
with col2:
st.metric("Mean Weight", f"{mean_weight:.4f}")
with col3:
st.metric("Max Weight", f"{max_weight:.4f}")
# Show top ERA5 features
st.write("**Top 10 ERA5 Features:**")
era5_df = era5_feature_array.to_dataframe(name="weight").reset_index()
# Get all columns except 'weight' for display
display_cols = [col for col in era5_df.columns if col != "weight"] + ["weight"]
top_era5 = era5_df.nlargest(10, "weight")[display_cols]
st.dataframe(top_era5, width="stretch")
def render_arcticdem_features(arcticdem_feature_array: xr.DataArray):
"""Render ArcticDEM feature visualizations.
Args:
arcticdem_feature_array: DataArray containing ArcticDEM feature weights.
"""
with st.container(border=True):
st.header("🏔️ ArcticDEM Feature Analysis")
st.markdown(
"""
Analysis of ArcticDEM topographic features showing which terrain variables and
aggregations are most important for the model predictions.
"""
)
# Summary bar charts
st.markdown("### Importance by Dimension")
with st.spinner("Generating ArcticDEM dimension summaries..."):
chart_variable, chart_agg = plot_arcticdem_summary(arcticdem_feature_array)
col1, col2 = st.columns(2)
with col1:
st.altair_chart(chart_variable, width="stretch")
with col2:
st.altair_chart(chart_agg, width="stretch")
# Detailed heatmap
st.markdown("### Detailed Heatmap")
st.markdown("Shows the weight of each variable-aggregation combination.")
with st.spinner("Generating ArcticDEM heatmap..."):
arcticdem_heatmap_chart = plot_arcticdem_heatmap(arcticdem_feature_array)
st.altair_chart(arcticdem_heatmap_chart, width="stretch")
# Statistics
with st.expander("ArcticDEM Feature Statistics"):
st.write("**Overall Statistics:**")
n_arcticdem_features = arcticdem_feature_array.size
mean_weight = float(arcticdem_feature_array.mean().values)
max_weight = float(arcticdem_feature_array.max().values)
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total ArcticDEM Features", n_arcticdem_features)
with col2:
st.metric("Mean Weight", f"{mean_weight:.4f}")
with col3:
st.metric("Max Weight", f"{max_weight:.4f}")
# Show top ArcticDEM features
st.write("**Top 10 ArcticDEM Features:**")
arcticdem_df = arcticdem_feature_array.to_dataframe(name="weight").reset_index()
top_arcticdem = arcticdem_df.nlargest(10, "weight")[["variable", "agg", "weight"]]
st.dataframe(top_arcticdem, width="stretch")
def render_common_features(common_feature_array: xr.DataArray):
"""Render common feature visualizations.
Args:
common_feature_array: DataArray containing common feature weights.
"""
with st.container(border=True):
st.header("🗺️ Common Feature Analysis")
st.markdown(
"""
Analysis of common features including cell area, water area, land area, land ratio,
longitude, and latitude. These features provide spatial and geographic context.
"""
)
# Bar chart showing all common feature weights
with st.spinner("Generating common features chart..."):
common_chart = plot_common_features(common_feature_array)
st.altair_chart(common_chart, width="stretch")
# Statistics
with st.expander("Common Feature Statistics"):
st.write("**Overall Statistics:**")
n_common_features = common_feature_array.size
mean_weight = float(common_feature_array.mean().values)
max_weight = float(common_feature_array.max().values)
min_weight = float(common_feature_array.min().values)
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Common Features", n_common_features)
with col2:
st.metric("Mean Weight", f"{mean_weight:.4f}")
with col3:
st.metric("Max Weight", f"{max_weight:.4f}")
with col4:
st.metric("Min Weight", f"{min_weight:.4f}")
# Show all common features sorted by importance
st.write("**All Common Features (by absolute weight):**")
common_df = common_feature_array.to_dataframe(name="weight").reset_index()
common_df["abs_weight"] = common_df["weight"].abs()
common_df = common_df.sort_values("abs_weight", ascending=False)
st.dataframe(common_df[["feature", "weight", "abs_weight"]], width="stretch")
st.markdown(
"""
**Interpretation:**
- **cell_area, water_area, land_area**: Spatial extent features that may indicate
size-related patterns
- **land_ratio**: Proportion of land vs water in each cell
- **lon, lat**: Geographic coordinates that can capture spatial trends or regional patterns
- Positive weights indicate features that increase the probability of the positive class
- Negative weights indicate features that decrease the probability of the positive class
"""
)

View file

@ -9,7 +9,7 @@ from entropice.dashboard.sections.experiment_results import (
render_training_results_summary, render_training_results_summary,
) )
from entropice.dashboard.sections.storage_statistics import render_storage_statistics from entropice.dashboard.sections.storage_statistics import render_storage_statistics
from entropice.dashboard.utils.loaders import load_all_autogluon_training_results, load_all_training_results from entropice.dashboard.utils.loaders import load_all_training_results
from entropice.dashboard.utils.stats import DatasetStatistics, load_all_default_dataset_statistics from entropice.dashboard.utils.stats import DatasetStatistics, load_all_default_dataset_statistics
@ -27,9 +27,6 @@ def render_overview_page():
) )
# Load training results # Load training results
training_results = load_all_training_results() training_results = load_all_training_results()
autogluon_results = load_all_autogluon_training_results()
if len(autogluon_results) > 0:
training_results.extend(autogluon_results)
if not training_results: if not training_results:
st.warning("No training results found. Please run some training experiments first.") st.warning("No training results found. Please run some training experiments first.")

View file

@ -51,9 +51,9 @@ def render_analysis_settings_sidebar(training_results: list[TrainingResult]) ->
available_metrics = selected_result.available_metrics available_metrics = selected_result.available_metrics
# Try to get refit metric from settings # Try to get refit metric from settings
if selected_result.settings.task == "binary": if selected_result.run.task == "binary":
refit_metric = "f1" refit_metric = "f1"
elif selected_result.settings.task in ["count_regimes", "density_regimes"]: elif selected_result.run.task in ["count_regimes", "density_regimes"]:
refit_metric = "f1_weighted" refit_metric = "f1_weighted"
else: else:
refit_metric = "r2" refit_metric = "r2"
@ -121,23 +121,29 @@ def render_training_analysis_page():
st.divider() st.divider()
# Render confusion matrices for classification, regression analysis for regression # Render confusion matrices for classification, regression analysis for regression
if selected_result.settings.task in ["binary", "count_regimes", "density_regimes"]: if selected_result.run.task in ["binary", "count_regimes", "density_regimes"]:
render_confusion_matrices(selected_result) render_confusion_matrices(selected_result)
else: else:
render_regression_analysis(selected_result) render_regression_analysis(selected_result)
st.divider() st.divider()
render_cv_statistics_section(cv_statistics, selected_result.test_metrics.get(selected_metric, float("nan"))) if cv_statistics is not None:
test_score = selected_result._get_best_score("test")
render_cv_statistics_section(cv_statistics, test_score)
st.divider() st.divider()
render_hparam_space_section(selected_result, selected_metric) render_hparam_space_section(selected_result, selected_metric)
st.divider() st.divider()
# List all results at the end # List all results at the end
st.header("📄 All Training Results") if selected_result.run.method_type == "HPOCV":
st.dataframe(selected_result.results) st.header("📄 All Cross-Validation Results")
st.dataframe(selected_result.run.cv_results)
elif selected_result.run.method_type == "AutoML":
st.header("📄 Model Leaderboard")
st.dataframe(selected_result.run.leaderboard)
st.balloons() st.balloons()

View file

@ -0,0 +1 @@
"""Experiments."""

View file

@ -1,4 +1,6 @@
from typing import cast """Feature Importance Experiment."""
from typing import Literal, cast
import cyclopts import cyclopts
from stopuhr import stopwatch from stopuhr import stopwatch
@ -13,17 +15,19 @@ from entropice.utils.types import Grid, Model, TargetDataset, Task
cli = cyclopts.App("entropice-feature-importance") cli = cyclopts.App("entropice-feature-importance")
EXPERIMENT_NAME = "feature_importance_era5-shoulder_arcticdem" DEV = False
# EXPERIMENT_NAME = "tobis-final-tests"
EXPERIMENT_NAME = "feature_importance_era5-shoulder_arcticdem-v2"
if DEV:
EXPERIMENT_NAME = "tobis-final-tests"
@cli.default @cli.default
def main( def main(grid: Grid, target: TargetDataset, selection: Literal["none", "cluster", "univariate"] = "none"):
grid: Grid, """Feature Importance Experiment."""
target: TargetDataset,
):
levels = [3, 4, 5, 6] if grid == "hex" else [6, 7, 8, 9, 10] levels = [3, 4, 5, 6] if grid == "hex" else [6, 7, 8, 9, 10]
levels = [3, 6] if grid == "hex" else [6, 10] if DEV:
levels = [3, 6] if grid == "hex" else [6, 10]
for level in levels: for level in levels:
print(f"Running feature importance experiment for {grid} grid at level {level}...") print(f"Running feature importance experiment for {grid} grid at level {level}...")
dimension_filters = {"ArcticDEM": {"aggregations": ["median"]}} dimension_filters = {"ArcticDEM": {"aggregations": ["median"]}}
@ -38,9 +42,11 @@ def main(
# AutoGluon # AutoGluon
time_limit = 30 * 60 # 30 minutes time_limit = 30 * 60 # 30 minutes
# time_limit = 60 if DEV:
time_limit = 2 * 60 # 2 minutes
presets = "extreme" presets = "extreme"
# presets = "medium" if DEV:
presets = "medium"
settings = AutoGluonRunSettings( settings = AutoGluonRunSettings(
time_limit=time_limit, time_limit=time_limit,
presets=presets, presets=presets,
@ -59,11 +65,12 @@ def main(
print(f"\nRunning HPOCV for model {model}...") print(f"\nRunning HPOCV for model {model}...")
n_iter = { n_iter = {
"espa": 300, "espa": 300,
"xgboost": 100, "xgboost": 300,
"rf": 40, "rf": 100, # RF is slow, so we reduce the number of iterations
"knn": 20, "knn": 40, # kNN hpspace is small, so we reduce the number of iterations
}[model] }[model]
# n_iter = 3 if DEV:
n_iter = 3
scaler = "standard" if model in ["espa", "knn"] else "none" scaler = "standard" if model in ["espa", "knn"] else "none"
normalize = scaler != "none" normalize = scaler != "none"
settings = HPOCVRunSettings( settings = HPOCVRunSettings(

View file

@ -0,0 +1,79 @@
"""Feature Importance Experiment."""
from typing import cast
import cyclopts
from stopuhr import stopwatch
from entropice.ml.autogluon import RunSettings as AutoGluonRunSettings
from entropice.ml.autogluon import train as train_autogluon
from entropice.ml.dataset import DatasetEnsemble
from entropice.ml.hpsearchcv import RunSettings as HPOCVRunSettings
from entropice.ml.hpsearchcv import hpsearch_cv
from entropice.utils.paths import RESULTS_DIR
from entropice.utils.types import Grid, Model, TargetDataset, Task
cli = cyclopts.App("entropice-feature-importance")
EXPERIMENT_NAME = "feature_importance_era5-shoulder_arcticdem-v2"
@cli.default
def main(grid: Grid, target: TargetDataset):
"""Feature Importance Experiment."""
levels = [3, 4, 5, 6] if grid == "hex" else [6, 7, 8, 9, 10]
for level in levels:
print(f"Running feature importance experiment for {grid} grid at level {level}...")
dimension_filters = {"AlphaEarth": {"agg": ["median"]}}
dataset_ensemble = DatasetEnsemble(
grid=grid, level=level, members=["AlphaEarth"], dimension_filters=dimension_filters
)
for task in cast(list[Task], ["binary", "density"]):
print(f"\nRunning for {task}...")
# AutoGluon
time_limit = 30 * 60 # 30 minutes
presets = "extreme"
settings = AutoGluonRunSettings(
time_limit=time_limit,
presets=presets,
verbosity=2,
task=task,
target=target,
)
train_autogluon(dataset_ensemble, settings, experiment=EXPERIMENT_NAME)
# HPOCV
splitter = "stratified_shuffle" if task == "binary" else "kfold"
models: list[Model] = ["xgboost", "rf", "knn"]
if task == "binary":
models.append("espa")
for model in models:
print(f"\nRunning HPOCV for model {model}...")
n_iter = {
"espa": 300,
"xgboost": 300,
"rf": 100, # RF is slow, so we reduce the number of iterations
"knn": 40, # kNN hpspace is small, so we reduce the number of iterations
}[model]
settings = HPOCVRunSettings(
n_iter=n_iter,
task=task,
target=target,
splitter=splitter,
model=model,
# AlphaEarth Embeddings are already normalized unit vectors
scaler="none",
normalize=False,
)
hpsearch_cv(dataset_ensemble, settings, experiment=EXPERIMENT_NAME)
stopwatch.summary()
times = stopwatch.export()
times.to_parquet(RESULTS_DIR / EXPERIMENT_NAME / f"training_times_{target}_{grid}.parquet")
print("Done.")
if __name__ == "__main__":
cli()

File diff suppressed because one or more lines are too long

View file

@ -18,6 +18,7 @@ from entropice.spatial import grids
from entropice.utils.paths import ( from entropice.utils.paths import (
DARTS_MLLABELS_DIR, DARTS_MLLABELS_DIR,
DARTS_V1_DIR, DARTS_V1_DIR,
DARTS_V2_DIR,
get_darts_file, get_darts_file,
) )
from entropice.utils.types import Grid from entropice.utils.types import Grid
@ -25,6 +26,8 @@ from entropice.utils.types import Grid
traceback.install() traceback.install()
pretty.install() pretty.install()
darts_v2_l2_file = DARTS_V2_DIR / "all_prediction_segments_ensemble5.parquet"
darts_v2_l2_cov_file = DARTS_V2_DIR / "all_prediction_extent_ensemble5.parquet"
darts_v1_l2_file = DARTS_V1_DIR / "DARTS_NitzeEtAl_v1-2_features_2018-2023_level2.parquet" darts_v1_l2_file = DARTS_V1_DIR / "DARTS_NitzeEtAl_v1-2_features_2018-2023_level2.parquet"
darts_v1_l2_cov_file = DARTS_V1_DIR / "DARTS_NitzeEtAl_v1-2_coverage_2018-2023_level2.parquet" darts_v1_l2_cov_file = DARTS_V1_DIR / "DARTS_NitzeEtAl_v1-2_coverage_2018-2023_level2.parquet"
darts_v1_corrections = DARTS_V1_DIR / "negative_correction.geojson" darts_v1_corrections = DARTS_V1_DIR / "negative_correction.geojson"
@ -131,6 +134,83 @@ def _process_rts_yearly_grid(
return darts return darts
@cli.command()
def extract_darts_v2(grid: Grid, level: int):
"""Extract RTS labels from DARTS-v2 Level-2 dataset.
Creates a Darts-v1 xarray Dataset on the specified grid and level.
The Dataset contains the following variables:
- count: Number of RTS in the cell
- area_km2: Total area of RTS in the cell (in km^2)
- covered_area_km2: Area of the cell covered by DARTS (in km^2)
- coverage: Fraction of the cell covered by DARTS
- density: Density of RTS area per covered area (area_km2 / covered_area_km2)
Since the DARTS-v1 Level-2 dataset contains yearly data, all variables are indexed by year as well.
Thus each variable has dimensions (cell_ids, year).
Args:
grid (Grid): The grid type to use.
level (int): The grid level to use.
"""
with stopwatch("Load data"):
grid_gdf, cell_areas = _load_grid(grid, level)
darts_l2 = gpd.read_parquet(darts_v2_l2_file).to_crs(grid_gdf.crs)
darts_cov_l2 = gpd.read_parquet(darts_v2_l2_cov_file).to_crs(grid_gdf.crs)
# Need to filter small noise pixels (I do not know where they are coming from)
darts_cov_l2 = darts_cov_l2[darts_cov_l2.geometry.area > 1e9]
with stopwatch("Assign RTS to grid"):
grid_l2 = grid_gdf.overlay(darts_l2, how="intersection")
grid_cov_l2 = grid_gdf.overlay(darts_cov_l2, how="intersection")
darts = _process_rts_yearly_grid(grid_l2, grid_cov_l2, cell_areas)
darts = _convert_xdggs(darts, grid, level)
output_path = get_darts_file(grid, level, version="v2")
with stopwatch(f"Writing Darts v2 to {output_path}"):
darts.to_zarr(output_path, consolidated=False, mode="w")
@cli.command()
def extract_darts_v2_aggregated(grid: Grid, level: int):
"""Extract RTS labels from DARTS-v2 Level-3 dataset.
Creates a Darts-v2 xarray Dataset on the specified grid and level.
The Dataset contains the following variables:
- count: Number of RTS in the cell
- area_km2: Total area of RTS in the cell (in km^2)
- covered_area_km2: Area of the cell covered by DARTS (in km^2)
- coverage: Fraction of the cell covered by DARTS
- density: Density of RTS area per covered area (area_km2 / covered_area_km2)
Since the DARTS-v2 Level-2 dataset contains yearly data, the data is dissolved then exploded to obtain Level-3 data.
Thus each variable has only the dimension (cell_ids).
Args:
grid (Grid): The grid type to use.
level (int): The grid level to use.
"""
with stopwatch("Load data"):
grid_gdf, cell_areas = _load_grid(grid, level)
darts_l2 = gpd.read_parquet(darts_v2_l2_file).to_crs(grid_gdf.crs)
darts_cov_l2 = gpd.read_parquet(darts_v2_l2_cov_file).to_crs(grid_gdf.crs)
# Need to filter small noise pixels (I do not know where they are coming from)
darts_cov_l2 = darts_cov_l2[darts_cov_l2.geometry.area > 1e9]
# Remove overlapping labels by dissolving
darts_l2 = darts_l2[["geometry"]].dissolve().explode()
darts_cov_l2 = darts_cov_l2[["geometry"]].dissolve().explode()
with stopwatch("Extract RTS labels"):
grid_l3 = grid_gdf.overlay(darts_l2, how="intersection")
grid_cov_l3 = grid_gdf.overlay(darts_cov_l2, how="intersection")
darts = _process_rts_grid(grid_l3, grid_cov_l3, cell_areas)
darts = _convert_xdggs(darts, grid, level)
output_path = get_darts_file(grid, level, version="v2-l3")
with stopwatch(f"Writing Darts v2 l3 to {output_path}"):
darts.to_zarr(output_path, consolidated=False, mode="w")
@cli.command() @cli.command()
def extract_darts_v1(grid: Grid, level: int): def extract_darts_v1(grid: Grid, level: int):
"""Extract RTS labels from DARTS-v1 Level-2 dataset. """Extract RTS labels from DARTS-v1 Level-2 dataset.
@ -176,7 +256,6 @@ def extract_darts_v1(grid: Grid, level: int):
grid_cov_l2 = grid_gdf.overlay(darts_cov_l2.to_crs(grid_gdf.crs), how="intersection") grid_cov_l2 = grid_gdf.overlay(darts_cov_l2.to_crs(grid_gdf.crs), how="intersection")
darts = _process_rts_yearly_grid(grid_l2, grid_cov_l2, cell_areas) darts = _process_rts_yearly_grid(grid_l2, grid_cov_l2, cell_areas)
darts = _convert_xdggs(darts, grid, level) darts = _convert_xdggs(darts, grid, level)
output_path = get_darts_file(grid, level, version="v1") output_path = get_darts_file(grid, level, version="v1")
with stopwatch(f"Writing Darts v1 to {output_path}"): with stopwatch(f"Writing Darts v1 to {output_path}"):
@ -206,10 +285,18 @@ def extract_darts_v1_aggregated(grid: Grid, level: int):
darts_l2 = gpd.read_parquet(darts_v1_l2_file) darts_l2 = gpd.read_parquet(darts_v1_l2_file)
darts_cov_l2 = gpd.read_parquet(darts_v1_l2_cov_file) darts_cov_l2 = gpd.read_parquet(darts_v1_l2_cov_file)
grid_gdf, cell_areas = _load_grid(grid, level) grid_gdf, cell_areas = _load_grid(grid, level)
corrections = gpd.read_file(darts_v1_corrections).to_crs(darts_l2.crs)
# Remove overlapping labels by dissolving # Remove overlapping labels by dissolving
darts_l2 = darts_l2[["geometry"]].dissolve().explode() darts_l2 = darts_l2[["geometry"]].dissolve().explode()
darts_cov_l2 = darts_cov_l2[["geometry"]].dissolve().explode() darts_cov_l2 = darts_cov_l2[["geometry"]].dissolve().explode()
with stopwatch("Apply corrections"):
# The correction file is just an area of sure negatives
# Thus, we first need to remove all RTS labels that intersect with the correction area,
darts_l2 = gpd.overlay(darts_l2, corrections, how="difference")
# then we need to add the correction area as coverage to the coverage file.
darts_cov_l2 = gpd.overlay(darts_cov_l2, corrections, how="union")
with stopwatch("Extract RTS labels"): with stopwatch("Extract RTS labels"):
grid_l3 = grid_gdf.overlay(darts_l2.to_crs(grid_gdf.crs), how="intersection") grid_l3 = grid_gdf.overlay(darts_l2.to_crs(grid_gdf.crs), how="intersection")
grid_cov_l3 = grid_gdf.overlay(darts_cov_l2.to_crs(grid_gdf.crs), how="intersection") grid_cov_l3 = grid_gdf.overlay(darts_cov_l2.to_crs(grid_gdf.crs), how="intersection")

View file

@ -7,6 +7,6 @@ This package contains modules for machine learning workflows:
- inference: Batch prediction pipeline for trained classifiers - inference: Batch prediction pipeline for trained classifiers
""" """
from . import dataset, inference, randomsearch from . import autogluon, dataset, hpsearchcv, inference
__all__ = ["dataset", "inference", "randomsearch"] __all__ = ["autogluon", "dataset", "hpsearchcv", "inference", "inference"]

View file

@ -49,7 +49,7 @@ def _compute_metrics_and_confusion_matrix( # noqa: C901
complete_scores = predictor.evaluate(complete_data, display=True, detailed_report=True) complete_scores = predictor.evaluate(complete_data, display=True, detailed_report=True)
m = [] m = []
cm = {} cm = {}
for dataset, scores in zip(["train", "test", "complete"], [train_scores, test_scores, complete_scores]): for split, scores in zip(["train", "test", "complete"], [train_scores, test_scores, complete_scores]):
for metric, score in scores.items(): for metric, score in scores.items():
if metric == "confusion_matrix": if metric == "confusion_matrix":
score = cast(pd.DataFrame, score) score = cast(pd.DataFrame, score)
@ -58,24 +58,24 @@ def _compute_metrics_and_confusion_matrix( # noqa: C901
dims=("y_true", "y_pred"), dims=("y_true", "y_pred"),
coords={"y_true": score.index.tolist(), "y_pred": score.columns.tolist()}, coords={"y_true": score.index.tolist(), "y_pred": score.columns.tolist()},
) )
cm[dataset] = confusion_matrix cm[split] = confusion_matrix
elif metric == "classification_report": elif metric == "classification_report":
score = cast(dict[str, dict[str, float]], score) score = cast(dict[str, dict[str, float]], score)
score.pop("accuracy") # Accuracy is already included as a separate metric score.pop("accuracy") # Accuracy is already included as a separate metric
macro_avg = score.pop("macro avg") macro_avg = score.pop("macro avg")
for macro_avg_metric, macro_avg_score in macro_avg.items(): for macro_avg_metric, macro_avg_score in macro_avg.items():
metric_name = f"macro_avg_{macro_avg_metric}" metric_name = f"macro_avg_{macro_avg_metric}"
m.append({"dataset": dataset, "metric": metric_name, "score": macro_avg_score}) m.append({"split": split, "metric": metric_name, "score": macro_avg_score})
weighted_avg = score.pop("weighted avg") weighted_avg = score.pop("weighted avg")
for weighted_avg_metric, weighted_avg_score in weighted_avg.items(): for weighted_avg_metric, weighted_avg_score in weighted_avg.items():
metric_name = f"weighted_avg_{weighted_avg_metric}" metric_name = f"weighted_avg_{weighted_avg_metric}"
m.append({"dataset": dataset, "metric": metric_name, "score": weighted_avg_score}) m.append({"split": split, "metric": metric_name, "score": weighted_avg_score})
for class_name, class_scores in score.items(): for class_name, class_scores in score.items():
class_name = class_name.replace(" ", "-") class_name = class_name.replace(" ", "-")
for class_metric, class_score in class_scores.items(): for class_metric, class_score in class_scores.items():
m.append({"dataset": dataset, "metric": f"{class_name}_{class_metric}", "score": class_score}) m.append({"split": split, "metric": f"{class_name}_{class_metric}", "score": class_score})
else: # Scalar metric else: # Scalar metric
m.append({"dataset": dataset, "metric": metric, "score": score}) m.append({"split": split, "metric": metric, "score": score})
if len(cm) == 0: if len(cm) == 0:
return pd.DataFrame(m), None return pd.DataFrame(m), None
elif len(cm) == 3: elif len(cm) == 3:
@ -100,8 +100,8 @@ def _compute_shap_explanation(
output_names=target_labels, output_names=target_labels,
) )
samples = test_data.drop(columns=["label"]) samples = test_data.drop(columns=["label"])
if len(samples) > 200: if len(samples) > 100:
samples = samples.sample(n=200, random_state=42) samples = samples.sample(n=100, random_state=42)
explanation = explainer(samples) explanation = explainer(samples)
return explanation return explanation
@ -161,10 +161,12 @@ def train(
feature_importance = predictor.feature_importance(test_data) feature_importance = predictor.feature_importance(test_data)
metrics, confusion_matrix = _compute_metrics_and_confusion_matrix(predictor, train_data, test_data, complete_data) metrics, confusion_matrix = _compute_metrics_and_confusion_matrix(predictor, train_data, test_data, complete_data)
with stopwatch("Explaining model predictions with SHAP..."): # ?: GPU inference is not yet implemented in AutoGluon, hence SHAP computation takes ages for large model ensembles,
explanation = _compute_shap_explanation( # as they are present in the higher quality presets. Disabling SHAP for now...
predictor, train_data, test_data, training_data.feature_names, training_data.target_labels # with stopwatch("Explaining model predictions with SHAP..."):
) # explanation = _compute_shap_explanation(
# predictor, train_data, test_data, training_data.feature_names, training_data.target_labels
# )
print("Predicting probabilities for all cells...") print("Predicting probabilities for all cells...")
preds = predict_proba(dataset_ensemble, model=predictor, task=settings.task) preds = predict_proba(dataset_ensemble, model=predictor, task=settings.task)
@ -176,12 +178,11 @@ def train(
method=AutoML(time_budget=settings.time_limit, preset=settings.presets, hpo=False), method=AutoML(time_budget=settings.time_limit, preset=settings.presets, hpo=False),
task=settings.task, task=settings.task,
target=settings.target, target=settings.target,
training_set=training_data,
model=predictor, model=predictor,
model_type="autogluon", model_type="autogluon",
metrics=metrics, metrics=metrics,
feature_importance=feature_importance, feature_importance=feature_importance,
shap_explanation=explanation, shap_explanation=None,
predictions=preds, predictions=preds,
confusion_matrix=confusion_matrix, confusion_matrix=confusion_matrix,
cv_results=None, cv_results=None,

View file

@ -1,208 +0,0 @@
"""DePRECATED!!! Training with AutoGluon TabularPredictor for automated ML."""
import pickle
from dataclasses import asdict, dataclass
import cyclopts
import pandas as pd
import toml
from autogluon.tabular import TabularDataset, TabularPredictor
from rich import pretty, traceback
from sklearn import set_config
from stopuhr import stopwatch
from entropice.ml.dataset import DatasetEnsemble
from entropice.utils.paths import get_training_results_dir
from entropice.utils.types import TargetDataset, Task
traceback.install()
pretty.install()
set_config(array_api_dispatch=False)
cli = cyclopts.App("entropice-autogluon")
@cyclopts.Parameter("*")
@dataclass(frozen=True, kw_only=True)
class AutoGluonSettings:
"""AutoGluon training settings."""
task: Task = "binary"
target: TargetDataset = "darts_v1"
time_limit: int = 3600 # Time limit in seconds (1 hour default)
presets: str = "best" # AutoGluon preset: 'best', 'high', 'good', 'medium'
eval_metric: str | None = None # Evaluation metric, None for auto-detect
num_bag_folds: int = 5 # Number of folds for bagging
num_bag_sets: int = 1 # Number of bagging sets
num_stack_levels: int = 1 # Number of stacking levels
num_gpus: int = 1 # Number of GPUs to use
verbosity: int = 2 # Verbosity level (0-4)
@dataclass(frozen=True, kw_only=True)
class AutoGluonTrainingSettings(DatasetEnsemble, AutoGluonSettings):
"""Combined settings for AutoGluon training."""
classes: list[str] | None = None
problem_type: str = "binary"
def _determine_problem_type_and_metric(task: Task) -> tuple[str, str]:
"""Determine AutoGluon problem type and appropriate evaluation metric.
Args:
task: The training task type
Returns:
Tuple of (problem_type, eval_metric)
"""
if task == "binary":
return ("binary", "balanced_accuracy") # Good for imbalanced datasets
elif task in ["count_regimes", "density_regimes"]:
return ("multiclass", "f1_weighted") # Weighted F1 for multiclass
elif task in ["count", "density"]:
return ("regression", "mean_absolute_error")
else:
raise ValueError(f"Unknown task: {task}")
@cli.default
def autogluon_train(
dataset_ensemble: DatasetEnsemble,
settings: AutoGluonSettings = AutoGluonSettings(),
experiment: str | None = None,
):
"""Train models using AutoGluon TabularPredictor.
Args:
dataset_ensemble: Dataset ensemble configuration
settings: AutoGluon training settings
experiment: Optional experiment name for organizing results
"""
training_data = dataset_ensemble.create_training_set(task=settings.task, target=settings.target)
# Convert to AutoGluon TabularDataset
train_data: pd.DataFrame = TabularDataset(training_data.to_dataframe("train")) # ty:ignore[invalid-assignment]
test_data: pd.DataFrame = TabularDataset(training_data.to_dataframe("test")) # ty:ignore[invalid-assignment]
print(f"\nTraining data: {len(train_data)} samples")
print(f"Test data: {len(test_data)} samples")
print(f"Features: {len(training_data.feature_names)}")
print(f"Classes: {training_data.target_labels}")
# Determine problem type and metric
problem_type, default_metric = _determine_problem_type_and_metric(settings.task)
eval_metric = settings.eval_metric or default_metric
print(f"\n🎯 Problem type: {problem_type}")
print(f"📈 Evaluation metric: {eval_metric}")
# Create results directory
results_dir = get_training_results_dir(
experiment=experiment,
grid=dataset_ensemble.grid,
level=dataset_ensemble.level,
task=settings.task,
target=settings.target,
name="autogluon",
)
print(f"\n💾 Results directory: {results_dir}")
# Initialize TabularPredictor
print(f"\n🚀 Initializing AutoGluon TabularPredictor (preset='{settings.presets}')...")
predictor = TabularPredictor(
label="label",
problem_type=problem_type,
eval_metric=eval_metric,
path=str(results_dir / "models"),
verbosity=settings.verbosity,
)
# Train models
print(f"\n⚡ Training models (time_limit={settings.time_limit}s, num_gpus={settings.num_gpus})...")
with stopwatch("AutoGluon training"):
predictor.fit(
train_data=train_data,
time_limit=settings.time_limit,
presets=settings.presets,
num_bag_folds=settings.num_bag_folds,
num_bag_sets=settings.num_bag_sets,
num_stack_levels=settings.num_stack_levels,
num_gpus=settings.num_gpus,
ag_args_fit={"num_gpus": settings.num_gpus} if settings.num_gpus > 0 else None,
)
# Evaluate on test data
print("\n📊 Evaluating on test data...")
test_score = predictor.evaluate(test_data, silent=True, detailed_report=True)
print(f"Test {eval_metric}: {test_score[eval_metric]:.4f}")
# Get leaderboard
print("\n🏆 Model Leaderboard:")
leaderboard = predictor.leaderboard(test_data, silent=True)
print(leaderboard[["model", "score_test", "score_val", "pred_time_test", "fit_time"]].head(10))
# Save leaderboard
leaderboard_file = results_dir / "leaderboard.parquet"
print(f"\n💾 Saving leaderboard to {leaderboard_file}")
leaderboard.to_parquet(leaderboard_file)
# Get feature importance
print("\n🔍 Computing feature importance...")
with stopwatch("Feature importance"):
try:
# Compute feature importance with reduced repeats
feature_importance = predictor.feature_importance(
test_data,
num_shuffle_sets=3,
subsample_size=min(500, len(test_data)), # Further subsample if needed
)
fi_file = results_dir / "feature_importance.parquet"
print(f"💾 Saving feature importance to {fi_file}")
feature_importance.to_parquet(fi_file)
except Exception as e:
print(f"⚠️ Could not compute feature importance: {e}")
# Save training settings
print("\n💾 Saving training settings...")
combined_settings = AutoGluonTrainingSettings(
**asdict(settings),
**asdict(dataset_ensemble),
classes=training_data.target_labels,
problem_type=problem_type,
)
settings_file = results_dir / "training_settings.toml"
with open(settings_file, "w") as f:
toml.dump({"settings": asdict(combined_settings)}, f)
# Save test metrics
# We need to use pickle here, because the confusion matrix is stored as a dataframe
# This only matters for classification tasks
test_metrics_file = results_dir / "test_metrics.pickle"
print(f"💾 Saving test metrics to {test_metrics_file}")
with open(test_metrics_file, "wb") as f:
pickle.dump(test_score, f, protocol=pickle.HIGHEST_PROTOCOL)
# Save the predictor
predictor_file = results_dir / "tabular_predictor.pkl"
print(f"💾 Saving TabularPredictor to {predictor_file}")
predictor.save()
# Print summary
print("\n" + "=" * 80)
print("✅ AutoGluon Training Complete!")
print("=" * 80)
print(f"\n📂 Results saved to: {results_dir}")
print(f"🏆 Best model: {predictor.model_best}")
print(f"📈 Test {eval_metric}: {test_score[eval_metric]:.4f}")
print(f"⏱️ Total models trained: {len(leaderboard)}")
stopwatch.summary()
print("\nDone! 🎉")
if __name__ == "__main__":
cli()

View file

@ -53,14 +53,40 @@ def _collapse_to_dataframe(ds: xr.Dataset | xr.DataArray) -> pd.DataFrame:
use_dummy = collapsed.shape[0] == 0 use_dummy = collapsed.shape[0] == 0
if use_dummy: if use_dummy:
collapsed.loc[tuple(range(len(collapsed.index.names)))] = np.nan collapsed.loc[tuple(range(len(collapsed.index.names)))] = np.nan
pivcols = set(collapsed.index.names) - {"cell_ids"} cols = cast(list[str], list(collapsed.index.names))
pivcols = sorted(set(cols) - {"cell_ids"})
collapsed = collapsed.pivot_table(index="cell_ids", columns=pivcols) collapsed = collapsed.pivot_table(index="cell_ids", columns=pivcols)
collapsed.columns = ["_".join(map(str, v)) for v in collapsed.columns] collapsed.columns = ["_".join(map(str, v)) for v in collapsed.columns]
if use_dummy: if use_dummy:
collapsed = collapsed.dropna(how="all") collapsed = collapsed.dropna(how="all")
expected_cols = _get_expected_collapsed_columns(ds)
missing_cols = set(expected_cols) - set(collapsed.columns)
if missing_cols:
raise ValueError(
f"Collapsed dataframe is missing expected columns: {missing_cols=} {collapsed.columns=} {expected_cols=}"
)
return collapsed return collapsed
def _get_expected_collapsed_columns(ds: xr.Dataset | xr.DataArray) -> list[str]:
dims = sorted(set(ds.dims) - {"cell_ids"})
dims_product = list(product(*[ds.coords[dim].to_numpy() for dim in dims]))
expected_cols = []
if isinstance(ds, xr.Dataset):
variables = list(ds.data_vars)
for var in variables:
for dims_values in dims_product:
agg = "_".join(dims_values)
expected_cols.append(f"{var}_{agg}")
else:
assert ds.name is not None, "DataArray must have a name to determine expected columns"
for dims_values in dims_product:
agg = "_".join(dims_values)
expected_cols.append(f"{ds.name}_{agg}")
return expected_cols
def _cell_ids_hash(cell_ids: pd.Series) -> str: def _cell_ids_hash(cell_ids: pd.Series) -> str:
sorted_ids = np.sort(cell_ids.to_numpy()) sorted_ids = np.sort(cell_ids.to_numpy())
return hashlib.blake2b(sorted_ids.tobytes(), digest_size=8).hexdigest() return hashlib.blake2b(sorted_ids.tobytes(), digest_size=8).hexdigest()
@ -130,8 +156,8 @@ class SplittedArrays[ArrayType: (torch.Tensor, np.ndarray, cp.ndarray)]:
test: ArrayType test: ArrayType
@cached_property @cached_property
def combined(self) -> ArrayType: def complete(self) -> ArrayType:
"""Combined train and test arrays.""" """Complete train and test arrays."""
if isinstance(self.train, torch.Tensor) and isinstance(self.test, torch.Tensor): if isinstance(self.train, torch.Tensor) and isinstance(self.test, torch.Tensor):
return torch.cat([self.train, self.test], dim=0) # ty:ignore[invalid-return-type] return torch.cat([self.train, self.test], dim=0) # ty:ignore[invalid-return-type]
elif isinstance(self.train, cp.ndarray) and isinstance(self.test, cp.ndarray): elif isinstance(self.train, cp.ndarray) and isinstance(self.test, cp.ndarray):
@ -272,7 +298,6 @@ class DatasetEnsemble:
# ?: We can't use L2SourceDataset as types here because cyclopts can't handle Literals as dict keys # ?: We can't use L2SourceDataset as types here because cyclopts can't handle Literals as dict keys
dimension_filters: dict[str, dict[str, list]] = field(default_factory=dict) dimension_filters: dict[str, dict[str, list]] = field(default_factory=dict)
variable_filters: dict[str, list[str]] = field(default_factory=dict) variable_filters: dict[str, list[str]] = field(default_factory=dict)
add_lonlat: bool = True
def __post_init__(self): def __post_init__(self):
# Validate filters # Validate filters
@ -295,6 +320,10 @@ class DatasetEnsemble:
f"Invalid dimension filter for {dim=}: {values}" f"Invalid dimension filter for {dim=}: {values}"
" Dimension filter values must be a list with one or more entries." " Dimension filter values must be a list with one or more entries."
) )
if "Grid" in self.variable_filters.keys():
filtered = set(self.variable_filters["Grid"])
valid = {"x", "y", "cell_area", "land_area", "water_area", "land_ratio"}
assert len(filtered - valid) == 0
def __hash__(self): def __hash__(self):
return int(self.id(), 16) return int(self.id(), 16)
@ -320,12 +349,10 @@ class DatasetEnsemble:
@cache @cache
def read_grid(self) -> gpd.GeoDataFrame: def read_grid(self) -> gpd.GeoDataFrame:
"""Load the grid dataframe and enrich it with lat-lon information.""" """Load the grid dataframe and enrich it with lat-lon information."""
columns_to_load = ["cell_id", "geometry", "cell_area", "land_area", "water_area", "land_ratio"] if "Grid" in self.variable_filters:
# The name add_lonlat has legacy reasons and should be add_location columns_to_load = self.variable_filters["Grid"] + ["cell_id", "geometry"]
# If add_location is true, keep the x and y else:
# For future reworks: "lat" and "lon" are also available columns columns_to_load = ["cell_id", "geometry", "cell_area", "land_area", "water_area", "land_ratio", "x", "y"]
if self.add_lonlat:
columns_to_load.extend(["x", "y"])
# Reading the data takes for the largest grids ~1.7s # Reading the data takes for the largest grids ~1.7s
gridfile = entropice.utils.paths.get_grid_file(self.grid, self.level) gridfile = entropice.utils.paths.get_grid_file(self.grid, self.level)
@ -468,7 +495,7 @@ class DatasetEnsemble:
ds = unstack_era5_time(ds, era5_agg) ds = unstack_era5_time(ds, era5_agg)
# Apply the temporal mode # Apply the temporal mode
if isinstance(self.temporal_mode, int): if isinstance(self.temporal_mode, int) and member != "ArcticDEM":
ds = ds.sel(year=self.temporal_mode, drop=True) ds = ds.sel(year=self.temporal_mode, drop=True)
# Actually read data into memory # Actually read data into memory

View file

@ -79,13 +79,16 @@ class RunSettings:
""" """
return "torch" if self.model == "espa" else "cuda" return "torch" if self.model == "espa" else "cuda"
def build_pipeline(self, model_hpo_config: ModelHPOConfig) -> Pipeline: # noqa: C901 @property
def hpo_config(self) -> ModelHPOConfig:
"""Get the hyperparameter optimization configuration for the selected model and task."""
return get_model_hpo_config(self.model, self.task)
def build_pipeline(self, model_hpo_config: ModelHPOConfig) -> Pipeline:
"""Build a scikit-learn Pipeline based on the settings.""" """Build a scikit-learn Pipeline based on the settings."""
# Add a feature scaler / normalization step if specified, but assert that it's only used for non-Tree models # Add a feature scaler / normalization step if specified, but assert that it's only used for non-Tree models
if self.model in ["rf", "xgboost"]: if self.model in ["rf", "xgboost"]:
assert self.scaler == "none", f"Scaler {self.scaler} is not viable with model {self.model}" assert self.scaler == "none", f"Scaler {self.scaler} is not viable with model {self.model}"
elif self.scaler == "none":
assert self.scaler != "none", f"No scaler specified for model {self.model}, which is not viable."
match self.scaler: match self.scaler:
case "standard": case "standard":
@ -159,9 +162,9 @@ def _compute_metrics(y: SplittedArrays, y_pred: SplittedArrays, metrics: list[st
m = [] m = []
for metric in metrics: for metric in metrics:
metric_fn = metric_functions[metric] metric_fn = metric_functions[metric]
for split in ["train", "test", "combined"]: for split in ["train", "test", "complete"]:
value = metric_fn(getattr(y, split), getattr(y_pred, split)) value = metric_fn(getattr(y, split), getattr(y_pred, split))
m.append({"metric": metric, "split": split, "value": value}) m.append({"metric": metric, "split": split, "score": value})
return pd.DataFrame(m) return pd.DataFrame(m)
@ -174,9 +177,9 @@ def _compute_confusion_matrices(
{ {
"test": (("true_label", "predicted_label"), confusion_matrix(y.test, y_pred.test, labels=codes)), "test": (("true_label", "predicted_label"), confusion_matrix(y.test, y_pred.test, labels=codes)),
"train": (("true_label", "predicted_label"), confusion_matrix(y.train, y_pred.train, labels=codes)), "train": (("true_label", "predicted_label"), confusion_matrix(y.train, y_pred.train, labels=codes)),
"combined": ( "complete": (
("true_label", "predicted_label"), ("true_label", "predicted_label"),
confusion_matrix(y.combined, y_pred.combined, labels=codes), confusion_matrix(y.complete, y_pred.complete, labels=codes),
), ),
}, },
coords={"true_label": labels, "predicted_label": labels}, coords={"true_label": labels, "predicted_label": labels},
@ -235,9 +238,9 @@ def _compute_feature_importance(model: Model, best_estimator: Pipeline, training
return feature_importances return feature_importances
def _compute_shap_explanation(model: Model, best_estimator: Pipeline, training_data: TrainingSet) -> Explanation: def _compute_shap_explanation(model: Model, best_estimator: Pipeline, training_data: TrainingSet) -> Explanation: # noqa: C901
match model: match model:
case "espa" | "knn" | "rf": # CUML models do not yet work with TreeExplainer... case "espa" | "knn":
train_transformed = training_data.X.as_numpy().train train_transformed = training_data.X.as_numpy().train
if "scaler" in best_estimator.named_steps: if "scaler" in best_estimator.named_steps:
train_transformed = best_estimator.named_steps["scaler"].transform(train_transformed) train_transformed = best_estimator.named_steps["scaler"].transform(train_transformed)
@ -263,15 +266,23 @@ def _compute_shap_explanation(model: Model, best_estimator: Pipeline, training_d
feature_names=training_data.feature_names, feature_names=training_data.feature_names,
output_names=training_data.target_labels, output_names=training_data.target_labels,
) )
case "rf":
masker = shap.maskers.Independent(data=training_data.X.as_numpy().train)
explainer = TreeExplainer(
best_estimator.named_steps["model"].as_sklearn(),
data=masker,
feature_names=training_data.feature_names,
)
case "xgboost": case "xgboost":
explainer = TreeExplainer(best_estimator.named_steps["model"], feature_names=training_data.feature_names) explainer = TreeExplainer(best_estimator.named_steps["model"], feature_names=training_data.feature_names)
case _: case _:
raise ValueError(f"Unknown model: {model}") raise ValueError(f"Unknown model: {model}")
samples = training_data.X.as_numpy().test samples = training_data.X.as_numpy().test
if len(samples) > 200: nsamples = 2 * samples.shape[1] + 2048
if len(samples) > nsamples:
rng = np.random.default_rng(seed=42) rng = np.random.default_rng(seed=42)
sample_indices = rng.choice(len(samples), size=200, replace=False) sample_indices = rng.choice(len(samples), size=nsamples, replace=False)
samples = samples[sample_indices] samples = samples[sample_indices]
if "scaler" in best_estimator.named_steps: if "scaler" in best_estimator.named_steps:
samples = best_estimator.named_steps["scaler"].transform(samples) samples = best_estimator.named_steps["scaler"].transform(samples)
@ -314,7 +325,7 @@ def hpsearch_cv(
task=settings.task, target=settings.target, device=settings.device task=settings.task, target=settings.target, device=settings.device
) )
model_hpo_config = get_model_hpo_config(settings.model, settings.task) model_hpo_config = settings.hpo_config
print(f"Using model: {settings.model} with parameters: {model_hpo_config.hp_config}") print(f"Using model: {settings.model} with parameters: {model_hpo_config.hp_config}")
metrics, refit = get_metrics(settings.task) metrics, refit = get_metrics(settings.task)
@ -324,8 +335,8 @@ def hpsearch_cv(
print(f"Pipeline steps: {pipeline.named_steps}") print(f"Pipeline steps: {pipeline.named_steps}")
hp_search = settings.build_search(pipeline, model_hpo_config, metrics, refit) hp_search = settings.build_search(pipeline, model_hpo_config, metrics, refit)
print(f"Starting hyperparameter search with {settings.n_iter} iterations...") print(f"Starting hyperparameter search for {settings.model} with {settings.n_iter} iterations...")
with stopwatch(f"RandomizedSearchCV fitting for {settings.n_iter} candidates"): with stopwatch(f"RandomizedSearchCV fitting of {settings.model} for {settings.n_iter} candidates"):
fit_params = {f"model__{k}": v for k, v in model_hpo_config.fit_params.items()} fit_params = {f"model__{k}": v for k, v in model_hpo_config.fit_params.items()}
hp_search.fit( hp_search.fit(
training_data.X.train, training_data.X.train,
@ -379,7 +390,6 @@ def hpsearch_cv(
), ),
task=settings.task, task=settings.task,
target=settings.target, target=settings.target,
training_set=training_data,
model=best_estimator, model=best_estimator,
model_type=settings.model, model_type=settings.model,
metrics=metrics, metrics=metrics,

View file

@ -85,7 +85,7 @@ def predict_proba(
grid_gdf = e.read_grid() grid_gdf = e.read_grid()
for batch in e.create_inference_df(batch_size=batch_size): for batch in e.create_inference_df(batch_size=batch_size):
# Filter rows containing NaN values # Filter rows containing NaN values
batch = batch.dropna(axis=0, how="any") batch = batch.dropna(axis="index", how="any")
# Skip empty batches (all rows had NaN values) # Skip empty batches (all rows had NaN values)
if len(batch) == 0: if len(batch) == 0:
@ -96,7 +96,13 @@ def predict_proba(
if isinstance(model, TabularPredictor): if isinstance(model, TabularPredictor):
print(f"Predicting batch of size {len(batch)} ({type(batch)}) with AutoGluon TabularPredictor...") print(f"Predicting batch of size {len(batch)} ({type(batch)}) with AutoGluon TabularPredictor...")
batch_preds = model.predict(batch) try:
batch_preds = model.predict(batch)
except Exception as ex:
print("Something went wrong")
print(batch)
print(batch.columns)
raise ex
print(f"Batch predictions type: {type(batch_preds)}, shape: {batch_preds.shape}") print(f"Batch predictions type: {type(batch_preds)}, shape: {batch_preds.shape}")
assert isinstance(batch_preds, pd.DataFrame | pd.Series), ( assert isinstance(batch_preds, pd.DataFrame | pd.Series), (

View file

@ -60,7 +60,13 @@ def get_search_space(hp_config: HPConfig) -> dict[str, list | rv_continuous_froz
f"Unknown distribution type for {key}: {dist['distribution']}" f"Unknown distribution type for {key}: {dist['distribution']}"
) )
distfn = getattr(scipy.stats, dist["distribution"]) distfn = getattr(scipy.stats, dist["distribution"])
search_space[key] = distfn(dist["low"], dist["high"]) # Add edge-case for uniform distribution, as low-high is different there
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.uniform.html#scipy.stats.uniform
# Using the parameters loc and scale, one obtains the uniform distribution on [loc, loc + scale].
if dist["distribution"] == "uniform":
search_space[key] = distfn(loc=dist["low"], scale=dist["high"] - dist["low"])
else:
search_space[key] = distfn(dist["low"], dist["high"])
return search_space return search_space
@ -188,7 +194,7 @@ def get_model_hpo_config(model: str, task: Task, **model_kwargs) -> ModelHPOConf
clf = RandomForestClassifier(split_criterion="entropy", **model_kwargs) clf = RandomForestClassifier(split_criterion="entropy", **model_kwargs)
return ModelHPOConfig(clf, rf_hpconfig) return ModelHPOConfig(clf, rf_hpconfig)
case ("rf", "regressor"): case ("rf", "regressor"):
reg = RandomForestRegressor(split_criterion="variance", **model_kwargs) reg = RandomForestRegressor(split_criterion="poisson", **model_kwargs)
return ModelHPOConfig(reg, rf_hpconfig) return ModelHPOConfig(reg, rf_hpconfig)
case ("knn", "classifier"): case ("knn", "classifier"):
clf = KNeighborsClassifier(**model_kwargs) clf = KNeighborsClassifier(**model_kwargs)

View file

@ -1,241 +0,0 @@
"""DEPRECATED!!! Training of classification models training."""
import pickle
from dataclasses import asdict, dataclass
from pathlib import Path
import cyclopts
import numpy as np
import pandas as pd
import toml
import xarray as xr
from rich import pretty, traceback
from sklearn import set_config
from sklearn.metrics import (
confusion_matrix,
)
from sklearn.model_selection import KFold, RandomizedSearchCV
from stopuhr import stopwatch
from entropice.ml.dataset import DatasetEnsemble, SplittedArrays
from entropice.ml.inference import predict_proba
from entropice.ml.models import (
extract_espa_feature_importance,
extract_espa_state,
extract_rf_feature_importance,
extract_xgboost_feature_importance,
get_model_hpo_config,
)
from entropice.utils.metrics import get_metrics, metric_functions
from entropice.utils.paths import get_training_results_dir
from entropice.utils.types import Model, TargetDataset, Task
traceback.install()
pretty.install()
cli = cyclopts.App("entropice-training", config=cyclopts.config.Toml("training-config.toml")) # ty:ignore[invalid-argument-type]
@cyclopts.Parameter("*")
@dataclass(frozen=True, kw_only=True)
class RunSettings:
"""Cross-validation settings for model training."""
n_iter: int = 2000
task: Task = "binary"
target: TargetDataset = "darts_v1"
model: Model = "espa"
@dataclass(frozen=True, kw_only=True)
class TrainingSettings(DatasetEnsemble, RunSettings):
"""Helper Wrapper to store combined training and dataset ensemble settings."""
param_grid: dict
cv_splits: int
metrics: list[str]
classes: list[str] | None
@cli.default
def random_cv(
dataset_ensemble: DatasetEnsemble,
settings: RunSettings = RunSettings(),
experiment: str | None = None,
) -> Path:
"""Perform random cross-validation on the training dataset.
Args:
dataset_ensemble (DatasetEnsemble): The dataset ensemble configuration.
settings (RunSettings): The cross-validation settings.
experiment (str | None): Optional experiment name for results directory.
"""
# Since we use cuml and xgboost libraries, we can only enable array API for ESPA
use_array_api = settings.model != "xgboost"
device = "torch" if settings.model == "espa" else "cuda"
set_config(array_api_dispatch=use_array_api)
print("Creating training data...")
training_data = dataset_ensemble.create_training_set(task=settings.task, target=settings.target, device=device)
model_hpo_config = get_model_hpo_config(settings.model, settings.task)
print(f"Using model: {settings.model} with parameters: {model_hpo_config.hp_config}")
cv = KFold(n_splits=5, shuffle=True, random_state=42)
metrics, refit = get_metrics(settings.task)
search = RandomizedSearchCV(
model_hpo_config.model,
model_hpo_config.search_space,
n_iter=settings.n_iter,
n_jobs=1,
cv=cv,
random_state=42,
verbose=10,
scoring=metrics,
refit=refit,
)
print(f"Starting RandomizedSearchCV with {search.n_iter} candidates...")
with stopwatch(f"RandomizedSearchCV fitting for {search.n_iter} candidates"):
search.fit(
training_data.X.train,
# XGBoost returns it's labels as numpy arrays instead of cupy arrays
# Thus, for the scoring to work, we need to convert them back to numpy
training_data.y.as_numpy().train if settings.model == "xgboost" else training_data.y.train,
**model_hpo_config.fit_params,
)
print("Best parameters combination found:")
best_estimator = search.best_estimator_
best_parameters = best_estimator.get_params()
for param_name in sorted(model_hpo_config.hp_config.keys()):
print(f"{param_name}: {best_parameters[param_name]}")
test_score = search.score(
training_data.X.test,
training_data.y.as_numpy().test if settings.model == "xgboost" else training_data.y.test,
)
print(
f"{refit.replace('_', ' ').capitalize()} of the best parameters using the inner CV"
f" of the random search: {search.best_score_:.3f}"
)
print(f"{refit.replace('_', ' ').capitalize()} on test set: {test_score:.3f}")
results_dir = get_training_results_dir(
experiment=experiment,
name="random_search",
grid=dataset_ensemble.grid,
level=dataset_ensemble.level,
task=settings.task,
target=settings.target,
model_type=settings.model,
)
# Store the search settings
combined_settings = TrainingSettings(
**asdict(settings),
**asdict(dataset_ensemble),
param_grid=model_hpo_config.hp_config,
cv_splits=cv.get_n_splits(),
metrics=metrics,
classes=training_data.target_labels,
)
settings_file = results_dir / "search_settings.toml"
print(f"Storing search settings to {settings_file}")
with open(settings_file, "w") as f:
toml.dump({"settings": asdict(combined_settings)}, f)
# Store the best estimator model
best_model_file = results_dir / "best_estimator_model.pkl"
print(f"Storing best estimator model to {best_model_file}")
with open(best_model_file, "wb") as f:
pickle.dump(best_estimator, f, protocol=pickle.HIGHEST_PROTOCOL)
# Store the search results
results = pd.DataFrame(search.cv_results_)
# Parse the params into individual columns
params = pd.json_normalize(results["params"]) # ty:ignore[invalid-argument-type]
# Concatenate the params columns with the original DataFrame
results = pd.concat([results.drop(columns=["params"]), params], axis=1)
results_file = results_dir / "search_results.parquet"
print(f"Storing CV results to {results_file}")
results.to_parquet(results_file)
# Compute predictions on the all sets and move them to numpy for metric computations
y_pred = SplittedArrays(
train=best_estimator.predict(training_data.X.train),
test=best_estimator.predict(training_data.X.test),
).as_numpy()
# Compute and StoreMetrics
y = training_data.y.as_numpy()
test_metrics = {metric: metric_functions[metric](y.test, y_pred.test) for metric in metrics}
train_metrics = {metric: metric_functions[metric](y.train, y_pred.train) for metric in metrics}
combined_metrics = {metric: metric_functions[metric](y.combined, y_pred.combined) for metric in metrics}
all_metrics = {
"test_metrics": test_metrics,
"train_metrics": train_metrics,
"combined_metrics": combined_metrics,
}
test_metrics_file = results_dir / "metrics.toml"
print(f"Storing test metrics to {test_metrics_file}")
with open(test_metrics_file, "w") as f:
toml.dump(all_metrics, f)
# Make confusion matrices for classification taasks
if settings.task in ["binary", "count_regimes", "density_regimes"]:
codes = np.array(training_data.target_codes)
cm = xr.Dataset(
{
"test": (("true_label", "predicted_label"), confusion_matrix(y.test, y_pred.test, labels=codes)),
"train": (("true_label", "predicted_label"), confusion_matrix(y.train, y_pred.train, labels=codes)),
"combined": (
("true_label", "predicted_label"),
confusion_matrix(y.combined, y_pred.combined, labels=codes),
),
},
coords={"true_label": training_data.target_labels, "predicted_label": training_data.target_labels},
)
# Store the confusion matrices
cm_file = results_dir / "confusion_matrix.nc"
print(f"Storing confusion matrices to {cm_file}")
cm.to_netcdf(cm_file, engine="h5netcdf")
# Get the inner state of the best estimator
if settings.model == "espa":
state = extract_espa_state(best_estimator, training_data)
state_file = results_dir / "best_estimator_state.nc"
print(f"Storing best estimator state to {state_file}")
state.to_netcdf(state_file, engine="h5netcdf")
fi = extract_espa_feature_importance(best_estimator, training_data)
fi_file = results_dir / "best_estimator_feature_importance.parquet"
print(f"Storing best estimator feature importance to {fi_file}")
fi.to_parquet(fi_file)
elif settings.model == "xgboost":
fi = extract_xgboost_feature_importance(best_estimator, training_data)
fi_file = results_dir / "best_estimator_feature_importance.parquet"
print(f"Storing best estimator feature importance to {fi_file}")
fi.to_parquet(fi_file)
elif settings.model == "rf":
fi = extract_rf_feature_importance(best_estimator, training_data)
fi_file = results_dir / "best_estimator_feature_importance.parquet"
print(f"Storing best estimator feature importance to {fi_file}")
fi.to_parquet(fi_file)
# Predict probabilities for all cells
print("Predicting probabilities for all cells...")
preds = predict_proba(dataset_ensemble, model=best_estimator, task=settings.task, device=device)
print(f"Predicted probabilities DataFrame with {len(preds)} entries.")
preds_file = results_dir / "predicted_probabilities.parquet"
print(f"Storing predicted probabilities to {preds_file}")
preds.to_parquet(preds_file)
stopwatch.summary()
print("Done.")
return results_dir
if __name__ == "__main__":
cli()

View file

@ -1,3 +1,5 @@
"""Training utilities for Entropice."""
import pickle import pickle
from dataclasses import asdict, dataclass from dataclasses import asdict, dataclass
from functools import cached_property from functools import cached_property
@ -47,7 +49,7 @@ def move_data_to_device(data: ndarray, device: Literal["torch", "cuda", "cpu"])
@dataclass @dataclass
class HPOCV: class HPOCV: # noqa: D101
method: HPSearch method: HPSearch
splitter: Splitter splitter: Splitter
scaler: Scaler scaler: Scaler
@ -55,13 +57,13 @@ class HPOCV:
n_iter: int n_iter: int
hpconfig: HPConfig hpconfig: HPConfig
@property @cached_property
def search_space(self): def search_space(self): # noqa: D102
return get_search_space(self.hpconfig) return get_search_space(self.hpconfig)
@dataclass @dataclass
class AutoML: class AutoML: # noqa: D101
time_budget: int time_budget: int
preset: str preset: str
hpo: bool hpo: bool
@ -80,7 +82,7 @@ class Training:
f(dataset, method) -> (model, metrics) f(dataset, method) -> (model, metrics)
Metrics refer to a simple dataframe in long format, with the columns: "metric", "split", "value". Metrics refer to a simple dataframe in long format, with the columns: "metric", "split", "score".
Split is either "train", "test" or "complete". Split is either "train", "test" or "complete".
""" """
@ -89,12 +91,11 @@ class Training:
method: HPOCV | AutoML method: HPOCV | AutoML
task: Task task: Task
target: TargetDataset target: TargetDataset
training_set: TrainingSet # TODO: Store Training Set to improve loading time (?)
model: Any model: Any
model_type: Model model_type: Model
metrics: pd.DataFrame metrics: pd.DataFrame
feature_importance: pd.DataFrame feature_importance: pd.DataFrame
shap_explanation: Explanation shap_explanation: Explanation | None
predictions: gpd.GeoDataFrame predictions: gpd.GeoDataFrame
confusion_matrix: xr.Dataset | None # only for classification tasks confusion_matrix: xr.Dataset | None # only for classification tasks
cv_results: pd.DataFrame | None # only for HPOCV cv_results: pd.DataFrame | None # only for HPOCV
@ -115,6 +116,31 @@ class Training:
"""Get the list of metric names from the metrics DataFrame.""" """Get the list of metric names from the metrics DataFrame."""
return self.metrics["metric"].unique().tolist() return self.metrics["metric"].unique().tolist()
@cached_property
def training_set(self) -> TrainingSet:
"""Get the training set for this training run."""
return self.dataset.create_training_set(self.task, self.target, device="cpu")
@property
def method_type(self) -> Literal["HPOCV", "AutoML"]:
"""Get the type of method used in this training run."""
if isinstance(self.method, HPOCV):
return "HPOCV"
elif isinstance(self.method, AutoML):
return "AutoML"
else:
raise ValueError(f"Unknown method type: {type(self.method)}")
@property
def n_trials(self) -> int | None:
"""Get the number of trials in the hyperparameter search, if applicable."""
if self.method_type == "HPOCV" and self.cv_results is not None:
return len(self.cv_results)
elif self.method_type == "AutoML" and self.leaderboard is not None:
return len(self.leaderboard)
else:
return None
@property @property
def get_state(self) -> xr.Dataset | pd.DataFrame | None: def get_state(self) -> xr.Dataset | pd.DataFrame | None:
"""Get the inner state of the trained model, if available.""" """Get the inner state of the trained model, if available."""
@ -123,6 +149,10 @@ class Training:
else: else:
return None return None
def get_metrics_from_split(self, split: Literal["train", "test", "complete"]) -> dict[str, float]:
"""Get a dictionary of metric names and values for the specified split."""
return self.metrics[self.metrics["split"] == split].set_index("metric")["score"].to_dict() # ty:ignore[invalid-return-type]
def save(self): def save(self):
"""Save the training results to the specified path.""" """Save the training results to the specified path."""
self.path.mkdir(parents=True, exist_ok=True) self.path.mkdir(parents=True, exist_ok=True)
@ -130,7 +160,6 @@ class Training:
model_file = self.path / "model.pkl" model_file = self.path / "model.pkl"
metrics_file = self.path / "metrics.parquet" metrics_file = self.path / "metrics.parquet"
feature_importance_file = self.path / "feature_importance.parquet" feature_importance_file = self.path / "feature_importance.parquet"
explanations_file = self.path / "shap_explanation.pkl"
predictions_file = self.path / "predictions.parquet" predictions_file = self.path / "predictions.parquet"
# Save config # Save config
with open(config_file, "w") as f: with open(config_file, "w") as f:
@ -150,9 +179,13 @@ class Training:
model_file.write_bytes(pickle.dumps(self.model)) model_file.write_bytes(pickle.dumps(self.model))
self.metrics.to_parquet(metrics_file) self.metrics.to_parquet(metrics_file)
self.feature_importance.to_parquet(feature_importance_file) self.feature_importance.to_parquet(feature_importance_file)
explanations_file.write_bytes(pickle.dumps(self.shap_explanation))
self.predictions.to_parquet(predictions_file) self.predictions.to_parquet(predictions_file)
# Save SHAP explanation if it exists
if self.shap_explanation is not None:
explanations_file = self.path / "shap_explanation.pkl"
explanations_file.write_bytes(pickle.dumps(self.shap_explanation))
# Save the confusion matrix if it exists # Save the confusion matrix if it exists
if self.confusion_matrix is not None: if self.confusion_matrix is not None:
cm_file = self.path / "confusion_matrix.nc" cm_file = self.path / "confusion_matrix.nc"
@ -168,13 +201,14 @@ class Training:
self.leaderboard.to_parquet(leaderboard_file) self.leaderboard.to_parquet(leaderboard_file)
@classmethod @classmethod
def load(cls, path: Path, device: Literal["cpu", "cuda"] = "cpu") -> "Training": def load(cls, path: Path) -> "Training":
"""Load a training run from the specified path.""" """Load a training run from the specified path."""
config_file = path / "training_config.toml" config_file = path / "training_config.toml"
model_file = path / "model.pkl" model_file = path / "model.pkl"
metrics_file = path / "metrics.parquet" metrics_file = path / "metrics.parquet"
feature_importance_file = path / "feature_importance.parquet" feature_importance_file = path / "feature_importance.parquet"
predictions_file = path / "predictions.parquet" predictions_file = path / "predictions.parquet"
explanations_file = path / "shap_explanation.pkl"
cm_file = path / "confusion_matrix.nc" cm_file = path / "confusion_matrix.nc"
cv_results_file = path / "search_results.parquet" cv_results_file = path / "search_results.parquet"
leaderboard_file = path / "leaderboard.parquet" leaderboard_file = path / "leaderboard.parquet"
@ -188,7 +222,6 @@ class Training:
model_type = config["model_type"] model_type = config["model_type"]
dataset = DatasetEnsemble(**config["dataset"]) dataset = DatasetEnsemble(**config["dataset"])
training_set = dataset.create_training_set(task, target, device)
method_type = config["method_type"] method_type = config["method_type"]
if method_type == "HPOCV": if method_type == "HPOCV":
@ -202,9 +235,13 @@ class Training:
model = pickle.loads(model_file.read_bytes()) model = pickle.loads(model_file.read_bytes())
metrics = pd.read_parquet(metrics_file) metrics = pd.read_parquet(metrics_file)
feature_importance = pd.read_parquet(feature_importance_file) feature_importance = pd.read_parquet(feature_importance_file)
shap_explanation = pickle.loads((path / "shap_explanation.pkl").read_bytes())
predictions = gpd.read_parquet(predictions_file) predictions = gpd.read_parquet(predictions_file)
# Load SHAP explanation if it exists
shap_explanation = None
if explanations_file.exists():
shap_explanation = pickle.loads(explanations_file.read_bytes())
# Load confusion matrix if it exists # Load confusion matrix if it exists
confusion_matrix = None confusion_matrix = None
if cm_file.exists(): if cm_file.exists():
@ -225,7 +262,6 @@ class Training:
method=method, method=method,
task=task, task=task,
target=target, target=target,
training_set=training_set,
model=model, model=model,
model_type=model_type, model_type=model_type,
metrics=metrics, metrics=metrics,

View file

@ -18,7 +18,6 @@ def sample_ensemble() -> Generator[DatasetEnsemble]:
grid="hex", grid="hex",
level=3, # Use level 3 for much faster tests level=3, # Use level 3 for much faster tests
members=["AlphaEarth"], # Use only one member for faster tests members=["AlphaEarth"], # Use only one member for faster tests
add_lonlat=True,
) )
@ -29,7 +28,6 @@ def sample_ensemble_v2() -> Generator[DatasetEnsemble]:
grid="hex", grid="hex",
level=3, # Use level 3 for much faster tests level=3, # Use level 3 for much faster tests
members=["AlphaEarth"], # Use only one member for faster tests members=["AlphaEarth"], # Use only one member for faster tests
add_lonlat=True,
) )
@ -41,7 +39,6 @@ class TestDatasetEnsemble:
assert sample_ensemble.grid == "hex" assert sample_ensemble.grid == "hex"
assert sample_ensemble.level == 3 assert sample_ensemble.level == 3
assert "AlphaEarth" in sample_ensemble.members assert "AlphaEarth" in sample_ensemble.members
assert sample_ensemble.add_lonlat is True
def test_get_targets_returns_geodataframe(self, sample_ensemble: DatasetEnsemble) -> None: def test_get_targets_returns_geodataframe(self, sample_ensemble: DatasetEnsemble) -> None:
"""Test that get_targets() returns a GeoDataFrame.""" """Test that get_targets() returns a GeoDataFrame."""
@ -106,10 +103,9 @@ class TestDatasetEnsemble:
# Should NOT have geometry column # Should NOT have geometry column
assert "geometry" not in features.columns assert "geometry" not in features.columns
# Should have location columns if add_lonlat is True # Should have location columns
if sample_ensemble.add_lonlat: assert "x" in features.columns
assert "x" in features.columns assert "y" in features.columns
assert "y" in features.columns
# Should have grid property columns # Should have grid property columns
assert "cell_area" in features.columns assert "cell_area" in features.columns

View file

@ -1,222 +0,0 @@
"""Tests for training.py module, specifically random_cv function.
This test suite validates the random_cv training function across all model-task
combinations using a minimal hex level 3 grid with synopsis temporal mode.
Test Coverage:
- All 12 model-task combinations (4 models x 3 tasks): espa, xgboost, rf, knn
- Device handling for each model type (torch/CUDA/cuML compatibility)
- Multi-label target dataset support
- Temporal mode configuration (synopsis)
- Output file creation and validation
Running Tests:
# Run all training tests (18 tests total, ~3 iterations each)
pixi run pytest tests/test_training.py -v
# Run only device handling tests
pixi run pytest tests/test_training.py::TestRandomCV::test_device_handling -v
# Run a specific model-task combination
pixi run pytest tests/test_training.py::TestRandomCV::test_random_cv_all_combinations[binary-espa] -v
Note: Tests use minimal iterations (3) and level 3 grid for speed.
Full production runs use higher iteration counts (100-2000).
"""
import shutil
import pytest
from entropice.ml.dataset import DatasetEnsemble
from entropice.ml.randomsearch import RunSettings, random_cv
from entropice.utils.types import Model, Task
@pytest.fixture(scope="module")
def test_ensemble():
"""Create a minimal DatasetEnsemble for testing.
Uses hex level 3 grid with synopsis temporal mode for fast testing.
"""
return DatasetEnsemble(
grid="hex",
level=3,
temporal_mode="synopsis",
members=["AlphaEarth"], # Use only one member for faster tests
add_lonlat=True,
)
@pytest.fixture
def cleanup_results():
"""Clean up results directory after each test.
This fixture collects the actual result directories created during tests
and removes them after the test completes.
"""
created_dirs = []
def register_dir(results_dir):
"""Register a directory to be cleaned up."""
created_dirs.append(results_dir)
return results_dir
yield register_dir
# Clean up only the directories created during this test
for results_dir in created_dirs:
if results_dir.exists():
shutil.rmtree(results_dir)
# Model-task combinations to test
# Note: Not all combinations make sense, but we test all to ensure robustness
MODELS: list[Model] = ["espa", "xgboost", "rf", "knn"]
TASKS: list[Task] = ["binary", "count", "density"]
class TestRandomCV:
"""Test suite for random_cv function."""
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("task", TASKS)
def test_random_cv_all_combinations(self, test_ensemble, model: Model, task: Task, cleanup_results):
"""Test random_cv with all model-task combinations.
This test runs 3 iterations for each combination to verify:
- The function completes without errors
- Device handling works correctly for each model type
- All output files are created
"""
# Use darts_v1 as the primary target for all tests
settings = RunSettings(
n_iter=3,
task=task,
target="darts_v1",
model=model,
)
# Run the cross-validation and get the results directory
results_dir = random_cv(
dataset_ensemble=test_ensemble,
settings=settings,
experiment="test_training",
)
cleanup_results(results_dir)
# Verify results directory was created
assert results_dir.exists(), f"Results directory not created for {model=}, {task=}"
# Verify all expected output files exist
expected_files = [
"search_settings.toml",
"best_estimator_model.pkl",
"search_results.parquet",
"metrics.toml",
"predicted_probabilities.parquet",
]
# Add task-specific files
if task in ["binary", "count", "density"]:
# All tasks that use classification (including count/density when binned)
# Note: count and density without _regimes suffix might be regression
if task == "binary" or "_regimes" in task:
expected_files.append("confusion_matrix.nc")
# Add model-specific files
if model in ["espa", "xgboost", "rf"]:
expected_files.append("best_estimator_state.nc")
for filename in expected_files:
filepath = results_dir / filename
assert filepath.exists(), f"Expected file {filename} not found for {model=}, {task=}"
@pytest.mark.parametrize("model", MODELS)
def test_device_handling(self, test_ensemble, model: Model, cleanup_results):
"""Test that device handling works correctly for each model type.
Different models require different device configurations:
- espa: Uses torch with array API dispatch
- xgboost: Uses CUDA without array API dispatch
- rf/knn: GPU-accelerated via cuML
"""
settings = RunSettings(
n_iter=3,
task="binary", # Simple binary task for device testing
target="darts_v1",
model=model,
)
# This should complete without device-related errors
try:
results_dir = random_cv(
dataset_ensemble=test_ensemble,
settings=settings,
experiment="test_training",
)
cleanup_results(results_dir)
except RuntimeError as e:
# Check if error is device-related
error_msg = str(e).lower()
device_keywords = ["cuda", "gpu", "device", "cpu", "torch", "cupy"]
if any(keyword in error_msg for keyword in device_keywords):
pytest.fail(f"Device handling error for {model=}: {e}")
else:
# Re-raise non-device errors
raise
def test_random_cv_with_mllabels(self, test_ensemble, cleanup_results):
"""Test random_cv with multi-label target dataset."""
settings = RunSettings(
n_iter=3,
task="binary",
target="darts_mllabels",
model="espa",
)
# Run the cross-validation and get the results directory
results_dir = random_cv(
dataset_ensemble=test_ensemble,
settings=settings,
experiment="test_training",
)
cleanup_results(results_dir)
# Verify results were created
assert results_dir.exists(), "Results directory not created"
assert (results_dir / "search_settings.toml").exists()
def test_temporal_mode_synopsis(self, cleanup_results):
"""Test that temporal_mode='synopsis' is correctly used."""
import toml
ensemble = DatasetEnsemble(
grid="hex",
level=3,
temporal_mode="synopsis",
members=["AlphaEarth"],
add_lonlat=True,
)
settings = RunSettings(
n_iter=3,
task="binary",
target="darts_v1",
model="espa",
)
# This should use synopsis mode (all years aggregated)
results_dir = random_cv(
dataset_ensemble=ensemble,
settings=settings,
experiment="test_training",
)
cleanup_results(results_dir)
# Verify the settings were stored correctly
assert results_dir.exists(), "Results directory not created"
with open(results_dir / "search_settings.toml") as f:
stored_settings = toml.load(f)
assert stored_settings["settings"]["temporal_mode"] == "synopsis"