Fix training and finalize dataset page

This commit is contained in:
Tobias Hölzer 2026-01-18 20:16:47 +01:00
parent c358bb63bc
commit 636c034b55
30 changed files with 533 additions and 851 deletions

View file

@ -0,0 +1,37 @@
#!/bin/bash
# Run some trainings on synopsis datasets for all different tasks + targets + grids + models
# Check if running inside the pixi environment
which autogluon >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "This script must be run inside the pixi environment."
exit 1
fi
for grid in hex healpix; do
if [ "$grid" = "hex" ]; then
levels=(3 4 5 6)
else
levels=(6 7 8 9 10)
fi
for level in "${levels[@]}"; do
# Only apply ERA5 filter for hex-3, hex-4, healpix-6 and healpix-7
if { [ "$grid" = "hex" ] && { [ "$level" -eq 3 ] || [ "$level" -eq 4 ]; }; } || { [ "$grid" = "healpix" ] && { [ "$level" -eq 6 ] || [ "$level" -eq 7 ]; }; }; then
era5_dimension_filters="--dimension-filters.ERA5-shoulder.aggregations=median --dimension-filters.ERA5-seasonal.aggregations=median --dimension-filters.ERA5-yearly.aggregations=median"
else
era5_dimension_filters=""
fi
for target in darts_v1 darts_mllabels; do
for task in binary density count; do
echo
echo "----------------------------------------"
echo "Running autogluon training for grid=$grid, level=$level, target=$target, task=$task"
autogluon --grid "$grid" --level "$level" --target "$target" --task "$task" --time-limit 600 --temporal-mode synopsis --experiment "static-variables-autogluon" --dimension-filters.ArcticDEM.aggregations=median --dimension-filters.AlphaEarth.agg=median $era5_dimension_filters
echo "----------------------------------------"
echo
done
done
done
done

View file

@ -0,0 +1,49 @@
#!/bin/bash
# Check if running inside the pixi environment
which train >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "This script must be run inside the pixi environment."
exit 1
fi
# Run some trainings on synopsis datasets for all different tasks + targets + grids + models
grid="healpix"
target="darts_mllabels"
# levels=(6 7 8 9 10)
levels=(8 9 10)
for level in "${levels[@]}"; do
# Only apply ERA5 filter for hex-3, hex-4, healpix-6 and healpix-7
if { [ "$grid" = "hex" ] && { [ "$level" -eq 3 ] || [ "$level" -eq 4 ]; }; } || { [ "$grid" = "healpix" ] && { [ "$level" -eq 6 ] || [ "$level" -eq 7 ]; }; }; then
era5_dimension_filters="--dimension-filters.ERA5-shoulder.aggregations=median --dimension-filters.ERA5-seasonal.aggregations=median --dimension-filters.ERA5-yearly.aggregations=median"
else
era5_dimension_filters=""
fi
for task in binary density count; do
for model in espa xgboost rf knn; do
# Skip if task is density or count and model is espa because espa only supports binary
if { [ "$task" = "density" ] || [ "$task" = "count" ]; } && [ "$model" = "espa" ]; then
continue
fi
# Skip if task is count or density and model is rf because rf is super slow for regression tasks
if { [ "$task" = "density" ] || [ "$task" = "count" ]; } && [ "$model" = "rf" ]; then
continue
fi
# Set number of iterations (use less for slow models)
if { [ "$model" = "knn" ] || [ "$model" = "rf" ]; }; then
niter=5
else
niter=100
fi
echo
echo "----------------------------------------"
echo "Running training for grid=$grid, level=$level, target=$target, task=$task, model=$model"
train --grid "$grid" --level "$level" --target "$target" --task "$task" --model "$model" --n-iter "$niter" --temporal-mode synopsis --experiment "static-variables" --dimension-filters.ArcticDEM.aggregations=median --dimension-filters.AlphaEarth.agg=median $era5_dimension_filters
echo "----------------------------------------"
echo
done
done
done

View file

@ -0,0 +1,48 @@
#!/bin/bash
# Check if running inside the pixi environment
which train >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "This script must be run inside the pixi environment."
exit 1
fi
# Run some trainings on synopsis datasets for all different tasks + targets + grids + models
grid="healpix"
target="darts_v1"
#levels=(6 7 8 9 10)
levels=(8 9 10)
for level in "${levels[@]}"; do
# Only apply ERA5 filter for hex-3, hex-4, healpix-6 and healpix-7
if { [ "$grid" = "hex" ] && { [ "$level" -eq 3 ] || [ "$level" -eq 4 ]; }; } || { [ "$grid" = "healpix" ] && { [ "$level" -eq 6 ] || [ "$level" -eq 7 ]; }; }; then
era5_dimension_filters="--dimension-filters.ERA5-shoulder.aggregations=median --dimension-filters.ERA5-seasonal.aggregations=median --dimension-filters.ERA5-yearly.aggregations=median"
else
era5_dimension_filters=""
fi
for task in binary density count; do
for model in espa xgboost rf knn; do
# Skip if task is density or count and model is espa because espa only supports binary
if { [ "$task" = "density" ] || [ "$task" = "count" ]; } && [ "$model" = "espa" ]; then
continue
fi
# Skip if task is count or density and model is rf because rf is super slow for regression tasks
if { [ "$task" = "density" ] || [ "$task" = "count" ]; } && [ "$model" = "rf" ]; then
continue
fi
# Set number of iterations (use less for slow models)
if { [ "$model" = "knn" ] || [ "$model" = "rf" ]; }; then
niter=5
else
niter=100
fi
echo
echo "----------------------------------------"
echo "Running training for grid=$grid, level=$level, target=$target, task=$task, model=$model"
train --grid "$grid" --level "$level" --target "$target" --task "$task" --model "$model" --n-iter "$niter" --temporal-mode synopsis --experiment "static-variables" --dimension-filters.ArcticDEM.aggregations=median --dimension-filters.AlphaEarth.agg=median $era5_dimension_filters
echo "----------------------------------------"
echo
done
done
done

View file

@ -0,0 +1,48 @@
#!/bin/bash
# Check if running inside the pixi environment
which train >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "This script must be run inside the pixi environment."
exit 1
fi
# Run some trainings on synopsis datasets for all different tasks + targets + grids + models
grid="hex"
target="darts_mllabels"
# levels=(3 4 5 6)
levels=(5 6)
for level in "${levels[@]}"; do
# Only apply ERA5 filter for hex-3, hex-4, healpix-6 and healpix-7
if { [ "$grid" = "hex" ] && { [ "$level" -eq 3 ] || [ "$level" -eq 4 ]; }; } || { [ "$grid" = "healpix" ] && { [ "$level" -eq 6 ] || [ "$level" -eq 7 ]; }; }; then
era5_dimension_filters="--dimension-filters.ERA5-shoulder.aggregations=median --dimension-filters.ERA5-seasonal.aggregations=median --dimension-filters.ERA5-yearly.aggregations=median"
else
era5_dimension_filters=""
fi
for task in binary density count; do
for model in espa xgboost rf knn; do
# Skip if task is density or count and model is espa because espa only supports binary
if { [ "$task" = "density" ] || [ "$task" = "count" ]; } && [ "$model" = "espa" ]; then
continue
fi
# Skip if task is count or density and model is rf because rf is super slow for regression tasks
if { [ "$task" = "density" ] || [ "$task" = "count" ]; } && [ "$model" = "rf" ]; then
continue
fi
# Set number of iterations (use less for slow models)
if { [ "$model" = "knn" ] || [ "$model" = "rf" ]; }; then
niter=5
else
niter=100
fi
echo
echo "----------------------------------------"
echo "Running training for grid=$grid, level=$level, target=$target, task=$task, model=$model"
train --grid "$grid" --level "$level" --target "$target" --task "$task" --model "$model" --n-iter "$niter" --temporal-mode synopsis --experiment "static-variables" --dimension-filters.ArcticDEM.aggregations=median --dimension-filters.AlphaEarth.agg=median $era5_dimension_filters
echo "----------------------------------------"
echo
done
done
done

View file

@ -0,0 +1,48 @@
#!/bin/bash
# Check if running inside the pixi environment
which train >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "This script must be run inside the pixi environment."
exit 1
fi
# Run some trainings on synopsis datasets for all different tasks + targets + grids + models
grid="hex"
target="darts_v1"
# levels=(3 4 5 6)
levels=(5 6)
for level in "${levels[@]}"; do
# Only apply ERA5 filter for hex-3, hex-4, healpix-6 and healpix-7
if { [ "$grid" = "hex" ] && { [ "$level" -eq 3 ] || [ "$level" -eq 4 ]; }; } || { [ "$grid" = "healpix" ] && { [ "$level" -eq 6 ] || [ "$level" -eq 7 ]; }; }; then
era5_dimension_filters="--dimension-filters.ERA5-shoulder.aggregations=median --dimension-filters.ERA5-seasonal.aggregations=median --dimension-filters.ERA5-yearly.aggregations=median"
else
era5_dimension_filters=""
fi
for task in binary density count; do
for model in espa xgboost rf knn; do
# Skip if task is density or count and model is espa because espa only supports binary
if { [ "$task" = "density" ] || [ "$task" = "count" ]; } && [ "$model" = "espa" ]; then
continue
fi
# Skip if task is count or density and model is rf because rf is super slow for regression tasks
if { [ "$task" = "density" ] || [ "$task" = "count" ]; } && [ "$model" = "rf" ]; then
continue
fi
# Set number of iterations (use less for slow models)
if { [ "$model" = "knn" ] || [ "$model" = "rf" ]; }; then
niter=5
else
niter=100
fi
echo
echo "----------------------------------------"
echo "Running training for grid=$grid, level=$level, target=$target, task=$task, model=$model"
train --grid "$grid" --level "$level" --target "$target" --task "$task" --model "$model" --n-iter "$niter" --temporal-mode synopsis --experiment "static-variables" --dimension-filters.ArcticDEM.aggregations=median --dimension-filters.AlphaEarth.agg=median $era5_dimension_filters
echo "----------------------------------------"
echo
done
done
done