entropice/scripts/06static/healpix_darts_v1.sh

49 lines
2.2 KiB
Bash
Raw Normal View History

2026-01-18 20:16:47 +01:00
#!/bin/bash
# Check if running inside the pixi environment
which train >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "This script must be run inside the pixi environment."
exit 1
fi
# Run some trainings on synopsis datasets for all different tasks + targets + grids + models
grid="healpix"
target="darts_v1"
#levels=(6 7 8 9 10)
levels=(8 9 10)
for level in "${levels[@]}"; do
# Only apply ERA5 filter for hex-3, hex-4, healpix-6 and healpix-7
if { [ "$grid" = "hex" ] && { [ "$level" -eq 3 ] || [ "$level" -eq 4 ]; }; } || { [ "$grid" = "healpix" ] && { [ "$level" -eq 6 ] || [ "$level" -eq 7 ]; }; }; then
era5_dimension_filters="--dimension-filters.ERA5-shoulder.aggregations=median --dimension-filters.ERA5-seasonal.aggregations=median --dimension-filters.ERA5-yearly.aggregations=median"
else
era5_dimension_filters=""
fi
for task in binary density count; do
for model in espa xgboost rf knn; do
# Skip if task is density or count and model is espa because espa only supports binary
if { [ "$task" = "density" ] || [ "$task" = "count" ]; } && [ "$model" = "espa" ]; then
continue
fi
# Skip if task is count or density and model is rf because rf is super slow for regression tasks
if { [ "$task" = "density" ] || [ "$task" = "count" ]; } && [ "$model" = "rf" ]; then
continue
fi
# Set number of iterations (use less for slow models)
if { [ "$model" = "knn" ] || [ "$model" = "rf" ]; }; then
niter=5
else
niter=100
fi
echo
echo "----------------------------------------"
echo "Running training for grid=$grid, level=$level, target=$target, task=$task, model=$model"
train --grid "$grid" --level "$level" --target "$target" --task "$task" --model "$model" --n-iter "$niter" --temporal-mode synopsis --experiment "static-variables" --dimension-filters.ArcticDEM.aggregations=median --dimension-filters.AlphaEarth.agg=median $era5_dimension_filters
echo "----------------------------------------"
echo
done
done
done