Add the areas tab to the new dataset page

This commit is contained in:
Tobias Hölzer 2026-01-17 00:17:21 +01:00
parent cb7b0f9e6b
commit 3581f9b80f
3 changed files with 233 additions and 1 deletions

View file

@ -0,0 +1,123 @@
"""Plots for visualizing grid statistics."""
import geopandas as gpd
import matplotlib.colors as mcolors
import numpy as np
import pydeck as pdk
from entropice.dashboard.utils.colors import get_cmap, hex_to_rgb
from entropice.dashboard.utils.geometry import fix_hex_geometry
def create_grid_areas_map(
grid_gdf: gpd.GeoDataFrame,
metric: str,
make_3d_map: bool,
) -> pdk.Deck:
"""Create a spatial distribution map for grid areas.
Args:
grid_gdf (gpd.GeoDataFrame): GeoDataFrame containing grid cell geometries and statistics.
metric (str): The metric to visualize (e.g., "cell_area", "land_area", "water_area", "land_ratio").
make_3d_map (bool): Whether to render the map in 3D (extruded) or 2D.
Returns:
pdk.Deck: A PyDeck map visualization of the specified grid statistic.
"""
# Create a copy to avoid modifying the original
gdf = grid_gdf.copy().to_crs("EPSG:4326")
# Fix antimeridian issues for hex cells
gdf["geometry"] = gdf["geometry"].apply(fix_hex_geometry)
# Convert to WGS84 for pydeck
gdf_wgs84 = gdf.to_crs("EPSG:4326")
# Get colormap for the metric
cmap = get_cmap(metric)
# Normalize the metric values to [0, 1] for color mapping
values = gdf_wgs84[metric].values
vmin, vmax = values.min(), values.max()
if vmax > vmin:
normalized_values = (values - vmin) / (vmax - vmin)
else:
normalized_values = np.zeros_like(values)
# Map normalized values to colors
colors = [cmap(val) for val in normalized_values]
rgb_colors = [hex_to_rgb(mcolors.to_hex(color)) for color in colors]
gdf_wgs84["fill_color"] = rgb_colors
# Store metric value for tooltip
gdf_wgs84["metric_value"] = values
# Store normalized values for elevation (if 3D)
gdf_wgs84["elevation"] = normalized_values
# Convert to GeoJSON format
geojson_data = []
for _, row in gdf_wgs84.iterrows():
feature = {
"type": "Feature",
"geometry": row["geometry"].__geo_interface__,
"properties": {
"fill_color": row["fill_color"],
"metric_value": float(row["metric_value"]),
"elevation": float(row["elevation"]) if make_3d_map else 0,
"cell_area": float(row["cell_area"]),
"land_area": float(row["land_area"]),
"water_area": float(row["water_area"]),
"land_ratio": float(row["land_ratio"]),
},
}
geojson_data.append(feature)
# Create pydeck layer
layer = pdk.Layer(
"GeoJsonLayer",
geojson_data,
opacity=0.7,
stroked=True,
filled=True,
extruded=make_3d_map,
wireframe=False,
get_fill_color="properties.fill_color",
get_line_color=[80, 80, 80],
line_width_min_pixels=0.5,
get_elevation="properties.elevation" if make_3d_map else 0,
elevation_scale=500000, # Scale normalized values (0-1) to 500km height
pickable=True,
)
# Set initial view state (centered on the Arctic)
# Adjust pitch and zoom based on whether we're using 3D
view_state = pdk.ViewState(
latitude=70,
longitude=0,
zoom=2 if not make_3d_map else 1.5,
pitch=0 if not make_3d_map else 45,
)
# Build tooltip HTML
tooltip_html = (
"<b>Cell Area:</b> {cell_area} km²<br/>"
"<b>Land Area:</b> {land_area} km²<br/>"
"<b>Water Area:</b> {water_area} km²<br/>"
"<b>Land Ratio:</b> {land_ratio}"
)
# Create deck
deck = pdk.Deck(
layers=[layer],
initial_view_state=view_state,
tooltip={
"html": tooltip_html,
"style": {"backgroundColor": "steelblue", "color": "white"},
},
map_style="https://basemaps.cartocdn.com/gl/dark-matter-gl-style/style.json",
)
return deck

View file

@ -0,0 +1,104 @@
"""Area of grid cells dashboard section."""
from typing import cast
import geopandas as gpd
import matplotlib.colors as mcolors
import streamlit as st
from entropice.dashboard.plots.grids import create_grid_areas_map
from entropice.dashboard.utils.colors import get_cmap
@st.fragment
def _render_area_map(grid_gdf: gpd.GeoDataFrame):
st.subheader("Spatial Distribution of Grid Cell Areas")
cols = st.columns([4, 1])
with cols[0]:
metric = st.selectbox(
"Metric",
options=["cell_area", "land_area", "water_area", "land_ratio"],
format_func=lambda x: x.replace("_", " ").title(),
key="metric",
)
with cols[1]:
make_3d_map = cast(bool, st.checkbox("3D Map", value=True, key="area_3d_map"))
map_deck = create_grid_areas_map(grid_gdf, metric, make_3d_map)
st.pydeck_chart(map_deck)
# Add legend
with st.expander("Legend", expanded=True):
st.markdown(f"**{metric.replace('_', ' ').title()}**")
values = grid_gdf[metric]
vmin, vmax = values.min(), values.max()
# Format values based on metric type
if metric == "land_ratio":
vmin_str = f"{vmin:.1%}"
vmax_str = f"{vmax:.1%}"
else:
vmin_str = f"{vmin:.2f} km²"
vmax_str = f"{vmax:.2f} km²"
# Get the same colormap used in the map
cmap = get_cmap(metric)
# Sample 4 colors from the colormap to create the gradient
gradient_colors = [mcolors.to_hex(cmap(i)) for i in [0.0, 0.33, 0.67, 1.0]]
gradient_css = ", ".join(gradient_colors)
# Create a simple gradient legend
st.markdown(
f'<div style="display: flex; align-items: center; margin-top: 10px; margin-bottom: 10px;">'
f'<span style="margin-right: 10px;">{vmin_str}</span>'
f'<div style="flex: 1; height: 20px; background: linear-gradient(to right, '
f'{gradient_css}); border: 1px solid #ccc;"></div>'
f'<span style="margin-left: 10px;">{vmax_str}</span>'
f"</div>",
unsafe_allow_html=True,
)
st.caption("Color intensity represents the metric value from low (purple) to high (yellow).")
if make_3d_map:
st.markdown("---")
st.markdown("**3D Elevation:**")
st.caption(
f"Height represents normalized {metric.replace('_', ' ')} values. "
"Rotate the map by holding Ctrl/Cmd and dragging."
)
def render_area_information_tab(grid_gdf: gpd.GeoDataFrame):
"""Render grid cell areas and land/water distribution.
Args:
grid_gdf: Pre-loaded grid GeoDataFrame.
"""
st.markdown("### Grid Cell Areas and Land/Water Distribution")
st.markdown(
"This visualization shows the spatial distribution of cell areas, land areas, "
"water areas, and land ratio across the grid. The grid has been filtered to "
"include only cells in the permafrost region (>50° latitude, <85° latitude) "
"with >10% land coverage."
)
# Show summary statistics
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Cells", f"{len(grid_gdf):,}")
with col2:
st.metric("Avg Cell Area", f"{grid_gdf['cell_area'].mean():.2f} km²")
with col3:
st.metric("Avg Land Ratio", f"{grid_gdf['land_ratio'].mean():.1%}")
with col4:
total_land = grid_gdf["land_area"].sum()
st.metric("Total Land Area", f"{total_land:,.0f} km²")
st.divider()
_render_area_map(grid_gdf)

View file

@ -5,6 +5,7 @@ from typing import cast
import streamlit as st import streamlit as st
from stopuhr import stopwatch from stopuhr import stopwatch
from entropice.dashboard.sections.areas import render_area_information_tab
from entropice.dashboard.sections.dataset_statistics import render_ensemble_details from entropice.dashboard.sections.dataset_statistics import render_ensemble_details
from entropice.dashboard.sections.targets import render_target_information_tab from entropice.dashboard.sections.targets import render_target_information_tab
from entropice.dashboard.utils.stats import DatasetStatistics from entropice.dashboard.utils.stats import DatasetStatistics
@ -120,6 +121,8 @@ def render_dataset_page():
train_data_dict[target] = {} train_data_dict[target] = {}
for task in all_tasks: for task in all_tasks:
train_data_dict[target][task] = ensemble.create_training_set(target=target, task=task) train_data_dict[target][task] = ensemble.create_training_set(target=target, task=task)
# Preload the grid GeoDataFrame
grid_gdf = ensemble.read_grid()
era5_members = [m for m in ensemble.members if m.startswith("ERA5")] era5_members = [m for m in ensemble.members if m.startswith("ERA5")]
# Create tabs for different data views # Create tabs for different data views
@ -135,9 +138,11 @@ def render_dataset_page():
with tabs[0]: with tabs[0]:
st.header("🎯 Target Labels Visualization") st.header("🎯 Target Labels Visualization")
if False: #! debug
render_target_information_tab(train_data_dict) render_target_information_tab(train_data_dict)
with tabs[1]: with tabs[1]:
st.header("📐 Areas Visualization") st.header("📐 Areas Visualization")
render_area_information_tab(grid_gdf)
tab_index = 2 tab_index = 2
if "AlphaEarth" in ensemble.members: if "AlphaEarth" in ensemble.members:
with tabs[tab_index]: with tabs[tab_index]: