6. Results
# Load required packages
library(rgee)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(stars)
## Loading required package: abind
library(dplyr)
library(rnaturalearth)
library(ggplot2)
## Warning: Can't find generic `as.gtable` in package gtable to register S3 method.
## ℹ This message is only shown to developers using devtools.
## ℹ Do you need to update gtable to the latest version?
library(tidyr)
library(stringr)
# Load datasets
river_ndvi <- st_read("water_mask_river_ndvi_sr_2024")
## Reading layer `water_mask_river_ndvi_sr_2024_fixed' from data source
## `/Users/rebekahchen/Desktop/annual_freshwater_VI/water_mask_river_ndvi_sr_2024'
## using driver `ESRI Shapefile'
## Simple feature collection with 12752 features and 13 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -55.98403 xmax: 180 ymax: 83.01597
## Geodetic CRS: WGS 84
river_ndavi <- st_read("water_mask_river_ndavi_sr_2024")
## Reading layer `water_mask_river_ndavi_sr_2024_fixed' from data source
## `/Users/rebekahchen/Desktop/annual_freshwater_VI/water_mask_river_ndavi_sr_2024'
## using driver `ESRI Shapefile'
## Simple feature collection with 12752 features and 13 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -55.98403 xmax: 180 ymax: 83.01597
## Geodetic CRS: WGS 84
lake_ndvi <- st_read("water_mask_lake_ndvi_sr_2024")
## Reading layer `water_mask_lake_ndvi_sr_2024_fixed' from data source
## `/Users/rebekahchen/Desktop/annual_freshwater_VI/water_mask_lake_ndvi_sr_2024'
## using driver `ESRI Shapefile'
## Simple feature collection with 12752 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -55.98403 xmax: 180 ymax: 83.01597
## Geodetic CRS: WGS 84
lake_ndavi <- st_read("water_mask_lake_ndavi_sr_2024")
## Reading layer `water_mask_lake_ndavi_sr_2024_fixed' from data source
## `/Users/rebekahchen/Desktop/annual_freshwater_VI/water_mask_lake_ndavi_sr_2024'
## using driver `ESRI Shapefile'
## Simple feature collection with 12752 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -55.98403 xmax: 180 ymax: 83.01597
## Geodetic CRS: WGS 84
# Load world boundaries for continental classification
world <- ne_countries(scale = "medium", returnclass = "sf")
world_regions <- ne_countries(scale = "medium", returnclass = "sf") %>%
select(continent, subregion)
6.1 Global Maps
# Lake NDVI map
ggplot() +
# First layer: world map
geom_sf(data = world, fill = "lightgray", color = "white", size = 0.1) +
# Second layer: lake NDVI
geom_sf(data = lake_ndvi %>% filter(!is.na(ndvi_wtr)),
aes(color = ndvi_wtr), size = 0.8) +
scale_color_gradient(low = "white", high = "#228B22",
name = " Annual mean NDAVI", limits = c(-0.2, 0.8)) +
theme_minimal() +
labs(title = "Global Lake NDVI Distribution within Freshwater Species Richness Grid (with water mask)")

# Lake NDAVI map
ggplot() +
# First layer: world map
geom_sf(data = world, fill = "lightgray", color = "white", size = 0.1) +
# Second layer: lake NDAVI
geom_sf(data = lake_ndavi %>% filter(!is.na(ndavi_wtr)),
aes(color = ndavi_wtr), size = 1) +
scale_color_gradient(
low = "white",
high = "#228B22",
name = "Annual mean NDAVI"
) +
theme_minimal() +
labs(title = "Global Lake NDAVI Distribution within Freshwater Species Richness Grid (with water mask)")

# River NDVI map
ggplot() +
# First layer: world map
geom_sf(data = world, fill = "lightgray", color = "white", size = 0.1) +
# Second layer: river NDVI
geom_sf(data = river_ndvi %>% filter(!is.na(ndvi_wtr)),
aes(color = ndvi_wtr), size = 1) +
scale_color_gradient(low = "white", high = "#228B22",
name = "Annual mean NDVI", limits = c(-0.2, 0.8)) +
theme_minimal() +
labs(title = "Global River NDVI Distribution within Freshwater Species Richness Grid (with water mask)")

# River NDAVI map
ggplot() +
# First layer: world map
geom_sf(data = world, fill = "lightgray", color = "white", size = 0.1) +
# Second layer: river NDAVI
geom_sf(data = river_ndavi %>% filter(!is.na(ndavi_wtr)),
aes(color = ndavi_wtr), size = 1) +
scale_color_gradient(
low = "white",
high = "#228B22",
name = "Annual mean NDAVI"
) +
theme_minimal() +
labs(title = "Global River NDAVI Distribution within Freshwater Species Richness Grid (with water mask)")

6.2 Lake Vegetation Indices by Continent
# Calculate continental summaries for lakes
lake_ndvi_continents <- lake_ndvi %>%
st_join(world_regions) %>%
st_drop_geometry() %>%
group_by(continent) %>%
summarise(
mean_ndvi_water = mean(ndvi_wtr, na.rm = TRUE),
median_ndvi_water = median(ndvi_wtr, na.rm = TRUE),
mean_ndvi = mean(lk_ndvi_mn, na.rm = TRUE),
median_ndvi = median(lk_ndvi_mn, na.rm = TRUE),
sd_ndvi = sd(lk_ndvi_mn, na.rm = TRUE),
total_lakes = sum(n_lakes, na.rm = TRUE),
n_cells = n(),
.groups = 'drop'
) %>%
filter(continent != "Seven seas (open ocean)")
## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data
# Transform to long format for plotting
lake_ndvi_long <- lake_ndvi_continents %>%
pivot_longer(
cols = c(mean_ndvi_water, mean_ndvi, median_ndvi_water, median_ndvi),
names_to = "metric_type",
values_to = "value"
) %>%
mutate(
stat_type = case_when(
str_detect(metric_type, "mean") ~ "Mean",
str_detect(metric_type, "median") ~ "Median"
),
water_mask = case_when(
str_detect(metric_type, "_water") ~ "With Water Mask",
TRUE ~ "Without Water Mask"
),
facet_label = paste(stat_type, "-", water_mask)
)
# Order continents by mean NDVI with water mask
continent_order <- lake_ndvi_continents %>%
group_by(continent) %>%
arrange(desc(mean_ndvi_water)) %>%
pull(continent)
# Create bar plot
lake_ndvi_long %>%
mutate(continent = factor(continent, levels = continent_order)) %>%
ggplot(aes(x = continent, y = value)) +
geom_bar(stat = "identity", fill = "#5c89cc") +
facet_wrap(~ facet_label, scales = "free_y") +
labs(
title = "Annual Lake NDVI by Continent",
subtitle = "Continents ordered by mean Lake NDVI with water mask",
x = "Continent",
y = "Lake NDVI Value"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Calculate continental summaries for lake NDAVI
lake_ndavi_continents <- lake_ndavi %>%
st_join(world_regions) %>%
st_drop_geometry() %>%
group_by(continent) %>%
summarise(
mean_ndavi_water = mean(ndavi_wtr, na.rm = TRUE),
median_ndavi_water = median(ndavi_wtr, na.rm = TRUE),
mean_ndavi = mean(lk_ndavi_m, na.rm = TRUE),
median_ndavi = median(lk_ndavi_m, na.rm = TRUE),
sd_ndavi = sd(lk_ndavi_m, na.rm = TRUE),
total_lakes = sum(n_lakes, na.rm = TRUE),
n_cells = n(),
.groups = 'drop'
) %>%
filter(continent != "Seven seas (open ocean)")
## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data
# Transform to long format
lake_ndavi_long <- lake_ndavi_continents %>%
pivot_longer(
cols = c(mean_ndavi_water, mean_ndavi, median_ndavi_water, median_ndavi),
names_to = "metric_type",
values_to = "value"
) %>%
mutate(
stat_type = case_when(
str_detect(metric_type, "mean") ~ "Mean",
str_detect(metric_type, "median") ~ "Median"
),
water_mask = case_when(
str_detect(metric_type, "_water") ~ "With Water Mask",
TRUE ~ "Without Water Mask"
),
facet_label = paste(stat_type, "-", water_mask)
)
# Order continents
continent_order_ndavi <- lake_ndavi_continents %>%
arrange(desc(mean_ndavi_water)) %>%
pull(continent)
# Create bar plot
lake_ndavi_long %>%
mutate(continent = factor(continent, levels = continent_order_ndavi)) %>%
ggplot(aes(x = continent, y = value)) +
geom_bar(stat = "identity", fill = "#5c89cc") +
facet_wrap(~ facet_label, scales = "free_y") +
labs(
title = "Annual Lake NDAVI by Continent",
subtitle = "Continents ordered by mean NDAVI with water mask",
x = "Continent",
y = "Lake NDAVI Value"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

6.3 River Vegetation Indices by Continent
# Calculate continental summaries for rivers
river_ndvi_continents <- river_ndvi %>%
st_join(world_regions) %>%
st_drop_geometry() %>%
group_by(continent) %>%
summarise(
mean_ndvi_water = mean(ndvi_wtr, na.rm = TRUE),
median_ndvi_water = median(ndvi_wtr, na.rm = TRUE),
mean_ndvi = mean(rv_ndvi_mn, na.rm = TRUE),
median_ndvi = median(rv_ndvi_mn, na.rm = TRUE),
sd_ndvi = sd(rv_ndvi_mn, na.rm = TRUE),
total_rivers = sum(n_rivers, na.rm = TRUE),
n_cells = n(),
.groups = 'drop'
) %>%
filter(continent != "Seven seas (open ocean)")
## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data
# Transform to long format
river_ndvi_long <- river_ndvi_continents %>%
pivot_longer(
cols = c(mean_ndvi_water, mean_ndvi, median_ndvi_water, median_ndvi),
names_to = "metric_type",
values_to = "value"
) %>%
mutate(
stat_type = case_when(
str_detect(metric_type, "mean") ~ "Mean",
str_detect(metric_type, "median") ~ "Median"
),
water_mask = case_when(
str_detect(metric_type, "_water") ~ "With Water Mask",
TRUE ~ "Without Water Mask"
),
facet_label = paste(stat_type, "-", water_mask)
)
# Order continents
continent_order_river <- river_ndvi_continents %>%
arrange(desc(mean_ndvi_water)) %>%
pull(continent)
# Create bar plot
river_ndvi_long %>%
mutate(continent = factor(continent, levels = continent_order_river)) %>%
ggplot(aes(x = continent, y = value)) +
geom_bar(stat = "identity", fill = "#4d9966") +
facet_wrap(~ facet_label, scales = "free_y") +
labs(
title = "Annual River NDVI by Continent",
subtitle = "Continents ordered by mean River NDVI with water mask",
x = "Continent",
y = "River NDVI Value"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Calculate continental summaries for river NDAVI
river_ndavi_continents <- river_ndavi %>%
st_join(world_regions) %>%
st_drop_geometry() %>%
group_by(continent) %>%
summarise(
mean_ndavi_water = mean(ndavi_wtr, na.rm = TRUE),
median_ndavi_water = median(ndavi_wtr, na.rm = TRUE),
mean_ndavi = mean(rv_ndavi_m, na.rm = TRUE),
median_ndavi = median(rv_ndavi_m, na.rm = TRUE),
sd_ndavi = sd(rv_ndavi_m, na.rm = TRUE),
total_rivers = sum(n_rivers, na.rm = TRUE),
n_cells = n(),
.groups = 'drop'
) %>%
filter(continent != "Seven seas (open ocean)")
## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data
# Transform to long format
river_ndavi_long <- river_ndavi_continents %>%
pivot_longer(
cols = c(mean_ndavi_water, mean_ndavi, median_ndavi_water, median_ndavi),
names_to = "metric_type",
values_to = "value"
) %>%
mutate(
stat_type = case_when(
str_detect(metric_type, "mean") ~ "Mean",
str_detect(metric_type, "median") ~ "Median"
),
water_mask = case_when(
str_detect(metric_type, "_water") ~ "With Water Mask",
TRUE ~ "Without Water Mask"
),
facet_label = paste(stat_type, "-", water_mask)
)
# Order continents
continent_order_river_ndavi <- river_ndavi_continents %>%
arrange(desc(mean_ndavi_water)) %>%
pull(continent)
# Create bar plot
river_ndavi_long %>%
mutate(continent = factor(continent, levels = continent_order_river_ndavi)) %>%
ggplot(aes(x = continent, y = value)) +
geom_bar(stat = "identity", fill ="#4d9966")+
facet_wrap(~ facet_label, scales = "free_y") +
labs(
title = "Annual River NDAVI by Continent",
subtitle = "Continents ordered by mean River NDAVI with water mask",
x = "Continent",
y = "River NDAVI Value"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

6.4 Continental Distribution Patterns
# Lake NDVI boxplot
lake_ndvi %>%
st_join(world_regions) %>%
st_drop_geometry() %>%
filter(continent != "Seven seas (open ocean)") %>%
ggplot(aes(x = continent, y = ndvi_wtr)) +
geom_boxplot(fill = "lightblue") +
geom_jitter(alpha = 0.1, color = "darkblue", width = 0.2) +
labs(title = "A. Lake NDVI Distribution",
y = "NDVI (with water mask)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data
## Warning: Removed 3351 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 3351 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Lake NDAVI boxplot
lake_ndavi %>%
st_join(world_regions) %>%
st_drop_geometry() %>%
filter(continent != "Seven seas (open ocean)") %>%
ggplot(aes(x = continent, y = ndavi_wtr)) +
geom_boxplot(fill = "lightblue") + # Light blue fill for lakes
# Add individual points (slightly spread out to avoid overlap)
geom_jitter(alpha = 0.1, color = "darkblue", width = 0.2) +
labs(
title = "Lake NDAVI Distribution by Continent",
x = "Continent",
y = "Lake NDAVI (with water mask)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data
## Warning: Removed 3351 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 3351 rows containing missing values or values outside the scale range
## (`geom_point()`).

# River NDVI boxplot
river_ndvi %>%
st_join(world_regions) %>%
st_drop_geometry() %>%
filter(continent != "Seven seas (open ocean)") %>%
ggplot(aes(x = continent, y = ndvi_wtr)) +
geom_boxplot(fill = "lightgreen") +
geom_jitter(alpha = 0.1, color = "darkgreen", width = 0.2) +
labs(title = "River NDVI Distribution by Continent",
y = "NDVI (with water mask)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data
## Warning: Removed 5027 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 5027 rows containing missing values or values outside the scale range
## (`geom_point()`).

# River NDAVI boxplot
river_ndavi %>%
st_join(world_regions) %>%
st_drop_geometry() %>%
filter(continent != "Seven seas (open ocean)") %>%
ggplot(aes(x = continent, y = ndavi_wtr)) +
geom_boxplot(fill = "lightgreen") +
geom_jitter(alpha = 0.1, color = "darkgreen", width = 0.2) +
labs(title = "River NDAVI Distribution by Continent",
y = "NDVI (with water mask)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data
## Warning: Removed 5027 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 5027 rows containing missing values or values outside the scale range
## (`geom_point()`).

lake_ndavi %>%
st_join(world_regions) %>%
st_drop_geometry() %>%
filter(continent != "Seven seas (open ocean)") %>%
ggplot(aes(x = lk_ndavi_m, y = ndavi_wtr)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE, color = "red") + # Add trend line
facet_wrap(~ continent, scales = "free") +
labs(
title = "NDAVI Comparison: Water Mask vs No Water Mask by Continent",
x = "Lake NDAVI (no water mask)",
y = "NDAVI (with water mask)"
) +
theme_minimal()
## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3351 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 3351 rows containing missing values or values outside the scale range
## (`geom_point()`).

river_ndavi %>%
st_join(world_regions) %>%
st_drop_geometry() %>%
filter(continent != "Seven seas (open ocean)") %>%
ggplot(aes(x = rv_ndavi_m, y = ndavi_wtr)) +
geom_point(alpha = 0.5) +
# Add trend line
geom_smooth(method = "lm", se = FALSE, color = "red") +
# Separate panel for each continent
facet_wrap(~ continent, scales = "free") +
labs(
title = "River NDVI Comparison: Water Mask vs No Water Mask by Continent",
x = "River NDAVI (no water mask)",
y = "River NDAVI (with water mask)"
) +
theme_minimal()
## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5027 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 5027 rows containing missing values or values outside the scale range
## (`geom_point()`).

6.5 Interpretation
At the continental level, South America consistently ranks first
across indices (NDVI and NDAVI), systems (rivers and lakes), and
parameters (mean and median), suggesting both more productive riparian
zones and more aquatic vegetation than other continents. This finding
aligns with the climate in South America where year-round growing
conditions and abundant moisture converge, creating optimal environments
for sustained aquatic vegetation productivity.
However, the ranking shifts in parameters between NDVI and NDAVI and
from masked to unmasked data reveal somewhat interesting patterns that
reveal different aspects of continental aquatic vegetation dynamics.
While Europe and Aisa maintain relatively high NDVI in their river
systems, its ranking drops notably in NDAVI, suggesting that the
apparent vegetation productivity in these continents may be inflated by
terrestrial vegetation contributions rather than true aquatic macrophyte
abundance.
Oceania shows the opposite pattern, maintaining relatively strong
aquatic vegetation signals in both vegetation indices that might reflect
extensive wetland systems where genuine aquatic vegetation dominates
rather than mixed pixels. Africa, in particular, moves up markedly
without the mask, which likely reflects the continent’s diverse
freshwater ecosystems ranging from highly productive tropical systems to
more arid-adapted riverine vegetation.
North America consistently shows lower values across both vegetation
indices in lake systems. This suggests that temperate and boreal
freshwater systems, despite their increased abundance during growing
seasons, actually maintain lower annual vegetation productivity due to
seasonal dormancy and shorter growing periods compared to tropical
systems.
The scatter plots show that masked and unmasked index values are
strongly linearly related, especially for lake vegetation index values
which show a more correlated relationship than rivers. This clarifies
why lake rankings change less than river rankings across different
analytical approaches. However, the unmasked values, such as NDVI and
for rivers, the values are higher because shoreline and adjacent land
vegetation can result in mixed pixels that inflate apparent aquatic
productivity. Therefore, the inflation is much weaker for NDAVI than for
NDVI, consistent with NDAVI being more selective for aquatic macrophytes
and NDVI being dominated by terrestrial greenness near water. With water
masking, medians often exceed means, implying left-skew from many low or
negative open-water pixels pulling the mean down. Therefore, medians are
the more appropriate continental summary, providing more reliable
estimates.
However, any ranking changes need to be interpreted with caution as
they likely reflect pixel mixing and skew, not clear ecological ordering
among continents. The underlying Major Habitat Type (MHT) composition
within each continent adds another layer of complexity to these
patterns. For instance, continental rankings may shift not because of
inherent productivity differences, but because continents differ in
their relative habitat types that respond differently to pixel mixing.
Europe’s dominance of “Temperate floodplain rivers and wetlands” versus
South America’s “Tropical and subtropical floodplain rivers and
wetlands” means that apparent ranking changes between these continents
might reflect habitat-specific responses to pixel mixing issue rather
than true productivity differences. Therefore, continental comparisons
only provide a quick idea of what data looks like and help us see if
data makes sense. They likely reflect the interaction between pixel
mixing effects and the underlying distribution of major habitat types
rather than clear biome-scale differences in ecological productivity.
Future habitat-stratified analyses would be needed to disentangle true
productivity signals from methodological uncertainties across these
diverse freshwater ecosystem habitat types.