# Install all requirements for scripts
# install.packages(c("dplyr", "devtools", "sf", "wk", "worrms", "here", "httr2", "tidyr", "ggplot2", "readr", "rfishbase", "rnaturalearth", "robis", "rgbif", "stringr", "ggtext"))
# devtools::install_github("ropensci/mregions2", build_vignettes = TRUE)
# devtools::install_github("bio-oracle/biooracler")
library(biooracler)
library(dplyr)
library(sf)
library(wk)
library(worrms)
library(here)
library(httr2)
library(tidyr)
library(ggplot2)
library(readr)
library(rfishbase)
library(rnaturalearth)
library(robis)
library(rgbif)
library(stringr)
library(ggtext)
Results
Modelling Anemonefish and Sea Anemone Hosts
This document outlines the process of downloading and preparing occurrence data for anemonefish and their host sea anemones. This data is crucial for building species distribution models (SDMs) and understanding the ecological relationships between these species.
Script Overview
The data acquisition process is orchestrated through a series of R scripts. Here’s a summary of each script:
00_run_all_scripts.R
: This script acts as the main entry point, executing all other scripts in the correct sequence.01_install_requirements.R
: Ensures all necessary R packages are installed before proceeding with data acquisition.02_get_species_metadata.R
: Downloads and processes species metadata (scientific names, AphiaIDs, etc.) for both anemones and anemonefish from online databases like WoRMS and GBIF.03_download_occurrence_data.R
: Downloads occurrence data (location records) for anemones and anemonefish from OBIS and GBIF, using the species metadata obtained in the previous step.
1. Install Requirements
The 01_install_requirements.R
script installs the necessary R packages for the entire workflow. This ensures that all dependencies are met before running the data acquisition and processing scripts.
2. Download Species Metadata
The 02_get_species_metadata.R
script retrieves species metadata for both anemones and anemonefish. This script is crucial for obtaining the correct scientific names and identification codes (AphiaIDs) needed for querying occurrence databases.
2.1 Processed Species Lists
Species lists for both anemones and anemonefish, stored in separate Excel files (Anemone List.xlsx
and Anemonefish List.xlsx
), are read into R. The script then performs the following steps:
- Read Species Lists: Reads species lists from Excel files, ensuring correct column names.
- Match Scientific Names: Uses
obistools::match_taxa
to match original scientific names with the accepted names in WoRMS. This ensures that the most up-to-date taxonomic information is used. - Combine Data: Combines the original species list with the WoRMS-matched names and AphiaIDs.
- Add GBIF Information: Queries GBIF for additional species information, including GBIF species keys and match types.
- Save Species Lists: Saves the processed species lists to CSV files (
final_anemone_species_list.csv
andfinal_anemonefish_species_list.csv
). These files contain the scientific names, AphiaIDs, and GBIF species keys, which are used in the next step to download occurrence data.
# Download species metadata from WoRMS and GBIF for anemones and anemonefish.
# Based on the following guide: https://www.gbif.us/post/2024/searching-with-aphiaids/
# Anemone species list:
# Original:
# Entacmaea quadricolor
# Radianthus crispa (formerly Heteractis crispa)
# Radianthus magnifica (formerly Heteractis magnifica)
# Stichodactyla gigantea
# Cryptodendrum adhaesivum
# Macrodactyla doreensis
# Stichodactyla mertensii
# Extra:
# Stichodactyla haddoni
# Radianthus malu (formerly Heteractis malu)
# Radianthus aurora (formerly Heteractis aurora)
# Anemonefish species list:
# Generalist
# Original:
# Amphiprion clarkii
# Amphiprion ocellaris
# Amphiprion perideraion
# Extra:
# Amphiprion bicinctus
# Amphiprion chrysopterus
# Amphiprion akindynos
# Amphiprion melanopus
# Amphiprion leucokranos
# Amphiprion percula
# Amphiprion omanensis
# Amphiprion tricinctus
# Amphiprion allardi
# Amphiprion chrysogaster
# Amphiprion latifasciatus (double check, it might be some sort of specialist)
# Specialist
# Original:
# Amphiprion frenatus
# Amphiprion sandaracinos
# Amphiprion polymnus
# Extra:
# Amphiprion akallopisos
# Amphiprion ephippium
# Amphiprion sebae
# Amphiprion chagosensis
# Amphiprion nigripes
# Amphiprion latezonatus
# Amphiprion biaculeatus
# Amphiprion mccullochi
# Amphiprion rubrocinctus
# Amphiprion barberi
# Amphiprion fuscocaudatus
# Function to process species list
<- function(file_path, output_file_path) {
process_species_list # Read in species list from xlsx file
<- readxl::read_excel(file_path)
species_list colnames(species_list) <- c(
"vernacularName",
"scientificNameOriginal",
"rank"
)
# Match original Scientific Names with correct WoRMS ACCEPTED Scientific Names
<- obistools::match_taxa(species_list$scientificNameOriginal)
new_names
# Bind new names
<- bind_cols(new_names, species_list)
final_list
<- final_list %>%
final_list mutate(AphiaID = acceptedNameUsageID) %>%
relocate(AphiaID, scientificName)
print(final_list)
# Print the number of species
print(paste("Number of species:", nrow(final_list)))
# Add GBIF information
<- lapply(final_list$scientificName, rgbif::name_backbone)
gbif_names <- bind_rows(gbif_names)
gbif_names <- gbif_names %>%
gbif_names select(gbif_speciesKey = usageKey, gbif_scientificName = scientificName, gbif_matchType = matchType)
<- final_list %>% bind_cols(gbif_names)
final_list
# Display the first few rows of the combined data frame
print(head(final_list))
# Add taxonID column (same as AphiaID)
$taxonID <- final_list$AphiaID
final_list
# Save the species list to a CSV file
write.csv(final_list, output_file_path, row.names = FALSE)
return(final_list) # Return the processed dataframe
}
# Process anemone species list
<- process_species_list("Anemone List.xlsx", "final_anemone_species_list.csv")
final_anemone_list
# Process anemonefish species list
<- process_species_list("Anemonefish List.xlsx", "final_anemonefish_species_list.csv") final_anemonefish_list
2.2 Anemone Species List
A species list of anemones is compiled from online resources, this is used to download occurence data in the next script.
2.3 Anemonefish Species List
A species list of anemonefish is compiled from online resources, this is used to download occurence data in the next script.
3. Download Occurrence Data
The 03_download_occurrence_data.R
script downloads occurrence data for the anemone and anemonefish species. This script uses the AphiaIDs from the processed species lists to query OBIS and GBIF for occurrence records.
3.1 Download Sea Anemone Occurence Data
This downloads all the sea anemone occurence data for the species in teh species list in section 2.1 and saves it to the data/occurence/anemone directory. The species list it reads in is the final_anemone_species_list.csv. The script merges the GBIF and OBIS datsets for each species. The datasets are deduplicated before saving.
# Download occurrence data from OBIS and GBIF for anemones and anemonefish.
# Based on the following guide: https://www.gbif.us/post/2024/searching-with-aphiaids/
source("helpers/occurrence_helpers.R")
# Run the function for anemones and anemonefish
download_occurrence_data("final_anemone_species_list.csv", "data/occurrence/anemone")
# download_occurrence_data("final_anemonefish_species_list.csv", "data/occurrence/anemonefish")
3.2 Download Anemonefish Occurence Data
This downloads all the anemonefish occurence data for the species in teh species list in section 2.1 and saves it to the data/occurence/anemonefish directory. The species list it reads in is the final_anemonefish_species_list.csv. The script merges the GBIF and OBIS datsets for each species. The datasets are deduplicated before saving.
# Download occurrence data from OBIS and GBIF for anemones and anemonefish.
# Based on the following guide: https://www.gbif.us/post/2024/searching-with-aphiaids/
source("helpers/occurrence_helpers.R")
# Run the function for anemones and anemonefish
# download_occurrence_data("final_anemone_species_list.csv", "data/occurrence/anemone")
download_occurrence_data("final_anemonefish_species_list.csv", "data/occurrence/anemonefish")
3.3 Download Environmental Data
This downloads environmental data from Bio-Oracle to be used as predictors in the species distribution models. The environmental layers are downloaded for current conditions and future climate scenarios.
# Download all environmental data form Bio-Oracle using obissdm library
# List datasets to download
<- c(
datasets "thetao_baseline_2000_2019_depthsurf",
"so_baseline_2000_2019_depthsurf",
"PAR_mean_baseline_2000_2020_depthsurf",
"phyc_baseline_2000_2020_depthsurf",
"ph_baseline_2000_2018_depthsurf",
"sws_baseline_2000_2019_depthsurf",
"o2_baseline_2000_2018_depthsurf",
"KDPAR_mean_baseline_2000_2020_depthsurf",
"no3_baseline_2000_2018_depthsurf",
"chl_baseline_2000_2018_depthsurf",
"tas_baseline_2000_2020_depthsurf",
"si_baseline_2000_2018_depthsurf",
"mlotst_baseline_2000_2019_depthsurf"
)
<- c(datasets,
datasets gsub("depthsurf", "depthmean", datasets))
# List scenarios to download
# Most optimistic and least optimistic scenarios
<- c("ssp245", "ssp370")
future_scenarios
# Define time steps
<- list(
time_steps current = c("2000-01-01T00:00:00Z", "2010-01-01T00:00:00Z"), #2000-2010/2010-2020
dec50 = c("2030-01-01", "2040-01-01"), #2030-2040/2040-2050
dec100 = c("2080-01-01", "2090-01-01") #2080-2090/2090-2100
)
# Define variables to be downloaded
<- c("min", "mean", "max")
variables
::get_env_data(datasets = datasets, future_scenarios = future_scenarios,
obissdmtime_steps = time_steps, variables = variables,
terrain_vars = c(
"bathymetry_mean",
"slope",
"terrain_ruggedness_index"
average_time = T)
),
# For just temperature, download also the range, ltmin and ltmax
::get_env_data(datasets = "thetao_baseline_2000_2019_depthsurf",
obissdmfuture_scenarios = future_scenarios,
time_steps = time_steps, variables = c("range", "ltmin", "ltmax"),
average_time = T)
# For Chlorophyll-a we remove the depthmean and depthmax, as for the future is
# not available
<- list.files("data/env/current", full.names = T)
to_remove <- to_remove[grepl("chl", to_remove)]
to_remove <- to_remove[grepl("depthmean|depthmax", to_remove)]
to_remove ::file_delete(to_remove)
fs
# Rename KDPAR for kd
<- list.files("data/env", recursive = T, full.names = T)
to_rename <- to_rename[grepl("kdpar", to_rename)]
to_rename <- gsub("kdpar", "kd", to_rename)
new_names file.rename(to_rename, new_names)
# Rename terrain_ruggedness
<- list.files("data/env/terrain/", recursive = T, full.names = T)
to_rename <- to_rename[grepl("rugg", to_rename)]
to_rename <- to_rename[!grepl("aux", to_rename)]
to_rename <- gsub("terrain_ruggedness_index", "rugosity", to_rename)
new_names <- terra::rast(to_rename)
edit_r names(edit_r) <- "rugosity"
::writeRaster(edit_r, new_names, overwrite = T)
terra::file_delete(to_rename) fs