Files
r-toolbox/warbleR_xeno-canto/warbleR_xeno-canto.r
2024-01-27 17:01:31 +01:00

148 lines
4.9 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
library(warbleR)
# Create a new folder inside a new tempdir and set it to your working dir
wd <- file.path(tempdir(), "xeno-canto_example")
dir.create(wd)
setwd(wd)
##
# Do the queries
##
# Query xeno-canto for all Phaethornis recordings (e.g., by genus)
Phae <- query_xc(qword = "Phaethornis", download = FALSE)
# Check out the structure of resulting the data frame
str(Phae)
# Query xeno-canto for all Phaethornis longirostris recordings
Phae.lon <- query_xc(qword = "Phaethornis longirostris", download = FALSE)
# Check out the structure of resulting the data frame
str(Phae.lon)
##
# filter xeno-canto recordings by quality, signal type and locality
##
# How many recordings are available for Phaethornis longirostris?
nrow(Phae.lon)
# How many signal types exist in the xeno-canto metadata?
unique(Phae.lon$Vocalization_type)
# How many recordings per signal type?
table(Phae.lon$Vocalization_type)
##
# Filter the metadata to select the signals we want to retain
##
# First by quality
Phae.lon <- Phae.lon[Phae.lon$Quality == "A", ]
nrow(Phae.lon)
# Then by signal type
Phae.lon.song <- Phae.lon[grep("song", Phae.lon$Vocalization_type, ignore.case = TRUE), ]
nrow(Phae.lon.song)
# Finally by locality
Phae.lon.LS <- Phae.lon.song[grep("La Selva Biological Station, Sarapiqui, Heredia", Phae.lon.song$Locality, ignore.case = FALSE), ]
nrow(Phae.lon.LS)
# Check resulting data frame, 3 recordings remain
str(Phae.lon.LS)
# check the location
map_xc(Phae.lon.LS, img = FALSE)
##
# Once we're sure the recordings fit, it's time to download the files, also save
# the metadata as .csv file
##
# Download sound files
query_xc(X = Phae.lon.LS)
# Save the metadata object as a .csv file
write.csv(Phae.lon.LS, "Phae_lon.LS.csv", row.names = FALSE)
##
# xeno-canto maintains recordings in mp3 format due to file size restrictions.
# However, we require wav format for all downstream analyses
##
# here we are downsampling the original sampling rate of 44.1 kHz
# to speed up downstream analyses in the vignette series
mp32wav(samp.rate = 22.05)
# Use checkwavs to see if wav files can be read
check_wavs()
##
# Make long spectrograms of whole recordings
##
# Create a vector of all the recordings in the directory
wavs <- list.files(pattern = "wav$")
# Print this object to see all sound files
wavs
# How long are these files? this will determine number of pages returned by full_spectrograms
duration_wavs(wavs)
# ovlp = 10 to speed up function
# tiff image files are better quality and are faster to produce
full_spectrograms(flist = wavs, ovlp = 10, it = "tiff")
# We can zoom in on the frequency axis by changing flim,
# the number of seconds per row, and number of rows
full_spectrograms(flist = wavs, flim = c(2, 10), sxrow = 6, rows = 15, ovlp = 10, it = "tiff")
##
# Once satisfied with the argument settings we can make long spectrograms for all the sound files.
##
# Make long spectrograms for the xeno-canto sound files
full_spectrograms(flim = c(2, 10), ovlp = 10, sxrow = 6, rows = 15, it = "jpeg", flist = wavs)
# Concatenate full_spectrograms image files into a single PDF per recording
# full_spectrograms images must be jpegs to do this
full_spectrogram2pdf(keep.img = FALSE, overwrite = TRUE)
##
# Automatically detect signals with auto_detect
##
# Select a subset of sound files
# Reinitialize the wav object
wavs <- list.files(pattern = ".wav$", ignore.case = TRUE)
# Set a seed so we all have the same results
set.seed(1)
sub <- wavs[sample(1:length(wavs), 3)]
##
# Run auto_detec() on subset of recordings
##
# Once were satisfied with the detection, we can run the auto_detec on all the
# recordings, removing the argument flist (so auto_detec runs over all wav files
# in the working directory). We will also save the temporal output in an object.
# Once were satisfied with the detection, we can run the auto_detec on all the
# recordings, removing the argument flist (so auto_detec runs over all wav files
# in the working directory). We will also save the temporal output in an object.
Phae.ad <- auto_detec(
path = wd,
threshold = 20, # amplitude threshold in %
ssmooth = 900, # amplitude envelope with sum smooth
bp = c(2, 10), # bandpass filter (between 2 and 10 kHz)
wl = 300, # window for ffilter bandpass
parallel = 6*2 # how many cores shall be used in parallel (*2 due to hyper threading)
)
# Lets look at the number of selections per sound file
table(Phae.ad$sound.files)
# create an image with all detections
full_spectrograms(flim = c(2, 10),
ovlp = 10,
sxrow = 6,
rows = 15,
it = "jpeg",
flist = wavs,
X = auto_detec(
path = wd,
threshold = 20,
ssmooth = 900,
bp = c(2, 10),
wl = 300,
parallel = 6*2))
# combine the image into a single pdf per species like before
full_spectrogram2pdf(keep.img = FALSE, overwrite = TRUE)