147 lines
4.7 KiB
R
147 lines
4.7 KiB
R
library(warbleR)
|
|
|
|
# Create a new folder inside a new tempdir and set it to your working dir
|
|
wd <- file.path(tempdir(), "xeno-canto_example")
|
|
dir.create(wd)
|
|
setwd(wd)
|
|
|
|
##
|
|
# Do the queries
|
|
##
|
|
|
|
# Query xeno-canto for all Phaethornis recordings (e.g., by genus)
|
|
Phae <- query_xc(qword = "Phaethornis", download = FALSE)
|
|
# Check out the structure of resulting the data frame
|
|
str(Phae)
|
|
# Query xeno-canto for all Phaethornis longirostris recordings
|
|
Phae.lon <- query_xc(qword = "Phaethornis longirostris", download = FALSE)
|
|
# Check out the structure of resulting the data frame
|
|
str(Phae.lon)
|
|
|
|
##
|
|
# filter xeno-canto recordings by quality, signal type and locality
|
|
##
|
|
|
|
# How many recordings are available for Phaethornis longirostris?
|
|
nrow(Phae.lon)
|
|
# How many signal types exist in the xeno-canto metadata?
|
|
unique(Phae.lon$Vocalization_type)
|
|
# How many recordings per signal type?
|
|
table(Phae.lon$Vocalization_type)
|
|
|
|
##
|
|
# Filter the metadata to select the signals we want to retain
|
|
##
|
|
|
|
# First by quality
|
|
Phae.lon <- Phae.lon[Phae.lon$Quality == "A", ]
|
|
nrow(Phae.lon)
|
|
# Then by signal type
|
|
Phae.lon.song <- Phae.lon[grep("song", Phae.lon$Vocalization_type, ignore.case = TRUE), ]
|
|
nrow(Phae.lon.song)
|
|
# Finally by locality
|
|
Phae.lon.LS <- Phae.lon.song[grep("La Selva Biological Station, Sarapiqui, Heredia", Phae.lon.song$Locality, ignore.case = FALSE), ]
|
|
nrow(Phae.lon.LS)
|
|
# Check resulting data frame, 3 recordings remain
|
|
str(Phae.lon.LS)
|
|
# check the location
|
|
map_xc(Phae.lon.LS, img = FALSE)
|
|
|
|
|
|
##
|
|
# Once we're sure the recordings fit, it's time to download the files, also save
|
|
# the metadata as .csv file
|
|
##
|
|
|
|
# Download sound files
|
|
query_xc(X = Phae.lon.LS)
|
|
# Save the metadata object as a .csv file
|
|
write.csv(Phae.lon.LS, "Phae_lon.LS.csv", row.names = FALSE)
|
|
|
|
##
|
|
# xeno-canto maintains recordings in mp3 format due to file size restrictions.
|
|
# However, we require wav format for all downstream analyses
|
|
##
|
|
|
|
# here we are downsampling the original sampling rate of 44.1 kHz
|
|
# to speed up downstream analyses in the vignette series
|
|
mp32wav(samp.rate = 22.05)
|
|
# Use checkwavs to see if wav files can be read
|
|
check_wavs()
|
|
|
|
##
|
|
# Make long spectrograms of whole recordings
|
|
##
|
|
|
|
# Create a vector of all the recordings in the directory
|
|
wavs <- list.files(pattern = "wav$")
|
|
# Print this object to see all sound files
|
|
wavs
|
|
# How long are these files? this will determine number of pages returned by full_spectrograms
|
|
duration_wavs(wavs)
|
|
# ovlp = 10 to speed up function
|
|
# tiff image files are better quality and are faster to produce
|
|
full_spectrograms(flist = wavs, ovlp = 10, it = "tiff")
|
|
# We can zoom in on the frequency axis by changing flim,
|
|
# the number of seconds per row, and number of rows
|
|
full_spectrograms(flist = wavs, flim = c(2, 10), sxrow = 6, rows = 15, ovlp = 10, it = "tiff")
|
|
|
|
##
|
|
# Once satisfied with the argument settings we can make long spectrograms for all the sound files.
|
|
##
|
|
|
|
# Make long spectrograms for the xeno-canto sound files
|
|
full_spectrograms(flim = c(2, 10), ovlp = 10, sxrow = 6, rows = 15, it = "jpeg", flist = wavs)
|
|
# Concatenate full_spectrograms image files into a single PDF per recording
|
|
# full_spectrograms images must be jpegs to do this
|
|
full_spectrogram2pdf(keep.img = FALSE, overwrite = TRUE)
|
|
|
|
##
|
|
# Automatically detect signals with auto_detect
|
|
##
|
|
|
|
# Select a subset of sound files
|
|
# Reinitialize the wav object
|
|
wavs <- list.files(pattern = ".wav$", ignore.case = TRUE)
|
|
# Set a seed so we all have the same results
|
|
set.seed(1)
|
|
sub <- wavs[sample(1:length(wavs), 3)]
|
|
|
|
##
|
|
# Run auto_detec() on subset of recordings
|
|
##
|
|
|
|
# play around with the auto detection, setting the values accordingly.
|
|
# Phaethornis longirostris songs have frequencies between 2 and 10 kHz and durations between 0.05 and 0.5 s.
|
|
Phae.ad <- auto_detec(
|
|
path = wd,
|
|
threshold = 20, # amplitude threshold in %
|
|
ssmooth = 900, # amplitude envelope with sum smooth
|
|
bp = c(2, 10), # bandpass filter (between 2 and 10 kHz)
|
|
wl = 300, # window for ffilter bandpass
|
|
parallel = 6*2 # how many cores shall be used in parallel (*2 due to hyper threading)
|
|
)
|
|
|
|
# Let's look at the number of selections per sound file
|
|
table(Phae.ad$sound.files)
|
|
|
|
# When we're done, create an image with all detections using full spectrograms
|
|
full_spectrograms(
|
|
flim = c(2, 10),
|
|
ovlp = 10,
|
|
sxrow = 6,
|
|
rows = 15,
|
|
it = "jpeg",
|
|
flist = wavs,
|
|
X = auto_detec(
|
|
path = wd,
|
|
threshold = 20,
|
|
ssmooth = 900,
|
|
bp = c(2, 10),
|
|
wl = 300,
|
|
parallel = 6*2)
|
|
)
|
|
|
|
# combine the image into a single pdf per species like before
|
|
full_spectrogram2pdf(keep.img = FALSE, overwrite = TRUE)
|