148 lines
4.9 KiB
R
148 lines
4.9 KiB
R
library(warbleR)
|
||
|
||
# Create a new folder inside a new tempdir and set it to your working dir
|
||
wd <- file.path(tempdir(), "xeno-canto_example")
|
||
dir.create(wd)
|
||
setwd(wd)
|
||
|
||
##
|
||
# Do the queries
|
||
##
|
||
|
||
# Query xeno-canto for all Phaethornis recordings (e.g., by genus)
|
||
Phae <- query_xc(qword = "Phaethornis", download = FALSE)
|
||
# Check out the structure of resulting the data frame
|
||
str(Phae)
|
||
# Query xeno-canto for all Phaethornis longirostris recordings
|
||
Phae.lon <- query_xc(qword = "Phaethornis longirostris", download = FALSE)
|
||
# Check out the structure of resulting the data frame
|
||
str(Phae.lon)
|
||
|
||
##
|
||
# filter xeno-canto recordings by quality, signal type and locality
|
||
##
|
||
|
||
# How many recordings are available for Phaethornis longirostris?
|
||
nrow(Phae.lon)
|
||
# How many signal types exist in the xeno-canto metadata?
|
||
unique(Phae.lon$Vocalization_type)
|
||
# How many recordings per signal type?
|
||
table(Phae.lon$Vocalization_type)
|
||
|
||
##
|
||
# Filter the metadata to select the signals we want to retain
|
||
##
|
||
|
||
# First by quality
|
||
Phae.lon <- Phae.lon[Phae.lon$Quality == "A", ]
|
||
nrow(Phae.lon)
|
||
# Then by signal type
|
||
Phae.lon.song <- Phae.lon[grep("song", Phae.lon$Vocalization_type, ignore.case = TRUE), ]
|
||
nrow(Phae.lon.song)
|
||
# Finally by locality
|
||
Phae.lon.LS <- Phae.lon.song[grep("La Selva Biological Station, Sarapiqui, Heredia", Phae.lon.song$Locality, ignore.case = FALSE), ]
|
||
nrow(Phae.lon.LS)
|
||
# Check resulting data frame, 3 recordings remain
|
||
str(Phae.lon.LS)
|
||
# check the location
|
||
map_xc(Phae.lon.LS, img = FALSE)
|
||
|
||
|
||
##
|
||
# Once we're sure the recordings fit, it's time to download the files, also save
|
||
# the metadata as .csv file
|
||
##
|
||
|
||
# Download sound files
|
||
query_xc(X = Phae.lon.LS)
|
||
# Save the metadata object as a .csv file
|
||
write.csv(Phae.lon.LS, "Phae_lon.LS.csv", row.names = FALSE)
|
||
|
||
##
|
||
# xeno-canto maintains recordings in mp3 format due to file size restrictions.
|
||
# However, we require wav format for all downstream analyses
|
||
##
|
||
|
||
# here we are downsampling the original sampling rate of 44.1 kHz
|
||
# to speed up downstream analyses in the vignette series
|
||
mp32wav(samp.rate = 22.05)
|
||
# Use checkwavs to see if wav files can be read
|
||
check_wavs()
|
||
|
||
##
|
||
# Make long spectrograms of whole recordings
|
||
##
|
||
|
||
# Create a vector of all the recordings in the directory
|
||
wavs <- list.files(pattern = "wav$")
|
||
# Print this object to see all sound files
|
||
wavs
|
||
# How long are these files? this will determine number of pages returned by full_spectrograms
|
||
duration_wavs(wavs)
|
||
# ovlp = 10 to speed up function
|
||
# tiff image files are better quality and are faster to produce
|
||
full_spectrograms(flist = wavs, ovlp = 10, it = "tiff")
|
||
# We can zoom in on the frequency axis by changing flim,
|
||
# the number of seconds per row, and number of rows
|
||
full_spectrograms(flist = wavs, flim = c(2, 10), sxrow = 6, rows = 15, ovlp = 10, it = "tiff")
|
||
|
||
##
|
||
# Once satisfied with the argument settings we can make long spectrograms for all the sound files.
|
||
##
|
||
|
||
# Make long spectrograms for the xeno-canto sound files
|
||
full_spectrograms(flim = c(2, 10), ovlp = 10, sxrow = 6, rows = 15, it = "jpeg", flist = wavs)
|
||
# Concatenate full_spectrograms image files into a single PDF per recording
|
||
# full_spectrograms images must be jpegs to do this
|
||
full_spectrogram2pdf(keep.img = FALSE, overwrite = TRUE)
|
||
|
||
##
|
||
# Automatically detect signals with auto_detect
|
||
##
|
||
|
||
# Select a subset of sound files
|
||
# Reinitialize the wav object
|
||
wavs <- list.files(pattern = ".wav$", ignore.case = TRUE)
|
||
# Set a seed so we all have the same results
|
||
set.seed(1)
|
||
sub <- wavs[sample(1:length(wavs), 3)]
|
||
|
||
##
|
||
# Run auto_detec() on subset of recordings
|
||
##
|
||
|
||
# Once we’re satisfied with the detection, we can run the auto_detec on all the
|
||
# recordings, removing the argument flist (so auto_detec runs over all wav files
|
||
# in the working directory). We will also save the temporal output in an object.
|
||
# Once we’re satisfied with the detection, we can run the auto_detec on all the
|
||
# recordings, removing the argument flist (so auto_detec runs over all wav files
|
||
# in the working directory). We will also save the temporal output in an object.
|
||
Phae.ad <- auto_detec(
|
||
path = wd,
|
||
threshold = 20, # amplitude threshold in %
|
||
ssmooth = 900, # amplitude envelope with sum smooth
|
||
bp = c(2, 10), # bandpass filter (between 2 and 10 kHz)
|
||
wl = 300, # window for ffilter bandpass
|
||
parallel = 6*2 # how many cores shall be used in parallel (*2 due to hyper threading)
|
||
)
|
||
|
||
# Let’s look at the number of selections per sound file
|
||
table(Phae.ad$sound.files)
|
||
|
||
# create an image with all detections
|
||
full_spectrograms(flim = c(2, 10),
|
||
ovlp = 10,
|
||
sxrow = 6,
|
||
rows = 15,
|
||
it = "jpeg",
|
||
flist = wavs,
|
||
X = auto_detec(
|
||
path = wd,
|
||
threshold = 20,
|
||
ssmooth = 900,
|
||
bp = c(2, 10),
|
||
wl = 300,
|
||
parallel = 6*2))
|
||
|
||
# combine the image into a single pdf per species like before
|
||
full_spectrogram2pdf(keep.img = FALSE, overwrite = TRUE) |