160 lines
4.6 KiB
R
160 lines
4.6 KiB
R
##
|
|
# Libary imports
|
|
##
|
|
|
|
library(readODS)
|
|
library(tidyverse)
|
|
library(dplyr)
|
|
library(leaflet)
|
|
|
|
##
|
|
# parse the input data, declare global values and auxiliary data
|
|
##
|
|
|
|
# read a data frame from the ods document
|
|
df <- read_ods(path = "data/ironwood_data_cleaned.ods",
|
|
sheet = 1)
|
|
# site base location (to zero in the map)
|
|
population_lat <- "-33.943917"
|
|
population_lon <- "23.507389"
|
|
# vector of condition names corresponding to the health index numbers
|
|
condition_names <- c("healthy", "light damage",
|
|
"medium damage", "severe damage",
|
|
"at point of death")
|
|
# colors for each condition
|
|
condition_colors <- c("green", "yellow", "orange", "red", "black")
|
|
|
|
|
|
##
|
|
# 1.) asses the tree health of the entire population
|
|
# create an overview of the populations health
|
|
##
|
|
|
|
# Calculate the percentage of trees in each health condition
|
|
percentage <- proportions(table(df$tree_health_index)) * 100
|
|
# Now, let's create the bar plot
|
|
barplot(percentage,
|
|
names.arg = condition_names,
|
|
main = "Overview of Tree Health Index",
|
|
xlab = "Health Index",
|
|
ylab = "Percentage of Trees",
|
|
ylim = c(0, max(percentage) + 10),
|
|
col = condition_colors,
|
|
border = "black")
|
|
# Adding a legend
|
|
legend("topright",
|
|
legend = condition_names,
|
|
fill = condition_colors)
|
|
# Adding a box around the plot
|
|
#box()
|
|
# Add labels with the percentage of trees in each bar
|
|
text(x = barplot(percentage, plot = FALSE),
|
|
y = percentage,
|
|
labels = paste0(round(percentage, 1), "%"),
|
|
pos = 3)
|
|
|
|
##
|
|
# 2. Create a stacked barchart that represents all site and their health data
|
|
##
|
|
|
|
# Convert tree_health_index to factor for correct ordering in the barplot
|
|
df$tree_health_index <- factor(df$tree_health_index, levels = 0:4, ordered = TRUE)
|
|
|
|
# Create a new data frame with counts of each health index for each site
|
|
health_counts <- df %>%
|
|
group_by(site_num, tree_health_index) %>%
|
|
summarise(count = n())
|
|
|
|
# Pivot the data frame to wide format for ggplot
|
|
health_counts_wide <- pivot_wider(health_counts, names_from = tree_health_index, values_from = count, values_fill = list(count = 0))
|
|
|
|
ggplot(health_counts_wide, aes(x = site_num)) +
|
|
geom_bar(aes(fill = `0`), position = "stack", width = 0.5) +
|
|
geom_bar(aes(fill = `1`), position = "stack", width = 0.5) +
|
|
geom_bar(aes(fill = `2`), position = "stack", width = 0.5) +
|
|
geom_bar(aes(fill = `3`), position = "stack", width = 0.5) +
|
|
geom_bar(aes(fill = `4`), position = "stack", width = 0.5) +
|
|
labs(x = "Site Number", y = "Count", fill = "Health Index") +
|
|
scale_fill_manual(values = c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#66a61e")) + # Custom colors for each health index
|
|
theme_minimal()
|
|
|
|
|
|
colnames(health_data) <- paste("Site", seq(1,20), sep=" ")
|
|
rownames(health_data) <- condition_names
|
|
|
|
# create color palette:
|
|
library(RColorBrewer)
|
|
coul <- brewer.pal(3, "Pastel2")
|
|
# Transform this data in %
|
|
data_percentage <- apply(data, 2, function(x){x*100/sum(x,na.rm=T)})
|
|
# Make a stacked barplot--> it will be in %!
|
|
barplot(df$tree_health_index, col=coul , border="white", xlab="group")
|
|
|
|
|
|
##
|
|
# 3. perform a shapiro-wilk normality test
|
|
# - the goal is to see if the health is normally distributed
|
|
##
|
|
|
|
# Perform Shapiro-Wilk test
|
|
shapiro_test <- shapiro.test(df$tree_health_index)
|
|
# Print the test results
|
|
print(shapiro_test)
|
|
# Check the p-value
|
|
p_value <- shapiro_test$p.value
|
|
# Interpret the results
|
|
if (p_value < 0.05) {
|
|
print("The data is not normally distributed (reject the null hypothesis)")
|
|
} else {
|
|
print("The data is normally distributed (fail to reject the null hypothesis)")
|
|
}
|
|
|
|
##
|
|
# 4. try to fit health and location data in one plot
|
|
##
|
|
|
|
# create a subset of the site locations
|
|
sites <- df[complete.cases(df$site_num),]
|
|
# ensure all coordinates are numeric
|
|
sites$site_lat <- as.numeric(sites$site_lat)
|
|
sites$site_lon <- as.numeric(sites$site_lon)
|
|
|
|
# create a map from our base location
|
|
map <- leaflet() %>%
|
|
setView(lng = population_lon,
|
|
lat = population_lat,
|
|
zoom = 16) %>%
|
|
addProviderTiles(
|
|
provider = "Esri.WorldImagery",
|
|
options = providerTileOptions(
|
|
opacity = 0.5
|
|
)
|
|
) %>%
|
|
addCircleMarkers(
|
|
data = df,
|
|
lng = ~tree_lon,
|
|
lat = ~tree_lat,
|
|
radius = 1,
|
|
color = ~condition_colors[tree_health_index+1],
|
|
opacity = 1,
|
|
fillOpacity = 1
|
|
) %>%
|
|
addCircleMarkers(
|
|
data = sites,
|
|
lng = ~site_lon,
|
|
lat = ~site_lat,
|
|
radius = 40,
|
|
fill = FALSE,
|
|
color = "black",
|
|
opacity = 0.5
|
|
)
|
|
|
|
# show map
|
|
map
|
|
|
|
##
|
|
# ToDo Tasks:
|
|
##
|
|
|
|
# 4. calculate the average DBH and try to correlate it with the health index
|
|
# 5. plot health indices of each site on a map and try to find patterns |