## # Libary imports ## library(readODS) library(tidyverse) library(dplyr) library(leaflet) ## # parse the input data, declare global values and auxiliary data ## # read a data frame from the ods document df <- read_ods(path = "data/ironwood_data_cleaned.ods", sheet = 1) # site base location (to zero in the map) population_lat <- "-33.943917" population_lon <- "23.507389" # vector of condition names corresponding to the health index numbers condition_names <- c("healthy", "light damage", "medium damage", "severe damage", "at point of death") # colors for each condition condition_colors <- c("green", "yellow", "orange", "red", "black") ## # 1.) asses the tree health of the entire population # create an overview of the populations health ## # Calculate the percentage of trees in each health condition percentage <- proportions(table(df$tree_health_index)) * 100 # Now, let's create the bar plot barplot(percentage, names.arg = condition_names, main = "Overview of Tree Health Index", xlab = "Health Index", ylab = "Percentage of Trees", ylim = c(0, max(percentage) + 10), col = condition_colors, border = "black") # Adding a legend legend("topright", legend = condition_names, fill = condition_colors) # Adding a box around the plot #box() # Add labels with the percentage of trees in each bar text(x = barplot(percentage, plot = FALSE), y = percentage, labels = paste0(round(percentage, 1), "%"), pos = 3) ## # 2. Create a stacked barchart that represents all site and their health data ## # Convert tree_health_index to factor for correct ordering in the barplot df$tree_health_index <- factor(df$tree_health_index, levels = 0:4, ordered = TRUE) # Create a new data frame with counts of each health index for each site health_counts <- df %>% group_by(site_num, tree_health_index) %>% summarise(count = n()) # Pivot the data frame to wide format for ggplot health_counts_wide <- pivot_wider(health_counts, names_from = tree_health_index, values_from = count, values_fill = list(count = 0)) ggplot(health_counts_wide, aes(x = site_num)) + geom_bar(aes(fill = `0`), position = "stack", width = 0.5) + geom_bar(aes(fill = `1`), position = "stack", width = 0.5) + geom_bar(aes(fill = `2`), position = "stack", width = 0.5) + geom_bar(aes(fill = `3`), position = "stack", width = 0.5) + geom_bar(aes(fill = `4`), position = "stack", width = 0.5) + labs(x = "Site Number", y = "Count", fill = "Health Index") + scale_fill_manual(values = c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#66a61e")) + # Custom colors for each health index theme_minimal() colnames(health_data) <- paste("Site", seq(1,20), sep=" ") rownames(health_data) <- condition_names # create color palette: library(RColorBrewer) coul <- brewer.pal(3, "Pastel2") # Transform this data in % data_percentage <- apply(data, 2, function(x){x*100/sum(x,na.rm=T)}) # Make a stacked barplot--> it will be in %! barplot(df$tree_health_index, col=coul , border="white", xlab="group") ## # 3. perform a shapiro-wilk normality test # - the goal is to see if the health is normally distributed ## # Perform Shapiro-Wilk test shapiro_test <- shapiro.test(df$tree_health_index) # Print the test results print(shapiro_test) # Check the p-value p_value <- shapiro_test$p.value # Interpret the results if (p_value < 0.05) { print("The data is not normally distributed (reject the null hypothesis)") } else { print("The data is normally distributed (fail to reject the null hypothesis)") } ## # 4. try to fit health and location data in one plot ## # create a subset of the site locations sites <- df[complete.cases(df$site_num),] # ensure all coordinates are numeric sites$site_lat <- as.numeric(sites$site_lat) sites$site_lon <- as.numeric(sites$site_lon) # create a map from our base location map <- leaflet() %>% setView(lng = population_lon, lat = population_lat, zoom = 16) %>% addProviderTiles( provider = "Esri.WorldImagery", options = providerTileOptions( opacity = 0.5 ) ) %>% addCircleMarkers( data = df, lng = ~tree_lon, lat = ~tree_lat, radius = 1, color = ~condition_colors[tree_health_index+1], opacity = 1, fillOpacity = 1 ) %>% addCircleMarkers( data = sites, lng = ~site_lon, lat = ~site_lat, radius = 40, fill = FALSE, color = "black", opacity = 0.5 ) # show map map ## # ToDo Tasks: ## # 4. calculate the average DBH and try to correlate it with the health index # 5. plot health indices of each site on a map and try to find patterns