## # Libary imports ## library(readODS) library(tidyverse) library(dplyr) library(leaflet) ## # parse the input data, declare global values and auxiliary data ## # read a data frame from the ods document df <- read_ods(path = "data/ironwood_data_cleaned.ods", sheet = 1) # site base location (to zero in the map) population_lat <- "-33.943917" population_lon <- "23.507389" # vector of condition names corresponding to the health index numbers condition_names <- c("healthy", "light damage", "medium damage", "severe damage", "at point of death") # colors for each condition condition_colors <- c("green", "yellow", "orange", "red", "black") ## # 1.) asses the tree health of the entire population # create an overview of the populations health ## # Calculate the percentage of trees in each health condition percentage <- proportions(table(df$tree_health_index)) * 100 # Now, let's create the bar plot barplot(percentage, names.arg = condition_names, main = "Overview of Tree Health Index", xlab = "Health Index", ylab = "Percentage of Trees", ylim = c(0, max(percentage) + 10), col = condition_colors, border = "black") # Adding a legend legend("topright", legend = condition_names, fill = condition_colors) # Adding a box around the plot #box() # Add labels with the percentage of trees in each bar text(x = barplot(percentage, plot = FALSE), y = percentage, labels = paste0(round(percentage, 1), "%"), pos = 3) ## # 2. Create a stacked barchart that represents all site and their health data ## # Create data set.seed(1124) # for each site i need a table like this: #table(df$tree_health_index[1:14]) colnames(health_data) <- paste("Site", seq(1,20), sep=" ") rownames(health_data) <- condition_names # create color palette: library(RColorBrewer) coul <- brewer.pal(3, "Pastel2") # Transform this data in % data_percentage <- apply(data, 2, function(x){x*100/sum(x,na.rm=T)}) # Make a stacked barplot--> it will be in %! barplot(df$tree_health_index, col=coul , border="white", xlab="group") ## # 3. perform a shapiro-wilk normality test # - the goal is to see if the health is normally distributed ## # Perform Shapiro-Wilk test shapiro_test <- shapiro.test(df$tree_health_index) # Print the test results print(shapiro_test) # Check the p-value p_value <- shapiro_test$p.value # Interpret the results if (p_value < 0.05) { print("The data is not normally distributed (reject the null hypothesis)") } else { print("The data is normally distributed (fail to reject the null hypothesis)") } ## # 4. try to fit health and location data in one plot ## # create a subset of the site locations sites <- df[complete.cases(df$site_num),] # ensure all coordinates are numeric sites$site_lat <- as.numeric(sites$site_lat) sites$site_lon <- as.numeric(sites$site_lon) # create a map from our base location map <- leaflet() %>% setView(lng = population_lon, lat = population_lat, zoom = 16) %>% addTiles() %>% addCircleMarkers( data = df, lng = ~tree_lon, lat = ~tree_lat, radius = 2, color = ~condition_colors[tree_health_index+1], opacity = 1, fillOpacity = 1 ) %>% addCircleMarkers( data = sites, lng = ~site_lon, lat = ~site_lat, radius = 25, fill = FALSE, color = "green", opacity = 0.1 ) # show map map ## # ToDo Tasks: ## # 4. calculate the average DBH and try to correlate it with the health index # 5. plot health indices of each site on a map and try to find patterns