##
# Libary imports
##

library(readODS)
library(tidyverse)
library(dplyr)
library(leaflet)

##
# parse the input data, declare global values and auxiliary data
##

# read a data frame from the ods document
df <- read_ods(path = "data/ironwood_data_cleaned.ods", 
               sheet = 1)
# site base location (to zero in the map)
population_lat <- "-33.943917"
population_lon <- "23.507389"
# vector of condition names corresponding to the health index numbers
condition_names <- c("healthy", "light damage", 
                     "medium damage", "severe damage", 
                     "at point of death")
# colors for each condition
condition_colors <- c("green", "yellow", "orange", "red", "black")


##
# 1.) asses the tree health of the entire population
#     create an overview of the populations health
##

# Calculate the percentage of trees in each health condition
percentage <- proportions(table(df$tree_health_index)) * 100
# Now, let's create the bar plot
barplot(percentage,
        names.arg = condition_names,
        main = "Overview of Tree Health Index",
        xlab = "Health Index",
        ylab = "Percentage of Trees",
        ylim = c(0, max(percentage) + 10),
        col = condition_colors,
        border = "black")
# Adding a legend
legend("topright",
       legend = condition_names, 
       fill = condition_colors)
# Adding a box around the plot
#box()
# Add labels with the percentage of trees in each bar
text(x = barplot(percentage, plot = FALSE),
     y = percentage,
     labels = paste0(round(percentage, 1), "%"),
     pos = 3)

##
# 2. Create a stacked barchart that represents all site and their health data
##

# Create data
set.seed(1124)

# for each site i need a table like this:
#table(df$tree_health_index[1:14])

colnames(health_data) <- paste("Site", seq(1,20), sep=" ")
rownames(health_data) <- condition_names

# create color palette:
library(RColorBrewer)
coul <- brewer.pal(3, "Pastel2") 
# Transform this data in %
data_percentage <- apply(data, 2, function(x){x*100/sum(x,na.rm=T)})
# Make a stacked barplot--> it will be in %!
barplot(df$tree_health_index, col=coul , border="white", xlab="group")


##
# 3. perform a shapiro-wilk normality test
# - the goal is to see if the health is normally distributed
##

# Perform Shapiro-Wilk test
shapiro_test <- shapiro.test(df$tree_health_index)
# Print the test results
print(shapiro_test)
# Check the p-value
p_value <- shapiro_test$p.value
# Interpret the results
if (p_value < 0.05) {
  print("The data is not normally distributed (reject the null hypothesis)")
} else {
  print("The data is normally distributed (fail to reject the null hypothesis)")
}

##
# 4. try to fit health and location data in one plot
##

# create a subset of the site locations
sites <- df[complete.cases(df$site_num),]
# ensure all coordinates are numeric
sites$site_lat <- as.numeric(sites$site_lat)
sites$site_lon <- as.numeric(sites$site_lon)

# create a map from our base location
map <- leaflet() %>%
  setView(lng = population_lon,
          lat = population_lat,
          zoom = 16) %>%
  addTiles() %>%
  addCircleMarkers(
    data = df,
    lng = ~tree_lon,
    lat = ~tree_lat,
    radius = 2,
    color = ~condition_colors[tree_health_index+1],
    opacity = 1,
    fillOpacity = 1
  ) %>%
  addCircleMarkers(
    data = sites,
    lng = ~site_lon,
    lat = ~site_lat,
    radius = 25,
    fill = FALSE,
    color = "green",
    opacity = 0.1
  )

# show map
map

##
# ToDo Tasks:
##

# 4. calculate the average DBH and try to correlate it with the health index
# 5. plot health indices of each site on a map and try to find patterns