##
# Libary imports
##

library(readODS)
library(tidyverse)
library(dplyr)


# read a data frame from the ods document
df <- read_ods("ironwood_data_cleaned.ods", sheet = 1)


##
# asses the tree health
##

# create a vector with our condition names
condition_names <- c("healthy", "light damage", "medium damage", "severe damage", "at point of death")
# Define colors for each condition
condition_colors <- c("green", "yellow", "orange", "red", "black")
# Now, let's create the histogram
hist(df$Tree_Health_Index,
     breaks = 0:5 - 0.5,  # Setting breaks at midpoints between integers
     main = "Distribution of Tree Health Index",
     xlab = "Health Index",
     ylab = "Number of Trees",
     col = condition_colors,  # Assigning colors based on the health index values
     border = "black", # Border color of the bars
     xlim = c(-0.5, 4.5), # Setting x-axis limits to include all health index values
     ylim = c(0, max(table(df$Tree_Health_Index))*1.1), # Setting y-axis limits slightly above the maximum frequency
     axes = FALSE)  # Suppressing axes for customization
# Add axis labels
axis(1, at = 0:4, labels = condition_names)
axis(2)
# Adding a legend
legend("topright", legend = condition_names, fill = condition_colors, border.col = "black")
# Add a title
title(main = "Distribution of Tree Health Index")
# Adding a grid
grid(nx = NULL, ny = NULL, col = "lightgray", lty = "dotted")
# Adding a box around the plot
box()

##
# Perform a shapiro-wilk normality test
##

# Perform Shapiro-Wilk test
shapiro_test <- shapiro.test(df$Tree_Health_Index)
# Print the test results
print(shapiro_test)
# Check the p-value
p_value <- shapiro_test$p.value
# Interpret the results
if (p_value < 0.05) {
  print("The data is not normally distributed (reject the null hypothesis)")
} else {
  print("The data is normally distributed (fail to reject the null hypothesis)")
}

##
# Tasks:
##

# 1. find average dbh
# 2. find