"""
Use the random forest classifier to classify the iris data set.

- This is an example of a supervised ML algorithm
    - it has labels on the training data
    - you tell the model: this is class X during training
"""

import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

iris = datasets.load_iris()
print(iris.data.shape)

# split into training and test data
x_train, x_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=0
)

# use a random forest classifier
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
# train on the split data
classifier.fit(x_train, y_train)
# test the model and print it's accurecy
score = classifier.score(x_test, y_test)
print(score)

# get the first tree and turn it into an image
fig, ax = plt.subplots(figsize=(20, 10))
tree.plot_tree(
    classifier.estimators_[0],
    feature_names=iris.feature_names,
    class_names=list(iris.target_names),
    filled=True,
    rounded=True,
    ax=ax,
)
fig.savefig("randomforest_iris_tree_0.png", dpi=150, bbox_inches="tight")

# plot a bar chart with the importance of all features
plt.figure()
plt.barh(iris.feature_names, classifier.feature_importances_)
plt.xlabel('Importance')
plt.title('Random Forest: Feature Importance (Iris)')
plt.savefig('randomforest_iris_feature_importance.png', dpi=150, bbox_inches='tight')