""" Use the random forest classifier to classify the iris data set. - This is an example of a supervised ML algorithm - it has labels on the training data - you tell the model: this is class X during training """ import matplotlib.pyplot as plt from sklearn import datasets from sklearn import tree from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier iris = datasets.load_iris() print(iris.data.shape) # split into training and test data x_train, x_test, y_train, y_test = train_test_split( iris.data, iris.target, test_size=0.2, random_state=0 ) # use a random forest classifier classifier = RandomForestClassifier(n_estimators=100, random_state=42) # train on the split data classifier.fit(x_train, y_train) # test the model and print it's accurecy score = classifier.score(x_test, y_test) print(score) # get the first tree and turn it into an image fig, ax = plt.subplots(figsize=(20, 10)) tree.plot_tree( classifier.estimators_[0], feature_names=iris.feature_names, class_names=list(iris.target_names), filled=True, rounded=True, ax=ax, ) fig.savefig("randomforest_iris_tree_0.png", dpi=150, bbox_inches="tight") # plot a bar chart with the importance of all features plt.figure() plt.barh(iris.feature_names, classifier.feature_importances_) plt.xlabel('Importance') plt.title('Random Forest: Feature Importance (Iris)') plt.savefig('randomforest_iris_feature_importance.png', dpi=150, bbox_inches='tight')