diff --git a/ML/aufgaben/randomforest/randomforest_digits.py b/ML/aufgaben/randomforest/randomforest_digits.py new file mode 100644 index 0000000..e2c83b6 --- /dev/null +++ b/ML/aufgaben/randomforest/randomforest_digits.py @@ -0,0 +1,50 @@ +""" +Use the random forest classifier to classify the digits data set. + +- This is an example of a supervised ML algorithm + - it has labels on the training data + - you tell the model: this is class X during training +""" + +import matplotlib.pyplot as plt + +from sklearn import datasets +from sklearn import tree +from sklearn.model_selection import train_test_split +from sklearn.ensemble import RandomForestClassifier + +digits = datasets.load_digits() +print(digits.data.shape) + +# split into training and test data +x_train, x_test, y_train, y_test = train_test_split( + digits.data, digits.target, test_size=0.2, random_state=0 +) + +# use a random forest classifier +classifier = RandomForestClassifier(n_estimators=100, random_state=42) +# train on the split data +classifier.fit(x_train, y_train) +# test the model and print it's accurecy +score = classifier.score(x_test, y_test) +print(score) + +# get the first tree and turn it into an image +fig, ax = plt.subplots(figsize=(20, 10)) +tree.plot_tree( + classifier.estimators_[0], + feature_names=digits.feature_names, + class_names=[str(n) for n in digits.target_names], + filled=True, + rounded=True, + ax=ax, +) +fig.savefig("randomforest_digits_tree_0.png", dpi=150, bbox_inches="tight") + +# for digits, plot feature importance as 8x8 heatmap +importances = classifier.feature_importances_ +plt.figure() +plt.imshow(importances.reshape(8, 8), cmap='hot') +plt.title('Random Forest: Feature Importance') +plt.colorbar() +plt.savefig('randomforest_digits_feature_importance.png', dpi=150, bbox_inches='tight') diff --git a/ML/aufgaben/randomforest/randomforest_digits_feature_importance.png b/ML/aufgaben/randomforest/randomforest_digits_feature_importance.png new file mode 100644 index 0000000..7991918 Binary files /dev/null and b/ML/aufgaben/randomforest/randomforest_digits_feature_importance.png differ diff --git a/ML/aufgaben/randomforest/randomforest_digits_tree_0.png b/ML/aufgaben/randomforest/randomforest_digits_tree_0.png new file mode 100644 index 0000000..a2a7b55 Binary files /dev/null and b/ML/aufgaben/randomforest/randomforest_digits_tree_0.png differ diff --git a/ML/aufgaben/randomforest/randomforest_iris.py b/ML/aufgaben/randomforest/randomforest_iris.py new file mode 100644 index 0000000..61542db --- /dev/null +++ b/ML/aufgaben/randomforest/randomforest_iris.py @@ -0,0 +1,49 @@ +""" +Use the random forest classifier to classify the iris data set. + +- This is an example of a supervised ML algorithm + - it has labels on the training data + - you tell the model: this is class X during training +""" + +import matplotlib.pyplot as plt + +from sklearn import datasets +from sklearn import tree +from sklearn.model_selection import train_test_split +from sklearn.ensemble import RandomForestClassifier + +iris = datasets.load_iris() +print(iris.data.shape) + +# split into training and test data +x_train, x_test, y_train, y_test = train_test_split( + iris.data, iris.target, test_size=0.2, random_state=0 +) + +# use a random forest classifier +classifier = RandomForestClassifier(n_estimators=100, random_state=42) +# train on the split data +classifier.fit(x_train, y_train) +# test the model and print it's accurecy +score = classifier.score(x_test, y_test) +print(score) + +# get the first tree and turn it into an image +fig, ax = plt.subplots(figsize=(20, 10)) +tree.plot_tree( + classifier.estimators_[0], + feature_names=iris.feature_names, + class_names=list(iris.target_names), + filled=True, + rounded=True, + ax=ax, +) +fig.savefig("randomforest_iris_tree_0.png", dpi=150, bbox_inches="tight") + +# plot a bar chart with the importance of all features +plt.figure() +plt.barh(iris.feature_names, classifier.feature_importances_) +plt.xlabel('Importance') +plt.title('Random Forest: Feature Importance (Iris)') +plt.savefig('randomforest_iris_feature_importance.png', dpi=150, bbox_inches='tight') diff --git a/ML/aufgaben/randomforest/randomforest_iris_feature_importance.png b/ML/aufgaben/randomforest/randomforest_iris_feature_importance.png new file mode 100644 index 0000000..6ba28c9 Binary files /dev/null and b/ML/aufgaben/randomforest/randomforest_iris_feature_importance.png differ diff --git a/ML/aufgaben/randomforest/randomforest_iris_tree_0.png b/ML/aufgaben/randomforest/randomforest_iris_tree_0.png new file mode 100644 index 0000000..c550e58 Binary files /dev/null and b/ML/aufgaben/randomforest/randomforest_iris_tree_0.png differ