diff --git a/ML/aufgaben/naivebayes/naivebayes_digits.py b/ML/aufgaben/naivebayes/naivebayes_digits.py new file mode 100644 index 0000000..1b70bae --- /dev/null +++ b/ML/aufgaben/naivebayes/naivebayes_digits.py @@ -0,0 +1,49 @@ +""" +Use the naive bayes classifier to classify the digits data set. + +- This is an example of a supervised ML algorithm + - it has labels on the training data + - you tell the model: this is class X during training +""" +import matplotlib.pyplot as plt +import numpy as np + +from sklearn import datasets +from sklearn.model_selection import train_test_split +from sklearn.naive_bayes import GaussianNB + +digits = datasets.load_digits() +print(digits.data.shape) + +# split into training and test data +x_train, x_test, y_train, y_test = train_test_split( + digits.data, digits.target, test_size=0.2, random_state=0 +) + +# use a gaussian NB classifier +classifier = GaussianNB() +# train on the split data +classifier.fit(x_train, y_train) +# test the model and print it's accurecy +score = classifier.score(x_test, y_test) +print(score) + +# visualizing the learned means as 8x8 images +fig, axes = plt.subplots(2, 5, figsize=(12, 5)) +for i, ax in enumerate(axes.flat): + ax.imshow(classifier.theta_[i].reshape(8, 8), cmap='gray_r') + ax.set_title(f'Class {i}') + ax.axis('off') +fig.suptitle('NB: Mean pixel intensity per class') +fig.savefig('naivebayes_digits_means.png', dpi=150, bbox_inches='tight') + +# The variance plot shows where pixels vary most within a class: +# - high variance (bright) means that pixel isn't reliable for classification +# - low variance (dark) means it's consistent. +fig, axes = plt.subplots(2, 5, figsize=(12, 5)) +for i, ax in enumerate(axes.flat): + ax.imshow(classifier.var_[i].reshape(8, 8), cmap='hot') + ax.set_title(f'Class {i}') + ax.axis('off') +fig.suptitle('NB: Pixel variance per class') +fig.savefig('naivebayes_digits_variance.png', dpi=150, bbox_inches='tight') diff --git a/ML/aufgaben/naivebayes/naivebayes_digits_means.png b/ML/aufgaben/naivebayes/naivebayes_digits_means.png new file mode 100644 index 0000000..1daebbc Binary files /dev/null and b/ML/aufgaben/naivebayes/naivebayes_digits_means.png differ diff --git a/ML/aufgaben/naivebayes/naivebayes_digits_variance.png b/ML/aufgaben/naivebayes/naivebayes_digits_variance.png new file mode 100644 index 0000000..f12b25e Binary files /dev/null and b/ML/aufgaben/naivebayes/naivebayes_digits_variance.png differ diff --git a/ML/aufgaben/naivebayes/naivebayes_iris.py b/ML/aufgaben/naivebayes/naivebayes_iris.py new file mode 100644 index 0000000..762b33f --- /dev/null +++ b/ML/aufgaben/naivebayes/naivebayes_iris.py @@ -0,0 +1,40 @@ +""" +Use the naive bayes classifier to classify the iris data set. + +- This is an example of a supervised ML algorithm + - it has labels on the training data - you tell the model: this is class X during training +""" +import matplotlib.pyplot as plt +import numpy as np + +from sklearn import datasets +from sklearn.model_selection import train_test_split +from sklearn.naive_bayes import GaussianNB + +iris = datasets.load_iris() +print(iris.data.shape) + +# split into training and test data +x_train, x_test, y_train, y_test = train_test_split( + iris.data, iris.target, test_size=0.2, random_state=0 +) + +# use a gaussian NB classifier +classifier = GaussianNB() +# train on the split data +classifier.fit(x_train, y_train) +# test the model and print it's accurecy +score = classifier.score(x_test, y_test) +print(score) + +fig, axes = plt.subplots(2, 2, figsize=(12, 10)) +for idx, ax in enumerate(axes.flat): + x_range = np.linspace(iris.data[:, idx].min() - 1, iris.data[:, idx].max() + 1, 200) + for class_idx, class_name in enumerate(iris.target_names): + mean = classifier.theta_[class_idx, idx] + var = classifier.var_[class_idx, idx] + gaussian = np.exp(-0.5 * (x_range - mean) ** 2 / var) / np.sqrt(2 * np.pi * var) + ax.plot(x_range, gaussian, label=class_name) + ax.set_title(iris.feature_names[idx]) + ax.legend() +fig.savefig('naivebayes_iris_distributions.png', dpi=150, bbox_inches='tight') diff --git a/ML/aufgaben/naivebayes/naivebayes_iris_distributions.png b/ML/aufgaben/naivebayes/naivebayes_iris_distributions.png new file mode 100644 index 0000000..4e540aa Binary files /dev/null and b/ML/aufgaben/naivebayes/naivebayes_iris_distributions.png differ