feature(nb): add naive bayes for iris and digits dataset

2026-04-30 19:43:32 +02:00
parent aa374a7f53
commit 36172948a6
5 changed files with 89 additions and 0 deletions
@@ -0,0 +1,49 @@
+"""
+Use the naive bayes classifier to classify the digits data set.
+
+- This is an example of a supervised ML algorithm
+    - it has labels on the training data                                                    
+    - you tell the model: this is class X during training
+"""
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn import datasets
+from sklearn.model_selection import train_test_split
+from sklearn.naive_bayes import GaussianNB
+
+digits = datasets.load_digits()
+print(digits.data.shape)
+
+# split into training and test data
+x_train, x_test, y_train, y_test = train_test_split(
+    digits.data, digits.target, test_size=0.2, random_state=0
+)
+
+# use a gaussian NB classifier
+classifier = GaussianNB()
+# train on the split data
+classifier.fit(x_train, y_train)
+# test the model and print it's accurecy
+score = classifier.score(x_test, y_test)
+print(score)
+
+# visualizing the learned means as 8x8 images
+fig, axes = plt.subplots(2, 5, figsize=(12, 5))
+for i, ax in enumerate(axes.flat):
+    ax.imshow(classifier.theta_[i].reshape(8, 8), cmap='gray_r')
+    ax.set_title(f'Class {i}')
+    ax.axis('off')
+fig.suptitle('NB: Mean pixel intensity per class')
+fig.savefig('naivebayes_digits_means.png', dpi=150, bbox_inches='tight')
+
+# The variance plot shows where pixels vary most within a class: 
+# - high variance (bright) means that pixel isn't reliable for classification
+# - low variance (dark) means it's consistent.
+fig, axes = plt.subplots(2, 5, figsize=(12, 5))
+for i, ax in enumerate(axes.flat):
+    ax.imshow(classifier.var_[i].reshape(8, 8), cmap='hot')
+    ax.set_title(f'Class {i}')
+    ax.axis('off')
+fig.suptitle('NB: Pixel variance per class')
+fig.savefig('naivebayes_digits_variance.png', dpi=150, bbox_inches='tight')
@@ -0,0 +1,40 @@
+"""
+Use the naive bayes classifier to classify the iris data set.
+
+- This is an example of a supervised ML algorithm
+    - it has labels on the training data                                                    - you tell the model: this is class X during training
+"""
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn import datasets
+from sklearn.model_selection import train_test_split
+from sklearn.naive_bayes import GaussianNB
+
+iris = datasets.load_iris()
+print(iris.data.shape)
+
+# split into training and test data
+x_train, x_test, y_train, y_test = train_test_split(
+    iris.data, iris.target, test_size=0.2, random_state=0
+)
+
+# use a gaussian NB classifier
+classifier = GaussianNB()
+# train on the split data
+classifier.fit(x_train, y_train)
+# test the model and print it's accurecy
+score = classifier.score(x_test, y_test)
+print(score)
+
+fig, axes = plt.subplots(2, 2, figsize=(12, 10))
+for idx, ax in enumerate(axes.flat):
+    x_range = np.linspace(iris.data[:, idx].min() - 1, iris.data[:, idx].max() + 1, 200)
+    for class_idx, class_name in enumerate(iris.target_names):
+        mean = classifier.theta_[class_idx, idx]
+        var = classifier.var_[class_idx, idx]
+        gaussian = np.exp(-0.5 * (x_range - mean) ** 2 / var) / np.sqrt(2 * np.pi * var)
+        ax.plot(x_range, gaussian, label=class_name)
+    ax.set_title(iris.feature_names[idx])
+    ax.legend()
+fig.savefig('naivebayes_iris_distributions.png', dpi=150, bbox_inches='tight')