feature(nb): add naive bayes for iris and digits dataset
This commit is contained in:
@@ -0,0 +1,49 @@
|
||||
"""
|
||||
Use the naive bayes classifier to classify the digits data set.
|
||||
|
||||
- This is an example of a supervised ML algorithm
|
||||
- it has labels on the training data
|
||||
- you tell the model: this is class X during training
|
||||
"""
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
from sklearn import datasets
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.naive_bayes import GaussianNB
|
||||
|
||||
digits = datasets.load_digits()
|
||||
print(digits.data.shape)
|
||||
|
||||
# split into training and test data
|
||||
x_train, x_test, y_train, y_test = train_test_split(
|
||||
digits.data, digits.target, test_size=0.2, random_state=0
|
||||
)
|
||||
|
||||
# use a gaussian NB classifier
|
||||
classifier = GaussianNB()
|
||||
# train on the split data
|
||||
classifier.fit(x_train, y_train)
|
||||
# test the model and print it's accurecy
|
||||
score = classifier.score(x_test, y_test)
|
||||
print(score)
|
||||
|
||||
# visualizing the learned means as 8x8 images
|
||||
fig, axes = plt.subplots(2, 5, figsize=(12, 5))
|
||||
for i, ax in enumerate(axes.flat):
|
||||
ax.imshow(classifier.theta_[i].reshape(8, 8), cmap='gray_r')
|
||||
ax.set_title(f'Class {i}')
|
||||
ax.axis('off')
|
||||
fig.suptitle('NB: Mean pixel intensity per class')
|
||||
fig.savefig('naivebayes_digits_means.png', dpi=150, bbox_inches='tight')
|
||||
|
||||
# The variance plot shows where pixels vary most within a class:
|
||||
# - high variance (bright) means that pixel isn't reliable for classification
|
||||
# - low variance (dark) means it's consistent.
|
||||
fig, axes = plt.subplots(2, 5, figsize=(12, 5))
|
||||
for i, ax in enumerate(axes.flat):
|
||||
ax.imshow(classifier.var_[i].reshape(8, 8), cmap='hot')
|
||||
ax.set_title(f'Class {i}')
|
||||
ax.axis('off')
|
||||
fig.suptitle('NB: Pixel variance per class')
|
||||
fig.savefig('naivebayes_digits_variance.png', dpi=150, bbox_inches='tight')
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 24 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 23 KiB |
@@ -0,0 +1,40 @@
|
||||
"""
|
||||
Use the naive bayes classifier to classify the iris data set.
|
||||
|
||||
- This is an example of a supervised ML algorithm
|
||||
- it has labels on the training data - you tell the model: this is class X during training
|
||||
"""
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
from sklearn import datasets
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.naive_bayes import GaussianNB
|
||||
|
||||
iris = datasets.load_iris()
|
||||
print(iris.data.shape)
|
||||
|
||||
# split into training and test data
|
||||
x_train, x_test, y_train, y_test = train_test_split(
|
||||
iris.data, iris.target, test_size=0.2, random_state=0
|
||||
)
|
||||
|
||||
# use a gaussian NB classifier
|
||||
classifier = GaussianNB()
|
||||
# train on the split data
|
||||
classifier.fit(x_train, y_train)
|
||||
# test the model and print it's accurecy
|
||||
score = classifier.score(x_test, y_test)
|
||||
print(score)
|
||||
|
||||
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
|
||||
for idx, ax in enumerate(axes.flat):
|
||||
x_range = np.linspace(iris.data[:, idx].min() - 1, iris.data[:, idx].max() + 1, 200)
|
||||
for class_idx, class_name in enumerate(iris.target_names):
|
||||
mean = classifier.theta_[class_idx, idx]
|
||||
var = classifier.var_[class_idx, idx]
|
||||
gaussian = np.exp(-0.5 * (x_range - mean) ** 2 / var) / np.sqrt(2 * np.pi * var)
|
||||
ax.plot(x_range, gaussian, label=class_name)
|
||||
ax.set_title(iris.feature_names[idx])
|
||||
ax.legend()
|
||||
fig.savefig('naivebayes_iris_distributions.png', dpi=150, bbox_inches='tight')
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 172 KiB |
Reference in New Issue
Block a user