cas-pml/ML/aufgaben/naivebayes/naivebayes_iris.py

"""
Use the naive bayes classifier to classify the iris data set.

- This is an example of a supervised ML algorithm
    - it has labels on the training data
    - you tell the model: this is class X during training
"""
import matplotlib.pyplot as plt
import numpy as np

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

iris = datasets.load_iris()
print(iris.data.shape)

# split into training and test data
x_train, x_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=0
)

# use a gaussian NB classifier
classifier = GaussianNB()
# train on the split data
classifier.fit(x_train, y_train)
# test the model and print it's accurecy
score = classifier.score(x_test, y_test)
print(score)

fig, axes = plt.subplots(2, 2, figsize=(12, 10))
for idx, ax in enumerate(axes.flat):
    x_range = np.linspace(iris.data[:, idx].min() - 1, iris.data[:, idx].max() + 1, 200)
    for class_idx, class_name in enumerate(iris.target_names):
        mean = classifier.theta_[class_idx, idx]
        var = classifier.var_[class_idx, idx]
        gaussian = np.exp(-0.5 * (x_range - mean) ** 2 / var) / np.sqrt(2 * np.pi * var)
        ax.plot(x_range, gaussian, label=class_name)
    ax.set_title(iris.feature_names[idx])
    ax.legend()
fig.savefig('naivebayes_iris_distributions.png', dpi=150, bbox_inches='tight')