Files
cas-pml/ML/aufgaben/a2/kmeans_iris.py
T

29 lines
877 B
Python

from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn import metrics
iris = datasets.load_iris()
# print 150 samples
print(iris.target)
# clusters=3, centroiden zufällig wählen, n_init=50
kmeans = KMeans(n_clusters=3, init='random', n_init=50)
# fit auf daten
kmeans.fit(iris.data)
# print alle daten
#print(list(zip(iris.target, kmeans.labels_)))
# gegenüberstellung
print("gold standard vs. prediction")
for target_label, predicted_label in zip(iris.target, kmeans.labels_):
print(f'{target_label} vs. {predicted_label}')
print(metrics.homogeneity_score(iris.target, kmeans.labels_))
print(metrics.completeness_score(iris.target, kmeans.labels_))
print(metrics.adjusted_rand_score(iris.target, kmeans.labels_))
print(metrics.silhouette_score(iris.data, kmeans.labels_))
# erkenntnis, der Algo ist nicht perfekt für diese Art von Daten!!