25 lines
818 B
Python
25 lines
818 B
Python
#import numpy as np
|
|
|
|
from sklearn import datasets
|
|
from sklearn.cluster import KMeans
|
|
from sklearn import metrics
|
|
|
|
digits = datasets.load_digits()
|
|
|
|
# 100 samples pro ziffer
|
|
# 64 pixel pro zahl
|
|
print(digits.data.shape)
|
|
#print(len(np.unique(digits.target)))
|
|
|
|
# 10 cluster, random, n_init=1
|
|
kmeans = KMeans(n_clusters=10, init='random', n_init=1)
|
|
kmeans.fit(digits.data)
|
|
|
|
print(list(zip(digits.target, kmeans.labels_)))
|
|
print(metrics.homogeneity_score(digits.target, kmeans.labels_))
|
|
print(metrics.completeness_score(digits.target, kmeans.labels_))
|
|
print(metrics.adjusted_rand_score(digits.target, kmeans.labels_))
|
|
print(metrics.silhouette_score(digits.data, kmeans.labels_))
|
|
|
|
# auch hier ist kmeans nicht der richtige algorithmus, weil die Daten nicht schön kugelförmig verteilt sind und sich nicht gut clustern lassen
|