feature(a2): add kmeans exercises
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -0,0 +1,24 @@
|
|||||||
|
#import numpy as np
|
||||||
|
|
||||||
|
from sklearn import datasets
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
from sklearn import metrics
|
||||||
|
|
||||||
|
digits = datasets.load_digits()
|
||||||
|
|
||||||
|
# 100 samples pro ziffer
|
||||||
|
# 64 pixel pro zahl
|
||||||
|
print(digits.data.shape)
|
||||||
|
#print(len(np.unique(digits.target)))
|
||||||
|
|
||||||
|
# 10 cluster, random, n_init=1
|
||||||
|
kmeans = KMeans(n_clusters=10, init='random', n_init=1)
|
||||||
|
kmeans.fit(digits.data)
|
||||||
|
|
||||||
|
print(list(zip(digits.target, kmeans.labels_)))
|
||||||
|
print(metrics.homogeneity_score(digits.target, kmeans.labels_))
|
||||||
|
print(metrics.completeness_score(digits.target, kmeans.labels_))
|
||||||
|
print(metrics.adjusted_rand_score(digits.target, kmeans.labels_))
|
||||||
|
print(metrics.silhouette_score(digits.data, kmeans.labels_))
|
||||||
|
|
||||||
|
# auch hier ist kmeans nicht der richtige algorithmus, weil die Daten nicht schön kugelförmig verteilt sind und sich nicht gut clustern lassen
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,28 @@
|
|||||||
|
from sklearn import datasets
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
from sklearn import metrics
|
||||||
|
|
||||||
|
iris = datasets.load_iris()
|
||||||
|
|
||||||
|
# print 150 samples
|
||||||
|
print(iris.target)
|
||||||
|
|
||||||
|
# clusters=3, centroiden zufällig wählen, n_init=50
|
||||||
|
kmeans = KMeans(n_clusters=3, init='random', n_init=50)
|
||||||
|
# fit auf daten
|
||||||
|
kmeans.fit(iris.data)
|
||||||
|
|
||||||
|
# print alle daten
|
||||||
|
#print(list(zip(iris.target, kmeans.labels_)))
|
||||||
|
|
||||||
|
# gegenüberstellung
|
||||||
|
print("gold standard vs. prediction")
|
||||||
|
for target_label, predicted_label in zip(iris.target, kmeans.labels_):
|
||||||
|
print(f'{target_label} vs. {predicted_label}')
|
||||||
|
|
||||||
|
print(metrics.homogeneity_score(iris.target, kmeans.labels_))
|
||||||
|
print(metrics.completeness_score(iris.target, kmeans.labels_))
|
||||||
|
print(metrics.adjusted_rand_score(iris.target, kmeans.labels_))
|
||||||
|
print(metrics.silhouette_score(iris.data, kmeans.labels_))
|
||||||
|
|
||||||
|
# erkenntnis, der Algo ist nicht perfekt für diese Art von Daten!!
|
||||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user