feature(a2): add kmeans exercises

This commit is contained in:
2026-04-30 16:48:00 +02:00
parent a87b454bc5
commit 628c0a3beb
5 changed files with 4881 additions and 1285 deletions
File diff suppressed because one or more lines are too long
+24
View File
@@ -0,0 +1,24 @@
#import numpy as np
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn import metrics
digits = datasets.load_digits()
# 100 samples pro ziffer
# 64 pixel pro zahl
print(digits.data.shape)
#print(len(np.unique(digits.target)))
# 10 cluster, random, n_init=1
kmeans = KMeans(n_clusters=10, init='random', n_init=1)
kmeans.fit(digits.data)
print(list(zip(digits.target, kmeans.labels_)))
print(metrics.homogeneity_score(digits.target, kmeans.labels_))
print(metrics.completeness_score(digits.target, kmeans.labels_))
print(metrics.adjusted_rand_score(digits.target, kmeans.labels_))
print(metrics.silhouette_score(digits.data, kmeans.labels_))
# auch hier ist kmeans nicht der richtige algorithmus, weil die Daten nicht schön kugelförmig verteilt sind und sich nicht gut clustern lassen
File diff suppressed because it is too large Load Diff
+28
View File
@@ -0,0 +1,28 @@
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn import metrics
iris = datasets.load_iris()
# print 150 samples
print(iris.target)
# clusters=3, centroiden zufällig wählen, n_init=50
kmeans = KMeans(n_clusters=3, init='random', n_init=50)
# fit auf daten
kmeans.fit(iris.data)
# print alle daten
#print(list(zip(iris.target, kmeans.labels_)))
# gegenüberstellung
print("gold standard vs. prediction")
for target_label, predicted_label in zip(iris.target, kmeans.labels_):
print(f'{target_label} vs. {predicted_label}')
print(metrics.homogeneity_score(iris.target, kmeans.labels_))
print(metrics.completeness_score(iris.target, kmeans.labels_))
print(metrics.adjusted_rand_score(iris.target, kmeans.labels_))
print(metrics.silhouette_score(iris.data, kmeans.labels_))
# erkenntnis, der Algo ist nicht perfekt für diese Art von Daten!!
File diff suppressed because one or more lines are too long