from sklearn import datasets from sklearn.cluster import KMeans from sklearn import metrics iris = datasets.load_iris() # print 150 samples print(iris.target) # clusters=3, centroiden zufällig wählen, n_init=50 kmeans = KMeans(n_clusters=3, init='random', n_init=50) # fit auf daten kmeans.fit(iris.data) # print alle daten #print(list(zip(iris.target, kmeans.labels_))) # gegenüberstellung print("gold standard vs. prediction") for target_label, predicted_label in zip(iris.target, kmeans.labels_): print(f'{target_label} vs. {predicted_label}') print(metrics.homogeneity_score(iris.target, kmeans.labels_)) print(metrics.completeness_score(iris.target, kmeans.labels_)) print(metrics.adjusted_rand_score(iris.target, kmeans.labels_)) print(metrics.silhouette_score(iris.data, kmeans.labels_)) # erkenntnis, der Algo ist nicht perfekt für diese Art von Daten!!