refactor: rename folder structure to sort for algorithms

This commit is contained in:
2026-04-30 18:59:56 +02:00
parent 9a8d59290b
commit dbc2b765a7
13 changed files with 124 additions and 54 deletions
+60
View File
@@ -0,0 +1,60 @@
"""
Use k-means to try to match handwritten digits and see if changing the parameters
results in better recognition.
- This is an example of an unsupervised ML algorithm
- it has no labels on the training data
- it discovers the structure on its own
- thus the cluster numbers are arbitrary and do not correspond to the class labels
"""
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.decomposition import PCA
# get the digits dataset
digits = datasets.load_digits()
# 100 samples pro ziffer
# 64 pixel pro zahl
print(digits.data.shape)
# ausprobieren verschiedener parameter
# kmeans = KMeans(n_clusters=10, init="random", n_init=1)
# kmeans = KMeans(n_clusters=10)
kmeans = KMeans(n_clusters=10, init="k-means++", n_init=10)
kmeans.fit(digits.data)
print(list(zip(digits.target, kmeans.labels_)))
print(metrics.homogeneity_score(digits.target, kmeans.labels_))
print(metrics.completeness_score(digits.target, kmeans.labels_))
print(metrics.adjusted_rand_score(digits.target, kmeans.labels_))
print(metrics.silhouette_score(digits.data, kmeans.labels_))
pca = PCA(n_components=2)
X2d = pca.fit_transform(digits.data)
centroids2d = pca.transform(kmeans.cluster_centers_)
plt.figure(figsize=(10, 8))
scatter = plt.scatter(X2d[:, 0], X2d[:, 1], c=kmeans.labels_, cmap='tab10', s=10, alpha=0.6)
plt.scatter(centroids2d[:, 0], centroids2d[:, 1], c='red', marker='X', s=200, edgecolors='black')
plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.1%} var)')
plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.1%} var)')
plt.title('K-Means on Digits (PCA projection)')
plt.colorbar(scatter, label='Cluster')
plt.savefig('kmeans_digits.png', dpi=150, bbox_inches='tight')
fig, axes = plt.subplots(2, 5, figsize=(10, 4))
for i, ax in enumerate(axes.flat):
ax.imshow(kmeans.cluster_centers_[i].reshape(8, 8), cmap='gray_r')
ax.set_title(f'Cluster {i}')
ax.axis('off')
fig.savefig('kmeans_digits_centroids.png', dpi=150, bbox_inches='tight')
"""
Takaway:
- Hier ist k-means nicht der richtige algorithmus, weil die Daten nicht schön kugelförmig verteilt sind und sich nicht gut clustern lassen.
"""