refactor: add explanations to the comments

This commit is contained in:
2026-04-30 20:12:23 +02:00
parent 28bb039e58
commit 749c5eaef8
+17 -4
View File
@@ -13,9 +13,14 @@ import numpy as np
def kmeans_accuracy(X, y, n_classes): def kmeans_accuracy(X, y, n_classes):
"""Map each cluster to its majority true label, then compute accuracy.""" """
Map each cluster to its majority true label, then compute accuracy.
This function handles the cluster→label mapping via majority vote.
Each cluster gets assigned the most common true label in it.
"""
kmeans = KMeans(n_clusters=n_classes, init="k-means++", n_init=10, random_state=42) kmeans = KMeans(n_clusters=n_classes, init="k-means++", n_init=10, random_state=42)
kmeans.fit(X) kmeans.fit(X)
labels = np.zeros_like(kmeans.labels_) labels = np.zeros_like(kmeans.labels_)
for i in range(n_classes): for i in range(n_classes):
mask = kmeans.labels_ == i mask = kmeans.labels_ == i
@@ -25,6 +30,10 @@ def kmeans_accuracy(X, y, n_classes):
def evaluate(name, dataset, target_names): def evaluate(name, dataset, target_names):
"""
Evaluate unsupervised and supervised ML algorithms on the same dataset, split with the train_test_split function.
Use the classification_report to evaluate the function.
"""
print(f"\n{'=' * 60}") print(f"\n{'=' * 60}")
print(f" {name}") print(f" {name}")
print(f"{'=' * 60}") print(f"{'=' * 60}")
@@ -34,8 +43,10 @@ def evaluate(name, dataset, target_names):
) )
# supervised # supervised
for clf_name, clf in [("Decision Tree", DecisionTreeClassifier(random_state=42)), for clf_name, clf in [
("Naive Bayes", GaussianNB())]: ("Decision Tree", DecisionTreeClassifier(random_state=42)),
("Naive Bayes", GaussianNB()),
]:
clf.fit(X_train, y_train) clf.fit(X_train, y_train)
y_pred = clf.predict(X_test) y_pred = clf.predict(X_test)
print(f"\n--- {clf_name} ---") print(f"\n--- {clf_name} ---")
@@ -49,7 +60,9 @@ def evaluate(name, dataset, target_names):
print(f"\n--- K-Means (mapped) ---") print(f"\n--- K-Means (mapped) ---")
print(f"Accuracy: {accuracy_score(dataset.target, mapped_labels):.3f}") print(f"Accuracy: {accuracy_score(dataset.target, mapped_labels):.3f}")
print(f"Adj. Rand: {adjusted_rand_score(dataset.target, kmeans.labels_):.3f}") print(f"Adj. Rand: {adjusted_rand_score(dataset.target, kmeans.labels_):.3f}")
print(classification_report(dataset.target, mapped_labels, target_names=target_names)) print(
classification_report(dataset.target, mapped_labels, target_names=target_names)
)
iris = datasets.load_iris() iris = datasets.load_iris()