From 749c5eaef8860cd0e53c82a6a826d71fc10d2e7e Mon Sep 17 00:00:00 2001 From: aaron Date: Thu, 30 Apr 2026 20:12:23 +0200 Subject: [PATCH] refactor: add explanations to the comments --- .../comparison/compare_ml_algorihms.py | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/ML/aufgaben/comparison/compare_ml_algorihms.py b/ML/aufgaben/comparison/compare_ml_algorihms.py index d27e4d1..63779d1 100644 --- a/ML/aufgaben/comparison/compare_ml_algorihms.py +++ b/ML/aufgaben/comparison/compare_ml_algorihms.py @@ -13,9 +13,14 @@ import numpy as np def kmeans_accuracy(X, y, n_classes): - """Map each cluster to its majority true label, then compute accuracy.""" + """ + Map each cluster to its majority true label, then compute accuracy. + This function handles the cluster→label mapping via majority vote. + Each cluster gets assigned the most common true label in it. + """ kmeans = KMeans(n_clusters=n_classes, init="k-means++", n_init=10, random_state=42) kmeans.fit(X) + labels = np.zeros_like(kmeans.labels_) for i in range(n_classes): mask = kmeans.labels_ == i @@ -25,17 +30,23 @@ def kmeans_accuracy(X, y, n_classes): def evaluate(name, dataset, target_names): - print(f"\n{'='*60}") + """ + Evaluate unsupervised and supervised ML algorithms on the same dataset, split with the train_test_split function. + Use the classification_report to evaluate the function. + """ + print(f"\n{'=' * 60}") print(f" {name}") - print(f"{'='*60}") + print(f"{'=' * 60}") X_train, X_test, y_train, y_test = train_test_split( dataset.data, dataset.target, test_size=0.3, random_state=42 ) # supervised - for clf_name, clf in [("Decision Tree", DecisionTreeClassifier(random_state=42)), - ("Naive Bayes", GaussianNB())]: + for clf_name, clf in [ + ("Decision Tree", DecisionTreeClassifier(random_state=42)), + ("Naive Bayes", GaussianNB()), + ]: clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print(f"\n--- {clf_name} ---") @@ -49,7 +60,9 @@ def evaluate(name, dataset, target_names): print(f"\n--- K-Means (mapped) ---") print(f"Accuracy: {accuracy_score(dataset.target, mapped_labels):.3f}") print(f"Adj. Rand: {adjusted_rand_score(dataset.target, kmeans.labels_):.3f}") - print(classification_report(dataset.target, mapped_labels, target_names=target_names)) + print( + classification_report(dataset.target, mapped_labels, target_names=target_names) + ) iris = datasets.load_iris()