From 749c5eaef8860cd0e53c82a6a826d71fc10d2e7e Mon Sep 17 00:00:00 2001
From: aaron <aaron@0x29a.ch>
Date: Thu, 30 Apr 2026 20:12:23 +0200
Subject: [PATCH] refactor: add explanations to the comments

---
 .../comparison/compare_ml_algorihms.py        | 25 ++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/ML/aufgaben/comparison/compare_ml_algorihms.py b/ML/aufgaben/comparison/compare_ml_algorihms.py
index d27e4d1..63779d1 100644
--- a/ML/aufgaben/comparison/compare_ml_algorihms.py
+++ b/ML/aufgaben/comparison/compare_ml_algorihms.py
@@ -13,9 +13,14 @@ import numpy as np
 
 
 def kmeans_accuracy(X, y, n_classes):
-    """Map each cluster to its majority true label, then compute accuracy."""
+    """
+    Map each cluster to its majority true label, then compute accuracy.
+    This function handles the cluster→label mapping via majority vote.
+    Each cluster gets assigned the most common true label in it.
+    """
     kmeans = KMeans(n_clusters=n_classes, init="k-means++", n_init=10, random_state=42)
     kmeans.fit(X)
+
     labels = np.zeros_like(kmeans.labels_)
     for i in range(n_classes):
         mask = kmeans.labels_ == i
@@ -25,17 +30,23 @@ def kmeans_accuracy(X, y, n_classes):
 
 
 def evaluate(name, dataset, target_names):
-    print(f"\n{'='*60}")
+    """
+    Evaluate unsupervised and supervised ML algorithms on the same dataset, split with the train_test_split function.
+    Use the classification_report to evaluate the function.
+    """
+    print(f"\n{'=' * 60}")
     print(f" {name}")
-    print(f"{'='*60}")
+    print(f"{'=' * 60}")
 
     X_train, X_test, y_train, y_test = train_test_split(
         dataset.data, dataset.target, test_size=0.3, random_state=42
     )
 
     # supervised
-    for clf_name, clf in [("Decision Tree", DecisionTreeClassifier(random_state=42)),
-                          ("Naive Bayes", GaussianNB())]:
+    for clf_name, clf in [
+        ("Decision Tree", DecisionTreeClassifier(random_state=42)),
+        ("Naive Bayes", GaussianNB()),
+    ]:
         clf.fit(X_train, y_train)
         y_pred = clf.predict(X_test)
         print(f"\n--- {clf_name} ---")
@@ -49,7 +60,9 @@ def evaluate(name, dataset, target_names):
     print(f"\n--- K-Means (mapped) ---")
     print(f"Accuracy: {accuracy_score(dataset.target, mapped_labels):.3f}")
     print(f"Adj. Rand: {adjusted_rand_score(dataset.target, kmeans.labels_):.3f}")
-    print(classification_report(dataset.target, mapped_labels, target_names=target_names))
+    print(
+        classification_report(dataset.target, mapped_labels, target_names=target_names)
+    )
 
 
 iris = datasets.load_iris()