refactor: add explanations to the comments
This commit is contained in:
@@ -13,9 +13,14 @@ import numpy as np
|
|||||||
|
|
||||||
|
|
||||||
def kmeans_accuracy(X, y, n_classes):
|
def kmeans_accuracy(X, y, n_classes):
|
||||||
"""Map each cluster to its majority true label, then compute accuracy."""
|
"""
|
||||||
|
Map each cluster to its majority true label, then compute accuracy.
|
||||||
|
This function handles the cluster→label mapping via majority vote.
|
||||||
|
Each cluster gets assigned the most common true label in it.
|
||||||
|
"""
|
||||||
kmeans = KMeans(n_clusters=n_classes, init="k-means++", n_init=10, random_state=42)
|
kmeans = KMeans(n_clusters=n_classes, init="k-means++", n_init=10, random_state=42)
|
||||||
kmeans.fit(X)
|
kmeans.fit(X)
|
||||||
|
|
||||||
labels = np.zeros_like(kmeans.labels_)
|
labels = np.zeros_like(kmeans.labels_)
|
||||||
for i in range(n_classes):
|
for i in range(n_classes):
|
||||||
mask = kmeans.labels_ == i
|
mask = kmeans.labels_ == i
|
||||||
@@ -25,17 +30,23 @@ def kmeans_accuracy(X, y, n_classes):
|
|||||||
|
|
||||||
|
|
||||||
def evaluate(name, dataset, target_names):
|
def evaluate(name, dataset, target_names):
|
||||||
print(f"\n{'='*60}")
|
"""
|
||||||
|
Evaluate unsupervised and supervised ML algorithms on the same dataset, split with the train_test_split function.
|
||||||
|
Use the classification_report to evaluate the function.
|
||||||
|
"""
|
||||||
|
print(f"\n{'=' * 60}")
|
||||||
print(f" {name}")
|
print(f" {name}")
|
||||||
print(f"{'='*60}")
|
print(f"{'=' * 60}")
|
||||||
|
|
||||||
X_train, X_test, y_train, y_test = train_test_split(
|
X_train, X_test, y_train, y_test = train_test_split(
|
||||||
dataset.data, dataset.target, test_size=0.3, random_state=42
|
dataset.data, dataset.target, test_size=0.3, random_state=42
|
||||||
)
|
)
|
||||||
|
|
||||||
# supervised
|
# supervised
|
||||||
for clf_name, clf in [("Decision Tree", DecisionTreeClassifier(random_state=42)),
|
for clf_name, clf in [
|
||||||
("Naive Bayes", GaussianNB())]:
|
("Decision Tree", DecisionTreeClassifier(random_state=42)),
|
||||||
|
("Naive Bayes", GaussianNB()),
|
||||||
|
]:
|
||||||
clf.fit(X_train, y_train)
|
clf.fit(X_train, y_train)
|
||||||
y_pred = clf.predict(X_test)
|
y_pred = clf.predict(X_test)
|
||||||
print(f"\n--- {clf_name} ---")
|
print(f"\n--- {clf_name} ---")
|
||||||
@@ -49,7 +60,9 @@ def evaluate(name, dataset, target_names):
|
|||||||
print(f"\n--- K-Means (mapped) ---")
|
print(f"\n--- K-Means (mapped) ---")
|
||||||
print(f"Accuracy: {accuracy_score(dataset.target, mapped_labels):.3f}")
|
print(f"Accuracy: {accuracy_score(dataset.target, mapped_labels):.3f}")
|
||||||
print(f"Adj. Rand: {adjusted_rand_score(dataset.target, kmeans.labels_):.3f}")
|
print(f"Adj. Rand: {adjusted_rand_score(dataset.target, kmeans.labels_):.3f}")
|
||||||
print(classification_report(dataset.target, mapped_labels, target_names=target_names))
|
print(
|
||||||
|
classification_report(dataset.target, mapped_labels, target_names=target_names)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
iris = datasets.load_iris()
|
iris = datasets.load_iris()
|
||||||
|
|||||||
Reference in New Issue
Block a user