cas-pml/Einfuehrung/unterlagen/06_Kmeans_Iris.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "iris = datasets.load_iris()\n",
    "\n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
      " 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
      " 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2\n",
      " 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n",
      " 2 2]\n"
     ]
    }
   ],
   "source": [
    "print(iris.target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KMeans(algorithm='auto', copy_x=True, init='random', max_iter=300,\n",
       "    n_clusters=3, n_init=1, n_jobs=None, precompute_distances='auto',\n",
       "    random_state=None, tol=0.0001, verbose=0)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "kmeans = KMeans(n_clusters=3, init='random', n_init=1)\n",
    "kmeans.fit(iris.data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (1, 0), (1, 1), (1, 0), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 0), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 1), (2, 0), (2, 1), (2, 0), (2, 0), (2, 1), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 1)]\n"
     ]
    }
   ],
   "source": [
    "print(list(zip(iris.target,kmeans.labels_)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7364192881252849\n",
      "0.7474865805095326\n",
      "0.7163421126838475\n",
      "0.5511916046195915\n"
     ]
    }
   ],
   "source": [
    "print(metrics.homogeneity_score(iris.target, kmeans.labels_))\n",
    "print(metrics.completeness_score(iris.target, kmeans.labels_))\n",
    "print(metrics.adjusted_rand_score(iris.target, kmeans.labels_))\n",
    "print(metrics.silhouette_score(iris.data, kmeans.labels_))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KMeans(algorithm='auto', copy_x=True, init='random', max_iter=300,\n",
       "    n_clusters=3, n_init=10, n_jobs=None, precompute_distances='auto',\n",
       "    random_state=None, tol=0.0001, verbose=0)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "kmeans_init = KMeans(n_clusters=3, init='random', n_init=10)\n",
    "kmeans_init.fit(iris.data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (1, 0), (1, 1), (1, 0), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 0), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 1), (2, 0), (2, 1), (2, 0), (2, 0), (2, 1), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 1)]\n"
     ]
    }
   ],
   "source": [
    "print(list(zip(iris.target,kmeans.labels_)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7514854021988338\n",
      "0.7649861514489815\n",
      "0.7302382722834697\n",
      "0.5528190123564091\n"
     ]
    }
   ],
   "source": [
    "print(metrics.homogeneity_score(iris.target, kmeans_init.labels_))\n",
    "print(metrics.completeness_score(iris.target, kmeans_init.labels_))\n",
    "print(metrics.adjusted_rand_score(iris.target, kmeans_init.labels_))\n",
    "print(metrics.silhouette_score(iris.data, kmeans_init.labels_))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n",
       "    n_clusters=3, n_init=10, n_jobs=None, precompute_distances='auto',\n",
       "    random_state=None, tol=0.0001, verbose=0)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "kmeans2 = KMeans(n_clusters=3)\n",
    "kmeans2.fit(iris.data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (0, 2), (1, 0), (1, 1), (1, 0), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 0), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 1), (2, 0), (2, 1), (2, 0), (2, 0), (2, 1), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 0), (2, 1), (2, 0), (2, 0), (2, 1)]\n"
     ]
    }
   ],
   "source": [
    "print(list(zip(iris.target,kmeans.labels_)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7514854021988338\n",
      "0.7649861514489815\n",
      "0.7302382722834697\n",
      "0.5528190123564091\n"
     ]
    }
   ],
   "source": [
    "print(metrics.homogeneity_score(iris.target, kmeans2.labels_))\n",
    "print(metrics.completeness_score(iris.target, kmeans2.labels_))\n",
    "print(metrics.adjusted_rand_score(iris.target, kmeans2.labels_))\n",
    "print(metrics.silhouette_score(iris.data, kmeans2.labels_))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}