{ "cells": [ { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "# Feature Engineering\n", "# Klassifikation\n", "## Instanzbasierte Modelle" ] }, { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "**Vorbereitung der Umgebung**" ] }, { "cell_type": "raw", "metadata": { "tags": [] }, "source": [ "## for scikit-learn 1.4.2, to silence warnings regarding physical cores\n", "import os\n", "os.environ['LOKY_MAX_CPU_COUNT'] = '4' ## depending on the hardware used" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns; sns.set()\n", "%matplotlib inline\n", "\n", "datapath = '../3_data'\n", "from os import chdir; chdir(datapath)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Vorbereitung der Daten**" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "## load data\n", "data = pd.read_csv('bank_data_prep.csv')\n", "data.shape ## check\n", "\n", "## features - target - split\n", "## organize features and target as independent objects\n", "X = data.drop('y', axis=1)\n", "y = data['y']\n", "\n", "## test - train - split\n", "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X,\n", " y,\n", " train_size=2 / 3,\n", " random_state=1234)\n", "\n", "## demo dataset\n", "demo_data = pd.read_csv('demo_data_class.csv')\n", "X_demo = demo_data.drop('y', axis=1)\n", "y_demo = demo_data['y']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### KNeighborsClassifier\n", "#### Theorie\n", "#### Praxis" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2020-03-17T11:35:53.227097Z", "start_time": "2020-03-17T11:35:53.124006Z" } }, "outputs": [], "source": [ "## import trainer class\n", "from sklearn.neighbors import KNeighborsClassifier" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2020-03-17T11:35:53.242354Z", "start_time": "2020-03-17T11:35:53.231829Z" } }, "outputs": [], "source": [ "## instantiate (and parameterize) the model\n", "model = KNeighborsClassifier()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2020-03-17T11:35:53.389975Z", "start_time": "2020-03-17T11:35:53.247879Z" }, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
KNeighborsClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsClassifier()