refactor: move things around

2026-05-21 14:16:30 +02:00
parent 2fce3281a3
commit 41e15ed275
124 changed files with 404226 additions and 0 deletions
@@ -0,0 +1,398 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "763373f9-1ba3-4fb6-9ae3-d0aeb6be07e2",
+   "metadata": {},
+   "source": [
+    "**Workshop 03 - Loesungsvorschlag**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ac5573af-46dd-43fa-886e-ca228e61edaf",
+   "metadata": {},
+   "source": [
+    "# Data Frame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "22a6e66a-eeaa-4ebe-aa95-ad983a2b1110",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## prepare and read data\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "datapath = '../../3_data'\n",
+    "filename = 'melb_data.csv'\n",
+    "from os import chdir; chdir(datapath)\n",
+    "\n",
+    "data = pd.read_csv(filename)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7c42aa1f-2ab1-43b2-a9a7-808f83dd724f",
+   "metadata": {},
+   "source": [
+    "## Entfernen von Beobachtungen nach Bedingung"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "71e9e7ca-c6cd-40bc-b9ff-adb11721d669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## remove outliers on Price\n",
+    "data = data[data.Price < 8000000]\n",
+    "\n",
+    "## remove selected observation2\n",
+    "data = data[data.YearBuilt != 1196]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "309c5083-cefb-4b25-91e3-1eebda35555f",
+   "metadata": {},
+   "source": [
+    "## Entfernen von Duplikaten"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1eb307e8-df31-4a37-a9ed-c80a1d354cf9",
+   "metadata": {},
+   "source": [
+    "hier kein Bedarf"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "id": "757d2596-04e0-445d-8ea9-9cd5c92ef702",
+   "metadata": {},
+   "source": [
+    "## remove duplicates\n",
+    "data.drop_duplicates(ignore_index=True, inplace = True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "da78d619-572c-4649-8c00-fde22bcbc8bb",
+   "metadata": {},
+   "source": [
+    "## Entfernen fragwürdiger Variablen"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "c689473c-e969-4c69-a131-4d3a680b1f83",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vars_to_drop = ['Unnamed: 0', 'Suburb', 'Address', 'SellerG', 'Postcode', 'Bedroom2']\n",
+    "data = data.drop(vars_to_drop, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8816b6f2-1bb3-4795-9fdb-cd7a50494d09",
+   "metadata": {},
+   "source": [
+    "## Einsetzen von Werten für NAs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "c637332d-24e5-48b1-bb58-ea704ee3f92e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## mode for all cat vars (if any)\n",
+    "cat_feats = data.select_dtypes(include=['object']).columns\n",
+    "for c in cat_feats:\n",
+    "    data[c].fillna(data[c].mode()[0], inplace = True)\n",
+    "\n",
+    "## median for all num features\n",
+    "num_feats = data.select_dtypes(include=['int64', 'float64']).columns\n",
+    "for c in num_feats:\n",
+    "    data[c].fillna(data[c].median(), inplace = True)    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5e85ccf8-e99e-4630-a4e6-6d380994ab26",
+   "metadata": {},
+   "source": [
+    "# Kategoriale Variablen"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b888038-88e8-46a5-988e-9f0dfd38d86d",
+   "metadata": {},
+   "source": [
+    "## Reduzieren der Kardinalität"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "e5a432e1-eb89-4d1e-b0cf-f799a8ac5216",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Regionname: combine rarest 3 levels to 'Victoria'\n",
+    "data.Regionname = np.where(\n",
+    "        (data.Regionname == 'Eastern Victoria') |\n",
+    "        (data.Regionname == 'Northern Victoria') |\n",
+    "        (data.Regionname == 'Western Victoria'),\n",
+    "    'Victoria', data.Regionname)\n",
+    "\n",
+    "## Method: combine 'SA' to 'S'\n",
+    "data.Method = np.where(data.Method == 'SA', 'S', data.Method)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dbacc9d0-d357-4020-8062-6b968aba5e9e",
+   "metadata": {},
+   "source": [
+    "## Nummerisiren - Faktorisieren"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "ec29d19a-347b-4793-b2ce-4831f7a9bfa2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.CouncilArea = data.CouncilArea.factorize()[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3937e91f-85c4-4f1c-bd50-564e8de620ea",
+   "metadata": {},
+   "source": [
+    "## Nummerisiren - Ordial Encodieren"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "3f250d95-862f-4016-af38-1c6ff8750db2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.Type.replace(\n",
+    "    ['h', 'u', 't'], \n",
+    "    [1, 2, 3], \n",
+    "    inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "154a4b0f-6e2f-498a-89f4-64e80de30fd4",
+   "metadata": {},
+   "source": [
+    "## Nummerisieren - Binär Encodieren"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0d422904-43d5-4c32-9c7b-cc16ddfbf39d",
+   "metadata": {},
+   "source": [
+    "hier kein Bedarf"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "807990cc-9fac-47b7-a165-3194f142b30c",
+   "metadata": {},
+   "source": [
+    "## Nummerisieren - Ordinal Encodieren"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "91ef6cb1-157a-46e0-b9e6-0bf025b953b3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## one-hot encoding\n",
+    "## apply for all categorical variables except Date (will be transformed later)\n",
+    "ignore = 'Date'\n",
+    "sel_vars = data.select_dtypes(include=['object']).columns.drop(ignore)\n",
+    "data = pd.get_dummies(data, columns=sel_vars, drop_first=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1a1dd82b-e0d9-44d2-a57b-80f4bb3b2c72",
+   "metadata": {},
+   "source": [
+    "# Numerische Variablen"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "607eab42-5a74-40fd-813d-eed338e93d3e",
+   "metadata": {},
+   "source": [
+    "## Logarithmieren"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "d5a6ff61-f2dc-47d0-ac10-82c811afe903",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## logarithme and rename\n",
+    "data.Landsize = np.log10(data.Landsize + 1)\n",
+    "data.BuildingArea = np.log10(data.BuildingArea + 1)\n",
+    "data.rename(columns={\n",
+    "    'Landsize' : 'logLandsize',\n",
+    "    'BuildingArea' : 'logBuildingArea'\n",
+    "}, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7dace176-8b5d-4533-bdd2-119e63a7bfa2",
+   "metadata": {},
+   "source": [
+    "## Binär umcodieren"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9f508469-97f4-4025-be52-45643c300bfe",
+   "metadata": {},
+   "source": [
+    "hier kein Bedarf"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "177c878f-ddf4-49f9-95c8-e9cc1af76473",
+   "metadata": {},
+   "source": [
+    "# Andere Tätigkeiten"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e1324181-c90f-4367-968f-1723bf2784ac",
+   "metadata": {},
+   "source": [
+    "## Konstruktion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "2e939e6a-7a96-4942-945f-4f21d0af48e5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## construct month, year and day_of_week\n",
+    "Date = pd.to_datetime(data.Date, format='%d/%m/%Y')\n",
+    "data['month'] = Date.dt.month\n",
+    "data['year'] = Date.dt.year\n",
+    "data['day_of_week'] = Date.dt.day_of_week\n",
+    "data.drop('Date', axis=1, inplace=True) ## not longer used\n",
+    "#print(data.info()) ## check"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3931a58a-6d47-494f-8bb2-9ea6f2914ff3",
+   "metadata": {},
+   "source": [
+    "## Bereinigen der Variablennamen"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "70daeeee-8257-4969-b9ff-49d700251b54",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "old_names = data.columns\n",
+    "new_names = old_names.str.replace('[^a-zA-Z0-9_]', '_', regex=True)\n",
+    "for i in range(len(old_names)):\n",
+    "    data.rename(columns={old_names[i]:new_names[i]}, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bbac2f0b-1f1e-41e2-aae9-60ec4238b28c",
+   "metadata": {},
+   "source": [
+    "## Standardisieren"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "906c9083-a03d-4a21-930e-f87e21079359",
+   "metadata": {},
+   "source": [
+    "hier kein Bedarf"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "92bae929-be6a-4061-bdb1-572cc9fc8390",
+   "metadata": {},
+   "source": [
+    "## Speichern unter neuem Namen"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "882cd439-5cb6-4ab3-8695-09af2670f8f3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.to_csv('melb_data_prep.csv', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc-autonumbering": true
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -0,0 +1,224 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "toc": true
+   },
+   "source": [
+    "# WS 08 Regression mit Standardisieren und Logarithmieren"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## prepare env, read and prepare data\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns; sns.set()\n",
+    "\n",
+    "#codepath = '../2_code' ## for import of user defined module\n",
+    "#datapath = '../3_data'\n",
+    "codepath = '.././2_code' ## for import of user defined module\n",
+    "datapath = '../../3_data'\n",
+    "\n",
+    "from sys import path; path.insert(1, codepath)\n",
+    "from os import chdir; chdir(datapath)\n",
+    "\n",
+    "from bfh_cas_pml import prep_data\n",
+    "X_train, X_test, y_train, y_test = prep_data('melb_data_prep.csv', target='Price', seed=1234)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-105513873.23403685\n",
+      "[ 245383.60581414 -141356.39759052  -40383.66643969  161336.03949841\n",
+      "   40391.14829949   83303.27089591]\n",
+      "[1331246.16325189 2557493.2373921   871684.82823291 1495633.275723\n",
+      " 1549557.61151302  634348.67092323]\n",
+      "0.5601419746121152\n"
+     ]
+    }
+   ],
+   "source": [
+    "## baseline\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.metrics import r2_score\n",
+    "model = LinearRegression()\n",
+    "model.fit(X_train, y_train)\n",
+    "y_pred = model.predict(X_test)\n",
+    "\n",
+    "print(model.intercept_)\n",
+    "print(model.coef_[:6])\n",
+    "print(y_pred[:6])\n",
+    "print(r2_score(y_test, y_pred))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1055902.69523731\n",
+      "[ 235020.76662584  -96493.73493151 -243470.62893089  106305.85273776\n",
+      "   35544.05464669   71047.51543032]\n",
+      "[1331246.16325187 2557493.23739203  871684.82823297 1495633.27572294\n",
+      " 1549557.611513    634348.67092323]\n",
+      "0.5601419746121148\n"
+     ]
+    }
+   ],
+   "source": [
+    "## scaled features\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "scaler = StandardScaler()\n",
+    "scaler.fit(X_train)\n",
+    "X_train_sc = scaler.transform(X_train)\n",
+    "X_test_sc = scaler.transform(X_test)\n",
+    "\n",
+    "model = LinearRegression()\n",
+    "model.fit(X_train_sc, y_train)\n",
+    "y_pred = model.predict(X_test_sc)\n",
+    "\n",
+    "print(model.intercept_)\n",
+    "print(model.coef_[:6])\n",
+    "print(y_pred[:6])\n",
+    "print(r2_score(y_test, y_pred))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Fazit**\n",
+    "* Auswirkung von Skalieren der Features\n",
+    "  * Koeffizienten und Intercept: Einfluss\n",
+    "  * Prediction: kein Einfluss\n",
+    "  * Score: natürlich auch kein Einfluss, wird ja aus Prediction berechnet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.5519266421486302\n"
+     ]
+    }
+   ],
+   "source": [
+    "## log target\n",
+    "y_train_log = np.log10(y_train)\n",
+    "y_test_log = np.log10(y_test)\n",
+    "\n",
+    "model = LinearRegression()\n",
+    "model.fit(X_train, y_train_log)\n",
+    "y_pred = model.predict(X_test)\n",
+    "print(r2_score(10**y_test_log, 10**y_pred))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Fazit**\n",
+    "* wird sogar etwas schlechter\n",
+    "* kombination mit skalierten Features erübrigt sich hier, da skalieren ja offenbar keinen Einfluss auf score hat"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "1",
+   "nav_menu": {},
+   "number_sections": false,
+   "sideBar": true,
+   "skip_h1_title": true,
+   "title_cell": "WS 11 Regression - mit FE - solution",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "195.933px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "position": {
+    "height": "321.85px",
+    "left": "785px",
+    "right": "20px",
+    "top": "118px",
+    "width": "350px"
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,212 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "toc": true
+   },
+   "source": [
+    "# WS 14 Random Search CV"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* untersuchen Sie Kombinationen von Parameterwerten bei RandomForestClassifier\n",
+    "* Vorschlag:\n",
+    "  * n_estimators in [50, 100, 150, 200]\n",
+    "  * max_features in [3, 5, 7, 9]\n",
+    "  * criterion in ['gini', 'entropy']\n",
+    "  * min_samples_leaf in [1, 2, 3, 4]\n",
+    "* wenden Sie 5-fach Kreuzvalidierung an\n",
+    "* setzen Sie die Anzahl der zu untersuchenden Kombinationen auf 12\n",
+    "* arbeiten Sie ohne setzen von random_state, damit anschliessend die Ergebnisse verglichen werden können"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## import libraries\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "## load data\n",
+    "datapath = '../../3_data'\n",
+    "from os import chdir; chdir(datapath)\n",
+    "bank_df = pd.read_csv('bank_data_prep.csv')\n",
+    "\n",
+    "## features - target - split\n",
+    "X = bank_df.drop('y', axis=1)\n",
+    "y = bank_df['y']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "best_params_ : {'n_estimators': 50, 'min_samples_leaf': 4, 'max_features': 9, 'criterion': 'entropy'}\n",
+      "best_score_  : 0.8884381338742393\n",
+      "CPU times: total: 3.09 s\n",
+      "Wall time: 43.6 s\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "## import classes from sklearn\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.model_selection import RandomizedSearchCV\n",
+    "\n",
+    "## define parameter grid\n",
+    "parameter_grid = {'n_estimators': [50, 100, 150, 200],\n",
+    "                  'max_features': [3, 5, 7, 9],\n",
+    "                  'criterion': ['gini', 'entropy'],\n",
+    "                  'min_samples_leaf': [1, 2, 3, 4]}\n",
+    "\n",
+    "## define RandomizedSearchCV\n",
+    "rscv = RandomizedSearchCV(\n",
+    "    estimator=RandomForestClassifier(random_state=1234), \n",
+    "    param_distributions=parameter_grid, \n",
+    "    cv=5,\n",
+    "    n_iter=12,\n",
+    "    random_state=1234,\n",
+    "    n_jobs=-1)\n",
+    "\n",
+    "## run RandomizedSearchCV\n",
+    "rscv.fit(X, y)\n",
+    "\n",
+    "## evaluate RandomizedSearchCV\n",
+    "print('best_params_ :', rscv.best_params_)\n",
+    "print('best_score_  :', rscv.best_score_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#rscv.best_estimator_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(9860, 29)"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Fazit:**\n",
+    "* n_estimators: [50, 100, 150, 200] -> 50\n",
+    "  * hier müsste der Suchbereich nach unten erweiter werden\n",
+    "* max_features: [3, 5, 7, 9] -> 9\n",
+    "  * hier müsste der Suchbereich nach oben erweiter werden\n",
+    "* criterion: ['gini', 'entropy']\n",
+    "  * Suchbereich ok\n",
+    "* min_samples_leaf: [1, 2, 3, 4] -> 4\n",
+    "  * hier müsste der Suchbereich nach oben erweiter werden"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "",
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "WS 17 Validierung - Random Search CV",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "195.867px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "position": {
+    "height": "306.85px",
+    "left": "862px",
+    "right": "20px",
+    "top": "137px",
+    "width": "350px"
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,234 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# WS 02 Feature Engineering Exploration Overview.ipynb"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* compiles the most important characteristic values from a loaded data frame and stores them in an Excel spreadsheet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "    index      var_names   dtypes    nas  uniques                  modes  \\\n",
+      "0       0     Unnamed: 0    int64      0    18396                   None   \n",
+      "1       1         Suburb   object      0      330              Reservoir   \n",
+      "2       2        Address   object      0    18134       1/1 Clarendon St   \n",
+      "3       3          Rooms    int64      0       11                   None   \n",
+      "4       4           Type   object      0        3                      h   \n",
+      "5       5          Price  float64      0     2470                   None   \n",
+      "6       6         Method   object      0        5                      S   \n",
+      "7       7        SellerG   object      0      305                 Nelson   \n",
+      "8       8           Date   object      0       58             27/05/2017   \n",
+      "9       9       Distance  float64      1      210                   None   \n",
+      "10     10       Postcode  float64      1      205                   None   \n",
+      "11     11       Bedroom2  float64   3469       12                   None   \n",
+      "12     12       Bathroom  float64   3471        9                   None   \n",
+      "13     13            Car  float64   3576       11                   None   \n",
+      "14     14       Landsize  float64   4793     1449                   None   \n",
+      "15     15   BuildingArea  float64  10634      613                   None   \n",
+      "16     16      YearBuilt  float64   9438      144                   None   \n",
+      "17     17    CouncilArea   object   6163       33               Moreland   \n",
+      "18     18      Lattitude  float64   3332     7518                   None   \n",
+      "19     19     Longtitude  float64   3332     8168                   None   \n",
+      "20     20     Regionname   object      1        8  Southern Metropolitan   \n",
+      "21     21  Propertycount  float64      1      324                   None   \n",
+      "\n",
+      "           means        medians  \n",
+      "0   1.182679e+04   11820.500000  \n",
+      "1            NaN            NaN  \n",
+      "2            NaN            NaN  \n",
+      "3   2.935040e+00       3.000000  \n",
+      "4            NaN            NaN  \n",
+      "5   1.056697e+06  880000.000000  \n",
+      "6            NaN            NaN  \n",
+      "7            NaN            NaN  \n",
+      "8            NaN            NaN  \n",
+      "9   1.038999e+01       9.700000  \n",
+      "10  3.107140e+03    3085.000000  \n",
+      "11  2.913043e+00       3.000000  \n",
+      "12  1.538492e+00       1.000000  \n",
+      "13  1.615520e+00       2.000000  \n",
+      "14  5.581164e+02     440.000000  \n",
+      "15  1.512202e+02     126.000000  \n",
+      "16  1.965880e+03    1970.000000  \n",
+      "17           NaN            NaN  \n",
+      "18 -3.780985e+01     -37.803625  \n",
+      "19  1.449963e+02     145.000920  \n",
+      "20           NaN            NaN  \n",
+      "21  7.517975e+03    6567.000000  \n"
+     ]
+    }
+   ],
+   "source": [
+    "## import libraries\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "## define data path\n",
+    "datapath = '../3_data'\n",
+    "from os import chdir; chdir(datapath)\n",
+    "\n",
+    "## load data\n",
+    "data = pd.read_csv('melb_data.csv')\n",
+    "#data.info()\n",
+    "\n",
+    "## var names\n",
+    "var_names = pd.Series(data.columns)\n",
+    "#print(var_names)\n",
+    "\n",
+    "dtypes = pd.Series(data.dtypes.values)\n",
+    "#print(dtypes)\n",
+    "\n",
+    "## nas\n",
+    "nas = pd.Series(data.isna().sum().values)\n",
+    "#print(nas)\n",
+    "\n",
+    "## uniques\n",
+    "uniques = []\n",
+    "for c in var_names:\n",
+    "    uniques.append(data[c].nunique())\n",
+    "uniques = pd.Series(uniques)\n",
+    "#print(uniques)\n",
+    "\n",
+    "modes = []\n",
+    "for i in range(len(var_names)):\n",
+    "    if dtypes[i] == 'object':\n",
+    "        #modes.append(data[c].mode()[0])\n",
+    "        modes.append(data[var_names[i]].mode()[0])\n",
+    "        \n",
+    "    else:\n",
+    "        modes.append(None)\n",
+    "modes = pd.Series(modes)\n",
+    "#print(modes)\n",
+    "\n",
+    "## means\n",
+    "means = []\n",
+    "for i in range(len(var_names)):\n",
+    "        if dtypes[i] != 'object':\n",
+    "            means.append(data[var_names[i]].mean())\n",
+    "        else:\n",
+    "            means.append(None)\n",
+    "means = pd.Series(means)\n",
+    "#print(means)\n",
+    "\n",
+    "## medians\n",
+    "medians = []\n",
+    "for i in range(len(var_names)):\n",
+    "        if dtypes[i] != 'object':\n",
+    "            medians.append(data[var_names[i]].median())\n",
+    "        else:\n",
+    "            medians.append(None)\n",
+    "medians = pd.Series(medians)\n",
+    "#print(medians)\n",
+    "\n",
+    "## collect results\n",
+    "overview = pd.DataFrame(dict(\n",
+    "    var_names = var_names, \n",
+    "    dtypes = dtypes,\n",
+    "    nas = nas,\n",
+    "    uniques = uniques,\n",
+    "    modes = modes,\n",
+    "    means = means,\n",
+    "    medians = medians\n",
+    ")).reset_index()\n",
+    "print(overview)\n",
+    "\n",
+    "overview.to_excel('ws_02_overview.xlsx', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "1",
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "WS 02 Feature Engineering - Exploration kategoriale Variablen",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "180.6px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "oldHeight": 217.64999999999998,
+   "position": {
+    "height": "238.85px",
+    "left": "802.2px",
+    "right": "20px",
+    "top": "116px",
+    "width": "326.8px"
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "varInspector_section_display": "block",
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,201 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# WS 04 Vorlage - KNeighborsClassifier"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* standardisieren Sie die Features von Trainings- und Testdaten mit Hilfe von sklearn.preprocessing.StandardScaler\n",
+    "* ermitteln Sie anschliessend die besten Parameterwerte für KNeighborsClassifier\n",
+    "  * n_neighbors (1-10)\n",
+    "  * p (z.B. 1, 2, 3)\n",
+    "* vergleichen Sie die Ergebnisse ohne und mit standardisieren"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## import libraries\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns; sns.set()\n",
+    "\n",
+    "## load data\n",
+    "datapath = '../3_data'\n",
+    "from os import chdir; chdir(datapath)\n",
+    "data = pd.read_csv('bank_data_prep.csv')\n",
+    "#data.shape  ## check\n",
+    "\n",
+    "## features - target - split\n",
+    "X = data.drop('y', axis=1)\n",
+    "y = data['y']\n",
+    "\n",
+    "## test - train - split\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "X_train, X_test, y_train, y_test, = train_test_split(X,\n",
+    "                                                     y,\n",
+    "                                                     train_size=2 / 3,\n",
+    "                                                     random_state=1234)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "rem: für die obige Datenaufbereitung wird ab dem nächsten Workshop die Funktion `prep_data()` aus dem Modul `bfh_cas_pml` verwendet werden"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## standardiz features (lead: train data)\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "scaler = StandardScaler().fit(X_train)\n",
+    "X_train_scaled = scaler.transform(X_train)\n",
+    "X_test_scaled = scaler.transform(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n",
+      "2\n",
+      "3\n",
+      "4\n",
+      "5\n",
+      "6\n",
+      "7\n",
+      "8\n",
+      "9\n",
+      "10\n"
+     ]
+    }
+   ],
+   "source": [
+    "## Tune über n_neighbors\n",
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "model = KNeighborsClassifier()\n",
+    "params = range(1, 11)\n",
+    "scores = []  ## scores ohne Standardisieren\n",
+    "scores_sc = []  ## scores mit Standardisieren\n",
+    "\n",
+    "for param in params:\n",
+    "    print(param)\n",
+    "    ## tbd\n",
+    "\n",
+    "    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Tune über p\n",
+    "params = range(1, 4) ## dasselbe wie [1, 2, 3]\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Fazit**:\n",
+    "* tbd\n",
+    "\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "",
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "WS 07 Klassifikation - KNeighborsClassifier",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "205.2px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,165 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "toc": true
+   },
+   "source": [
+    "# WS 05 Klassifikation - DecisionTreeClassifier  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* untersuchen Sie verschiedene Werte von min_impurity_decrease bei DecisionTreeClassifier auf die erreichbare Performance (Accuracy)\n",
+    "* grenzen Sie dabei den zu untersuchenden Wertebereich schrittweise ein\n",
+    "* stellen Sie dazu die Ergebnisse wie folgt dar\n",
+    "  * grafisch als Liniendiagramm\n",
+    "  * in der Konsole mit bestem Score und entsprechendem Parameterwert\n",
+    "* Hinweis\n",
+    "  * `range()`: erstellt einen Bereich von Ganzzahligen Werten mit identischer Schrittweite\n",
+    "  * `np.arange()`: (Funktion von numpy) erstellt mit analoger Parametrisierung einen Bereich mit Gleitkommawerten"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## prepare env, read and prepare data\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "\n",
+    "sns.set()\n",
+    "\n",
+    "codepath = '../2_code'  ## for import of user defined module\n",
+    "datapath = '../3_data'\n",
+    "\n",
+    "from sys import path\n",
+    "path.insert(1, codepath)\n",
+    "\n",
+    "from os import chdir\n",
+    "chdir(datapath)\n",
+    "\n",
+    "from bfh_cas_pml import prep_data\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = prep_data('bank_data_prep.csv',\n",
+    "                                                  target='y',\n",
+    "                                                  seed=1234)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "scrolled": true,
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.0\n",
+      "0.01\n",
+      "0.02\n",
+      "0.03\n",
+      "0.04\n",
+      "0.05\n",
+      "0.06\n",
+      "0.07\n",
+      "0.08\n",
+      "0.09\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "model = DecisionTreeClassifier()\n",
+    "\n",
+    "scores = []\n",
+    "params = np.arange(0, 0.1, 0.01)\n",
+    "\n",
+    "for param in params:\n",
+    "    print(param)\n",
+    "    ## tbd\n",
+    "    \n",
+    "\n",
+    "    "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "0",
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "WS 08 Klassifikation - DecisionTreeClassifier",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "165px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,271 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "toc": true
+   },
+   "source": [
+    "# WS 06 Klassifikation - RandomForestClassifier"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* untersuchen Sie die folgenden Tuning-Parameter von RandomForestClassifier in Bezug auf die erreichte Performance (accuracy_score) mit dem vorbereiteten Dataset:\n",
+    "  * n_estimators als `range(100, 500, 50)`\n",
+    "  * max_features als `range(1, 11)`\n",
+    "  * min_impurity_decrease als `np.arange(0, 0.1, 0.01)`\n",
+    "* wie wirkt sich der random_state aus?\n",
+    "* welche der ausserdem zur Verfügung stehenden Parameter sind keine Tuning Parameter? Konsultieren Sie dazu die (Online-) Dokumentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## prepare env, read and prepare data\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns; sns.set()\n",
+    "\n",
+    "codepath = '../2_code' ## for import of user defined module\n",
+    "datapath = '../3_data'\n",
+    "from sys import path; path.insert(1, codepath)\n",
+    "from os import chdir; chdir(datapath)\n",
+    "\n",
+    "from bfh_cas_pml import prep_data\n",
+    "X_train, X_test, y_train, y_test = prep_data('bank_data_prep.csv', target='y', seed=1234)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import RandomForestClassifier"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "n_estimators:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "100\n",
+      "150\n",
+      "200\n",
+      "250\n",
+      "300\n",
+      "350\n",
+      "400\n",
+      "450\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = RandomForestClassifier()\n",
+    "scores = []\n",
+    "params = range(100, 500, 50)\n",
+    "\n",
+    "for param in params:\n",
+    "    print(param)\n",
+    "    ## tbd\n",
+    "    \n",
+    "\n",
+    "## tbd\n",
+    "#fig = sns.lineplot(x=params, y=scores)\n",
+    "#...\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "max_features:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n",
+      "2\n",
+      "3\n",
+      "4\n",
+      "5\n",
+      "6\n",
+      "7\n",
+      "8\n",
+      "9\n",
+      "10\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = RandomForestClassifier()\n",
+    "scores = []\n",
+    "params = range(1, 11)\n",
+    "\n",
+    "for param in params:\n",
+    "    print(param)\n",
+    "    ## tbd\n",
+    "    \n",
+    "    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "min_impurity_decrease:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.0\n",
+      "0.01\n",
+      "0.02\n",
+      "0.03\n",
+      "0.04\n",
+      "0.05\n",
+      "0.06\n",
+      "0.07\n",
+      "0.08\n",
+      "0.09\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = RandomForestClassifier()\n",
+    "scores = []\n",
+    "params = np.arange(0, 0.1, 0.01)\n",
+    "\n",
+    "for param in params:\n",
+    "    print(param)\n",
+    "    ## tbd\n",
+    "    \n",
+    "    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Fazit:**\n",
+    "* tbd\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "keine Tuning Parameter sind hier:\n",
+    "* tbd\n",
+    "\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "2.2",
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "WS 09 Klassifikation - RandomForestClassifier",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "205.2px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,223 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "toc": true
+   },
+   "source": [
+    "<h1>WS 10 Klassifikation - Modellvergleiche<span class=\"tocSkip\"></span></h1>\n",
+    "<div class=\"toc\"><ul class=\"toc-item\"></ul></div>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* vergleichen Sie alle bis jetzt vorgestellten Klassifikatoren miteinander in Bezug auf\n",
+    "  * Performance\n",
+    "  * Rechenzeiten, differenziert nach .fit() und .predict()  \n",
+    "  und visualisieren Sie die Ergebnisse\n",
+    "* Tipp: modifizieren / ergänzen Sie dazu den abgegebenen Code von Kapitel 2.2.6 Modellvergleiche\n",
+    "\n",
+    "* optional: fügen Sie andere, im Kurs nicht behandelte Klassifikatoren dazu, welche Sie in der Dokumentation von scikit-learn finden\n",
+    "* optional: falls Sie im Rahmen von Feaure Engineering alternatives Preprocessing erarbeitet haben, können Sie die Auswirkungen desselben jetzt auch noch einbeziehen\n",
+    "* optional: wie wirkt sich Skalierung (z.B. mit StandardScaler) auf die Performance von MLPClassifier aus?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## prepare env, read and prepare data\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns; sns.set()\n",
+    "\n",
+    "codepath = '../2_code' ## for import of user defined module\n",
+    "datapath = '../3_data'\n",
+    "from sys import path; path.insert(1, codepath)\n",
+    "from os import chdir; chdir(datapath)\n",
+    "\n",
+    "from bfh_cas_pml import prep_data\n",
+    "X_train, X_test, y_train, y_test = prep_data('bank_data_prep.csv', target='y', seed=1234)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Funktionen (Klassen) importieren\n",
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "## tbd ergänzen\n",
+    "\n",
+    "\n",
+    "\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "import time  ## für Zeitmessung"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Modelle definieren und in Liste hinterlegen\n",
+    "models = [\n",
+    "    KNeighborsClassifier(),\n",
+    "    DecisionTreeClassifier(min_impurity_decrease=0.002),\n",
+    "    RandomForestClassifier(n_estimators=100)\n",
+    "    ## tbd ergänzen\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "KNeighborsClassifier()\n",
+      "DecisionTreeClassifier(min_impurity_decrease=0.002)\n",
+      "RandomForestClassifier()\n"
+     ]
+    }
+   ],
+   "source": [
+    "## zum Sammeln der Resultate\n",
+    "scores = []\n",
+    "times_fit = []\n",
+    "times_pred = []\n",
+    "model_names = []\n",
+    "\n",
+    "#print('Classifier                          Score      Time fit    Time pred')\n",
+    "#print('====================================================================')\n",
+    "\n",
+    "## Loop\n",
+    "for model in models:\n",
+    "    print(model)\n",
+    "    ## tbd\n",
+    "    \n",
+    "    ##   start timer1 - fit - stop timer1\n",
+    "    \n",
+    "    \n",
+    "    ##   start timer2 - predict - stop timer2\n",
+    "    \n",
+    "    \n",
+    "    ##   berechne Score & pick Modellname\n",
+    "    \n",
+    "    \n",
+    "    ##   Ergebnisse an vorbereitete Listen anhängen\n",
+    "    \n",
+    "    \n",
+    "    ##   Iterationsergebnisse in Konsole ausgeben (optional)\n",
+    "\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## visualisieren\n",
+    "## tbd ergänzen\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Fazit:**  \n",
+    "* tbd\n",
+    "\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "",
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "WS 10 Klassifikation - Modellvergleiche",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "205.2px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,187 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "toc": true
+   },
+   "source": [
+    "# WS 08 Regression mit Standardisieren und Logarithmieren"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* untersuchen Sie den Einfluss des Standardisierens der Features auf folgende Ergebnisse der Linearen Regression:\n",
+    "  * Modellkoeffizienten\n",
+    "  * Predictions\n",
+    "  * Score\n",
+    "* untersuchen Sie den Einfluss des Logarithmierens des Targets auf die Performance der Linearen Regression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## prepare env, read and prepare data\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns; sns.set()\n",
+    "\n",
+    "codepath = '../2_code' ## for import of user defined module\n",
+    "datapath = '../3_data'\n",
+    "#codepath = '.././2_code' ## for import of user defined module\n",
+    "#datapath = '../../3_data'\n",
+    "\n",
+    "from sys import path; path.insert(1, codepath)\n",
+    "from os import chdir; chdir(datapath)\n",
+    "\n",
+    "from bfh_cas_pml import prep_data\n",
+    "X_train, X_test, y_train, y_test = prep_data('melb_data_prep.csv', target='Price', seed=1234)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "-105513873.23403685\n",
+      "[ 245383.60581414 -141356.39759052  -40383.66643969  161336.03949841\n",
+      "   40391.14829949   83303.27089591]\n",
+      "[1331246.16325189 2557493.2373921   871684.82823291 1495633.275723\n",
+      " 1549557.61151302  634348.67092323]\n",
+      "0.5601419746121152\n"
+     ]
+    }
+   ],
+   "source": [
+    "## baseline\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.metrics import r2_score\n",
+    "model = LinearRegression()\n",
+    "model.fit(X_train, y_train)\n",
+    "y_pred = model.predict(X_test)\n",
+    "\n",
+    "print(model.intercept_)\n",
+    "print(model.coef_[:6])\n",
+    "print(y_pred[:6])\n",
+    "print(r2_score(y_test, y_pred))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## scaled features\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## log target\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Fazit**\n",
+    "* tbd"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "1",
+   "nav_menu": {},
+   "number_sections": false,
+   "sideBar": true,
+   "skip_h1_title": true,
+   "title_cell": "WS 11 Regression - mit FE - solution",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "195.933px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "position": {
+    "height": "321.85px",
+    "left": "785px",
+    "right": "20px",
+    "top": "118px",
+    "width": "350px"
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,198 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "toc": true
+   },
+   "source": [
+    "# WS 09 Tune AdaBoostRegressor"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* es wurde festgestellt, dass z.B. AdaBoostRegressor unter Standard-Parametrisierung ein unbrauchbares Ergebnis liefert\n",
+    "* untersuchen Sie das Potential von Parameter-Tuning für diesen Regressor\n",
+    "* konzentrieren Sie sich auf folgende Parameter\n",
+    "  * learning_rate, Parameter von AdaBoostRegressor\n",
+    "  * max_depth, interner Parameter des Basis-Estimators, hier DecisionTreeRegressor\n",
+    "* falls Zeit übrig, untersuchen Sie noch andere Regressoren Ihrer Wahl dahingehend"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## prepare env, read and prepare data\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns; sns.set()\n",
+    "\n",
+    "codepath = '../2_code' ## for import of user defined module\n",
+    "datapath = '../3_data'\n",
+    "#codepath = '.././2_code' ## for import of user defined module\n",
+    "#datapath = '../../3_data'\n",
+    "\n",
+    "from sys import path; path.insert(1, codepath)\n",
+    "from os import chdir; chdir(datapath)\n",
+    "\n",
+    "from bfh_cas_pml import prep_data\n",
+    "X_train, X_test, y_train, y_test = prep_data('melb_data_prep.csv', target='Price', seed=1234)\n",
+    "\n",
+    "from bfh_cas_pml import test_regression_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-04-08T10:06:45.098899Z",
+     "start_time": "2020-04-08T10:06:44.257283Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "R2 = -0.3023\n"
+     ]
+    }
+   ],
+   "source": [
+    "## baseline\n",
+    "from sklearn.ensemble import AdaBoostRegressor\n",
+    "this_model = test_regression_model(\n",
+    "    AdaBoostRegressor(random_state=1234), \n",
+    "    X_train, y_train, X_test, y_test,\n",
+    "    show_plot=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## tune learning_rate\n",
+    "## tbd: find parameter range here\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## tune max_depth\n",
+    "from sklearn.tree import DecisionTreeRegressor\n",
+    "## tbd: find parameter range here\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## best combination of single parameters\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Fazit**:\n",
+    "* tbd"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "1",
+   "nav_menu": {},
+   "number_sections": false,
+   "sideBar": true,
+   "skip_h1_title": true,
+   "title_cell": "WS 11 Regression - mit FE - solution",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "195.933px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "position": {
+    "height": "321.85px",
+    "left": "785px",
+    "right": "20px",
+    "top": "118px",
+    "width": "350px"
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,245 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "toc": true
+   },
+   "source": [
+    "# WS 10 Performancevergleiche Regression"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* Vergleichen Sie alle Regressoren (ausser `SVR` und `MLPRegressor`) mit folgenden Modifikationen\n",
+    "  * die Vergleiche werden ohne und mit Standardisierung der Features durchgeführt\n",
+    "  * die Resultate (r2_score) werden in Form einer Heatmap zusammengestellt\n",
+    "* informieren Sie sich zum Vorgehen am Code in 3.4 Regression - Modellvergleiche.ipynb\n",
+    "* Präsentation der Ergebnisse als \n",
+    "  * seaborn heatmap\n",
+    "  * alternative Visualisierung: Grouped barplots"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## for scikit-learn 1.4.2, to silence warnings regarding physical cores\n",
+    "import os\n",
+    "os.environ['LOKY_MAX_CPU_COUNT'] = '4' ## depending on the hardware used"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## prepare env, read and prepare data\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns; sns.set()\n",
+    "\n",
+    "codepath = '../2_code'\n",
+    "datapath = '../3_data'\n",
+    "from sys import path; path.insert(1, codepath)\n",
+    "from os import chdir; chdir(datapath)\n",
+    "\n",
+    "from bfh_cas_pml import prep_data\n",
+    "X_train, X_test, y_train, y_test = prep_data('melb_data_prep.csv', target='Price', seed=1234)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-04-08T10:33:02.116059Z",
+     "start_time": "2020-04-08T10:33:02.087399Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "## standardize features (lead: train)\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "scaler = StandardScaler()\n",
+    "scaler.fit(X_train)\n",
+    "X_train_sc = scaler.transform(X_train)\n",
+    "X_test_sc = scaler.transform(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-04-08T10:33:02.294366Z",
+     "start_time": "2020-04-08T10:33:02.120049Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "## import trainer classes\n",
+    "## tbd\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-04-08T10:33:02.326199Z",
+     "start_time": "2020-04-08T10:33:02.299732Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "## define models\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-04-08T10:33:15.141363Z",
+     "start_time": "2020-04-08T10:33:02.341448Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "## compare models\n",
+    "## tbd: prepare empty lists for results\n",
+    "\n",
+    "\n",
+    "\n",
+    "# for model in models:\n",
+    "\n",
+    "    ## not scaled\n",
+    "    ## tbd\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "    ## scaled\n",
+    "    ## tbd\n",
+    "    \n",
+    "\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-04-08T10:33:15.742776Z",
+     "start_time": "2020-04-08T10:33:15.150619Z"
+    }
+   },
+   "source": [
+    "## visualize results\n",
+    "\"\"\"\n",
+    "scores = pd.DataFrame(\n",
+    "    {'r2_no': r2_nos, \n",
+    "     'r2_yes': r2_yess\n",
+    "    }, index=regressors)\n",
+    "\n",
+    "sns.heatmap(scores);\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Fazit:**  \n",
+    "* tbd"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "1",
+   "nav_menu": {},
+   "number_sections": false,
+   "sideBar": true,
+   "skip_h1_title": true,
+   "title_cell": "WS 14 Regression - Modellvergleiche 2",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "195.933px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "position": {
+    "height": "321.85px",
+    "left": "785px",
+    "right": "20px",
+    "top": "118px",
+    "width": "350px"
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,217 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "toc": true
+   },
+   "source": [
+    "# WS 11 permutation_importance"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* ermitteln Sie die Importance der Features der *Rohdaten* von `melb_data.csv` unter Einsatz von `sklearn.inspection.ermutation_importance`\n",
+    "* setzen Sie dazu minimales Feature Engineering wie folgt ein:\n",
+    "  * entfernen fragwürdiger Variablen: 'Unnamed: 0', 'Suburb', 'Address', 'SellerG', 'Postcode', 'Bedroom2', 'Date', 'CouncilArea'\n",
+    "  * One-Hot encoding aller verbleibenden kategorialen Variablen (der Parameter `dummy_na=True` von `pd.get_dummies()` erstellt auch Dummy-Variablen für NAs)\n",
+    "  * einsetzen von geschätzten Werten für NAs in verbleibenden numerischen Variablen mit `sklearn.impute.KNNImputer`\n",
+    "* danach:\n",
+    "  * features - target - split\n",
+    "  * **kein** train - test - split\n",
+    "  * ermitteln der Importance unter Einsatz von \n",
+    "    * `sklearn.inspection.permutation_importance`\n",
+    "    * `sklearn.tree.DecisionTreeRegressor`\n",
+    "  * tabellarische und graphische Darstellung der Ergebnisse"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## prepare env, read and prepare data\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns; sns.set()\n",
+    "\n",
+    "codepath = '../2_code'\n",
+    "datapath = '../3_data'\n",
+    "from sys import path; path.insert(1, codepath)\n",
+    "from os import chdir; chdir(datapath)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## read data\n",
+    "data = pd.read_csv('melb_data.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## drop columns\n",
+    "vars_to_drop = ['Unnamed: 0', 'Suburb', 'Address', 'SellerG', 'Postcode', 'Bedroom2', 'Date', 'CouncilArea']\n",
+    "data = data.drop(vars_to_drop, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## one-hot encode (incl. NAs)\n",
+    "data = pd.get_dummies(data, drop_first=False, dummy_na=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## KNNImputer for NAs\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## features - target - split\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## permutation_importance\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## collect results in a dataframe, ordered by mean\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "## visualize results\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "1",
+   "nav_menu": {},
+   "number_sections": false,
+   "sideBar": true,
+   "skip_h1_title": true,
+   "title_cell": "WS 14 Regression - Modellvergleiche 2 - solution",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "195.933px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": false
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "position": {
+    "height": "321.85px",
+    "left": "785px",
+    "right": "20px",
+    "top": "118px",
+    "width": "350px"
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,193 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# WS 13 Kreuzvalidierung"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* vergleichen Sie alle bisher bekannten Klassifikatoren (ausser SVC und MLPClassifier) in Bezug auf deren Stabilität unter Anwendung von Kreuzvalidierung\n",
+    "* verwenden Sie für die Klassifikatoren jeweils Default-Parametrisierung\n",
+    "* setzen Sie für die Kreuzvalidierung folgende Funktion ein: `sklearn.model_selection.cross_val_score`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## load libraries\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns; sns.set()\n",
+    "\n",
+    "## load data\n",
+    "datapath = '../3_data'\n",
+    "from os import chdir; chdir(datapath)\n",
+    "bank_df = pd.read_csv('bank_data_prep.csv')\n",
+    "\n",
+    "## features - target - tplit\n",
+    "X = bank_df.drop('y', axis=1)\n",
+    "y = bank_df['y']\n",
+    "\n",
+    "## train - test - split\n",
+    "## obsolete here, is done internally by cross validation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "KNeighborsClassifier\n",
+      "DecisionTreeClassifier\n",
+      "RandomForestClassifier\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "## tbd complete\n",
+    "\n",
+    "\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "\n",
+    "from sklearn.model_selection import cross_val_score\n",
+    "\n",
+    "models = [\n",
+    "    KNeighborsClassifier(),\n",
+    "    DecisionTreeClassifier(),\n",
+    "    RandomForestClassifier()\n",
+    "    ## tbd complete\n",
+    "    \n",
+    "    \n",
+    "]\n",
+    "\n",
+    "kfold = 5\n",
+    "model_names = []\n",
+    "model_scores = []\n",
+    "\n",
+    "for model in models:\n",
+    "    model_name = model.__class__.__name__\n",
+    "    print(model_name)\n",
+    "    ## tbd\n",
+    "\n",
+    "    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## manage results, e.g. in pandas dataframe\n",
+    "## tbd\n",
+    "\n",
+    "\n",
+    "\n",
+    "## visualize results\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Fazit:**\n",
+    "* tbd"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "",
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "WS 16 Validierung - Kreuzvalidierung",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "165px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "position": {
+    "height": "306.85px",
+    "left": "862px",
+    "right": "20px",
+    "top": "137px",
+    "width": "350px"
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,165 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "toc": true
+   },
+   "source": [
+    "# WS 14 Random Search CV"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* untersuchen Sie Kombinationen von Parameterwerten bei RandomForestClassifier\n",
+    "* Vorschlag:\n",
+    "  * n_estimators in [50, 100, 150, 200]\n",
+    "  * max_features in [3, 5, 7, 9]\n",
+    "  * criterion in ['gini', 'entropy']\n",
+    "  * min_samples_leaf in [1, 2, 3, 4]\n",
+    "* wenden Sie 5-fach Kreuzvalidierung an\n",
+    "* setzen Sie die Anzahl der zu untersuchenden Kombinationen auf 12\n",
+    "* arbeiten Sie ohne setzen von random_state, damit anschliessend die Ergebnisse verglichen werden können"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## import libraries\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "## load data\n",
+    "datapath = '../3_data'\n",
+    "from os import chdir; chdir(datapath)\n",
+    "bank_df = pd.read_csv('bank_data_prep.csv')\n",
+    "\n",
+    "## features - target - split\n",
+    "X = bank_df.drop('y', axis=1)\n",
+    "y = bank_df['y']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## import classes from sklearn\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.model_selection import RandomizedSearchCV\n",
+    "\n",
+    "## define parameter grid\n",
+    "## tbd\n",
+    "#parameter_grid = ...\n",
+    "\n",
+    "\n",
+    "\n",
+    "## define RandomizedSearchCV\n",
+    "## tbd\n",
+    "\n",
+    "\n",
+    "\n",
+    "## run RandomizedSearchCV\n",
+    "## tbd\n",
+    "\n",
+    "\n",
+    "\n",
+    "## evaluate RandomizedSearchCV\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Fazit:**\n",
+    "* tbd\n",
+    "\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "",
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "WS 17 Validierung - Random Search CV",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "195.867px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "position": {
+    "height": "306.85px",
+    "left": "862px",
+    "right": "20px",
+    "top": "137px",
+    "width": "350px"
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,216 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": [],
+    "toc": true
+   },
+   "source": [
+    "# WS 15 Schwellenwert für Accuracy"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* `.predict_proba()` gibt bei allen Klassifikatoren die Wahrscheinlichkeit für die Zugehörigkeit zu den einlenen Klassen zurück, `predict()` dagegen die wahrscheinlichtste Klasse selber\n",
+    "* für Zwei-Klassen Fragestellungen bedeutet dies, dass bei einer Wahrscheinlickkeit (proba) `> 0.5` für die erste Klasse diese zurückgegeben wird, andernfalls die zweite Klasse, `0.5` ist somit ein scheinbar willkürlicher Schwellenwert\n",
+    "* untersuchen Sie die Auswirkung anderer Schwellenwerte auf die Accuracy mit `RandomForestClassifier` auf den aufbereiteten Bankkunden-Datan\n",
+    "\n",
+    "* vorgeschlagenes Vorgehen:\n",
+    "  * trainieren eines RandomForestClassifier mit den vorbereiteten Bankkundendaten (Trainingsdaten)\n",
+    "  * bestimmen der Wahrscheinlichkeit für jede Beobachtung der entsprechenden Testdaten zur Klasse 'no'\n",
+    "  * erstellen einens Range der zu untersuchenden Schwellenwerte, z.B. mit np.arange()\n",
+    "  * in einem Loop über alle Werte dieses Ranges\n",
+    "    * `y_pred` für den jeweiligen Schwellenwert berechnen (wiederum als `['no', 'yes']`)\n",
+    "    * `accuracy_score()` der jeweiligen Prediction (und sammeln in einer Liste)\n",
+    "  * ausgeben des besten Score-Wertes und des zugehörigen Schwellenwertes in der Konsole\n",
+    "  * visualisieren der Ergebnisse auch als Lineplot  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## prepare env, read and prepare data\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns; sns.set()\n",
+    "\n",
+    "codepath = '../2_code' ## for import of user defined module\n",
+    "datapath = '../3_data'\n",
+    "\n",
+    "from sys import path; path.insert(1, codepath)\n",
+    "from os import chdir; chdir(datapath)\n",
+    "\n",
+    "from bfh_cas_pml import prep_data\n",
+    "X_train, X_test, y_train, y_test = prep_data('bank_data_prep.csv', target='y', seed=1234)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-04-15T21:03:01.918701Z",
+     "start_time": "2020-04-15T21:03:01.844142Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "## train a model\n",
+    "from sklearn.ensemble import RandomForestClassifier \n",
+    "model = RandomForestClassifier(random_state=1234)\n",
+    "model.fit(X_train, y_train) \n",
+    "\n",
+    "## prediction using .predict_proba()\n",
+    "y_pred_p_no = model.predict_proba(X_test)[:, 0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.0\n",
+      "0.1\n",
+      "0.2\n",
+      "0.30000000000000004\n",
+      "0.4\n",
+      "0.5\n",
+      "0.6000000000000001\n",
+      "0.7000000000000001\n",
+      "0.8\n",
+      "0.9\n",
+      "1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "## inspect different threshold values\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "\n",
+    "thresholds = np.arange(0, 1.01, 0.1) ## test over 10\n",
+    "#thresholds = np.arange(0, 1.01, 0.01)\n",
+    "\n",
+    "scores = []\n",
+    "\n",
+    "for threshold in thresholds:\n",
+    "    ## tbd\n",
+    "    print(threshold)\n",
+    "\n",
+    "    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## results\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "## viszalization\n",
+    "## tbd\n",
+    "\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "toc": {
+   "base_numbering": "",
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "WS 15 Validierung - Sampling",
+   "title_sidebar": "Contents",
+   "toc_cell": true,
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "195.867px"
+   },
+   "toc_section_display": true,
+   "toc_window_display": true
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "position": {
+    "height": "306.85px",
+    "left": "862px",
+    "right": "20px",
+    "top": "137px",
+    "width": "350px"
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}