refactor: move things around
This commit is contained in:
Vendored
BIN
Binary file not shown.
@@ -0,0 +1,398 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "763373f9-1ba3-4fb6-9ae3-d0aeb6be07e2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Workshop 03 - Loesungsvorschlag**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ac5573af-46dd-43fa-886e-ca228e61edaf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Data Frame"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "22a6e66a-eeaa-4ebe-aa95-ad983a2b1110",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## prepare and read data\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"datapath = '../../3_data'\n",
|
||||
"filename = 'melb_data.csv'\n",
|
||||
"from os import chdir; chdir(datapath)\n",
|
||||
"\n",
|
||||
"data = pd.read_csv(filename)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7c42aa1f-2ab1-43b2-a9a7-808f83dd724f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Entfernen von Beobachtungen nach Bedingung"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "71e9e7ca-c6cd-40bc-b9ff-adb11721d669",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## remove outliers on Price\n",
|
||||
"data = data[data.Price < 8000000]\n",
|
||||
"\n",
|
||||
"## remove selected observation2\n",
|
||||
"data = data[data.YearBuilt != 1196]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "309c5083-cefb-4b25-91e3-1eebda35555f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Entfernen von Duplikaten"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1eb307e8-df31-4a37-a9ed-c80a1d354cf9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"hier kein Bedarf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "757d2596-04e0-445d-8ea9-9cd5c92ef702",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## remove duplicates\n",
|
||||
"data.drop_duplicates(ignore_index=True, inplace = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "da78d619-572c-4649-8c00-fde22bcbc8bb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Entfernen fragwürdiger Variablen"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "c689473c-e969-4c69-a131-4d3a680b1f83",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vars_to_drop = ['Unnamed: 0', 'Suburb', 'Address', 'SellerG', 'Postcode', 'Bedroom2']\n",
|
||||
"data = data.drop(vars_to_drop, axis=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8816b6f2-1bb3-4795-9fdb-cd7a50494d09",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Einsetzen von Werten für NAs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "c637332d-24e5-48b1-bb58-ea704ee3f92e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## mode for all cat vars (if any)\n",
|
||||
"cat_feats = data.select_dtypes(include=['object']).columns\n",
|
||||
"for c in cat_feats:\n",
|
||||
" data[c].fillna(data[c].mode()[0], inplace = True)\n",
|
||||
"\n",
|
||||
"## median for all num features\n",
|
||||
"num_feats = data.select_dtypes(include=['int64', 'float64']).columns\n",
|
||||
"for c in num_feats:\n",
|
||||
" data[c].fillna(data[c].median(), inplace = True) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5e85ccf8-e99e-4630-a4e6-6d380994ab26",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Kategoriale Variablen"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4b888038-88e8-46a5-988e-9f0dfd38d86d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Reduzieren der Kardinalität"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "e5a432e1-eb89-4d1e-b0cf-f799a8ac5216",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Regionname: combine rarest 3 levels to 'Victoria'\n",
|
||||
"data.Regionname = np.where(\n",
|
||||
" (data.Regionname == 'Eastern Victoria') |\n",
|
||||
" (data.Regionname == 'Northern Victoria') |\n",
|
||||
" (data.Regionname == 'Western Victoria'),\n",
|
||||
" 'Victoria', data.Regionname)\n",
|
||||
"\n",
|
||||
"## Method: combine 'SA' to 'S'\n",
|
||||
"data.Method = np.where(data.Method == 'SA', 'S', data.Method)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dbacc9d0-d357-4020-8062-6b968aba5e9e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Nummerisiren - Faktorisieren"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "ec29d19a-347b-4793-b2ce-4831f7a9bfa2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.CouncilArea = data.CouncilArea.factorize()[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3937e91f-85c4-4f1c-bd50-564e8de620ea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Nummerisiren - Ordial Encodieren"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "3f250d95-862f-4016-af38-1c6ff8750db2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.Type.replace(\n",
|
||||
" ['h', 'u', 't'], \n",
|
||||
" [1, 2, 3], \n",
|
||||
" inplace=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "154a4b0f-6e2f-498a-89f4-64e80de30fd4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Nummerisieren - Binär Encodieren"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0d422904-43d5-4c32-9c7b-cc16ddfbf39d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"hier kein Bedarf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "807990cc-9fac-47b7-a165-3194f142b30c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Nummerisieren - Ordinal Encodieren"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "91ef6cb1-157a-46e0-b9e6-0bf025b953b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## one-hot encoding\n",
|
||||
"## apply for all categorical variables except Date (will be transformed later)\n",
|
||||
"ignore = 'Date'\n",
|
||||
"sel_vars = data.select_dtypes(include=['object']).columns.drop(ignore)\n",
|
||||
"data = pd.get_dummies(data, columns=sel_vars, drop_first=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1a1dd82b-e0d9-44d2-a57b-80f4bb3b2c72",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Numerische Variablen"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "607eab42-5a74-40fd-813d-eed338e93d3e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Logarithmieren"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "d5a6ff61-f2dc-47d0-ac10-82c811afe903",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## logarithme and rename\n",
|
||||
"data.Landsize = np.log10(data.Landsize + 1)\n",
|
||||
"data.BuildingArea = np.log10(data.BuildingArea + 1)\n",
|
||||
"data.rename(columns={\n",
|
||||
" 'Landsize' : 'logLandsize',\n",
|
||||
" 'BuildingArea' : 'logBuildingArea'\n",
|
||||
"}, inplace=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7dace176-8b5d-4533-bdd2-119e63a7bfa2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Binär umcodieren"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9f508469-97f4-4025-be52-45643c300bfe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"hier kein Bedarf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "177c878f-ddf4-49f9-95c8-e9cc1af76473",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Andere Tätigkeiten"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e1324181-c90f-4367-968f-1723bf2784ac",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Konstruktion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "2e939e6a-7a96-4942-945f-4f21d0af48e5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## construct month, year and day_of_week\n",
|
||||
"Date = pd.to_datetime(data.Date, format='%d/%m/%Y')\n",
|
||||
"data['month'] = Date.dt.month\n",
|
||||
"data['year'] = Date.dt.year\n",
|
||||
"data['day_of_week'] = Date.dt.day_of_week\n",
|
||||
"data.drop('Date', axis=1, inplace=True) ## not longer used\n",
|
||||
"#print(data.info()) ## check"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3931a58a-6d47-494f-8bb2-9ea6f2914ff3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Bereinigen der Variablennamen"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "70daeeee-8257-4969-b9ff-49d700251b54",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"old_names = data.columns\n",
|
||||
"new_names = old_names.str.replace('[^a-zA-Z0-9_]', '_', regex=True)\n",
|
||||
"for i in range(len(old_names)):\n",
|
||||
" data.rename(columns={old_names[i]:new_names[i]}, inplace=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bbac2f0b-1f1e-41e2-aae9-60ec4238b28c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Standardisieren"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "906c9083-a03d-4a21-930e-f87e21079359",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"hier kein Bedarf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "92bae929-be6a-4061-bdb1-572cc9fc8390",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Speichern unter neuem Namen"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "882cd439-5cb6-4ab3-8695-09af2670f8f3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.to_csv('melb_data_prep.csv', index=False)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc-autonumbering": true
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,224 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"toc": true
|
||||
},
|
||||
"source": [
|
||||
"# WS 08 Regression mit Standardisieren und Logarithmieren"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## prepare env, read and prepare data\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns; sns.set()\n",
|
||||
"\n",
|
||||
"#codepath = '../2_code' ## for import of user defined module\n",
|
||||
"#datapath = '../3_data'\n",
|
||||
"codepath = '.././2_code' ## for import of user defined module\n",
|
||||
"datapath = '../../3_data'\n",
|
||||
"\n",
|
||||
"from sys import path; path.insert(1, codepath)\n",
|
||||
"from os import chdir; chdir(datapath)\n",
|
||||
"\n",
|
||||
"from bfh_cas_pml import prep_data\n",
|
||||
"X_train, X_test, y_train, y_test = prep_data('melb_data_prep.csv', target='Price', seed=1234)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-105513873.23403685\n",
|
||||
"[ 245383.60581414 -141356.39759052 -40383.66643969 161336.03949841\n",
|
||||
" 40391.14829949 83303.27089591]\n",
|
||||
"[1331246.16325189 2557493.2373921 871684.82823291 1495633.275723\n",
|
||||
" 1549557.61151302 634348.67092323]\n",
|
||||
"0.5601419746121152\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"## baseline\n",
|
||||
"from sklearn.linear_model import LinearRegression\n",
|
||||
"from sklearn.metrics import r2_score\n",
|
||||
"model = LinearRegression()\n",
|
||||
"model.fit(X_train, y_train)\n",
|
||||
"y_pred = model.predict(X_test)\n",
|
||||
"\n",
|
||||
"print(model.intercept_)\n",
|
||||
"print(model.coef_[:6])\n",
|
||||
"print(y_pred[:6])\n",
|
||||
"print(r2_score(y_test, y_pred))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1055902.69523731\n",
|
||||
"[ 235020.76662584 -96493.73493151 -243470.62893089 106305.85273776\n",
|
||||
" 35544.05464669 71047.51543032]\n",
|
||||
"[1331246.16325187 2557493.23739203 871684.82823297 1495633.27572294\n",
|
||||
" 1549557.611513 634348.67092323]\n",
|
||||
"0.5601419746121148\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"## scaled features\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"scaler = StandardScaler()\n",
|
||||
"scaler.fit(X_train)\n",
|
||||
"X_train_sc = scaler.transform(X_train)\n",
|
||||
"X_test_sc = scaler.transform(X_test)\n",
|
||||
"\n",
|
||||
"model = LinearRegression()\n",
|
||||
"model.fit(X_train_sc, y_train)\n",
|
||||
"y_pred = model.predict(X_test_sc)\n",
|
||||
"\n",
|
||||
"print(model.intercept_)\n",
|
||||
"print(model.coef_[:6])\n",
|
||||
"print(y_pred[:6])\n",
|
||||
"print(r2_score(y_test, y_pred))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Fazit**\n",
|
||||
"* Auswirkung von Skalieren der Features\n",
|
||||
" * Koeffizienten und Intercept: Einfluss\n",
|
||||
" * Prediction: kein Einfluss\n",
|
||||
" * Score: natürlich auch kein Einfluss, wird ja aus Prediction berechnet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.5519266421486302\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"## log target\n",
|
||||
"y_train_log = np.log10(y_train)\n",
|
||||
"y_test_log = np.log10(y_test)\n",
|
||||
"\n",
|
||||
"model = LinearRegression()\n",
|
||||
"model.fit(X_train, y_train_log)\n",
|
||||
"y_pred = model.predict(X_test)\n",
|
||||
"print(r2_score(10**y_test_log, 10**y_pred))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Fazit**\n",
|
||||
"* wird sogar etwas schlechter\n",
|
||||
"* kombination mit skalierten Features erübrigt sich hier, da skalieren ja offenbar keinen Einfluss auf score hat"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "1",
|
||||
"nav_menu": {},
|
||||
"number_sections": false,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": true,
|
||||
"title_cell": "WS 11 Regression - mit FE - solution",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": true,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "195.933px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": true
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"position": {
|
||||
"height": "321.85px",
|
||||
"left": "785px",
|
||||
"right": "20px",
|
||||
"top": "118px",
|
||||
"width": "350px"
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,212 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"toc": true
|
||||
},
|
||||
"source": [
|
||||
"# WS 14 Random Search CV"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* untersuchen Sie Kombinationen von Parameterwerten bei RandomForestClassifier\n",
|
||||
"* Vorschlag:\n",
|
||||
" * n_estimators in [50, 100, 150, 200]\n",
|
||||
" * max_features in [3, 5, 7, 9]\n",
|
||||
" * criterion in ['gini', 'entropy']\n",
|
||||
" * min_samples_leaf in [1, 2, 3, 4]\n",
|
||||
"* wenden Sie 5-fach Kreuzvalidierung an\n",
|
||||
"* setzen Sie die Anzahl der zu untersuchenden Kombinationen auf 12\n",
|
||||
"* arbeiten Sie ohne setzen von random_state, damit anschliessend die Ergebnisse verglichen werden können"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## import libraries\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"## load data\n",
|
||||
"datapath = '../../3_data'\n",
|
||||
"from os import chdir; chdir(datapath)\n",
|
||||
"bank_df = pd.read_csv('bank_data_prep.csv')\n",
|
||||
"\n",
|
||||
"## features - target - split\n",
|
||||
"X = bank_df.drop('y', axis=1)\n",
|
||||
"y = bank_df['y']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"best_params_ : {'n_estimators': 50, 'min_samples_leaf': 4, 'max_features': 9, 'criterion': 'entropy'}\n",
|
||||
"best_score_ : 0.8884381338742393\n",
|
||||
"CPU times: total: 3.09 s\n",
|
||||
"Wall time: 43.6 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"## import classes from sklearn\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"from sklearn.model_selection import RandomizedSearchCV\n",
|
||||
"\n",
|
||||
"## define parameter grid\n",
|
||||
"parameter_grid = {'n_estimators': [50, 100, 150, 200],\n",
|
||||
" 'max_features': [3, 5, 7, 9],\n",
|
||||
" 'criterion': ['gini', 'entropy'],\n",
|
||||
" 'min_samples_leaf': [1, 2, 3, 4]}\n",
|
||||
"\n",
|
||||
"## define RandomizedSearchCV\n",
|
||||
"rscv = RandomizedSearchCV(\n",
|
||||
" estimator=RandomForestClassifier(random_state=1234), \n",
|
||||
" param_distributions=parameter_grid, \n",
|
||||
" cv=5,\n",
|
||||
" n_iter=12,\n",
|
||||
" random_state=1234,\n",
|
||||
" n_jobs=-1)\n",
|
||||
"\n",
|
||||
"## run RandomizedSearchCV\n",
|
||||
"rscv.fit(X, y)\n",
|
||||
"\n",
|
||||
"## evaluate RandomizedSearchCV\n",
|
||||
"print('best_params_ :', rscv.best_params_)\n",
|
||||
"print('best_score_ :', rscv.best_score_)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#rscv.best_estimator_"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(9860, 29)"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Fazit:**\n",
|
||||
"* n_estimators: [50, 100, 150, 200] -> 50\n",
|
||||
" * hier müsste der Suchbereich nach unten erweiter werden\n",
|
||||
"* max_features: [3, 5, 7, 9] -> 9\n",
|
||||
" * hier müsste der Suchbereich nach oben erweiter werden\n",
|
||||
"* criterion: ['gini', 'entropy']\n",
|
||||
" * Suchbereich ok\n",
|
||||
"* min_samples_leaf: [1, 2, 3, 4] -> 4\n",
|
||||
" * hier müsste der Suchbereich nach oben erweiter werden"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "",
|
||||
"nav_menu": {},
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": false,
|
||||
"title_cell": "WS 17 Validierung - Random Search CV",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": false,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "195.867px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": true
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"position": {
|
||||
"height": "306.85px",
|
||||
"left": "862px",
|
||||
"right": "20px",
|
||||
"top": "137px",
|
||||
"width": "350px"
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
Binary file not shown.
@@ -0,0 +1,234 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"# WS 02 Feature Engineering Exploration Overview.ipynb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* compiles the most important characteristic values from a loaded data frame and stores them in an Excel spreadsheet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" index var_names dtypes nas uniques modes \\\n",
|
||||
"0 0 Unnamed: 0 int64 0 18396 None \n",
|
||||
"1 1 Suburb object 0 330 Reservoir \n",
|
||||
"2 2 Address object 0 18134 1/1 Clarendon St \n",
|
||||
"3 3 Rooms int64 0 11 None \n",
|
||||
"4 4 Type object 0 3 h \n",
|
||||
"5 5 Price float64 0 2470 None \n",
|
||||
"6 6 Method object 0 5 S \n",
|
||||
"7 7 SellerG object 0 305 Nelson \n",
|
||||
"8 8 Date object 0 58 27/05/2017 \n",
|
||||
"9 9 Distance float64 1 210 None \n",
|
||||
"10 10 Postcode float64 1 205 None \n",
|
||||
"11 11 Bedroom2 float64 3469 12 None \n",
|
||||
"12 12 Bathroom float64 3471 9 None \n",
|
||||
"13 13 Car float64 3576 11 None \n",
|
||||
"14 14 Landsize float64 4793 1449 None \n",
|
||||
"15 15 BuildingArea float64 10634 613 None \n",
|
||||
"16 16 YearBuilt float64 9438 144 None \n",
|
||||
"17 17 CouncilArea object 6163 33 Moreland \n",
|
||||
"18 18 Lattitude float64 3332 7518 None \n",
|
||||
"19 19 Longtitude float64 3332 8168 None \n",
|
||||
"20 20 Regionname object 1 8 Southern Metropolitan \n",
|
||||
"21 21 Propertycount float64 1 324 None \n",
|
||||
"\n",
|
||||
" means medians \n",
|
||||
"0 1.182679e+04 11820.500000 \n",
|
||||
"1 NaN NaN \n",
|
||||
"2 NaN NaN \n",
|
||||
"3 2.935040e+00 3.000000 \n",
|
||||
"4 NaN NaN \n",
|
||||
"5 1.056697e+06 880000.000000 \n",
|
||||
"6 NaN NaN \n",
|
||||
"7 NaN NaN \n",
|
||||
"8 NaN NaN \n",
|
||||
"9 1.038999e+01 9.700000 \n",
|
||||
"10 3.107140e+03 3085.000000 \n",
|
||||
"11 2.913043e+00 3.000000 \n",
|
||||
"12 1.538492e+00 1.000000 \n",
|
||||
"13 1.615520e+00 2.000000 \n",
|
||||
"14 5.581164e+02 440.000000 \n",
|
||||
"15 1.512202e+02 126.000000 \n",
|
||||
"16 1.965880e+03 1970.000000 \n",
|
||||
"17 NaN NaN \n",
|
||||
"18 -3.780985e+01 -37.803625 \n",
|
||||
"19 1.449963e+02 145.000920 \n",
|
||||
"20 NaN NaN \n",
|
||||
"21 7.517975e+03 6567.000000 \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"## import libraries\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"## define data path\n",
|
||||
"datapath = '../3_data'\n",
|
||||
"from os import chdir; chdir(datapath)\n",
|
||||
"\n",
|
||||
"## load data\n",
|
||||
"data = pd.read_csv('melb_data.csv')\n",
|
||||
"#data.info()\n",
|
||||
"\n",
|
||||
"## var names\n",
|
||||
"var_names = pd.Series(data.columns)\n",
|
||||
"#print(var_names)\n",
|
||||
"\n",
|
||||
"dtypes = pd.Series(data.dtypes.values)\n",
|
||||
"#print(dtypes)\n",
|
||||
"\n",
|
||||
"## nas\n",
|
||||
"nas = pd.Series(data.isna().sum().values)\n",
|
||||
"#print(nas)\n",
|
||||
"\n",
|
||||
"## uniques\n",
|
||||
"uniques = []\n",
|
||||
"for c in var_names:\n",
|
||||
" uniques.append(data[c].nunique())\n",
|
||||
"uniques = pd.Series(uniques)\n",
|
||||
"#print(uniques)\n",
|
||||
"\n",
|
||||
"modes = []\n",
|
||||
"for i in range(len(var_names)):\n",
|
||||
" if dtypes[i] == 'object':\n",
|
||||
" #modes.append(data[c].mode()[0])\n",
|
||||
" modes.append(data[var_names[i]].mode()[0])\n",
|
||||
" \n",
|
||||
" else:\n",
|
||||
" modes.append(None)\n",
|
||||
"modes = pd.Series(modes)\n",
|
||||
"#print(modes)\n",
|
||||
"\n",
|
||||
"## means\n",
|
||||
"means = []\n",
|
||||
"for i in range(len(var_names)):\n",
|
||||
" if dtypes[i] != 'object':\n",
|
||||
" means.append(data[var_names[i]].mean())\n",
|
||||
" else:\n",
|
||||
" means.append(None)\n",
|
||||
"means = pd.Series(means)\n",
|
||||
"#print(means)\n",
|
||||
"\n",
|
||||
"## medians\n",
|
||||
"medians = []\n",
|
||||
"for i in range(len(var_names)):\n",
|
||||
" if dtypes[i] != 'object':\n",
|
||||
" medians.append(data[var_names[i]].median())\n",
|
||||
" else:\n",
|
||||
" medians.append(None)\n",
|
||||
"medians = pd.Series(medians)\n",
|
||||
"#print(medians)\n",
|
||||
"\n",
|
||||
"## collect results\n",
|
||||
"overview = pd.DataFrame(dict(\n",
|
||||
" var_names = var_names, \n",
|
||||
" dtypes = dtypes,\n",
|
||||
" nas = nas,\n",
|
||||
" uniques = uniques,\n",
|
||||
" modes = modes,\n",
|
||||
" means = means,\n",
|
||||
" medians = medians\n",
|
||||
")).reset_index()\n",
|
||||
"print(overview)\n",
|
||||
"\n",
|
||||
"overview.to_excel('ws_02_overview.xlsx', index=False)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "1",
|
||||
"nav_menu": {},
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": false,
|
||||
"title_cell": "WS 02 Feature Engineering - Exploration kategoriale Variablen",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": true,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "180.6px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": true
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"oldHeight": 217.64999999999998,
|
||||
"position": {
|
||||
"height": "238.85px",
|
||||
"left": "802.2px",
|
||||
"right": "20px",
|
||||
"top": "116px",
|
||||
"width": "326.8px"
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"varInspector_section_display": "block",
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,201 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# WS 04 Vorlage - KNeighborsClassifier"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* standardisieren Sie die Features von Trainings- und Testdaten mit Hilfe von sklearn.preprocessing.StandardScaler\n",
|
||||
"* ermitteln Sie anschliessend die besten Parameterwerte für KNeighborsClassifier\n",
|
||||
" * n_neighbors (1-10)\n",
|
||||
" * p (z.B. 1, 2, 3)\n",
|
||||
"* vergleichen Sie die Ergebnisse ohne und mit standardisieren"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## import libraries\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns; sns.set()\n",
|
||||
"\n",
|
||||
"## load data\n",
|
||||
"datapath = '../3_data'\n",
|
||||
"from os import chdir; chdir(datapath)\n",
|
||||
"data = pd.read_csv('bank_data_prep.csv')\n",
|
||||
"#data.shape ## check\n",
|
||||
"\n",
|
||||
"## features - target - split\n",
|
||||
"X = data.drop('y', axis=1)\n",
|
||||
"y = data['y']\n",
|
||||
"\n",
|
||||
"## test - train - split\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"X_train, X_test, y_train, y_test, = train_test_split(X,\n",
|
||||
" y,\n",
|
||||
" train_size=2 / 3,\n",
|
||||
" random_state=1234)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"rem: für die obige Datenaufbereitung wird ab dem nächsten Workshop die Funktion `prep_data()` aus dem Modul `bfh_cas_pml` verwendet werden"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## standardiz features (lead: train data)\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"scaler = StandardScaler().fit(X_train)\n",
|
||||
"X_train_scaled = scaler.transform(X_train)\n",
|
||||
"X_test_scaled = scaler.transform(X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1\n",
|
||||
"2\n",
|
||||
"3\n",
|
||||
"4\n",
|
||||
"5\n",
|
||||
"6\n",
|
||||
"7\n",
|
||||
"8\n",
|
||||
"9\n",
|
||||
"10\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"## Tune über n_neighbors\n",
|
||||
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||||
"model = KNeighborsClassifier()\n",
|
||||
"params = range(1, 11)\n",
|
||||
"scores = [] ## scores ohne Standardisieren\n",
|
||||
"scores_sc = [] ## scores mit Standardisieren\n",
|
||||
"\n",
|
||||
"for param in params:\n",
|
||||
" print(param)\n",
|
||||
" ## tbd\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Tune über p\n",
|
||||
"params = range(1, 4) ## dasselbe wie [1, 2, 3]\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Fazit**:\n",
|
||||
"* tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "",
|
||||
"nav_menu": {},
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": false,
|
||||
"title_cell": "WS 07 Klassifikation - KNeighborsClassifier",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": true,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "205.2px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": true
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,165 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"toc": true
|
||||
},
|
||||
"source": [
|
||||
"# WS 05 Klassifikation - DecisionTreeClassifier "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* untersuchen Sie verschiedene Werte von min_impurity_decrease bei DecisionTreeClassifier auf die erreichbare Performance (Accuracy)\n",
|
||||
"* grenzen Sie dabei den zu untersuchenden Wertebereich schrittweise ein\n",
|
||||
"* stellen Sie dazu die Ergebnisse wie folgt dar\n",
|
||||
" * grafisch als Liniendiagramm\n",
|
||||
" * in der Konsole mit bestem Score und entsprechendem Parameterwert\n",
|
||||
"* Hinweis\n",
|
||||
" * `range()`: erstellt einen Bereich von Ganzzahligen Werten mit identischer Schrittweite\n",
|
||||
" * `np.arange()`: (Funktion von numpy) erstellt mit analoger Parametrisierung einen Bereich mit Gleitkommawerten"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## prepare env, read and prepare data\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"\n",
|
||||
"sns.set()\n",
|
||||
"\n",
|
||||
"codepath = '../2_code' ## for import of user defined module\n",
|
||||
"datapath = '../3_data'\n",
|
||||
"\n",
|
||||
"from sys import path\n",
|
||||
"path.insert(1, codepath)\n",
|
||||
"\n",
|
||||
"from os import chdir\n",
|
||||
"chdir(datapath)\n",
|
||||
"\n",
|
||||
"from bfh_cas_pml import prep_data\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = prep_data('bank_data_prep.csv',\n",
|
||||
" target='y',\n",
|
||||
" seed=1234)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"scrolled": true,
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.0\n",
|
||||
"0.01\n",
|
||||
"0.02\n",
|
||||
"0.03\n",
|
||||
"0.04\n",
|
||||
"0.05\n",
|
||||
"0.06\n",
|
||||
"0.07\n",
|
||||
"0.08\n",
|
||||
"0.09\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.tree import DecisionTreeClassifier\n",
|
||||
"model = DecisionTreeClassifier()\n",
|
||||
"\n",
|
||||
"scores = []\n",
|
||||
"params = np.arange(0, 0.1, 0.01)\n",
|
||||
"\n",
|
||||
"for param in params:\n",
|
||||
" print(param)\n",
|
||||
" ## tbd\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" "
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "0",
|
||||
"nav_menu": {},
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": false,
|
||||
"title_cell": "WS 08 Klassifikation - DecisionTreeClassifier",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": true,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "165px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": true
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,271 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"toc": true
|
||||
},
|
||||
"source": [
|
||||
"# WS 06 Klassifikation - RandomForestClassifier"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* untersuchen Sie die folgenden Tuning-Parameter von RandomForestClassifier in Bezug auf die erreichte Performance (accuracy_score) mit dem vorbereiteten Dataset:\n",
|
||||
" * n_estimators als `range(100, 500, 50)`\n",
|
||||
" * max_features als `range(1, 11)`\n",
|
||||
" * min_impurity_decrease als `np.arange(0, 0.1, 0.01)`\n",
|
||||
"* wie wirkt sich der random_state aus?\n",
|
||||
"* welche der ausserdem zur Verfügung stehenden Parameter sind keine Tuning Parameter? Konsultieren Sie dazu die (Online-) Dokumentation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## prepare env, read and prepare data\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns; sns.set()\n",
|
||||
"\n",
|
||||
"codepath = '../2_code' ## for import of user defined module\n",
|
||||
"datapath = '../3_data'\n",
|
||||
"from sys import path; path.insert(1, codepath)\n",
|
||||
"from os import chdir; chdir(datapath)\n",
|
||||
"\n",
|
||||
"from bfh_cas_pml import prep_data\n",
|
||||
"X_train, X_test, y_train, y_test = prep_data('bank_data_prep.csv', target='y', seed=1234)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.ensemble import RandomForestClassifier"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"n_estimators:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100\n",
|
||||
"150\n",
|
||||
"200\n",
|
||||
"250\n",
|
||||
"300\n",
|
||||
"350\n",
|
||||
"400\n",
|
||||
"450\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = RandomForestClassifier()\n",
|
||||
"scores = []\n",
|
||||
"params = range(100, 500, 50)\n",
|
||||
"\n",
|
||||
"for param in params:\n",
|
||||
" print(param)\n",
|
||||
" ## tbd\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"## tbd\n",
|
||||
"#fig = sns.lineplot(x=params, y=scores)\n",
|
||||
"#...\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"max_features:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1\n",
|
||||
"2\n",
|
||||
"3\n",
|
||||
"4\n",
|
||||
"5\n",
|
||||
"6\n",
|
||||
"7\n",
|
||||
"8\n",
|
||||
"9\n",
|
||||
"10\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = RandomForestClassifier()\n",
|
||||
"scores = []\n",
|
||||
"params = range(1, 11)\n",
|
||||
"\n",
|
||||
"for param in params:\n",
|
||||
" print(param)\n",
|
||||
" ## tbd\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"min_impurity_decrease:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.0\n",
|
||||
"0.01\n",
|
||||
"0.02\n",
|
||||
"0.03\n",
|
||||
"0.04\n",
|
||||
"0.05\n",
|
||||
"0.06\n",
|
||||
"0.07\n",
|
||||
"0.08\n",
|
||||
"0.09\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = RandomForestClassifier()\n",
|
||||
"scores = []\n",
|
||||
"params = np.arange(0, 0.1, 0.01)\n",
|
||||
"\n",
|
||||
"for param in params:\n",
|
||||
" print(param)\n",
|
||||
" ## tbd\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Fazit:**\n",
|
||||
"* tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"keine Tuning Parameter sind hier:\n",
|
||||
"* tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "2.2",
|
||||
"nav_menu": {},
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": false,
|
||||
"title_cell": "WS 09 Klassifikation - RandomForestClassifier",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": true,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "205.2px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": true
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,223 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"toc": true
|
||||
},
|
||||
"source": [
|
||||
"<h1>WS 10 Klassifikation - Modellvergleiche<span class=\"tocSkip\"></span></h1>\n",
|
||||
"<div class=\"toc\"><ul class=\"toc-item\"></ul></div>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* vergleichen Sie alle bis jetzt vorgestellten Klassifikatoren miteinander in Bezug auf\n",
|
||||
" * Performance\n",
|
||||
" * Rechenzeiten, differenziert nach .fit() und .predict() \n",
|
||||
" und visualisieren Sie die Ergebnisse\n",
|
||||
"* Tipp: modifizieren / ergänzen Sie dazu den abgegebenen Code von Kapitel 2.2.6 Modellvergleiche\n",
|
||||
"\n",
|
||||
"* optional: fügen Sie andere, im Kurs nicht behandelte Klassifikatoren dazu, welche Sie in der Dokumentation von scikit-learn finden\n",
|
||||
"* optional: falls Sie im Rahmen von Feaure Engineering alternatives Preprocessing erarbeitet haben, können Sie die Auswirkungen desselben jetzt auch noch einbeziehen\n",
|
||||
"* optional: wie wirkt sich Skalierung (z.B. mit StandardScaler) auf die Performance von MLPClassifier aus?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## prepare env, read and prepare data\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns; sns.set()\n",
|
||||
"\n",
|
||||
"codepath = '../2_code' ## for import of user defined module\n",
|
||||
"datapath = '../3_data'\n",
|
||||
"from sys import path; path.insert(1, codepath)\n",
|
||||
"from os import chdir; chdir(datapath)\n",
|
||||
"\n",
|
||||
"from bfh_cas_pml import prep_data\n",
|
||||
"X_train, X_test, y_train, y_test = prep_data('bank_data_prep.csv', target='y', seed=1234)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Funktionen (Klassen) importieren\n",
|
||||
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||||
"from sklearn.tree import DecisionTreeClassifier\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"## tbd ergänzen\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"import time ## für Zeitmessung"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Modelle definieren und in Liste hinterlegen\n",
|
||||
"models = [\n",
|
||||
" KNeighborsClassifier(),\n",
|
||||
" DecisionTreeClassifier(min_impurity_decrease=0.002),\n",
|
||||
" RandomForestClassifier(n_estimators=100)\n",
|
||||
" ## tbd ergänzen\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" \n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"KNeighborsClassifier()\n",
|
||||
"DecisionTreeClassifier(min_impurity_decrease=0.002)\n",
|
||||
"RandomForestClassifier()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"## zum Sammeln der Resultate\n",
|
||||
"scores = []\n",
|
||||
"times_fit = []\n",
|
||||
"times_pred = []\n",
|
||||
"model_names = []\n",
|
||||
"\n",
|
||||
"#print('Classifier Score Time fit Time pred')\n",
|
||||
"#print('====================================================================')\n",
|
||||
"\n",
|
||||
"## Loop\n",
|
||||
"for model in models:\n",
|
||||
" print(model)\n",
|
||||
" ## tbd\n",
|
||||
" \n",
|
||||
" ## start timer1 - fit - stop timer1\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" ## start timer2 - predict - stop timer2\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" ## berechne Score & pick Modellname\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" ## Ergebnisse an vorbereitete Listen anhängen\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" ## Iterationsergebnisse in Konsole ausgeben (optional)\n",
|
||||
"\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## visualisieren\n",
|
||||
"## tbd ergänzen\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Fazit:** \n",
|
||||
"* tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "",
|
||||
"nav_menu": {},
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": false,
|
||||
"title_cell": "WS 10 Klassifikation - Modellvergleiche",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": true,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "205.2px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": false
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"toc": true
|
||||
},
|
||||
"source": [
|
||||
"# WS 08 Regression mit Standardisieren und Logarithmieren"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* untersuchen Sie den Einfluss des Standardisierens der Features auf folgende Ergebnisse der Linearen Regression:\n",
|
||||
" * Modellkoeffizienten\n",
|
||||
" * Predictions\n",
|
||||
" * Score\n",
|
||||
"* untersuchen Sie den Einfluss des Logarithmierens des Targets auf die Performance der Linearen Regression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## prepare env, read and prepare data\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns; sns.set()\n",
|
||||
"\n",
|
||||
"codepath = '../2_code' ## for import of user defined module\n",
|
||||
"datapath = '../3_data'\n",
|
||||
"#codepath = '.././2_code' ## for import of user defined module\n",
|
||||
"#datapath = '../../3_data'\n",
|
||||
"\n",
|
||||
"from sys import path; path.insert(1, codepath)\n",
|
||||
"from os import chdir; chdir(datapath)\n",
|
||||
"\n",
|
||||
"from bfh_cas_pml import prep_data\n",
|
||||
"X_train, X_test, y_train, y_test = prep_data('melb_data_prep.csv', target='Price', seed=1234)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-105513873.23403685\n",
|
||||
"[ 245383.60581414 -141356.39759052 -40383.66643969 161336.03949841\n",
|
||||
" 40391.14829949 83303.27089591]\n",
|
||||
"[1331246.16325189 2557493.2373921 871684.82823291 1495633.275723\n",
|
||||
" 1549557.61151302 634348.67092323]\n",
|
||||
"0.5601419746121152\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"## baseline\n",
|
||||
"from sklearn.linear_model import LinearRegression\n",
|
||||
"from sklearn.metrics import r2_score\n",
|
||||
"model = LinearRegression()\n",
|
||||
"model.fit(X_train, y_train)\n",
|
||||
"y_pred = model.predict(X_test)\n",
|
||||
"\n",
|
||||
"print(model.intercept_)\n",
|
||||
"print(model.coef_[:6])\n",
|
||||
"print(y_pred[:6])\n",
|
||||
"print(r2_score(y_test, y_pred))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## scaled features\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## log target\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Fazit**\n",
|
||||
"* tbd"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "1",
|
||||
"nav_menu": {},
|
||||
"number_sections": false,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": true,
|
||||
"title_cell": "WS 11 Regression - mit FE - solution",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": true,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "195.933px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": true
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"position": {
|
||||
"height": "321.85px",
|
||||
"left": "785px",
|
||||
"right": "20px",
|
||||
"top": "118px",
|
||||
"width": "350px"
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,198 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"toc": true
|
||||
},
|
||||
"source": [
|
||||
"# WS 09 Tune AdaBoostRegressor"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* es wurde festgestellt, dass z.B. AdaBoostRegressor unter Standard-Parametrisierung ein unbrauchbares Ergebnis liefert\n",
|
||||
"* untersuchen Sie das Potential von Parameter-Tuning für diesen Regressor\n",
|
||||
"* konzentrieren Sie sich auf folgende Parameter\n",
|
||||
" * learning_rate, Parameter von AdaBoostRegressor\n",
|
||||
" * max_depth, interner Parameter des Basis-Estimators, hier DecisionTreeRegressor\n",
|
||||
"* falls Zeit übrig, untersuchen Sie noch andere Regressoren Ihrer Wahl dahingehend"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## prepare env, read and prepare data\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns; sns.set()\n",
|
||||
"\n",
|
||||
"codepath = '../2_code' ## for import of user defined module\n",
|
||||
"datapath = '../3_data'\n",
|
||||
"#codepath = '.././2_code' ## for import of user defined module\n",
|
||||
"#datapath = '../../3_data'\n",
|
||||
"\n",
|
||||
"from sys import path; path.insert(1, codepath)\n",
|
||||
"from os import chdir; chdir(datapath)\n",
|
||||
"\n",
|
||||
"from bfh_cas_pml import prep_data\n",
|
||||
"X_train, X_test, y_train, y_test = prep_data('melb_data_prep.csv', target='Price', seed=1234)\n",
|
||||
"\n",
|
||||
"from bfh_cas_pml import test_regression_model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-04-08T10:06:45.098899Z",
|
||||
"start_time": "2020-04-08T10:06:44.257283Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"R2 = -0.3023\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"## baseline\n",
|
||||
"from sklearn.ensemble import AdaBoostRegressor\n",
|
||||
"this_model = test_regression_model(\n",
|
||||
" AdaBoostRegressor(random_state=1234), \n",
|
||||
" X_train, y_train, X_test, y_test,\n",
|
||||
" show_plot=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## tune learning_rate\n",
|
||||
"## tbd: find parameter range here\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## tune max_depth\n",
|
||||
"from sklearn.tree import DecisionTreeRegressor\n",
|
||||
"## tbd: find parameter range here\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## best combination of single parameters\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Fazit**:\n",
|
||||
"* tbd"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "1",
|
||||
"nav_menu": {},
|
||||
"number_sections": false,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": true,
|
||||
"title_cell": "WS 11 Regression - mit FE - solution",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": true,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "195.933px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": true
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"position": {
|
||||
"height": "321.85px",
|
||||
"left": "785px",
|
||||
"right": "20px",
|
||||
"top": "118px",
|
||||
"width": "350px"
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,245 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"toc": true
|
||||
},
|
||||
"source": [
|
||||
"# WS 10 Performancevergleiche Regression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* Vergleichen Sie alle Regressoren (ausser `SVR` und `MLPRegressor`) mit folgenden Modifikationen\n",
|
||||
" * die Vergleiche werden ohne und mit Standardisierung der Features durchgeführt\n",
|
||||
" * die Resultate (r2_score) werden in Form einer Heatmap zusammengestellt\n",
|
||||
"* informieren Sie sich zum Vorgehen am Code in 3.4 Regression - Modellvergleiche.ipynb\n",
|
||||
"* Präsentation der Ergebnisse als \n",
|
||||
" * seaborn heatmap\n",
|
||||
" * alternative Visualisierung: Grouped barplots"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## for scikit-learn 1.4.2, to silence warnings regarding physical cores\n",
|
||||
"import os\n",
|
||||
"os.environ['LOKY_MAX_CPU_COUNT'] = '4' ## depending on the hardware used"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## prepare env, read and prepare data\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns; sns.set()\n",
|
||||
"\n",
|
||||
"codepath = '../2_code'\n",
|
||||
"datapath = '../3_data'\n",
|
||||
"from sys import path; path.insert(1, codepath)\n",
|
||||
"from os import chdir; chdir(datapath)\n",
|
||||
"\n",
|
||||
"from bfh_cas_pml import prep_data\n",
|
||||
"X_train, X_test, y_train, y_test = prep_data('melb_data_prep.csv', target='Price', seed=1234)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-04-08T10:33:02.116059Z",
|
||||
"start_time": "2020-04-08T10:33:02.087399Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## standardize features (lead: train)\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"scaler = StandardScaler()\n",
|
||||
"scaler.fit(X_train)\n",
|
||||
"X_train_sc = scaler.transform(X_train)\n",
|
||||
"X_test_sc = scaler.transform(X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-04-08T10:33:02.294366Z",
|
||||
"start_time": "2020-04-08T10:33:02.120049Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## import trainer classes\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-04-08T10:33:02.326199Z",
|
||||
"start_time": "2020-04-08T10:33:02.299732Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## define models\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-04-08T10:33:15.141363Z",
|
||||
"start_time": "2020-04-08T10:33:02.341448Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## compare models\n",
|
||||
"## tbd: prepare empty lists for results\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# for model in models:\n",
|
||||
"\n",
|
||||
" ## not scaled\n",
|
||||
" ## tbd\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" \n",
|
||||
" ## scaled\n",
|
||||
" ## tbd\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-04-08T10:33:15.742776Z",
|
||||
"start_time": "2020-04-08T10:33:15.150619Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## visualize results\n",
|
||||
"\"\"\"\n",
|
||||
"scores = pd.DataFrame(\n",
|
||||
" {'r2_no': r2_nos, \n",
|
||||
" 'r2_yes': r2_yess\n",
|
||||
" }, index=regressors)\n",
|
||||
"\n",
|
||||
"sns.heatmap(scores);\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Fazit:** \n",
|
||||
"* tbd"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "1",
|
||||
"nav_menu": {},
|
||||
"number_sections": false,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": true,
|
||||
"title_cell": "WS 14 Regression - Modellvergleiche 2",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": true,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "195.933px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": false
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"position": {
|
||||
"height": "321.85px",
|
||||
"left": "785px",
|
||||
"right": "20px",
|
||||
"top": "118px",
|
||||
"width": "350px"
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,217 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"toc": true
|
||||
},
|
||||
"source": [
|
||||
"# WS 11 permutation_importance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* ermitteln Sie die Importance der Features der *Rohdaten* von `melb_data.csv` unter Einsatz von `sklearn.inspection.ermutation_importance`\n",
|
||||
"* setzen Sie dazu minimales Feature Engineering wie folgt ein:\n",
|
||||
" * entfernen fragwürdiger Variablen: 'Unnamed: 0', 'Suburb', 'Address', 'SellerG', 'Postcode', 'Bedroom2', 'Date', 'CouncilArea'\n",
|
||||
" * One-Hot encoding aller verbleibenden kategorialen Variablen (der Parameter `dummy_na=True` von `pd.get_dummies()` erstellt auch Dummy-Variablen für NAs)\n",
|
||||
" * einsetzen von geschätzten Werten für NAs in verbleibenden numerischen Variablen mit `sklearn.impute.KNNImputer`\n",
|
||||
"* danach:\n",
|
||||
" * features - target - split\n",
|
||||
" * **kein** train - test - split\n",
|
||||
" * ermitteln der Importance unter Einsatz von \n",
|
||||
" * `sklearn.inspection.permutation_importance`\n",
|
||||
" * `sklearn.tree.DecisionTreeRegressor`\n",
|
||||
" * tabellarische und graphische Darstellung der Ergebnisse"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## prepare env, read and prepare data\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns; sns.set()\n",
|
||||
"\n",
|
||||
"codepath = '../2_code'\n",
|
||||
"datapath = '../3_data'\n",
|
||||
"from sys import path; path.insert(1, codepath)\n",
|
||||
"from os import chdir; chdir(datapath)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## read data\n",
|
||||
"data = pd.read_csv('melb_data.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## drop columns\n",
|
||||
"vars_to_drop = ['Unnamed: 0', 'Suburb', 'Address', 'SellerG', 'Postcode', 'Bedroom2', 'Date', 'CouncilArea']\n",
|
||||
"data = data.drop(vars_to_drop, axis=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## one-hot encode (incl. NAs)\n",
|
||||
"data = pd.get_dummies(data, drop_first=False, dummy_na=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## KNNImputer for NAs\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## features - target - split\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## permutation_importance\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## collect results in a dataframe, ordered by mean\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## visualize results\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "1",
|
||||
"nav_menu": {},
|
||||
"number_sections": false,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": true,
|
||||
"title_cell": "WS 14 Regression - Modellvergleiche 2 - solution",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": true,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "195.933px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": false
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"position": {
|
||||
"height": "321.85px",
|
||||
"left": "785px",
|
||||
"right": "20px",
|
||||
"top": "118px",
|
||||
"width": "350px"
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,193 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# WS 13 Kreuzvalidierung"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* vergleichen Sie alle bisher bekannten Klassifikatoren (ausser SVC und MLPClassifier) in Bezug auf deren Stabilität unter Anwendung von Kreuzvalidierung\n",
|
||||
"* verwenden Sie für die Klassifikatoren jeweils Default-Parametrisierung\n",
|
||||
"* setzen Sie für die Kreuzvalidierung folgende Funktion ein: `sklearn.model_selection.cross_val_score`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## load libraries\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns; sns.set()\n",
|
||||
"\n",
|
||||
"## load data\n",
|
||||
"datapath = '../3_data'\n",
|
||||
"from os import chdir; chdir(datapath)\n",
|
||||
"bank_df = pd.read_csv('bank_data_prep.csv')\n",
|
||||
"\n",
|
||||
"## features - target - tplit\n",
|
||||
"X = bank_df.drop('y', axis=1)\n",
|
||||
"y = bank_df['y']\n",
|
||||
"\n",
|
||||
"## train - test - split\n",
|
||||
"## obsolete here, is done internally by cross validation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"KNeighborsClassifier\n",
|
||||
"DecisionTreeClassifier\n",
|
||||
"RandomForestClassifier\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||||
"from sklearn.tree import DecisionTreeClassifier\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"## tbd complete\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"\n",
|
||||
"from sklearn.model_selection import cross_val_score\n",
|
||||
"\n",
|
||||
"models = [\n",
|
||||
" KNeighborsClassifier(),\n",
|
||||
" DecisionTreeClassifier(),\n",
|
||||
" RandomForestClassifier()\n",
|
||||
" ## tbd complete\n",
|
||||
" \n",
|
||||
" \n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"kfold = 5\n",
|
||||
"model_names = []\n",
|
||||
"model_scores = []\n",
|
||||
"\n",
|
||||
"for model in models:\n",
|
||||
" model_name = model.__class__.__name__\n",
|
||||
" print(model_name)\n",
|
||||
" ## tbd\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## manage results, e.g. in pandas dataframe\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## visualize results\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Fazit:**\n",
|
||||
"* tbd"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "",
|
||||
"nav_menu": {},
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": false,
|
||||
"title_cell": "WS 16 Validierung - Kreuzvalidierung",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": true,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "165px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": true
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"position": {
|
||||
"height": "306.85px",
|
||||
"left": "862px",
|
||||
"right": "20px",
|
||||
"top": "137px",
|
||||
"width": "350px"
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,165 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"toc": true
|
||||
},
|
||||
"source": [
|
||||
"# WS 14 Random Search CV"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* untersuchen Sie Kombinationen von Parameterwerten bei RandomForestClassifier\n",
|
||||
"* Vorschlag:\n",
|
||||
" * n_estimators in [50, 100, 150, 200]\n",
|
||||
" * max_features in [3, 5, 7, 9]\n",
|
||||
" * criterion in ['gini', 'entropy']\n",
|
||||
" * min_samples_leaf in [1, 2, 3, 4]\n",
|
||||
"* wenden Sie 5-fach Kreuzvalidierung an\n",
|
||||
"* setzen Sie die Anzahl der zu untersuchenden Kombinationen auf 12\n",
|
||||
"* arbeiten Sie ohne setzen von random_state, damit anschliessend die Ergebnisse verglichen werden können"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## import libraries\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"## load data\n",
|
||||
"datapath = '../3_data'\n",
|
||||
"from os import chdir; chdir(datapath)\n",
|
||||
"bank_df = pd.read_csv('bank_data_prep.csv')\n",
|
||||
"\n",
|
||||
"## features - target - split\n",
|
||||
"X = bank_df.drop('y', axis=1)\n",
|
||||
"y = bank_df['y']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## import classes from sklearn\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"from sklearn.model_selection import RandomizedSearchCV\n",
|
||||
"\n",
|
||||
"## define parameter grid\n",
|
||||
"## tbd\n",
|
||||
"#parameter_grid = ...\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## define RandomizedSearchCV\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## run RandomizedSearchCV\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## evaluate RandomizedSearchCV\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Fazit:**\n",
|
||||
"* tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "",
|
||||
"nav_menu": {},
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": false,
|
||||
"title_cell": "WS 17 Validierung - Random Search CV",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": true,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "195.867px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": true
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"position": {
|
||||
"height": "306.85px",
|
||||
"left": "862px",
|
||||
"right": "20px",
|
||||
"top": "137px",
|
||||
"width": "350px"
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,216 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": [],
|
||||
"toc": true
|
||||
},
|
||||
"source": [
|
||||
"# WS 15 Schwellenwert für Accuracy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* `.predict_proba()` gibt bei allen Klassifikatoren die Wahrscheinlichkeit für die Zugehörigkeit zu den einlenen Klassen zurück, `predict()` dagegen die wahrscheinlichtste Klasse selber\n",
|
||||
"* für Zwei-Klassen Fragestellungen bedeutet dies, dass bei einer Wahrscheinlickkeit (proba) `> 0.5` für die erste Klasse diese zurückgegeben wird, andernfalls die zweite Klasse, `0.5` ist somit ein scheinbar willkürlicher Schwellenwert\n",
|
||||
"* untersuchen Sie die Auswirkung anderer Schwellenwerte auf die Accuracy mit `RandomForestClassifier` auf den aufbereiteten Bankkunden-Datan\n",
|
||||
"\n",
|
||||
"* vorgeschlagenes Vorgehen:\n",
|
||||
" * trainieren eines RandomForestClassifier mit den vorbereiteten Bankkundendaten (Trainingsdaten)\n",
|
||||
" * bestimmen der Wahrscheinlichkeit für jede Beobachtung der entsprechenden Testdaten zur Klasse 'no'\n",
|
||||
" * erstellen einens Range der zu untersuchenden Schwellenwerte, z.B. mit np.arange()\n",
|
||||
" * in einem Loop über alle Werte dieses Ranges\n",
|
||||
" * `y_pred` für den jeweiligen Schwellenwert berechnen (wiederum als `['no', 'yes']`)\n",
|
||||
" * `accuracy_score()` der jeweiligen Prediction (und sammeln in einer Liste)\n",
|
||||
" * ausgeben des besten Score-Wertes und des zugehörigen Schwellenwertes in der Konsole\n",
|
||||
" * visualisieren der Ergebnisse auch als Lineplot "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## prepare env, read and prepare data\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns; sns.set()\n",
|
||||
"\n",
|
||||
"codepath = '../2_code' ## for import of user defined module\n",
|
||||
"datapath = '../3_data'\n",
|
||||
"\n",
|
||||
"from sys import path; path.insert(1, codepath)\n",
|
||||
"from os import chdir; chdir(datapath)\n",
|
||||
"\n",
|
||||
"from bfh_cas_pml import prep_data\n",
|
||||
"X_train, X_test, y_train, y_test = prep_data('bank_data_prep.csv', target='y', seed=1234)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2020-04-15T21:03:01.918701Z",
|
||||
"start_time": "2020-04-15T21:03:01.844142Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## train a model\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier \n",
|
||||
"model = RandomForestClassifier(random_state=1234)\n",
|
||||
"model.fit(X_train, y_train) \n",
|
||||
"\n",
|
||||
"## prediction using .predict_proba()\n",
|
||||
"y_pred_p_no = model.predict_proba(X_test)[:, 0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.0\n",
|
||||
"0.1\n",
|
||||
"0.2\n",
|
||||
"0.30000000000000004\n",
|
||||
"0.4\n",
|
||||
"0.5\n",
|
||||
"0.6000000000000001\n",
|
||||
"0.7000000000000001\n",
|
||||
"0.8\n",
|
||||
"0.9\n",
|
||||
"1.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"## inspect different threshold values\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"\n",
|
||||
"thresholds = np.arange(0, 1.01, 0.1) ## test over 10\n",
|
||||
"#thresholds = np.arange(0, 1.01, 0.01)\n",
|
||||
"\n",
|
||||
"scores = []\n",
|
||||
"\n",
|
||||
"for threshold in thresholds:\n",
|
||||
" ## tbd\n",
|
||||
" print(threshold)\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## results\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## viszalization\n",
|
||||
"## tbd\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": "",
|
||||
"nav_menu": {},
|
||||
"number_sections": true,
|
||||
"sideBar": true,
|
||||
"skip_h1_title": false,
|
||||
"title_cell": "WS 15 Validierung - Sampling",
|
||||
"title_sidebar": "Contents",
|
||||
"toc_cell": true,
|
||||
"toc_position": {
|
||||
"height": "calc(100% - 180px)",
|
||||
"left": "10px",
|
||||
"top": "150px",
|
||||
"width": "195.867px"
|
||||
},
|
||||
"toc_section_display": true,
|
||||
"toc_window_display": true
|
||||
},
|
||||
"varInspector": {
|
||||
"cols": {
|
||||
"lenName": 16,
|
||||
"lenType": 16,
|
||||
"lenVar": 40
|
||||
},
|
||||
"kernels_config": {
|
||||
"python": {
|
||||
"delete_cmd_postfix": "",
|
||||
"delete_cmd_prefix": "del ",
|
||||
"library": "var_list.py",
|
||||
"varRefreshCmd": "print(var_dic_list())"
|
||||
},
|
||||
"r": {
|
||||
"delete_cmd_postfix": ") ",
|
||||
"delete_cmd_prefix": "rm(",
|
||||
"library": "var_list.r",
|
||||
"varRefreshCmd": "cat(var_dic_list()) "
|
||||
}
|
||||
},
|
||||
"position": {
|
||||
"height": "306.85px",
|
||||
"left": "862px",
|
||||
"right": "20px",
|
||||
"top": "137px",
|
||||
"width": "350px"
|
||||
},
|
||||
"types_to_exclude": [
|
||||
"module",
|
||||
"function",
|
||||
"builtin_function_or_method",
|
||||
"instance",
|
||||
"_Feature"
|
||||
],
|
||||
"window_display": false
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user