Files
cas-pml/SL/aufgaben/template/2_Code/5.5 Deployment und Abschluss - pycaret.ipynb
2026-05-21 14:16:30 +02:00

1389 lines
63 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Deployment und Abschluss - pycaret"
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"## Install\n",
"\n",
"Attention: PyCaret does not run natively on Apple Silicon and in this case it is recommended to use Docker instead\n",
"\n",
"ref: https://pycaret.gitbook.io/docs/get-started/installation\n",
"* You can install PyCaret with Python's pip package manager:"
]
},
{
"cell_type": "raw",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
"!pip install pycaret"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load and prep Data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2021-12-25T16:56:08.445356Z",
"start_time": "2021-12-25T16:56:06.863957Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(9860, 30)\n"
]
}
],
"source": [
"## load data\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"datapath = '../3_data'\n",
"from os import chdir; chdir(datapath)\n",
"dataset = pd.read_csv('bank_data_prep.csv')\n",
"print(dataset.shape)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"## remove duration\n",
"dataset = dataset.drop(\"duration\", axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Data for Modeling: (8874, 29)\n",
"Unseen Data For Predictions: (986, 29)\n"
]
}
],
"source": [
"## train - test - split\n",
"from sklearn.model_selection import train_test_split\n",
"data, data_unseen = train_test_split(dataset, train_size=0.9, random_state=1234)\n",
"\n",
"print('Data for Modeling: ' + str(data.shape))\n",
"print('Unseen Data For Predictions: ' + str(data_unseen.shape))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Run a Classication Experiment"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Init setup"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
"#T_69e32_row9_col1 {\n",
" background-color: lightgreen;\n",
"}\n",
"</style>\n",
"<table id=\"T_69e32\">\n",
" <thead>\n",
" <tr>\n",
" <th class=\"blank level0\" >&nbsp;</th>\n",
" <th id=\"T_69e32_level0_col0\" class=\"col_heading level0 col0\" >Description</th>\n",
" <th id=\"T_69e32_level0_col1\" class=\"col_heading level0 col1\" >Value</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
" <td id=\"T_69e32_row0_col0\" class=\"data row0 col0\" >Session id</td>\n",
" <td id=\"T_69e32_row0_col1\" class=\"data row0 col1\" >1234</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
" <td id=\"T_69e32_row1_col0\" class=\"data row1 col0\" >Target</td>\n",
" <td id=\"T_69e32_row1_col1\" class=\"data row1 col1\" >y</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
" <td id=\"T_69e32_row2_col0\" class=\"data row2 col0\" >Target type</td>\n",
" <td id=\"T_69e32_row2_col1\" class=\"data row2 col1\" >Binary</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
" <td id=\"T_69e32_row3_col0\" class=\"data row3 col0\" >Target mapping</td>\n",
" <td id=\"T_69e32_row3_col1\" class=\"data row3 col1\" >no: 0, yes: 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
" <td id=\"T_69e32_row4_col0\" class=\"data row4 col0\" >Original data shape</td>\n",
" <td id=\"T_69e32_row4_col1\" class=\"data row4 col1\" >(8874, 29)</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row5\" class=\"row_heading level0 row5\" >5</th>\n",
" <td id=\"T_69e32_row5_col0\" class=\"data row5 col0\" >Transformed data shape</td>\n",
" <td id=\"T_69e32_row5_col1\" class=\"data row5 col1\" >(8874, 29)</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row6\" class=\"row_heading level0 row6\" >6</th>\n",
" <td id=\"T_69e32_row6_col0\" class=\"data row6 col0\" >Transformed train set shape</td>\n",
" <td id=\"T_69e32_row6_col1\" class=\"data row6 col1\" >(6211, 29)</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row7\" class=\"row_heading level0 row7\" >7</th>\n",
" <td id=\"T_69e32_row7_col0\" class=\"data row7 col0\" >Transformed test set shape</td>\n",
" <td id=\"T_69e32_row7_col1\" class=\"data row7 col1\" >(2663, 29)</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row8\" class=\"row_heading level0 row8\" >8</th>\n",
" <td id=\"T_69e32_row8_col0\" class=\"data row8 col0\" >Numeric features</td>\n",
" <td id=\"T_69e32_row8_col1\" class=\"data row8 col1\" >14</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row9\" class=\"row_heading level0 row9\" >9</th>\n",
" <td id=\"T_69e32_row9_col0\" class=\"data row9 col0\" >Preprocess</td>\n",
" <td id=\"T_69e32_row9_col1\" class=\"data row9 col1\" >True</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row10\" class=\"row_heading level0 row10\" >10</th>\n",
" <td id=\"T_69e32_row10_col0\" class=\"data row10 col0\" >Imputation type</td>\n",
" <td id=\"T_69e32_row10_col1\" class=\"data row10 col1\" >simple</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row11\" class=\"row_heading level0 row11\" >11</th>\n",
" <td id=\"T_69e32_row11_col0\" class=\"data row11 col0\" >Numeric imputation</td>\n",
" <td id=\"T_69e32_row11_col1\" class=\"data row11 col1\" >mean</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row12\" class=\"row_heading level0 row12\" >12</th>\n",
" <td id=\"T_69e32_row12_col0\" class=\"data row12 col0\" >Categorical imputation</td>\n",
" <td id=\"T_69e32_row12_col1\" class=\"data row12 col1\" >mode</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row13\" class=\"row_heading level0 row13\" >13</th>\n",
" <td id=\"T_69e32_row13_col0\" class=\"data row13 col0\" >Fold Generator</td>\n",
" <td id=\"T_69e32_row13_col1\" class=\"data row13 col1\" >StratifiedKFold</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row14\" class=\"row_heading level0 row14\" >14</th>\n",
" <td id=\"T_69e32_row14_col0\" class=\"data row14 col0\" >Fold Number</td>\n",
" <td id=\"T_69e32_row14_col1\" class=\"data row14 col1\" >5</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row15\" class=\"row_heading level0 row15\" >15</th>\n",
" <td id=\"T_69e32_row15_col0\" class=\"data row15 col0\" >CPU Jobs</td>\n",
" <td id=\"T_69e32_row15_col1\" class=\"data row15 col1\" >-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row16\" class=\"row_heading level0 row16\" >16</th>\n",
" <td id=\"T_69e32_row16_col0\" class=\"data row16 col0\" >Use GPU</td>\n",
" <td id=\"T_69e32_row16_col1\" class=\"data row16 col1\" >False</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row17\" class=\"row_heading level0 row17\" >17</th>\n",
" <td id=\"T_69e32_row17_col0\" class=\"data row17 col0\" >Log Experiment</td>\n",
" <td id=\"T_69e32_row17_col1\" class=\"data row17 col1\" >False</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row18\" class=\"row_heading level0 row18\" >18</th>\n",
" <td id=\"T_69e32_row18_col0\" class=\"data row18 col0\" >Experiment Name</td>\n",
" <td id=\"T_69e32_row18_col1\" class=\"data row18 col1\" >clf-default-name</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_69e32_level0_row19\" class=\"row_heading level0 row19\" >19</th>\n",
" <td id=\"T_69e32_row19_col0\" class=\"data row19 col0\" >USI</td>\n",
" <td id=\"T_69e32_row19_col1\" class=\"data row19 col1\" >9ce1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x289e745df90>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from pycaret.classification import *\n",
"s = setup(\n",
" data = data, \n",
" target = 'y', \n",
" fold = 5, ## defaul = 10\n",
" session_id=1234) ## random seed"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Show available Models (for Classification)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Reference</th>\n",
" <th>Turbo</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>lr</th>\n",
" <td>Logistic Regression</td>\n",
" <td>sklearn.linear_model._logistic.LogisticRegression</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>knn</th>\n",
" <td>K Neighbors Classifier</td>\n",
" <td>sklearn.neighbors._classification.KNeighborsCl...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>nb</th>\n",
" <td>Naive Bayes</td>\n",
" <td>sklearn.naive_bayes.GaussianNB</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dt</th>\n",
" <td>Decision Tree Classifier</td>\n",
" <td>sklearn.tree._classes.DecisionTreeClassifier</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>svm</th>\n",
" <td>SVM - Linear Kernel</td>\n",
" <td>sklearn.linear_model._stochastic_gradient.SGDC...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>rbfsvm</th>\n",
" <td>SVM - Radial Kernel</td>\n",
" <td>sklearn.svm._classes.SVC</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>gpc</th>\n",
" <td>Gaussian Process Classifier</td>\n",
" <td>sklearn.gaussian_process._gpc.GaussianProcessC...</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mlp</th>\n",
" <td>MLP Classifier</td>\n",
" <td>sklearn.neural_network._multilayer_perceptron....</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ridge</th>\n",
" <td>Ridge Classifier</td>\n",
" <td>sklearn.linear_model._ridge.RidgeClassifier</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>rf</th>\n",
" <td>Random Forest Classifier</td>\n",
" <td>sklearn.ensemble._forest.RandomForestClassifier</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>qda</th>\n",
" <td>Quadratic Discriminant Analysis</td>\n",
" <td>sklearn.discriminant_analysis.QuadraticDiscrim...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ada</th>\n",
" <td>Ada Boost Classifier</td>\n",
" <td>sklearn.ensemble._weight_boosting.AdaBoostClas...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>gbc</th>\n",
" <td>Gradient Boosting Classifier</td>\n",
" <td>sklearn.ensemble._gb.GradientBoostingClassifier</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>lda</th>\n",
" <td>Linear Discriminant Analysis</td>\n",
" <td>sklearn.discriminant_analysis.LinearDiscrimina...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>et</th>\n",
" <td>Extra Trees Classifier</td>\n",
" <td>sklearn.ensemble._forest.ExtraTreesClassifier</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>lightgbm</th>\n",
" <td>Light Gradient Boosting Machine</td>\n",
" <td>lightgbm.sklearn.LGBMClassifier</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>catboost</th>\n",
" <td>CatBoost Classifier</td>\n",
" <td>catboost.core.CatBoostClassifier</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dummy</th>\n",
" <td>Dummy Classifier</td>\n",
" <td>sklearn.dummy.DummyClassifier</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Name \\\n",
"ID \n",
"lr Logistic Regression \n",
"knn K Neighbors Classifier \n",
"nb Naive Bayes \n",
"dt Decision Tree Classifier \n",
"svm SVM - Linear Kernel \n",
"rbfsvm SVM - Radial Kernel \n",
"gpc Gaussian Process Classifier \n",
"mlp MLP Classifier \n",
"ridge Ridge Classifier \n",
"rf Random Forest Classifier \n",
"qda Quadratic Discriminant Analysis \n",
"ada Ada Boost Classifier \n",
"gbc Gradient Boosting Classifier \n",
"lda Linear Discriminant Analysis \n",
"et Extra Trees Classifier \n",
"lightgbm Light Gradient Boosting Machine \n",
"catboost CatBoost Classifier \n",
"dummy Dummy Classifier \n",
"\n",
" Reference Turbo \n",
"ID \n",
"lr sklearn.linear_model._logistic.LogisticRegression True \n",
"knn sklearn.neighbors._classification.KNeighborsCl... True \n",
"nb sklearn.naive_bayes.GaussianNB True \n",
"dt sklearn.tree._classes.DecisionTreeClassifier True \n",
"svm sklearn.linear_model._stochastic_gradient.SGDC... True \n",
"rbfsvm sklearn.svm._classes.SVC False \n",
"gpc sklearn.gaussian_process._gpc.GaussianProcessC... False \n",
"mlp sklearn.neural_network._multilayer_perceptron.... False \n",
"ridge sklearn.linear_model._ridge.RidgeClassifier True \n",
"rf sklearn.ensemble._forest.RandomForestClassifier True \n",
"qda sklearn.discriminant_analysis.QuadraticDiscrim... True \n",
"ada sklearn.ensemble._weight_boosting.AdaBoostClas... True \n",
"gbc sklearn.ensemble._gb.GradientBoostingClassifier True \n",
"lda sklearn.discriminant_analysis.LinearDiscrimina... True \n",
"et sklearn.ensemble._forest.ExtraTreesClassifier True \n",
"lightgbm lightgbm.sklearn.LGBMClassifier True \n",
"catboost catboost.core.CatBoostClassifier True \n",
"dummy sklearn.dummy.DummyClassifier True "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"models()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model Training and Selection"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
"#T_c3bc4 th {\n",
" text-align: left;\n",
"}\n",
"#T_c3bc4_row0_col0, #T_c3bc4_row1_col0, #T_c3bc4_row1_col1, #T_c3bc4_row1_col2, #T_c3bc4_row1_col3, #T_c3bc4_row1_col4, #T_c3bc4_row1_col5, #T_c3bc4_row1_col6, #T_c3bc4_row1_col7, #T_c3bc4_row2_col0, #T_c3bc4_row2_col1, #T_c3bc4_row2_col2, #T_c3bc4_row2_col3, #T_c3bc4_row2_col4, #T_c3bc4_row2_col5, #T_c3bc4_row2_col6, #T_c3bc4_row2_col7, #T_c3bc4_row3_col0, #T_c3bc4_row3_col1, #T_c3bc4_row3_col2, #T_c3bc4_row3_col3, #T_c3bc4_row3_col4, #T_c3bc4_row3_col5, #T_c3bc4_row3_col6, #T_c3bc4_row3_col7, #T_c3bc4_row4_col0, #T_c3bc4_row4_col1, #T_c3bc4_row4_col2, #T_c3bc4_row4_col3, #T_c3bc4_row4_col4, #T_c3bc4_row4_col5, #T_c3bc4_row4_col6, #T_c3bc4_row4_col7, #T_c3bc4_row5_col0, #T_c3bc4_row5_col1, #T_c3bc4_row5_col2, #T_c3bc4_row5_col3, #T_c3bc4_row5_col4, #T_c3bc4_row5_col5, #T_c3bc4_row5_col6, #T_c3bc4_row5_col7, #T_c3bc4_row6_col0, #T_c3bc4_row6_col1, #T_c3bc4_row6_col2, #T_c3bc4_row6_col3, #T_c3bc4_row6_col4, #T_c3bc4_row6_col5, #T_c3bc4_row6_col6, #T_c3bc4_row6_col7, #T_c3bc4_row7_col0, #T_c3bc4_row7_col1, #T_c3bc4_row7_col2, #T_c3bc4_row7_col3, #T_c3bc4_row7_col4, #T_c3bc4_row7_col5, #T_c3bc4_row7_col6, #T_c3bc4_row7_col7, #T_c3bc4_row8_col0, #T_c3bc4_row8_col1, #T_c3bc4_row8_col2, #T_c3bc4_row8_col3, #T_c3bc4_row8_col4, #T_c3bc4_row8_col5, #T_c3bc4_row8_col6, #T_c3bc4_row8_col7, #T_c3bc4_row9_col0, #T_c3bc4_row9_col1, #T_c3bc4_row9_col2, #T_c3bc4_row9_col3, #T_c3bc4_row9_col4, #T_c3bc4_row9_col5, #T_c3bc4_row9_col6, #T_c3bc4_row9_col7, #T_c3bc4_row10_col0, #T_c3bc4_row10_col1, #T_c3bc4_row10_col2, #T_c3bc4_row10_col3, #T_c3bc4_row10_col4, #T_c3bc4_row10_col5, #T_c3bc4_row10_col6, #T_c3bc4_row10_col7, #T_c3bc4_row11_col0, #T_c3bc4_row11_col1, #T_c3bc4_row11_col2, #T_c3bc4_row11_col3, #T_c3bc4_row11_col4, #T_c3bc4_row11_col5, #T_c3bc4_row11_col6, #T_c3bc4_row11_col7, #T_c3bc4_row12_col0, #T_c3bc4_row12_col1, #T_c3bc4_row12_col2, #T_c3bc4_row12_col3, #T_c3bc4_row12_col4, #T_c3bc4_row12_col5, #T_c3bc4_row12_col6, #T_c3bc4_row12_col7, #T_c3bc4_row13_col0, #T_c3bc4_row13_col1, #T_c3bc4_row13_col2, #T_c3bc4_row13_col3, #T_c3bc4_row13_col4, #T_c3bc4_row13_col5, #T_c3bc4_row13_col6, #T_c3bc4_row13_col7, #T_c3bc4_row14_col0, #T_c3bc4_row14_col1, #T_c3bc4_row14_col2, #T_c3bc4_row14_col3, #T_c3bc4_row14_col4, #T_c3bc4_row14_col5, #T_c3bc4_row14_col6, #T_c3bc4_row14_col7 {\n",
" text-align: left;\n",
"}\n",
"#T_c3bc4_row0_col1, #T_c3bc4_row0_col2, #T_c3bc4_row0_col3, #T_c3bc4_row0_col4, #T_c3bc4_row0_col5, #T_c3bc4_row0_col6, #T_c3bc4_row0_col7 {\n",
" text-align: left;\n",
" background-color: yellow;\n",
"}\n",
"#T_c3bc4_row0_col8, #T_c3bc4_row1_col8, #T_c3bc4_row2_col8, #T_c3bc4_row3_col8, #T_c3bc4_row4_col8, #T_c3bc4_row5_col8, #T_c3bc4_row6_col8, #T_c3bc4_row7_col8, #T_c3bc4_row8_col8, #T_c3bc4_row9_col8, #T_c3bc4_row10_col8, #T_c3bc4_row11_col8, #T_c3bc4_row12_col8, #T_c3bc4_row14_col8 {\n",
" text-align: left;\n",
" background-color: lightgrey;\n",
"}\n",
"#T_c3bc4_row13_col8 {\n",
" text-align: left;\n",
" background-color: yellow;\n",
" background-color: lightgrey;\n",
"}\n",
"</style>\n",
"<table id=\"T_c3bc4\">\n",
" <thead>\n",
" <tr>\n",
" <th class=\"blank level0\" >&nbsp;</th>\n",
" <th id=\"T_c3bc4_level0_col0\" class=\"col_heading level0 col0\" >Model</th>\n",
" <th id=\"T_c3bc4_level0_col1\" class=\"col_heading level0 col1\" >Accuracy</th>\n",
" <th id=\"T_c3bc4_level0_col2\" class=\"col_heading level0 col2\" >AUC</th>\n",
" <th id=\"T_c3bc4_level0_col3\" class=\"col_heading level0 col3\" >Recall</th>\n",
" <th id=\"T_c3bc4_level0_col4\" class=\"col_heading level0 col4\" >Prec.</th>\n",
" <th id=\"T_c3bc4_level0_col5\" class=\"col_heading level0 col5\" >F1</th>\n",
" <th id=\"T_c3bc4_level0_col6\" class=\"col_heading level0 col6\" >Kappa</th>\n",
" <th id=\"T_c3bc4_level0_col7\" class=\"col_heading level0 col7\" >MCC</th>\n",
" <th id=\"T_c3bc4_level0_col8\" class=\"col_heading level0 col8\" >TT (Sec)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row0\" class=\"row_heading level0 row0\" >gbc</th>\n",
" <td id=\"T_c3bc4_row0_col0\" class=\"data row0 col0\" >Gradient Boosting Classifier</td>\n",
" <td id=\"T_c3bc4_row0_col1\" class=\"data row0 col1\" >0.7617</td>\n",
" <td id=\"T_c3bc4_row0_col2\" class=\"data row0 col2\" >0.8020</td>\n",
" <td id=\"T_c3bc4_row0_col3\" class=\"data row0 col3\" >0.7617</td>\n",
" <td id=\"T_c3bc4_row0_col4\" class=\"data row0 col4\" >0.7730</td>\n",
" <td id=\"T_c3bc4_row0_col5\" class=\"data row0 col5\" >0.7567</td>\n",
" <td id=\"T_c3bc4_row0_col6\" class=\"data row0 col6\" >0.5144</td>\n",
" <td id=\"T_c3bc4_row0_col7\" class=\"data row0 col7\" >0.5290</td>\n",
" <td id=\"T_c3bc4_row0_col8\" class=\"data row0 col8\" >0.4640</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row1\" class=\"row_heading level0 row1\" >catboost</th>\n",
" <td id=\"T_c3bc4_row1_col0\" class=\"data row1 col0\" >CatBoost Classifier</td>\n",
" <td id=\"T_c3bc4_row1_col1\" class=\"data row1 col1\" >0.7582</td>\n",
" <td id=\"T_c3bc4_row1_col2\" class=\"data row1 col2\" >0.7996</td>\n",
" <td id=\"T_c3bc4_row1_col3\" class=\"data row1 col3\" >0.7582</td>\n",
" <td id=\"T_c3bc4_row1_col4\" class=\"data row1 col4\" >0.7681</td>\n",
" <td id=\"T_c3bc4_row1_col5\" class=\"data row1 col5\" >0.7534</td>\n",
" <td id=\"T_c3bc4_row1_col6\" class=\"data row1 col6\" >0.5074</td>\n",
" <td id=\"T_c3bc4_row1_col7\" class=\"data row1 col7\" >0.5207</td>\n",
" <td id=\"T_c3bc4_row1_col8\" class=\"data row1 col8\" >3.4800</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row2\" class=\"row_heading level0 row2\" >lightgbm</th>\n",
" <td id=\"T_c3bc4_row2_col0\" class=\"data row2 col0\" >Light Gradient Boosting Machine</td>\n",
" <td id=\"T_c3bc4_row2_col1\" class=\"data row2 col1\" >0.7543</td>\n",
" <td id=\"T_c3bc4_row2_col2\" class=\"data row2 col2\" >0.7954</td>\n",
" <td id=\"T_c3bc4_row2_col3\" class=\"data row2 col3\" >0.7543</td>\n",
" <td id=\"T_c3bc4_row2_col4\" class=\"data row2 col4\" >0.7632</td>\n",
" <td id=\"T_c3bc4_row2_col5\" class=\"data row2 col5\" >0.7498</td>\n",
" <td id=\"T_c3bc4_row2_col6\" class=\"data row2 col6\" >0.4998</td>\n",
" <td id=\"T_c3bc4_row2_col7\" class=\"data row2 col7\" >0.5120</td>\n",
" <td id=\"T_c3bc4_row2_col8\" class=\"data row2 col8\" >0.3200</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row3\" class=\"row_heading level0 row3\" >ada</th>\n",
" <td id=\"T_c3bc4_row3_col0\" class=\"data row3 col0\" >Ada Boost Classifier</td>\n",
" <td id=\"T_c3bc4_row3_col1\" class=\"data row3 col1\" >0.7492</td>\n",
" <td id=\"T_c3bc4_row3_col2\" class=\"data row3 col2\" >0.7957</td>\n",
" <td id=\"T_c3bc4_row3_col3\" class=\"data row3 col3\" >0.7492</td>\n",
" <td id=\"T_c3bc4_row3_col4\" class=\"data row3 col4\" >0.7616</td>\n",
" <td id=\"T_c3bc4_row3_col5\" class=\"data row3 col5\" >0.7432</td>\n",
" <td id=\"T_c3bc4_row3_col6\" class=\"data row3 col6\" >0.4882</td>\n",
" <td id=\"T_c3bc4_row3_col7\" class=\"data row3 col7\" >0.5044</td>\n",
" <td id=\"T_c3bc4_row3_col8\" class=\"data row3 col8\" >0.2180</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row4\" class=\"row_heading level0 row4\" >rf</th>\n",
" <td id=\"T_c3bc4_row4_col0\" class=\"data row4 col0\" >Random Forest Classifier</td>\n",
" <td id=\"T_c3bc4_row4_col1\" class=\"data row4 col1\" >0.7421</td>\n",
" <td id=\"T_c3bc4_row4_col2\" class=\"data row4 col2\" >0.7902</td>\n",
" <td id=\"T_c3bc4_row4_col3\" class=\"data row4 col3\" >0.7421</td>\n",
" <td id=\"T_c3bc4_row4_col4\" class=\"data row4 col4\" >0.7451</td>\n",
" <td id=\"T_c3bc4_row4_col5\" class=\"data row4 col5\" >0.7394</td>\n",
" <td id=\"T_c3bc4_row4_col6\" class=\"data row4 col6\" >0.4771</td>\n",
" <td id=\"T_c3bc4_row4_col7\" class=\"data row4 col7\" >0.4826</td>\n",
" <td id=\"T_c3bc4_row4_col8\" class=\"data row4 col8\" >0.4160</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row5\" class=\"row_heading level0 row5\" >lr</th>\n",
" <td id=\"T_c3bc4_row5_col0\" class=\"data row5 col0\" >Logistic Regression</td>\n",
" <td id=\"T_c3bc4_row5_col1\" class=\"data row5 col1\" >0.7393</td>\n",
" <td id=\"T_c3bc4_row5_col2\" class=\"data row5 col2\" >0.7852</td>\n",
" <td id=\"T_c3bc4_row5_col3\" class=\"data row5 col3\" >0.7393</td>\n",
" <td id=\"T_c3bc4_row5_col4\" class=\"data row5 col4\" >0.7435</td>\n",
" <td id=\"T_c3bc4_row5_col5\" class=\"data row5 col5\" >0.7361</td>\n",
" <td id=\"T_c3bc4_row5_col6\" class=\"data row5 col6\" >0.4709</td>\n",
" <td id=\"T_c3bc4_row5_col7\" class=\"data row5 col7\" >0.4778</td>\n",
" <td id=\"T_c3bc4_row5_col8\" class=\"data row5 col8\" >2.8060</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row6\" class=\"row_heading level0 row6\" >ridge</th>\n",
" <td id=\"T_c3bc4_row6_col0\" class=\"data row6 col0\" >Ridge Classifier</td>\n",
" <td id=\"T_c3bc4_row6_col1\" class=\"data row6 col1\" >0.7326</td>\n",
" <td id=\"T_c3bc4_row6_col2\" class=\"data row6 col2\" >0.7851</td>\n",
" <td id=\"T_c3bc4_row6_col3\" class=\"data row6 col3\" >0.7326</td>\n",
" <td id=\"T_c3bc4_row6_col4\" class=\"data row6 col4\" >0.7353</td>\n",
" <td id=\"T_c3bc4_row6_col5\" class=\"data row6 col5\" >0.7298</td>\n",
" <td id=\"T_c3bc4_row6_col6\" class=\"data row6 col6\" >0.4578</td>\n",
" <td id=\"T_c3bc4_row6_col7\" class=\"data row6 col7\" >0.4630</td>\n",
" <td id=\"T_c3bc4_row6_col8\" class=\"data row6 col8\" >0.0740</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row7\" class=\"row_heading level0 row7\" >lda</th>\n",
" <td id=\"T_c3bc4_row7_col0\" class=\"data row7 col0\" >Linear Discriminant Analysis</td>\n",
" <td id=\"T_c3bc4_row7_col1\" class=\"data row7 col1\" >0.7324</td>\n",
" <td id=\"T_c3bc4_row7_col2\" class=\"data row7 col2\" >0.7851</td>\n",
" <td id=\"T_c3bc4_row7_col3\" class=\"data row7 col3\" >0.7324</td>\n",
" <td id=\"T_c3bc4_row7_col4\" class=\"data row7 col4\" >0.7351</td>\n",
" <td id=\"T_c3bc4_row7_col5\" class=\"data row7 col5\" >0.7296</td>\n",
" <td id=\"T_c3bc4_row7_col6\" class=\"data row7 col6\" >0.4575</td>\n",
" <td id=\"T_c3bc4_row7_col7\" class=\"data row7 col7\" >0.4627</td>\n",
" <td id=\"T_c3bc4_row7_col8\" class=\"data row7 col8\" >0.0780</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row8\" class=\"row_heading level0 row8\" >et</th>\n",
" <td id=\"T_c3bc4_row8_col0\" class=\"data row8 col0\" >Extra Trees Classifier</td>\n",
" <td id=\"T_c3bc4_row8_col1\" class=\"data row8 col1\" >0.7224</td>\n",
" <td id=\"T_c3bc4_row8_col2\" class=\"data row8 col2\" >0.7732</td>\n",
" <td id=\"T_c3bc4_row8_col3\" class=\"data row8 col3\" >0.7224</td>\n",
" <td id=\"T_c3bc4_row8_col4\" class=\"data row8 col4\" >0.7232</td>\n",
" <td id=\"T_c3bc4_row8_col5\" class=\"data row8 col5\" >0.7206</td>\n",
" <td id=\"T_c3bc4_row8_col6\" class=\"data row8 col6\" >0.4387</td>\n",
" <td id=\"T_c3bc4_row8_col7\" class=\"data row8 col7\" >0.4414</td>\n",
" <td id=\"T_c3bc4_row8_col8\" class=\"data row8 col8\" >0.4140</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row9\" class=\"row_heading level0 row9\" >qda</th>\n",
" <td id=\"T_c3bc4_row9_col0\" class=\"data row9 col0\" >Quadratic Discriminant Analysis</td>\n",
" <td id=\"T_c3bc4_row9_col1\" class=\"data row9 col1\" >0.7221</td>\n",
" <td id=\"T_c3bc4_row9_col2\" class=\"data row9 col2\" >0.7713</td>\n",
" <td id=\"T_c3bc4_row9_col3\" class=\"data row9 col3\" >0.7221</td>\n",
" <td id=\"T_c3bc4_row9_col4\" class=\"data row9 col4\" >0.7379</td>\n",
" <td id=\"T_c3bc4_row9_col5\" class=\"data row9 col5\" >0.7132</td>\n",
" <td id=\"T_c3bc4_row9_col6\" class=\"data row9 col6\" >0.4313</td>\n",
" <td id=\"T_c3bc4_row9_col7\" class=\"data row9 col7\" >0.4520</td>\n",
" <td id=\"T_c3bc4_row9_col8\" class=\"data row9 col8\" >0.0900</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row10\" class=\"row_heading level0 row10\" >nb</th>\n",
" <td id=\"T_c3bc4_row10_col0\" class=\"data row10 col0\" >Naive Bayes</td>\n",
" <td id=\"T_c3bc4_row10_col1\" class=\"data row10 col1\" >0.7150</td>\n",
" <td id=\"T_c3bc4_row10_col2\" class=\"data row10 col2\" >0.7637</td>\n",
" <td id=\"T_c3bc4_row10_col3\" class=\"data row10 col3\" >0.7150</td>\n",
" <td id=\"T_c3bc4_row10_col4\" class=\"data row10 col4\" >0.7169</td>\n",
" <td id=\"T_c3bc4_row10_col5\" class=\"data row10 col5\" >0.7122</td>\n",
" <td id=\"T_c3bc4_row10_col6\" class=\"data row10 col6\" >0.4224</td>\n",
" <td id=\"T_c3bc4_row10_col7\" class=\"data row10 col7\" >0.4269</td>\n",
" <td id=\"T_c3bc4_row10_col8\" class=\"data row10 col8\" >0.0640</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row11\" class=\"row_heading level0 row11\" >knn</th>\n",
" <td id=\"T_c3bc4_row11_col0\" class=\"data row11 col0\" >K Neighbors Classifier</td>\n",
" <td id=\"T_c3bc4_row11_col1\" class=\"data row11 col1\" >0.7142</td>\n",
" <td id=\"T_c3bc4_row11_col2\" class=\"data row11 col2\" >0.7454</td>\n",
" <td id=\"T_c3bc4_row11_col3\" class=\"data row11 col3\" >0.7142</td>\n",
" <td id=\"T_c3bc4_row11_col4\" class=\"data row11 col4\" >0.7144</td>\n",
" <td id=\"T_c3bc4_row11_col5\" class=\"data row11 col5\" >0.7129</td>\n",
" <td id=\"T_c3bc4_row11_col6\" class=\"data row11 col6\" >0.4230</td>\n",
" <td id=\"T_c3bc4_row11_col7\" class=\"data row11 col7\" >0.4247</td>\n",
" <td id=\"T_c3bc4_row11_col8\" class=\"data row11 col8\" >2.5480</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row12\" class=\"row_heading level0 row12\" >dt</th>\n",
" <td id=\"T_c3bc4_row12_col0\" class=\"data row12 col0\" >Decision Tree Classifier</td>\n",
" <td id=\"T_c3bc4_row12_col1\" class=\"data row12 col1\" >0.6614</td>\n",
" <td id=\"T_c3bc4_row12_col2\" class=\"data row12 col2\" >0.6608</td>\n",
" <td id=\"T_c3bc4_row12_col3\" class=\"data row12 col3\" >0.6614</td>\n",
" <td id=\"T_c3bc4_row12_col4\" class=\"data row12 col4\" >0.6618</td>\n",
" <td id=\"T_c3bc4_row12_col5\" class=\"data row12 col5\" >0.6615</td>\n",
" <td id=\"T_c3bc4_row12_col6\" class=\"data row12 col6\" >0.3206</td>\n",
" <td id=\"T_c3bc4_row12_col7\" class=\"data row12 col7\" >0.3207</td>\n",
" <td id=\"T_c3bc4_row12_col8\" class=\"data row12 col8\" >0.0840</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row13\" class=\"row_heading level0 row13\" >dummy</th>\n",
" <td id=\"T_c3bc4_row13_col0\" class=\"data row13 col0\" >Dummy Classifier</td>\n",
" <td id=\"T_c3bc4_row13_col1\" class=\"data row13 col1\" >0.5320</td>\n",
" <td id=\"T_c3bc4_row13_col2\" class=\"data row13 col2\" >0.5000</td>\n",
" <td id=\"T_c3bc4_row13_col3\" class=\"data row13 col3\" >0.5320</td>\n",
" <td id=\"T_c3bc4_row13_col4\" class=\"data row13 col4\" >0.2830</td>\n",
" <td id=\"T_c3bc4_row13_col5\" class=\"data row13 col5\" >0.3694</td>\n",
" <td id=\"T_c3bc4_row13_col6\" class=\"data row13 col6\" >0.0000</td>\n",
" <td id=\"T_c3bc4_row13_col7\" class=\"data row13 col7\" >0.0000</td>\n",
" <td id=\"T_c3bc4_row13_col8\" class=\"data row13 col8\" >0.0620</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_c3bc4_level0_row14\" class=\"row_heading level0 row14\" >svm</th>\n",
" <td id=\"T_c3bc4_row14_col0\" class=\"data row14 col0\" >SVM - Linear Kernel</td>\n",
" <td id=\"T_c3bc4_row14_col1\" class=\"data row14 col1\" >0.5118</td>\n",
" <td id=\"T_c3bc4_row14_col2\" class=\"data row14 col2\" >0.7084</td>\n",
" <td id=\"T_c3bc4_row14_col3\" class=\"data row14 col3\" >0.5118</td>\n",
" <td id=\"T_c3bc4_row14_col4\" class=\"data row14 col4\" >0.4463</td>\n",
" <td id=\"T_c3bc4_row14_col5\" class=\"data row14 col5\" >0.3527</td>\n",
" <td id=\"T_c3bc4_row14_col6\" class=\"data row14 col6\" >0.0124</td>\n",
" <td id=\"T_c3bc4_row14_col7\" class=\"data row14 col7\" >0.0413</td>\n",
" <td id=\"T_c3bc4_row14_col8\" class=\"data row14 col8\" >0.1960</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x289f24e2990>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Processing: 0%| | 0/65 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"best_model = compare_models()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"MCC, see: https://en.wikipedia.org/wiki/Phi_coefficient"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,\n",
" learning_rate=0.1, loss='log_loss', max_depth=3,\n",
" max_features=None, max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" n_estimators=100, n_iter_no_change=None,\n",
" random_state=1234, subsample=1.0, tol=0.0001,\n",
" validation_fraction=0.1, verbose=0,\n",
" warm_start=False)\n"
]
}
],
"source": [
"print(best_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Analyze best Model"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0a3a89d79b974d2f9db0d40bb6869fdb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"evaluate_model(best_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Predict on unseen Data"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
"</style>\n",
"<table id=\"T_8e5a7\">\n",
" <thead>\n",
" <tr>\n",
" <th class=\"blank level0\" >&nbsp;</th>\n",
" <th id=\"T_8e5a7_level0_col0\" class=\"col_heading level0 col0\" >Model</th>\n",
" <th id=\"T_8e5a7_level0_col1\" class=\"col_heading level0 col1\" >Accuracy</th>\n",
" <th id=\"T_8e5a7_level0_col2\" class=\"col_heading level0 col2\" >AUC</th>\n",
" <th id=\"T_8e5a7_level0_col3\" class=\"col_heading level0 col3\" >Recall</th>\n",
" <th id=\"T_8e5a7_level0_col4\" class=\"col_heading level0 col4\" >Prec.</th>\n",
" <th id=\"T_8e5a7_level0_col5\" class=\"col_heading level0 col5\" >F1</th>\n",
" <th id=\"T_8e5a7_level0_col6\" class=\"col_heading level0 col6\" >Kappa</th>\n",
" <th id=\"T_8e5a7_level0_col7\" class=\"col_heading level0 col7\" >MCC</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th id=\"T_8e5a7_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
" <td id=\"T_8e5a7_row0_col0\" class=\"data row0 col0\" >Gradient Boosting Classifier</td>\n",
" <td id=\"T_8e5a7_row0_col1\" class=\"data row0 col1\" >0.7495</td>\n",
" <td id=\"T_8e5a7_row0_col2\" class=\"data row0 col2\" >0.7990</td>\n",
" <td id=\"T_8e5a7_row0_col3\" class=\"data row0 col3\" >0.7495</td>\n",
" <td id=\"T_8e5a7_row0_col4\" class=\"data row0 col4\" >0.7626</td>\n",
" <td id=\"T_8e5a7_row0_col5\" class=\"data row0 col5\" >0.7457</td>\n",
" <td id=\"T_8e5a7_row0_col6\" class=\"data row0 col6\" >0.4972</td>\n",
" <td id=\"T_8e5a7_row0_col7\" class=\"data row0 col7\" >0.5109</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x289f24d4050>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" age education housing contact_cellular month day_of_week \\\n",
"2809 36.0 6 0 1 5 2 \n",
"4052 34.0 6 1 1 8 3 \n",
"658 36.0 6 1 0 5 2 \n",
"786 40.0 5 0 0 5 3 \n",
"6675 36.0 6 0 1 11 3 \n",
"\n",
" campaign pdays previous emp_var_rate ... job_student \\\n",
"2809 0.477121 0 0 -1.8 ... False \n",
"4052 0.698970 0 0 1.4 ... False \n",
"658 0.903090 0 0 -1.8 ... False \n",
"786 0.698970 0 0 1.1 ... False \n",
"6675 0.477121 0 0 -0.1 ... False \n",
"\n",
" job_technician job_unemployed marital_married marital_single \\\n",
"2809 False False False True \n",
"4052 True False False True \n",
"658 False False False True \n",
"786 False False True False \n",
"6675 False False False True \n",
"\n",
" loan_unknown loan_yes y prediction_label prediction_score \n",
"2809 False False yes no 0.5681 \n",
"4052 False True yes no 0.6703 \n",
"658 False False yes no 0.8444 \n",
"786 False False no no 0.7768 \n",
"6675 False False no no 0.6379 \n",
"\n",
"[5 rows x 31 columns]\n"
]
}
],
"source": [
"predictions = predict_model(best_model, data=data_unseen)\n",
"print(predictions.head())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save best Model Pipeline"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"save_model(best_model, 'best_model_pipeline')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Tune a specific Model\n",
"ref: https://pycaret.gitbook.io/docs/get-started/functions/optimize"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### rf: Random Forest Classifier"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
"#T_7f597_row5_col0, #T_7f597_row5_col1, #T_7f597_row5_col2, #T_7f597_row5_col3, #T_7f597_row5_col4, #T_7f597_row5_col5, #T_7f597_row5_col6 {\n",
" background: yellow;\n",
"}\n",
"</style>\n",
"<table id=\"T_7f597\">\n",
" <thead>\n",
" <tr>\n",
" <th class=\"blank level0\" >&nbsp;</th>\n",
" <th id=\"T_7f597_level0_col0\" class=\"col_heading level0 col0\" >Accuracy</th>\n",
" <th id=\"T_7f597_level0_col1\" class=\"col_heading level0 col1\" >AUC</th>\n",
" <th id=\"T_7f597_level0_col2\" class=\"col_heading level0 col2\" >Recall</th>\n",
" <th id=\"T_7f597_level0_col3\" class=\"col_heading level0 col3\" >Prec.</th>\n",
" <th id=\"T_7f597_level0_col4\" class=\"col_heading level0 col4\" >F1</th>\n",
" <th id=\"T_7f597_level0_col5\" class=\"col_heading level0 col5\" >Kappa</th>\n",
" <th id=\"T_7f597_level0_col6\" class=\"col_heading level0 col6\" >MCC</th>\n",
" </tr>\n",
" <tr>\n",
" <th class=\"index_name level0\" >Fold</th>\n",
" <th class=\"blank col0\" >&nbsp;</th>\n",
" <th class=\"blank col1\" >&nbsp;</th>\n",
" <th class=\"blank col2\" >&nbsp;</th>\n",
" <th class=\"blank col3\" >&nbsp;</th>\n",
" <th class=\"blank col4\" >&nbsp;</th>\n",
" <th class=\"blank col5\" >&nbsp;</th>\n",
" <th class=\"blank col6\" >&nbsp;</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th id=\"T_7f597_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
" <td id=\"T_7f597_row0_col0\" class=\"data row0 col0\" >0.7409</td>\n",
" <td id=\"T_7f597_row0_col1\" class=\"data row0 col1\" >0.7939</td>\n",
" <td id=\"T_7f597_row0_col2\" class=\"data row0 col2\" >0.7409</td>\n",
" <td id=\"T_7f597_row0_col3\" class=\"data row0 col3\" >0.7447</td>\n",
" <td id=\"T_7f597_row0_col4\" class=\"data row0 col4\" >0.7379</td>\n",
" <td id=\"T_7f597_row0_col5\" class=\"data row0 col5\" >0.4744</td>\n",
" <td id=\"T_7f597_row0_col6\" class=\"data row0 col6\" >0.4808</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_7f597_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
" <td id=\"T_7f597_row1_col0\" class=\"data row1 col0\" >0.7536</td>\n",
" <td id=\"T_7f597_row1_col1\" class=\"data row1 col1\" >0.7961</td>\n",
" <td id=\"T_7f597_row1_col2\" class=\"data row1 col2\" >0.7536</td>\n",
" <td id=\"T_7f597_row1_col3\" class=\"data row1 col3\" >0.7583</td>\n",
" <td id=\"T_7f597_row1_col4\" class=\"data row1 col4\" >0.7505</td>\n",
" <td id=\"T_7f597_row1_col5\" class=\"data row1 col5\" >0.4998</td>\n",
" <td id=\"T_7f597_row1_col6\" class=\"data row1 col6\" >0.5072</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_7f597_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
" <td id=\"T_7f597_row2_col0\" class=\"data row2 col0\" >0.7351</td>\n",
" <td id=\"T_7f597_row2_col1\" class=\"data row2 col1\" >0.7710</td>\n",
" <td id=\"T_7f597_row2_col2\" class=\"data row2 col2\" >0.7351</td>\n",
" <td id=\"T_7f597_row2_col3\" class=\"data row2 col3\" >0.7392</td>\n",
" <td id=\"T_7f597_row2_col4\" class=\"data row2 col4\" >0.7317</td>\n",
" <td id=\"T_7f597_row2_col5\" class=\"data row2 col5\" >0.4621</td>\n",
" <td id=\"T_7f597_row2_col6\" class=\"data row2 col6\" >0.4691</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_7f597_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
" <td id=\"T_7f597_row3_col0\" class=\"data row3 col0\" >0.7311</td>\n",
" <td id=\"T_7f597_row3_col1\" class=\"data row3 col1\" >0.7967</td>\n",
" <td id=\"T_7f597_row3_col2\" class=\"data row3 col2\" >0.7311</td>\n",
" <td id=\"T_7f597_row3_col3\" class=\"data row3 col3\" >0.7329</td>\n",
" <td id=\"T_7f597_row3_col4\" class=\"data row3 col4\" >0.7287</td>\n",
" <td id=\"T_7f597_row3_col5\" class=\"data row3 col5\" >0.4553</td>\n",
" <td id=\"T_7f597_row3_col6\" class=\"data row3 col6\" >0.4594</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_7f597_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
" <td id=\"T_7f597_row4_col0\" class=\"data row4 col0\" >0.7496</td>\n",
" <td id=\"T_7f597_row4_col1\" class=\"data row4 col1\" >0.7931</td>\n",
" <td id=\"T_7f597_row4_col2\" class=\"data row4 col2\" >0.7496</td>\n",
" <td id=\"T_7f597_row4_col3\" class=\"data row4 col3\" >0.7506</td>\n",
" <td id=\"T_7f597_row4_col4\" class=\"data row4 col4\" >0.7481</td>\n",
" <td id=\"T_7f597_row4_col5\" class=\"data row4 col5\" >0.4940</td>\n",
" <td id=\"T_7f597_row4_col6\" class=\"data row4 col6\" >0.4966</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_7f597_level0_row5\" class=\"row_heading level0 row5\" >Mean</th>\n",
" <td id=\"T_7f597_row5_col0\" class=\"data row5 col0\" >0.7421</td>\n",
" <td id=\"T_7f597_row5_col1\" class=\"data row5 col1\" >0.7902</td>\n",
" <td id=\"T_7f597_row5_col2\" class=\"data row5 col2\" >0.7421</td>\n",
" <td id=\"T_7f597_row5_col3\" class=\"data row5 col3\" >0.7451</td>\n",
" <td id=\"T_7f597_row5_col4\" class=\"data row5 col4\" >0.7394</td>\n",
" <td id=\"T_7f597_row5_col5\" class=\"data row5 col5\" >0.4771</td>\n",
" <td id=\"T_7f597_row5_col6\" class=\"data row5 col6\" >0.4826</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_7f597_level0_row6\" class=\"row_heading level0 row6\" >Std</th>\n",
" <td id=\"T_7f597_row6_col0\" class=\"data row6 col0\" >0.0085</td>\n",
" <td id=\"T_7f597_row6_col1\" class=\"data row6 col1\" >0.0097</td>\n",
" <td id=\"T_7f597_row6_col2\" class=\"data row6 col2\" >0.0085</td>\n",
" <td id=\"T_7f597_row6_col3\" class=\"data row6 col3\" >0.0088</td>\n",
" <td id=\"T_7f597_row6_col4\" class=\"data row6 col4\" >0.0087</td>\n",
" <td id=\"T_7f597_row6_col5\" class=\"data row6 col5\" >0.0174</td>\n",
" <td id=\"T_7f597_row6_col6\" class=\"data row6 col6\" >0.0175</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x289f25ec950>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Processing: 0%| | 0/4 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"## create model\n",
"model_rf = create_model('rf')"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
"#T_f7a8e_row5_col0, #T_f7a8e_row5_col1, #T_f7a8e_row5_col2, #T_f7a8e_row5_col3, #T_f7a8e_row5_col4, #T_f7a8e_row5_col5, #T_f7a8e_row5_col6 {\n",
" background: yellow;\n",
"}\n",
"</style>\n",
"<table id=\"T_f7a8e\">\n",
" <thead>\n",
" <tr>\n",
" <th class=\"blank level0\" >&nbsp;</th>\n",
" <th id=\"T_f7a8e_level0_col0\" class=\"col_heading level0 col0\" >Accuracy</th>\n",
" <th id=\"T_f7a8e_level0_col1\" class=\"col_heading level0 col1\" >AUC</th>\n",
" <th id=\"T_f7a8e_level0_col2\" class=\"col_heading level0 col2\" >Recall</th>\n",
" <th id=\"T_f7a8e_level0_col3\" class=\"col_heading level0 col3\" >Prec.</th>\n",
" <th id=\"T_f7a8e_level0_col4\" class=\"col_heading level0 col4\" >F1</th>\n",
" <th id=\"T_f7a8e_level0_col5\" class=\"col_heading level0 col5\" >Kappa</th>\n",
" <th id=\"T_f7a8e_level0_col6\" class=\"col_heading level0 col6\" >MCC</th>\n",
" </tr>\n",
" <tr>\n",
" <th class=\"index_name level0\" >Fold</th>\n",
" <th class=\"blank col0\" >&nbsp;</th>\n",
" <th class=\"blank col1\" >&nbsp;</th>\n",
" <th class=\"blank col2\" >&nbsp;</th>\n",
" <th class=\"blank col3\" >&nbsp;</th>\n",
" <th class=\"blank col4\" >&nbsp;</th>\n",
" <th class=\"blank col5\" >&nbsp;</th>\n",
" <th class=\"blank col6\" >&nbsp;</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th id=\"T_f7a8e_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
" <td id=\"T_f7a8e_row0_col0\" class=\"data row0 col0\" >0.7562</td>\n",
" <td id=\"T_f7a8e_row0_col1\" class=\"data row0 col1\" >0.8053</td>\n",
" <td id=\"T_f7a8e_row0_col2\" class=\"data row0 col2\" >0.7562</td>\n",
" <td id=\"T_f7a8e_row0_col3\" class=\"data row0 col3\" >0.7654</td>\n",
" <td id=\"T_f7a8e_row0_col4\" class=\"data row0 col4\" >0.7517</td>\n",
" <td id=\"T_f7a8e_row0_col5\" class=\"data row0 col5\" >0.5037</td>\n",
" <td id=\"T_f7a8e_row0_col6\" class=\"data row0 col6\" >0.5161</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_f7a8e_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
" <td id=\"T_f7a8e_row1_col0\" class=\"data row1 col0\" >0.7689</td>\n",
" <td id=\"T_f7a8e_row1_col1\" class=\"data row1 col1\" >0.8134</td>\n",
" <td id=\"T_f7a8e_row1_col2\" class=\"data row1 col2\" >0.7689</td>\n",
" <td id=\"T_f7a8e_row1_col3\" class=\"data row1 col3\" >0.7825</td>\n",
" <td id=\"T_f7a8e_row1_col4\" class=\"data row1 col4\" >0.7635</td>\n",
" <td id=\"T_f7a8e_row1_col5\" class=\"data row1 col5\" >0.5285</td>\n",
" <td id=\"T_f7a8e_row1_col6\" class=\"data row1 col6\" >0.5456</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_f7a8e_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
" <td id=\"T_f7a8e_row2_col0\" class=\"data row2 col0\" >0.7544</td>\n",
" <td id=\"T_f7a8e_row2_col1\" class=\"data row2 col1\" >0.7897</td>\n",
" <td id=\"T_f7a8e_row2_col2\" class=\"data row2 col2\" >0.7544</td>\n",
" <td id=\"T_f7a8e_row2_col3\" class=\"data row2 col3\" >0.7672</td>\n",
" <td id=\"T_f7a8e_row2_col4\" class=\"data row2 col4\" >0.7485</td>\n",
" <td id=\"T_f7a8e_row2_col5\" class=\"data row2 col5\" >0.4988</td>\n",
" <td id=\"T_f7a8e_row2_col6\" class=\"data row2 col6\" >0.5154</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_f7a8e_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
" <td id=\"T_f7a8e_row3_col0\" class=\"data row3 col0\" >0.7617</td>\n",
" <td id=\"T_f7a8e_row3_col1\" class=\"data row3 col1\" >0.8021</td>\n",
" <td id=\"T_f7a8e_row3_col2\" class=\"data row3 col2\" >0.7617</td>\n",
" <td id=\"T_f7a8e_row3_col3\" class=\"data row3 col3\" >0.7709</td>\n",
" <td id=\"T_f7a8e_row3_col4\" class=\"data row3 col4\" >0.7572</td>\n",
" <td id=\"T_f7a8e_row3_col5\" class=\"data row3 col5\" >0.5147</td>\n",
" <td id=\"T_f7a8e_row3_col6\" class=\"data row3 col6\" >0.5272</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_f7a8e_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
" <td id=\"T_f7a8e_row4_col0\" class=\"data row4 col0\" >0.7689</td>\n",
" <td id=\"T_f7a8e_row4_col1\" class=\"data row4 col1\" >0.8071</td>\n",
" <td id=\"T_f7a8e_row4_col2\" class=\"data row4 col2\" >0.7689</td>\n",
" <td id=\"T_f7a8e_row4_col3\" class=\"data row4 col3\" >0.7759</td>\n",
" <td id=\"T_f7a8e_row4_col4\" class=\"data row4 col4\" >0.7655</td>\n",
" <td id=\"T_f7a8e_row4_col5\" class=\"data row4 col5\" >0.5305</td>\n",
" <td id=\"T_f7a8e_row4_col6\" class=\"data row4 col6\" >0.5402</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_f7a8e_level0_row5\" class=\"row_heading level0 row5\" >Mean</th>\n",
" <td id=\"T_f7a8e_row5_col0\" class=\"data row5 col0\" >0.7620</td>\n",
" <td id=\"T_f7a8e_row5_col1\" class=\"data row5 col1\" >0.8035</td>\n",
" <td id=\"T_f7a8e_row5_col2\" class=\"data row5 col2\" >0.7620</td>\n",
" <td id=\"T_f7a8e_row5_col3\" class=\"data row5 col3\" >0.7724</td>\n",
" <td id=\"T_f7a8e_row5_col4\" class=\"data row5 col4\" >0.7573</td>\n",
" <td id=\"T_f7a8e_row5_col5\" class=\"data row5 col5\" >0.5153</td>\n",
" <td id=\"T_f7a8e_row5_col6\" class=\"data row5 col6\" >0.5289</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_f7a8e_level0_row6\" class=\"row_heading level0 row6\" >Std</th>\n",
" <td id=\"T_f7a8e_row6_col0\" class=\"data row6 col0\" >0.0061</td>\n",
" <td id=\"T_f7a8e_row6_col1\" class=\"data row6 col1\" >0.0079</td>\n",
" <td id=\"T_f7a8e_row6_col2\" class=\"data row6 col2\" >0.0061</td>\n",
" <td id=\"T_f7a8e_row6_col3\" class=\"data row6 col3\" >0.0062</td>\n",
" <td id=\"T_f7a8e_row6_col4\" class=\"data row6 col4\" >0.0066</td>\n",
" <td id=\"T_f7a8e_row6_col5\" class=\"data row6 col5\" >0.0127</td>\n",
" <td id=\"T_f7a8e_row6_col6\" class=\"data row6 col6\" >0.0123</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x289e8bfeb90>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Processing: 0%| | 0/7 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 5 folds for each of 10 candidates, totalling 50 fits\n"
]
}
],
"source": [
"## tune model\n",
"model_rf_tuned = tune_model(model_rf)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n",
" criterion='gini', max_depth=None, max_features='sqrt',\n",
" max_leaf_nodes=None, max_samples=None,\n",
" min_impurity_decrease=0.0, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" monotonic_cst=None, n_estimators=100, n_jobs=-1,\n",
" oob_score=False, random_state=1234, verbose=0,\n",
" warm_start=False)\n"
]
}
],
"source": [
"## parameters of default model\n",
"print(model_rf)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,\n",
" class_weight='balanced_subsample', criterion='gini',\n",
" max_depth=10, max_features='sqrt', max_leaf_nodes=None,\n",
" max_samples=None, min_impurity_decrease=0,\n",
" min_samples_leaf=5, min_samples_split=7,\n",
" min_weight_fraction_leaf=0.0, monotonic_cst=None,\n",
" n_estimators=160, n_jobs=-1, oob_score=False,\n",
" random_state=1234, verbose=0, warm_start=False)\n"
]
}
],
"source": [
"## parameters of tuned model\n",
"print(model_rf_tuned)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"comparison of the default parameters and the tuned parameters"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"| parameter | default | tuned |\n",
"| :--- | :--- | :--- |\n",
"| ccp_alpha | 0 | 0 |\n",
"| class_weight | None | 'balanced_subsample' |\n",
"| criterion | 'gini' | 'gini' |\n",
"| max_depth | None | 10 |\n",
"| max_features | 'sqrt' | 'sqrt' |\n",
"| max_leaf_nodes | None | None |\n",
"| max_samples | None | None |\n",
"| min_impurity_decrease | 0 | 0 |\n",
"| min_samples_leaf | 1 | 5 |\n",
"| min_samples_split | 2 | 7 |\n",
"| min_weight_fraction_leaf | 0 | 0 |\n",
"| monotonic_cst | None | None |\n",
"| n_estimators | 100 | 160 |\n",
"| n_jobs | -1 | -1 |\n",
"| oob_score | False | False |\n",
"| random_state | 1234 | 1234 |\n",
"| verbose | 0 | 0 |\n",
"| warm_start | False | False |"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
},
"toc": {
"base_numbering": "5.5",
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "5.5 Deployment und Abschluss - pycaret",
"title_sidebar": "Contents",
"toc_cell": true,
"toc_position": {
"height": "370.667px",
"left": "25px",
"top": "110.233px",
"width": "187.667px"
},
"toc_section_display": true,
"toc_window_display": false
},
"toc-autonumbering": true,
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"position": {
"height": "234.85px",
"left": "911px",
"right": "20px",
"top": "120px",
"width": "350px"
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}