1389 lines
63 KiB
Plaintext
1389 lines
63 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Deployment und Abschluss - pycaret"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"## Install\n",
|
|
"\n",
|
|
"Attention: PyCaret does not run natively on Apple Silicon and in this case it is recommended to use Docker instead\n",
|
|
"\n",
|
|
"ref: https://pycaret.gitbook.io/docs/get-started/installation\n",
|
|
"* You can install PyCaret with Python's pip package manager:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "raw",
|
|
"metadata": {
|
|
"jp-MarkdownHeadingCollapsed": true,
|
|
"tags": []
|
|
},
|
|
"source": [
|
|
"!pip install pycaret"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Load and prep Data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2021-12-25T16:56:08.445356Z",
|
|
"start_time": "2021-12-25T16:56:06.863957Z"
|
|
},
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"(9860, 30)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"## load data\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"datapath = '../3_data'\n",
|
|
"from os import chdir; chdir(datapath)\n",
|
|
"dataset = pd.read_csv('bank_data_prep.csv')\n",
|
|
"print(dataset.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"## remove duration\n",
|
|
"dataset = dataset.drop(\"duration\", axis = 1)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Data for Modeling: (8874, 29)\n",
|
|
"Unseen Data For Predictions: (986, 29)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"## train - test - split\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"data, data_unseen = train_test_split(dataset, train_size=0.9, random_state=1234)\n",
|
|
"\n",
|
|
"print('Data for Modeling: ' + str(data.shape))\n",
|
|
"print('Unseen Data For Predictions: ' + str(data_unseen.shape))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Run a Classication Experiment"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Init setup"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<style type=\"text/css\">\n",
|
|
"#T_69e32_row9_col1 {\n",
|
|
" background-color: lightgreen;\n",
|
|
"}\n",
|
|
"</style>\n",
|
|
"<table id=\"T_69e32\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th class=\"blank level0\" > </th>\n",
|
|
" <th id=\"T_69e32_level0_col0\" class=\"col_heading level0 col0\" >Description</th>\n",
|
|
" <th id=\"T_69e32_level0_col1\" class=\"col_heading level0 col1\" >Value</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
|
|
" <td id=\"T_69e32_row0_col0\" class=\"data row0 col0\" >Session id</td>\n",
|
|
" <td id=\"T_69e32_row0_col1\" class=\"data row0 col1\" >1234</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
|
|
" <td id=\"T_69e32_row1_col0\" class=\"data row1 col0\" >Target</td>\n",
|
|
" <td id=\"T_69e32_row1_col1\" class=\"data row1 col1\" >y</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
|
|
" <td id=\"T_69e32_row2_col0\" class=\"data row2 col0\" >Target type</td>\n",
|
|
" <td id=\"T_69e32_row2_col1\" class=\"data row2 col1\" >Binary</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
|
|
" <td id=\"T_69e32_row3_col0\" class=\"data row3 col0\" >Target mapping</td>\n",
|
|
" <td id=\"T_69e32_row3_col1\" class=\"data row3 col1\" >no: 0, yes: 1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
|
|
" <td id=\"T_69e32_row4_col0\" class=\"data row4 col0\" >Original data shape</td>\n",
|
|
" <td id=\"T_69e32_row4_col1\" class=\"data row4 col1\" >(8874, 29)</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row5\" class=\"row_heading level0 row5\" >5</th>\n",
|
|
" <td id=\"T_69e32_row5_col0\" class=\"data row5 col0\" >Transformed data shape</td>\n",
|
|
" <td id=\"T_69e32_row5_col1\" class=\"data row5 col1\" >(8874, 29)</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row6\" class=\"row_heading level0 row6\" >6</th>\n",
|
|
" <td id=\"T_69e32_row6_col0\" class=\"data row6 col0\" >Transformed train set shape</td>\n",
|
|
" <td id=\"T_69e32_row6_col1\" class=\"data row6 col1\" >(6211, 29)</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row7\" class=\"row_heading level0 row7\" >7</th>\n",
|
|
" <td id=\"T_69e32_row7_col0\" class=\"data row7 col0\" >Transformed test set shape</td>\n",
|
|
" <td id=\"T_69e32_row7_col1\" class=\"data row7 col1\" >(2663, 29)</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row8\" class=\"row_heading level0 row8\" >8</th>\n",
|
|
" <td id=\"T_69e32_row8_col0\" class=\"data row8 col0\" >Numeric features</td>\n",
|
|
" <td id=\"T_69e32_row8_col1\" class=\"data row8 col1\" >14</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row9\" class=\"row_heading level0 row9\" >9</th>\n",
|
|
" <td id=\"T_69e32_row9_col0\" class=\"data row9 col0\" >Preprocess</td>\n",
|
|
" <td id=\"T_69e32_row9_col1\" class=\"data row9 col1\" >True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row10\" class=\"row_heading level0 row10\" >10</th>\n",
|
|
" <td id=\"T_69e32_row10_col0\" class=\"data row10 col0\" >Imputation type</td>\n",
|
|
" <td id=\"T_69e32_row10_col1\" class=\"data row10 col1\" >simple</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row11\" class=\"row_heading level0 row11\" >11</th>\n",
|
|
" <td id=\"T_69e32_row11_col0\" class=\"data row11 col0\" >Numeric imputation</td>\n",
|
|
" <td id=\"T_69e32_row11_col1\" class=\"data row11 col1\" >mean</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row12\" class=\"row_heading level0 row12\" >12</th>\n",
|
|
" <td id=\"T_69e32_row12_col0\" class=\"data row12 col0\" >Categorical imputation</td>\n",
|
|
" <td id=\"T_69e32_row12_col1\" class=\"data row12 col1\" >mode</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row13\" class=\"row_heading level0 row13\" >13</th>\n",
|
|
" <td id=\"T_69e32_row13_col0\" class=\"data row13 col0\" >Fold Generator</td>\n",
|
|
" <td id=\"T_69e32_row13_col1\" class=\"data row13 col1\" >StratifiedKFold</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row14\" class=\"row_heading level0 row14\" >14</th>\n",
|
|
" <td id=\"T_69e32_row14_col0\" class=\"data row14 col0\" >Fold Number</td>\n",
|
|
" <td id=\"T_69e32_row14_col1\" class=\"data row14 col1\" >5</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row15\" class=\"row_heading level0 row15\" >15</th>\n",
|
|
" <td id=\"T_69e32_row15_col0\" class=\"data row15 col0\" >CPU Jobs</td>\n",
|
|
" <td id=\"T_69e32_row15_col1\" class=\"data row15 col1\" >-1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row16\" class=\"row_heading level0 row16\" >16</th>\n",
|
|
" <td id=\"T_69e32_row16_col0\" class=\"data row16 col0\" >Use GPU</td>\n",
|
|
" <td id=\"T_69e32_row16_col1\" class=\"data row16 col1\" >False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row17\" class=\"row_heading level0 row17\" >17</th>\n",
|
|
" <td id=\"T_69e32_row17_col0\" class=\"data row17 col0\" >Log Experiment</td>\n",
|
|
" <td id=\"T_69e32_row17_col1\" class=\"data row17 col1\" >False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row18\" class=\"row_heading level0 row18\" >18</th>\n",
|
|
" <td id=\"T_69e32_row18_col0\" class=\"data row18 col0\" >Experiment Name</td>\n",
|
|
" <td id=\"T_69e32_row18_col1\" class=\"data row18 col1\" >clf-default-name</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_69e32_level0_row19\" class=\"row_heading level0 row19\" >19</th>\n",
|
|
" <td id=\"T_69e32_row19_col0\" class=\"data row19 col0\" >USI</td>\n",
|
|
" <td id=\"T_69e32_row19_col1\" class=\"data row19 col1\" >9ce1</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n"
|
|
],
|
|
"text/plain": [
|
|
"<pandas.io.formats.style.Styler at 0x289e745df90>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"from pycaret.classification import *\n",
|
|
"s = setup(\n",
|
|
" data = data, \n",
|
|
" target = 'y', \n",
|
|
" fold = 5, ## defaul = 10\n",
|
|
" session_id=1234) ## random seed"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Show available Models (for Classification)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Name</th>\n",
|
|
" <th>Reference</th>\n",
|
|
" <th>Turbo</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>ID</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>lr</th>\n",
|
|
" <td>Logistic Regression</td>\n",
|
|
" <td>sklearn.linear_model._logistic.LogisticRegression</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>knn</th>\n",
|
|
" <td>K Neighbors Classifier</td>\n",
|
|
" <td>sklearn.neighbors._classification.KNeighborsCl...</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>nb</th>\n",
|
|
" <td>Naive Bayes</td>\n",
|
|
" <td>sklearn.naive_bayes.GaussianNB</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>dt</th>\n",
|
|
" <td>Decision Tree Classifier</td>\n",
|
|
" <td>sklearn.tree._classes.DecisionTreeClassifier</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>svm</th>\n",
|
|
" <td>SVM - Linear Kernel</td>\n",
|
|
" <td>sklearn.linear_model._stochastic_gradient.SGDC...</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>rbfsvm</th>\n",
|
|
" <td>SVM - Radial Kernel</td>\n",
|
|
" <td>sklearn.svm._classes.SVC</td>\n",
|
|
" <td>False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>gpc</th>\n",
|
|
" <td>Gaussian Process Classifier</td>\n",
|
|
" <td>sklearn.gaussian_process._gpc.GaussianProcessC...</td>\n",
|
|
" <td>False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>mlp</th>\n",
|
|
" <td>MLP Classifier</td>\n",
|
|
" <td>sklearn.neural_network._multilayer_perceptron....</td>\n",
|
|
" <td>False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>ridge</th>\n",
|
|
" <td>Ridge Classifier</td>\n",
|
|
" <td>sklearn.linear_model._ridge.RidgeClassifier</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>rf</th>\n",
|
|
" <td>Random Forest Classifier</td>\n",
|
|
" <td>sklearn.ensemble._forest.RandomForestClassifier</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>qda</th>\n",
|
|
" <td>Quadratic Discriminant Analysis</td>\n",
|
|
" <td>sklearn.discriminant_analysis.QuadraticDiscrim...</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>ada</th>\n",
|
|
" <td>Ada Boost Classifier</td>\n",
|
|
" <td>sklearn.ensemble._weight_boosting.AdaBoostClas...</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>gbc</th>\n",
|
|
" <td>Gradient Boosting Classifier</td>\n",
|
|
" <td>sklearn.ensemble._gb.GradientBoostingClassifier</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>lda</th>\n",
|
|
" <td>Linear Discriminant Analysis</td>\n",
|
|
" <td>sklearn.discriminant_analysis.LinearDiscrimina...</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>et</th>\n",
|
|
" <td>Extra Trees Classifier</td>\n",
|
|
" <td>sklearn.ensemble._forest.ExtraTreesClassifier</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>lightgbm</th>\n",
|
|
" <td>Light Gradient Boosting Machine</td>\n",
|
|
" <td>lightgbm.sklearn.LGBMClassifier</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>catboost</th>\n",
|
|
" <td>CatBoost Classifier</td>\n",
|
|
" <td>catboost.core.CatBoostClassifier</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>dummy</th>\n",
|
|
" <td>Dummy Classifier</td>\n",
|
|
" <td>sklearn.dummy.DummyClassifier</td>\n",
|
|
" <td>True</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Name \\\n",
|
|
"ID \n",
|
|
"lr Logistic Regression \n",
|
|
"knn K Neighbors Classifier \n",
|
|
"nb Naive Bayes \n",
|
|
"dt Decision Tree Classifier \n",
|
|
"svm SVM - Linear Kernel \n",
|
|
"rbfsvm SVM - Radial Kernel \n",
|
|
"gpc Gaussian Process Classifier \n",
|
|
"mlp MLP Classifier \n",
|
|
"ridge Ridge Classifier \n",
|
|
"rf Random Forest Classifier \n",
|
|
"qda Quadratic Discriminant Analysis \n",
|
|
"ada Ada Boost Classifier \n",
|
|
"gbc Gradient Boosting Classifier \n",
|
|
"lda Linear Discriminant Analysis \n",
|
|
"et Extra Trees Classifier \n",
|
|
"lightgbm Light Gradient Boosting Machine \n",
|
|
"catboost CatBoost Classifier \n",
|
|
"dummy Dummy Classifier \n",
|
|
"\n",
|
|
" Reference Turbo \n",
|
|
"ID \n",
|
|
"lr sklearn.linear_model._logistic.LogisticRegression True \n",
|
|
"knn sklearn.neighbors._classification.KNeighborsCl... True \n",
|
|
"nb sklearn.naive_bayes.GaussianNB True \n",
|
|
"dt sklearn.tree._classes.DecisionTreeClassifier True \n",
|
|
"svm sklearn.linear_model._stochastic_gradient.SGDC... True \n",
|
|
"rbfsvm sklearn.svm._classes.SVC False \n",
|
|
"gpc sklearn.gaussian_process._gpc.GaussianProcessC... False \n",
|
|
"mlp sklearn.neural_network._multilayer_perceptron.... False \n",
|
|
"ridge sklearn.linear_model._ridge.RidgeClassifier True \n",
|
|
"rf sklearn.ensemble._forest.RandomForestClassifier True \n",
|
|
"qda sklearn.discriminant_analysis.QuadraticDiscrim... True \n",
|
|
"ada sklearn.ensemble._weight_boosting.AdaBoostClas... True \n",
|
|
"gbc sklearn.ensemble._gb.GradientBoostingClassifier True \n",
|
|
"lda sklearn.discriminant_analysis.LinearDiscrimina... True \n",
|
|
"et sklearn.ensemble._forest.ExtraTreesClassifier True \n",
|
|
"lightgbm lightgbm.sklearn.LGBMClassifier True \n",
|
|
"catboost catboost.core.CatBoostClassifier True \n",
|
|
"dummy sklearn.dummy.DummyClassifier True "
|
|
]
|
|
},
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"models()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Model Training and Selection"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<style type=\"text/css\">\n",
|
|
"#T_c3bc4 th {\n",
|
|
" text-align: left;\n",
|
|
"}\n",
|
|
"#T_c3bc4_row0_col0, #T_c3bc4_row1_col0, #T_c3bc4_row1_col1, #T_c3bc4_row1_col2, #T_c3bc4_row1_col3, #T_c3bc4_row1_col4, #T_c3bc4_row1_col5, #T_c3bc4_row1_col6, #T_c3bc4_row1_col7, #T_c3bc4_row2_col0, #T_c3bc4_row2_col1, #T_c3bc4_row2_col2, #T_c3bc4_row2_col3, #T_c3bc4_row2_col4, #T_c3bc4_row2_col5, #T_c3bc4_row2_col6, #T_c3bc4_row2_col7, #T_c3bc4_row3_col0, #T_c3bc4_row3_col1, #T_c3bc4_row3_col2, #T_c3bc4_row3_col3, #T_c3bc4_row3_col4, #T_c3bc4_row3_col5, #T_c3bc4_row3_col6, #T_c3bc4_row3_col7, #T_c3bc4_row4_col0, #T_c3bc4_row4_col1, #T_c3bc4_row4_col2, #T_c3bc4_row4_col3, #T_c3bc4_row4_col4, #T_c3bc4_row4_col5, #T_c3bc4_row4_col6, #T_c3bc4_row4_col7, #T_c3bc4_row5_col0, #T_c3bc4_row5_col1, #T_c3bc4_row5_col2, #T_c3bc4_row5_col3, #T_c3bc4_row5_col4, #T_c3bc4_row5_col5, #T_c3bc4_row5_col6, #T_c3bc4_row5_col7, #T_c3bc4_row6_col0, #T_c3bc4_row6_col1, #T_c3bc4_row6_col2, #T_c3bc4_row6_col3, #T_c3bc4_row6_col4, #T_c3bc4_row6_col5, #T_c3bc4_row6_col6, #T_c3bc4_row6_col7, #T_c3bc4_row7_col0, #T_c3bc4_row7_col1, #T_c3bc4_row7_col2, #T_c3bc4_row7_col3, #T_c3bc4_row7_col4, #T_c3bc4_row7_col5, #T_c3bc4_row7_col6, #T_c3bc4_row7_col7, #T_c3bc4_row8_col0, #T_c3bc4_row8_col1, #T_c3bc4_row8_col2, #T_c3bc4_row8_col3, #T_c3bc4_row8_col4, #T_c3bc4_row8_col5, #T_c3bc4_row8_col6, #T_c3bc4_row8_col7, #T_c3bc4_row9_col0, #T_c3bc4_row9_col1, #T_c3bc4_row9_col2, #T_c3bc4_row9_col3, #T_c3bc4_row9_col4, #T_c3bc4_row9_col5, #T_c3bc4_row9_col6, #T_c3bc4_row9_col7, #T_c3bc4_row10_col0, #T_c3bc4_row10_col1, #T_c3bc4_row10_col2, #T_c3bc4_row10_col3, #T_c3bc4_row10_col4, #T_c3bc4_row10_col5, #T_c3bc4_row10_col6, #T_c3bc4_row10_col7, #T_c3bc4_row11_col0, #T_c3bc4_row11_col1, #T_c3bc4_row11_col2, #T_c3bc4_row11_col3, #T_c3bc4_row11_col4, #T_c3bc4_row11_col5, #T_c3bc4_row11_col6, #T_c3bc4_row11_col7, #T_c3bc4_row12_col0, #T_c3bc4_row12_col1, #T_c3bc4_row12_col2, #T_c3bc4_row12_col3, #T_c3bc4_row12_col4, #T_c3bc4_row12_col5, #T_c3bc4_row12_col6, #T_c3bc4_row12_col7, #T_c3bc4_row13_col0, #T_c3bc4_row13_col1, #T_c3bc4_row13_col2, #T_c3bc4_row13_col3, #T_c3bc4_row13_col4, #T_c3bc4_row13_col5, #T_c3bc4_row13_col6, #T_c3bc4_row13_col7, #T_c3bc4_row14_col0, #T_c3bc4_row14_col1, #T_c3bc4_row14_col2, #T_c3bc4_row14_col3, #T_c3bc4_row14_col4, #T_c3bc4_row14_col5, #T_c3bc4_row14_col6, #T_c3bc4_row14_col7 {\n",
|
|
" text-align: left;\n",
|
|
"}\n",
|
|
"#T_c3bc4_row0_col1, #T_c3bc4_row0_col2, #T_c3bc4_row0_col3, #T_c3bc4_row0_col4, #T_c3bc4_row0_col5, #T_c3bc4_row0_col6, #T_c3bc4_row0_col7 {\n",
|
|
" text-align: left;\n",
|
|
" background-color: yellow;\n",
|
|
"}\n",
|
|
"#T_c3bc4_row0_col8, #T_c3bc4_row1_col8, #T_c3bc4_row2_col8, #T_c3bc4_row3_col8, #T_c3bc4_row4_col8, #T_c3bc4_row5_col8, #T_c3bc4_row6_col8, #T_c3bc4_row7_col8, #T_c3bc4_row8_col8, #T_c3bc4_row9_col8, #T_c3bc4_row10_col8, #T_c3bc4_row11_col8, #T_c3bc4_row12_col8, #T_c3bc4_row14_col8 {\n",
|
|
" text-align: left;\n",
|
|
" background-color: lightgrey;\n",
|
|
"}\n",
|
|
"#T_c3bc4_row13_col8 {\n",
|
|
" text-align: left;\n",
|
|
" background-color: yellow;\n",
|
|
" background-color: lightgrey;\n",
|
|
"}\n",
|
|
"</style>\n",
|
|
"<table id=\"T_c3bc4\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th class=\"blank level0\" > </th>\n",
|
|
" <th id=\"T_c3bc4_level0_col0\" class=\"col_heading level0 col0\" >Model</th>\n",
|
|
" <th id=\"T_c3bc4_level0_col1\" class=\"col_heading level0 col1\" >Accuracy</th>\n",
|
|
" <th id=\"T_c3bc4_level0_col2\" class=\"col_heading level0 col2\" >AUC</th>\n",
|
|
" <th id=\"T_c3bc4_level0_col3\" class=\"col_heading level0 col3\" >Recall</th>\n",
|
|
" <th id=\"T_c3bc4_level0_col4\" class=\"col_heading level0 col4\" >Prec.</th>\n",
|
|
" <th id=\"T_c3bc4_level0_col5\" class=\"col_heading level0 col5\" >F1</th>\n",
|
|
" <th id=\"T_c3bc4_level0_col6\" class=\"col_heading level0 col6\" >Kappa</th>\n",
|
|
" <th id=\"T_c3bc4_level0_col7\" class=\"col_heading level0 col7\" >MCC</th>\n",
|
|
" <th id=\"T_c3bc4_level0_col8\" class=\"col_heading level0 col8\" >TT (Sec)</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row0\" class=\"row_heading level0 row0\" >gbc</th>\n",
|
|
" <td id=\"T_c3bc4_row0_col0\" class=\"data row0 col0\" >Gradient Boosting Classifier</td>\n",
|
|
" <td id=\"T_c3bc4_row0_col1\" class=\"data row0 col1\" >0.7617</td>\n",
|
|
" <td id=\"T_c3bc4_row0_col2\" class=\"data row0 col2\" >0.8020</td>\n",
|
|
" <td id=\"T_c3bc4_row0_col3\" class=\"data row0 col3\" >0.7617</td>\n",
|
|
" <td id=\"T_c3bc4_row0_col4\" class=\"data row0 col4\" >0.7730</td>\n",
|
|
" <td id=\"T_c3bc4_row0_col5\" class=\"data row0 col5\" >0.7567</td>\n",
|
|
" <td id=\"T_c3bc4_row0_col6\" class=\"data row0 col6\" >0.5144</td>\n",
|
|
" <td id=\"T_c3bc4_row0_col7\" class=\"data row0 col7\" >0.5290</td>\n",
|
|
" <td id=\"T_c3bc4_row0_col8\" class=\"data row0 col8\" >0.4640</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row1\" class=\"row_heading level0 row1\" >catboost</th>\n",
|
|
" <td id=\"T_c3bc4_row1_col0\" class=\"data row1 col0\" >CatBoost Classifier</td>\n",
|
|
" <td id=\"T_c3bc4_row1_col1\" class=\"data row1 col1\" >0.7582</td>\n",
|
|
" <td id=\"T_c3bc4_row1_col2\" class=\"data row1 col2\" >0.7996</td>\n",
|
|
" <td id=\"T_c3bc4_row1_col3\" class=\"data row1 col3\" >0.7582</td>\n",
|
|
" <td id=\"T_c3bc4_row1_col4\" class=\"data row1 col4\" >0.7681</td>\n",
|
|
" <td id=\"T_c3bc4_row1_col5\" class=\"data row1 col5\" >0.7534</td>\n",
|
|
" <td id=\"T_c3bc4_row1_col6\" class=\"data row1 col6\" >0.5074</td>\n",
|
|
" <td id=\"T_c3bc4_row1_col7\" class=\"data row1 col7\" >0.5207</td>\n",
|
|
" <td id=\"T_c3bc4_row1_col8\" class=\"data row1 col8\" >3.4800</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row2\" class=\"row_heading level0 row2\" >lightgbm</th>\n",
|
|
" <td id=\"T_c3bc4_row2_col0\" class=\"data row2 col0\" >Light Gradient Boosting Machine</td>\n",
|
|
" <td id=\"T_c3bc4_row2_col1\" class=\"data row2 col1\" >0.7543</td>\n",
|
|
" <td id=\"T_c3bc4_row2_col2\" class=\"data row2 col2\" >0.7954</td>\n",
|
|
" <td id=\"T_c3bc4_row2_col3\" class=\"data row2 col3\" >0.7543</td>\n",
|
|
" <td id=\"T_c3bc4_row2_col4\" class=\"data row2 col4\" >0.7632</td>\n",
|
|
" <td id=\"T_c3bc4_row2_col5\" class=\"data row2 col5\" >0.7498</td>\n",
|
|
" <td id=\"T_c3bc4_row2_col6\" class=\"data row2 col6\" >0.4998</td>\n",
|
|
" <td id=\"T_c3bc4_row2_col7\" class=\"data row2 col7\" >0.5120</td>\n",
|
|
" <td id=\"T_c3bc4_row2_col8\" class=\"data row2 col8\" >0.3200</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row3\" class=\"row_heading level0 row3\" >ada</th>\n",
|
|
" <td id=\"T_c3bc4_row3_col0\" class=\"data row3 col0\" >Ada Boost Classifier</td>\n",
|
|
" <td id=\"T_c3bc4_row3_col1\" class=\"data row3 col1\" >0.7492</td>\n",
|
|
" <td id=\"T_c3bc4_row3_col2\" class=\"data row3 col2\" >0.7957</td>\n",
|
|
" <td id=\"T_c3bc4_row3_col3\" class=\"data row3 col3\" >0.7492</td>\n",
|
|
" <td id=\"T_c3bc4_row3_col4\" class=\"data row3 col4\" >0.7616</td>\n",
|
|
" <td id=\"T_c3bc4_row3_col5\" class=\"data row3 col5\" >0.7432</td>\n",
|
|
" <td id=\"T_c3bc4_row3_col6\" class=\"data row3 col6\" >0.4882</td>\n",
|
|
" <td id=\"T_c3bc4_row3_col7\" class=\"data row3 col7\" >0.5044</td>\n",
|
|
" <td id=\"T_c3bc4_row3_col8\" class=\"data row3 col8\" >0.2180</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row4\" class=\"row_heading level0 row4\" >rf</th>\n",
|
|
" <td id=\"T_c3bc4_row4_col0\" class=\"data row4 col0\" >Random Forest Classifier</td>\n",
|
|
" <td id=\"T_c3bc4_row4_col1\" class=\"data row4 col1\" >0.7421</td>\n",
|
|
" <td id=\"T_c3bc4_row4_col2\" class=\"data row4 col2\" >0.7902</td>\n",
|
|
" <td id=\"T_c3bc4_row4_col3\" class=\"data row4 col3\" >0.7421</td>\n",
|
|
" <td id=\"T_c3bc4_row4_col4\" class=\"data row4 col4\" >0.7451</td>\n",
|
|
" <td id=\"T_c3bc4_row4_col5\" class=\"data row4 col5\" >0.7394</td>\n",
|
|
" <td id=\"T_c3bc4_row4_col6\" class=\"data row4 col6\" >0.4771</td>\n",
|
|
" <td id=\"T_c3bc4_row4_col7\" class=\"data row4 col7\" >0.4826</td>\n",
|
|
" <td id=\"T_c3bc4_row4_col8\" class=\"data row4 col8\" >0.4160</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row5\" class=\"row_heading level0 row5\" >lr</th>\n",
|
|
" <td id=\"T_c3bc4_row5_col0\" class=\"data row5 col0\" >Logistic Regression</td>\n",
|
|
" <td id=\"T_c3bc4_row5_col1\" class=\"data row5 col1\" >0.7393</td>\n",
|
|
" <td id=\"T_c3bc4_row5_col2\" class=\"data row5 col2\" >0.7852</td>\n",
|
|
" <td id=\"T_c3bc4_row5_col3\" class=\"data row5 col3\" >0.7393</td>\n",
|
|
" <td id=\"T_c3bc4_row5_col4\" class=\"data row5 col4\" >0.7435</td>\n",
|
|
" <td id=\"T_c3bc4_row5_col5\" class=\"data row5 col5\" >0.7361</td>\n",
|
|
" <td id=\"T_c3bc4_row5_col6\" class=\"data row5 col6\" >0.4709</td>\n",
|
|
" <td id=\"T_c3bc4_row5_col7\" class=\"data row5 col7\" >0.4778</td>\n",
|
|
" <td id=\"T_c3bc4_row5_col8\" class=\"data row5 col8\" >2.8060</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row6\" class=\"row_heading level0 row6\" >ridge</th>\n",
|
|
" <td id=\"T_c3bc4_row6_col0\" class=\"data row6 col0\" >Ridge Classifier</td>\n",
|
|
" <td id=\"T_c3bc4_row6_col1\" class=\"data row6 col1\" >0.7326</td>\n",
|
|
" <td id=\"T_c3bc4_row6_col2\" class=\"data row6 col2\" >0.7851</td>\n",
|
|
" <td id=\"T_c3bc4_row6_col3\" class=\"data row6 col3\" >0.7326</td>\n",
|
|
" <td id=\"T_c3bc4_row6_col4\" class=\"data row6 col4\" >0.7353</td>\n",
|
|
" <td id=\"T_c3bc4_row6_col5\" class=\"data row6 col5\" >0.7298</td>\n",
|
|
" <td id=\"T_c3bc4_row6_col6\" class=\"data row6 col6\" >0.4578</td>\n",
|
|
" <td id=\"T_c3bc4_row6_col7\" class=\"data row6 col7\" >0.4630</td>\n",
|
|
" <td id=\"T_c3bc4_row6_col8\" class=\"data row6 col8\" >0.0740</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row7\" class=\"row_heading level0 row7\" >lda</th>\n",
|
|
" <td id=\"T_c3bc4_row7_col0\" class=\"data row7 col0\" >Linear Discriminant Analysis</td>\n",
|
|
" <td id=\"T_c3bc4_row7_col1\" class=\"data row7 col1\" >0.7324</td>\n",
|
|
" <td id=\"T_c3bc4_row7_col2\" class=\"data row7 col2\" >0.7851</td>\n",
|
|
" <td id=\"T_c3bc4_row7_col3\" class=\"data row7 col3\" >0.7324</td>\n",
|
|
" <td id=\"T_c3bc4_row7_col4\" class=\"data row7 col4\" >0.7351</td>\n",
|
|
" <td id=\"T_c3bc4_row7_col5\" class=\"data row7 col5\" >0.7296</td>\n",
|
|
" <td id=\"T_c3bc4_row7_col6\" class=\"data row7 col6\" >0.4575</td>\n",
|
|
" <td id=\"T_c3bc4_row7_col7\" class=\"data row7 col7\" >0.4627</td>\n",
|
|
" <td id=\"T_c3bc4_row7_col8\" class=\"data row7 col8\" >0.0780</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row8\" class=\"row_heading level0 row8\" >et</th>\n",
|
|
" <td id=\"T_c3bc4_row8_col0\" class=\"data row8 col0\" >Extra Trees Classifier</td>\n",
|
|
" <td id=\"T_c3bc4_row8_col1\" class=\"data row8 col1\" >0.7224</td>\n",
|
|
" <td id=\"T_c3bc4_row8_col2\" class=\"data row8 col2\" >0.7732</td>\n",
|
|
" <td id=\"T_c3bc4_row8_col3\" class=\"data row8 col3\" >0.7224</td>\n",
|
|
" <td id=\"T_c3bc4_row8_col4\" class=\"data row8 col4\" >0.7232</td>\n",
|
|
" <td id=\"T_c3bc4_row8_col5\" class=\"data row8 col5\" >0.7206</td>\n",
|
|
" <td id=\"T_c3bc4_row8_col6\" class=\"data row8 col6\" >0.4387</td>\n",
|
|
" <td id=\"T_c3bc4_row8_col7\" class=\"data row8 col7\" >0.4414</td>\n",
|
|
" <td id=\"T_c3bc4_row8_col8\" class=\"data row8 col8\" >0.4140</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row9\" class=\"row_heading level0 row9\" >qda</th>\n",
|
|
" <td id=\"T_c3bc4_row9_col0\" class=\"data row9 col0\" >Quadratic Discriminant Analysis</td>\n",
|
|
" <td id=\"T_c3bc4_row9_col1\" class=\"data row9 col1\" >0.7221</td>\n",
|
|
" <td id=\"T_c3bc4_row9_col2\" class=\"data row9 col2\" >0.7713</td>\n",
|
|
" <td id=\"T_c3bc4_row9_col3\" class=\"data row9 col3\" >0.7221</td>\n",
|
|
" <td id=\"T_c3bc4_row9_col4\" class=\"data row9 col4\" >0.7379</td>\n",
|
|
" <td id=\"T_c3bc4_row9_col5\" class=\"data row9 col5\" >0.7132</td>\n",
|
|
" <td id=\"T_c3bc4_row9_col6\" class=\"data row9 col6\" >0.4313</td>\n",
|
|
" <td id=\"T_c3bc4_row9_col7\" class=\"data row9 col7\" >0.4520</td>\n",
|
|
" <td id=\"T_c3bc4_row9_col8\" class=\"data row9 col8\" >0.0900</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row10\" class=\"row_heading level0 row10\" >nb</th>\n",
|
|
" <td id=\"T_c3bc4_row10_col0\" class=\"data row10 col0\" >Naive Bayes</td>\n",
|
|
" <td id=\"T_c3bc4_row10_col1\" class=\"data row10 col1\" >0.7150</td>\n",
|
|
" <td id=\"T_c3bc4_row10_col2\" class=\"data row10 col2\" >0.7637</td>\n",
|
|
" <td id=\"T_c3bc4_row10_col3\" class=\"data row10 col3\" >0.7150</td>\n",
|
|
" <td id=\"T_c3bc4_row10_col4\" class=\"data row10 col4\" >0.7169</td>\n",
|
|
" <td id=\"T_c3bc4_row10_col5\" class=\"data row10 col5\" >0.7122</td>\n",
|
|
" <td id=\"T_c3bc4_row10_col6\" class=\"data row10 col6\" >0.4224</td>\n",
|
|
" <td id=\"T_c3bc4_row10_col7\" class=\"data row10 col7\" >0.4269</td>\n",
|
|
" <td id=\"T_c3bc4_row10_col8\" class=\"data row10 col8\" >0.0640</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row11\" class=\"row_heading level0 row11\" >knn</th>\n",
|
|
" <td id=\"T_c3bc4_row11_col0\" class=\"data row11 col0\" >K Neighbors Classifier</td>\n",
|
|
" <td id=\"T_c3bc4_row11_col1\" class=\"data row11 col1\" >0.7142</td>\n",
|
|
" <td id=\"T_c3bc4_row11_col2\" class=\"data row11 col2\" >0.7454</td>\n",
|
|
" <td id=\"T_c3bc4_row11_col3\" class=\"data row11 col3\" >0.7142</td>\n",
|
|
" <td id=\"T_c3bc4_row11_col4\" class=\"data row11 col4\" >0.7144</td>\n",
|
|
" <td id=\"T_c3bc4_row11_col5\" class=\"data row11 col5\" >0.7129</td>\n",
|
|
" <td id=\"T_c3bc4_row11_col6\" class=\"data row11 col6\" >0.4230</td>\n",
|
|
" <td id=\"T_c3bc4_row11_col7\" class=\"data row11 col7\" >0.4247</td>\n",
|
|
" <td id=\"T_c3bc4_row11_col8\" class=\"data row11 col8\" >2.5480</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row12\" class=\"row_heading level0 row12\" >dt</th>\n",
|
|
" <td id=\"T_c3bc4_row12_col0\" class=\"data row12 col0\" >Decision Tree Classifier</td>\n",
|
|
" <td id=\"T_c3bc4_row12_col1\" class=\"data row12 col1\" >0.6614</td>\n",
|
|
" <td id=\"T_c3bc4_row12_col2\" class=\"data row12 col2\" >0.6608</td>\n",
|
|
" <td id=\"T_c3bc4_row12_col3\" class=\"data row12 col3\" >0.6614</td>\n",
|
|
" <td id=\"T_c3bc4_row12_col4\" class=\"data row12 col4\" >0.6618</td>\n",
|
|
" <td id=\"T_c3bc4_row12_col5\" class=\"data row12 col5\" >0.6615</td>\n",
|
|
" <td id=\"T_c3bc4_row12_col6\" class=\"data row12 col6\" >0.3206</td>\n",
|
|
" <td id=\"T_c3bc4_row12_col7\" class=\"data row12 col7\" >0.3207</td>\n",
|
|
" <td id=\"T_c3bc4_row12_col8\" class=\"data row12 col8\" >0.0840</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row13\" class=\"row_heading level0 row13\" >dummy</th>\n",
|
|
" <td id=\"T_c3bc4_row13_col0\" class=\"data row13 col0\" >Dummy Classifier</td>\n",
|
|
" <td id=\"T_c3bc4_row13_col1\" class=\"data row13 col1\" >0.5320</td>\n",
|
|
" <td id=\"T_c3bc4_row13_col2\" class=\"data row13 col2\" >0.5000</td>\n",
|
|
" <td id=\"T_c3bc4_row13_col3\" class=\"data row13 col3\" >0.5320</td>\n",
|
|
" <td id=\"T_c3bc4_row13_col4\" class=\"data row13 col4\" >0.2830</td>\n",
|
|
" <td id=\"T_c3bc4_row13_col5\" class=\"data row13 col5\" >0.3694</td>\n",
|
|
" <td id=\"T_c3bc4_row13_col6\" class=\"data row13 col6\" >0.0000</td>\n",
|
|
" <td id=\"T_c3bc4_row13_col7\" class=\"data row13 col7\" >0.0000</td>\n",
|
|
" <td id=\"T_c3bc4_row13_col8\" class=\"data row13 col8\" >0.0620</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_c3bc4_level0_row14\" class=\"row_heading level0 row14\" >svm</th>\n",
|
|
" <td id=\"T_c3bc4_row14_col0\" class=\"data row14 col0\" >SVM - Linear Kernel</td>\n",
|
|
" <td id=\"T_c3bc4_row14_col1\" class=\"data row14 col1\" >0.5118</td>\n",
|
|
" <td id=\"T_c3bc4_row14_col2\" class=\"data row14 col2\" >0.7084</td>\n",
|
|
" <td id=\"T_c3bc4_row14_col3\" class=\"data row14 col3\" >0.5118</td>\n",
|
|
" <td id=\"T_c3bc4_row14_col4\" class=\"data row14 col4\" >0.4463</td>\n",
|
|
" <td id=\"T_c3bc4_row14_col5\" class=\"data row14 col5\" >0.3527</td>\n",
|
|
" <td id=\"T_c3bc4_row14_col6\" class=\"data row14 col6\" >0.0124</td>\n",
|
|
" <td id=\"T_c3bc4_row14_col7\" class=\"data row14 col7\" >0.0413</td>\n",
|
|
" <td id=\"T_c3bc4_row14_col8\" class=\"data row14 col8\" >0.1960</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n"
|
|
],
|
|
"text/plain": [
|
|
"<pandas.io.formats.style.Styler at 0x289f24e2990>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"Processing: 0%| | 0/65 [00:00<?, ?it/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"best_model = compare_models()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"MCC, see: https://en.wikipedia.org/wiki/Phi_coefficient"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,\n",
|
|
" learning_rate=0.1, loss='log_loss', max_depth=3,\n",
|
|
" max_features=None, max_leaf_nodes=None,\n",
|
|
" min_impurity_decrease=0.0, min_samples_leaf=1,\n",
|
|
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
|
|
" n_estimators=100, n_iter_no_change=None,\n",
|
|
" random_state=1234, subsample=1.0, tol=0.0001,\n",
|
|
" validation_fraction=0.1, verbose=0,\n",
|
|
" warm_start=False)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(best_model)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Analyze best Model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "0a3a89d79b974d2f9db0d40bb6869fdb",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"evaluate_model(best_model)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Predict on unseen Data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<style type=\"text/css\">\n",
|
|
"</style>\n",
|
|
"<table id=\"T_8e5a7\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th class=\"blank level0\" > </th>\n",
|
|
" <th id=\"T_8e5a7_level0_col0\" class=\"col_heading level0 col0\" >Model</th>\n",
|
|
" <th id=\"T_8e5a7_level0_col1\" class=\"col_heading level0 col1\" >Accuracy</th>\n",
|
|
" <th id=\"T_8e5a7_level0_col2\" class=\"col_heading level0 col2\" >AUC</th>\n",
|
|
" <th id=\"T_8e5a7_level0_col3\" class=\"col_heading level0 col3\" >Recall</th>\n",
|
|
" <th id=\"T_8e5a7_level0_col4\" class=\"col_heading level0 col4\" >Prec.</th>\n",
|
|
" <th id=\"T_8e5a7_level0_col5\" class=\"col_heading level0 col5\" >F1</th>\n",
|
|
" <th id=\"T_8e5a7_level0_col6\" class=\"col_heading level0 col6\" >Kappa</th>\n",
|
|
" <th id=\"T_8e5a7_level0_col7\" class=\"col_heading level0 col7\" >MCC</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_8e5a7_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
|
|
" <td id=\"T_8e5a7_row0_col0\" class=\"data row0 col0\" >Gradient Boosting Classifier</td>\n",
|
|
" <td id=\"T_8e5a7_row0_col1\" class=\"data row0 col1\" >0.7495</td>\n",
|
|
" <td id=\"T_8e5a7_row0_col2\" class=\"data row0 col2\" >0.7990</td>\n",
|
|
" <td id=\"T_8e5a7_row0_col3\" class=\"data row0 col3\" >0.7495</td>\n",
|
|
" <td id=\"T_8e5a7_row0_col4\" class=\"data row0 col4\" >0.7626</td>\n",
|
|
" <td id=\"T_8e5a7_row0_col5\" class=\"data row0 col5\" >0.7457</td>\n",
|
|
" <td id=\"T_8e5a7_row0_col6\" class=\"data row0 col6\" >0.4972</td>\n",
|
|
" <td id=\"T_8e5a7_row0_col7\" class=\"data row0 col7\" >0.5109</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n"
|
|
],
|
|
"text/plain": [
|
|
"<pandas.io.formats.style.Styler at 0x289f24d4050>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" age education housing contact_cellular month day_of_week \\\n",
|
|
"2809 36.0 6 0 1 5 2 \n",
|
|
"4052 34.0 6 1 1 8 3 \n",
|
|
"658 36.0 6 1 0 5 2 \n",
|
|
"786 40.0 5 0 0 5 3 \n",
|
|
"6675 36.0 6 0 1 11 3 \n",
|
|
"\n",
|
|
" campaign pdays previous emp_var_rate ... job_student \\\n",
|
|
"2809 0.477121 0 0 -1.8 ... False \n",
|
|
"4052 0.698970 0 0 1.4 ... False \n",
|
|
"658 0.903090 0 0 -1.8 ... False \n",
|
|
"786 0.698970 0 0 1.1 ... False \n",
|
|
"6675 0.477121 0 0 -0.1 ... False \n",
|
|
"\n",
|
|
" job_technician job_unemployed marital_married marital_single \\\n",
|
|
"2809 False False False True \n",
|
|
"4052 True False False True \n",
|
|
"658 False False False True \n",
|
|
"786 False False True False \n",
|
|
"6675 False False False True \n",
|
|
"\n",
|
|
" loan_unknown loan_yes y prediction_label prediction_score \n",
|
|
"2809 False False yes no 0.5681 \n",
|
|
"4052 False True yes no 0.6703 \n",
|
|
"658 False False yes no 0.8444 \n",
|
|
"786 False False no no 0.7768 \n",
|
|
"6675 False False no no 0.6379 \n",
|
|
"\n",
|
|
"[5 rows x 31 columns]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"predictions = predict_model(best_model, data=data_unseen)\n",
|
|
"print(predictions.head())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Save best Model Pipeline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "raw",
|
|
"metadata": {},
|
|
"source": [
|
|
"save_model(best_model, 'best_model_pipeline')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Tune a specific Model\n",
|
|
"ref: https://pycaret.gitbook.io/docs/get-started/functions/optimize"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### rf: Random Forest Classifier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<style type=\"text/css\">\n",
|
|
"#T_7f597_row5_col0, #T_7f597_row5_col1, #T_7f597_row5_col2, #T_7f597_row5_col3, #T_7f597_row5_col4, #T_7f597_row5_col5, #T_7f597_row5_col6 {\n",
|
|
" background: yellow;\n",
|
|
"}\n",
|
|
"</style>\n",
|
|
"<table id=\"T_7f597\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th class=\"blank level0\" > </th>\n",
|
|
" <th id=\"T_7f597_level0_col0\" class=\"col_heading level0 col0\" >Accuracy</th>\n",
|
|
" <th id=\"T_7f597_level0_col1\" class=\"col_heading level0 col1\" >AUC</th>\n",
|
|
" <th id=\"T_7f597_level0_col2\" class=\"col_heading level0 col2\" >Recall</th>\n",
|
|
" <th id=\"T_7f597_level0_col3\" class=\"col_heading level0 col3\" >Prec.</th>\n",
|
|
" <th id=\"T_7f597_level0_col4\" class=\"col_heading level0 col4\" >F1</th>\n",
|
|
" <th id=\"T_7f597_level0_col5\" class=\"col_heading level0 col5\" >Kappa</th>\n",
|
|
" <th id=\"T_7f597_level0_col6\" class=\"col_heading level0 col6\" >MCC</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th class=\"index_name level0\" >Fold</th>\n",
|
|
" <th class=\"blank col0\" > </th>\n",
|
|
" <th class=\"blank col1\" > </th>\n",
|
|
" <th class=\"blank col2\" > </th>\n",
|
|
" <th class=\"blank col3\" > </th>\n",
|
|
" <th class=\"blank col4\" > </th>\n",
|
|
" <th class=\"blank col5\" > </th>\n",
|
|
" <th class=\"blank col6\" > </th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_7f597_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
|
|
" <td id=\"T_7f597_row0_col0\" class=\"data row0 col0\" >0.7409</td>\n",
|
|
" <td id=\"T_7f597_row0_col1\" class=\"data row0 col1\" >0.7939</td>\n",
|
|
" <td id=\"T_7f597_row0_col2\" class=\"data row0 col2\" >0.7409</td>\n",
|
|
" <td id=\"T_7f597_row0_col3\" class=\"data row0 col3\" >0.7447</td>\n",
|
|
" <td id=\"T_7f597_row0_col4\" class=\"data row0 col4\" >0.7379</td>\n",
|
|
" <td id=\"T_7f597_row0_col5\" class=\"data row0 col5\" >0.4744</td>\n",
|
|
" <td id=\"T_7f597_row0_col6\" class=\"data row0 col6\" >0.4808</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_7f597_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
|
|
" <td id=\"T_7f597_row1_col0\" class=\"data row1 col0\" >0.7536</td>\n",
|
|
" <td id=\"T_7f597_row1_col1\" class=\"data row1 col1\" >0.7961</td>\n",
|
|
" <td id=\"T_7f597_row1_col2\" class=\"data row1 col2\" >0.7536</td>\n",
|
|
" <td id=\"T_7f597_row1_col3\" class=\"data row1 col3\" >0.7583</td>\n",
|
|
" <td id=\"T_7f597_row1_col4\" class=\"data row1 col4\" >0.7505</td>\n",
|
|
" <td id=\"T_7f597_row1_col5\" class=\"data row1 col5\" >0.4998</td>\n",
|
|
" <td id=\"T_7f597_row1_col6\" class=\"data row1 col6\" >0.5072</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_7f597_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
|
|
" <td id=\"T_7f597_row2_col0\" class=\"data row2 col0\" >0.7351</td>\n",
|
|
" <td id=\"T_7f597_row2_col1\" class=\"data row2 col1\" >0.7710</td>\n",
|
|
" <td id=\"T_7f597_row2_col2\" class=\"data row2 col2\" >0.7351</td>\n",
|
|
" <td id=\"T_7f597_row2_col3\" class=\"data row2 col3\" >0.7392</td>\n",
|
|
" <td id=\"T_7f597_row2_col4\" class=\"data row2 col4\" >0.7317</td>\n",
|
|
" <td id=\"T_7f597_row2_col5\" class=\"data row2 col5\" >0.4621</td>\n",
|
|
" <td id=\"T_7f597_row2_col6\" class=\"data row2 col6\" >0.4691</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_7f597_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
|
|
" <td id=\"T_7f597_row3_col0\" class=\"data row3 col0\" >0.7311</td>\n",
|
|
" <td id=\"T_7f597_row3_col1\" class=\"data row3 col1\" >0.7967</td>\n",
|
|
" <td id=\"T_7f597_row3_col2\" class=\"data row3 col2\" >0.7311</td>\n",
|
|
" <td id=\"T_7f597_row3_col3\" class=\"data row3 col3\" >0.7329</td>\n",
|
|
" <td id=\"T_7f597_row3_col4\" class=\"data row3 col4\" >0.7287</td>\n",
|
|
" <td id=\"T_7f597_row3_col5\" class=\"data row3 col5\" >0.4553</td>\n",
|
|
" <td id=\"T_7f597_row3_col6\" class=\"data row3 col6\" >0.4594</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_7f597_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
|
|
" <td id=\"T_7f597_row4_col0\" class=\"data row4 col0\" >0.7496</td>\n",
|
|
" <td id=\"T_7f597_row4_col1\" class=\"data row4 col1\" >0.7931</td>\n",
|
|
" <td id=\"T_7f597_row4_col2\" class=\"data row4 col2\" >0.7496</td>\n",
|
|
" <td id=\"T_7f597_row4_col3\" class=\"data row4 col3\" >0.7506</td>\n",
|
|
" <td id=\"T_7f597_row4_col4\" class=\"data row4 col4\" >0.7481</td>\n",
|
|
" <td id=\"T_7f597_row4_col5\" class=\"data row4 col5\" >0.4940</td>\n",
|
|
" <td id=\"T_7f597_row4_col6\" class=\"data row4 col6\" >0.4966</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_7f597_level0_row5\" class=\"row_heading level0 row5\" >Mean</th>\n",
|
|
" <td id=\"T_7f597_row5_col0\" class=\"data row5 col0\" >0.7421</td>\n",
|
|
" <td id=\"T_7f597_row5_col1\" class=\"data row5 col1\" >0.7902</td>\n",
|
|
" <td id=\"T_7f597_row5_col2\" class=\"data row5 col2\" >0.7421</td>\n",
|
|
" <td id=\"T_7f597_row5_col3\" class=\"data row5 col3\" >0.7451</td>\n",
|
|
" <td id=\"T_7f597_row5_col4\" class=\"data row5 col4\" >0.7394</td>\n",
|
|
" <td id=\"T_7f597_row5_col5\" class=\"data row5 col5\" >0.4771</td>\n",
|
|
" <td id=\"T_7f597_row5_col6\" class=\"data row5 col6\" >0.4826</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_7f597_level0_row6\" class=\"row_heading level0 row6\" >Std</th>\n",
|
|
" <td id=\"T_7f597_row6_col0\" class=\"data row6 col0\" >0.0085</td>\n",
|
|
" <td id=\"T_7f597_row6_col1\" class=\"data row6 col1\" >0.0097</td>\n",
|
|
" <td id=\"T_7f597_row6_col2\" class=\"data row6 col2\" >0.0085</td>\n",
|
|
" <td id=\"T_7f597_row6_col3\" class=\"data row6 col3\" >0.0088</td>\n",
|
|
" <td id=\"T_7f597_row6_col4\" class=\"data row6 col4\" >0.0087</td>\n",
|
|
" <td id=\"T_7f597_row6_col5\" class=\"data row6 col5\" >0.0174</td>\n",
|
|
" <td id=\"T_7f597_row6_col6\" class=\"data row6 col6\" >0.0175</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n"
|
|
],
|
|
"text/plain": [
|
|
"<pandas.io.formats.style.Styler at 0x289f25ec950>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"Processing: 0%| | 0/4 [00:00<?, ?it/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"## create model\n",
|
|
"model_rf = create_model('rf')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<style type=\"text/css\">\n",
|
|
"#T_f7a8e_row5_col0, #T_f7a8e_row5_col1, #T_f7a8e_row5_col2, #T_f7a8e_row5_col3, #T_f7a8e_row5_col4, #T_f7a8e_row5_col5, #T_f7a8e_row5_col6 {\n",
|
|
" background: yellow;\n",
|
|
"}\n",
|
|
"</style>\n",
|
|
"<table id=\"T_f7a8e\">\n",
|
|
" <thead>\n",
|
|
" <tr>\n",
|
|
" <th class=\"blank level0\" > </th>\n",
|
|
" <th id=\"T_f7a8e_level0_col0\" class=\"col_heading level0 col0\" >Accuracy</th>\n",
|
|
" <th id=\"T_f7a8e_level0_col1\" class=\"col_heading level0 col1\" >AUC</th>\n",
|
|
" <th id=\"T_f7a8e_level0_col2\" class=\"col_heading level0 col2\" >Recall</th>\n",
|
|
" <th id=\"T_f7a8e_level0_col3\" class=\"col_heading level0 col3\" >Prec.</th>\n",
|
|
" <th id=\"T_f7a8e_level0_col4\" class=\"col_heading level0 col4\" >F1</th>\n",
|
|
" <th id=\"T_f7a8e_level0_col5\" class=\"col_heading level0 col5\" >Kappa</th>\n",
|
|
" <th id=\"T_f7a8e_level0_col6\" class=\"col_heading level0 col6\" >MCC</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th class=\"index_name level0\" >Fold</th>\n",
|
|
" <th class=\"blank col0\" > </th>\n",
|
|
" <th class=\"blank col1\" > </th>\n",
|
|
" <th class=\"blank col2\" > </th>\n",
|
|
" <th class=\"blank col3\" > </th>\n",
|
|
" <th class=\"blank col4\" > </th>\n",
|
|
" <th class=\"blank col5\" > </th>\n",
|
|
" <th class=\"blank col6\" > </th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_f7a8e_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
|
|
" <td id=\"T_f7a8e_row0_col0\" class=\"data row0 col0\" >0.7562</td>\n",
|
|
" <td id=\"T_f7a8e_row0_col1\" class=\"data row0 col1\" >0.8053</td>\n",
|
|
" <td id=\"T_f7a8e_row0_col2\" class=\"data row0 col2\" >0.7562</td>\n",
|
|
" <td id=\"T_f7a8e_row0_col3\" class=\"data row0 col3\" >0.7654</td>\n",
|
|
" <td id=\"T_f7a8e_row0_col4\" class=\"data row0 col4\" >0.7517</td>\n",
|
|
" <td id=\"T_f7a8e_row0_col5\" class=\"data row0 col5\" >0.5037</td>\n",
|
|
" <td id=\"T_f7a8e_row0_col6\" class=\"data row0 col6\" >0.5161</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_f7a8e_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
|
|
" <td id=\"T_f7a8e_row1_col0\" class=\"data row1 col0\" >0.7689</td>\n",
|
|
" <td id=\"T_f7a8e_row1_col1\" class=\"data row1 col1\" >0.8134</td>\n",
|
|
" <td id=\"T_f7a8e_row1_col2\" class=\"data row1 col2\" >0.7689</td>\n",
|
|
" <td id=\"T_f7a8e_row1_col3\" class=\"data row1 col3\" >0.7825</td>\n",
|
|
" <td id=\"T_f7a8e_row1_col4\" class=\"data row1 col4\" >0.7635</td>\n",
|
|
" <td id=\"T_f7a8e_row1_col5\" class=\"data row1 col5\" >0.5285</td>\n",
|
|
" <td id=\"T_f7a8e_row1_col6\" class=\"data row1 col6\" >0.5456</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_f7a8e_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
|
|
" <td id=\"T_f7a8e_row2_col0\" class=\"data row2 col0\" >0.7544</td>\n",
|
|
" <td id=\"T_f7a8e_row2_col1\" class=\"data row2 col1\" >0.7897</td>\n",
|
|
" <td id=\"T_f7a8e_row2_col2\" class=\"data row2 col2\" >0.7544</td>\n",
|
|
" <td id=\"T_f7a8e_row2_col3\" class=\"data row2 col3\" >0.7672</td>\n",
|
|
" <td id=\"T_f7a8e_row2_col4\" class=\"data row2 col4\" >0.7485</td>\n",
|
|
" <td id=\"T_f7a8e_row2_col5\" class=\"data row2 col5\" >0.4988</td>\n",
|
|
" <td id=\"T_f7a8e_row2_col6\" class=\"data row2 col6\" >0.5154</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_f7a8e_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
|
|
" <td id=\"T_f7a8e_row3_col0\" class=\"data row3 col0\" >0.7617</td>\n",
|
|
" <td id=\"T_f7a8e_row3_col1\" class=\"data row3 col1\" >0.8021</td>\n",
|
|
" <td id=\"T_f7a8e_row3_col2\" class=\"data row3 col2\" >0.7617</td>\n",
|
|
" <td id=\"T_f7a8e_row3_col3\" class=\"data row3 col3\" >0.7709</td>\n",
|
|
" <td id=\"T_f7a8e_row3_col4\" class=\"data row3 col4\" >0.7572</td>\n",
|
|
" <td id=\"T_f7a8e_row3_col5\" class=\"data row3 col5\" >0.5147</td>\n",
|
|
" <td id=\"T_f7a8e_row3_col6\" class=\"data row3 col6\" >0.5272</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_f7a8e_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
|
|
" <td id=\"T_f7a8e_row4_col0\" class=\"data row4 col0\" >0.7689</td>\n",
|
|
" <td id=\"T_f7a8e_row4_col1\" class=\"data row4 col1\" >0.8071</td>\n",
|
|
" <td id=\"T_f7a8e_row4_col2\" class=\"data row4 col2\" >0.7689</td>\n",
|
|
" <td id=\"T_f7a8e_row4_col3\" class=\"data row4 col3\" >0.7759</td>\n",
|
|
" <td id=\"T_f7a8e_row4_col4\" class=\"data row4 col4\" >0.7655</td>\n",
|
|
" <td id=\"T_f7a8e_row4_col5\" class=\"data row4 col5\" >0.5305</td>\n",
|
|
" <td id=\"T_f7a8e_row4_col6\" class=\"data row4 col6\" >0.5402</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_f7a8e_level0_row5\" class=\"row_heading level0 row5\" >Mean</th>\n",
|
|
" <td id=\"T_f7a8e_row5_col0\" class=\"data row5 col0\" >0.7620</td>\n",
|
|
" <td id=\"T_f7a8e_row5_col1\" class=\"data row5 col1\" >0.8035</td>\n",
|
|
" <td id=\"T_f7a8e_row5_col2\" class=\"data row5 col2\" >0.7620</td>\n",
|
|
" <td id=\"T_f7a8e_row5_col3\" class=\"data row5 col3\" >0.7724</td>\n",
|
|
" <td id=\"T_f7a8e_row5_col4\" class=\"data row5 col4\" >0.7573</td>\n",
|
|
" <td id=\"T_f7a8e_row5_col5\" class=\"data row5 col5\" >0.5153</td>\n",
|
|
" <td id=\"T_f7a8e_row5_col6\" class=\"data row5 col6\" >0.5289</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th id=\"T_f7a8e_level0_row6\" class=\"row_heading level0 row6\" >Std</th>\n",
|
|
" <td id=\"T_f7a8e_row6_col0\" class=\"data row6 col0\" >0.0061</td>\n",
|
|
" <td id=\"T_f7a8e_row6_col1\" class=\"data row6 col1\" >0.0079</td>\n",
|
|
" <td id=\"T_f7a8e_row6_col2\" class=\"data row6 col2\" >0.0061</td>\n",
|
|
" <td id=\"T_f7a8e_row6_col3\" class=\"data row6 col3\" >0.0062</td>\n",
|
|
" <td id=\"T_f7a8e_row6_col4\" class=\"data row6 col4\" >0.0066</td>\n",
|
|
" <td id=\"T_f7a8e_row6_col5\" class=\"data row6 col5\" >0.0127</td>\n",
|
|
" <td id=\"T_f7a8e_row6_col6\" class=\"data row6 col6\" >0.0123</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n"
|
|
],
|
|
"text/plain": [
|
|
"<pandas.io.formats.style.Styler at 0x289e8bfeb90>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"Processing: 0%| | 0/7 [00:00<?, ?it/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Fitting 5 folds for each of 10 candidates, totalling 50 fits\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"## tune model\n",
|
|
"model_rf_tuned = tune_model(model_rf)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n",
|
|
" criterion='gini', max_depth=None, max_features='sqrt',\n",
|
|
" max_leaf_nodes=None, max_samples=None,\n",
|
|
" min_impurity_decrease=0.0, min_samples_leaf=1,\n",
|
|
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
|
|
" monotonic_cst=None, n_estimators=100, n_jobs=-1,\n",
|
|
" oob_score=False, random_state=1234, verbose=0,\n",
|
|
" warm_start=False)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"## parameters of default model\n",
|
|
"print(model_rf)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,\n",
|
|
" class_weight='balanced_subsample', criterion='gini',\n",
|
|
" max_depth=10, max_features='sqrt', max_leaf_nodes=None,\n",
|
|
" max_samples=None, min_impurity_decrease=0,\n",
|
|
" min_samples_leaf=5, min_samples_split=7,\n",
|
|
" min_weight_fraction_leaf=0.0, monotonic_cst=None,\n",
|
|
" n_estimators=160, n_jobs=-1, oob_score=False,\n",
|
|
" random_state=1234, verbose=0, warm_start=False)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"## parameters of tuned model\n",
|
|
"print(model_rf_tuned)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"comparison of the default parameters and the tuned parameters"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"| parameter | default | tuned |\n",
|
|
"| :--- | :--- | :--- |\n",
|
|
"| ccp_alpha | 0 | 0 |\n",
|
|
"| class_weight | None | 'balanced_subsample' |\n",
|
|
"| criterion | 'gini' | 'gini' |\n",
|
|
"| max_depth | None | 10 |\n",
|
|
"| max_features | 'sqrt' | 'sqrt' |\n",
|
|
"| max_leaf_nodes | None | None |\n",
|
|
"| max_samples | None | None |\n",
|
|
"| min_impurity_decrease | 0 | 0 |\n",
|
|
"| min_samples_leaf | 1 | 5 |\n",
|
|
"| min_samples_split | 2 | 7 |\n",
|
|
"| min_weight_fraction_leaf | 0 | 0 |\n",
|
|
"| monotonic_cst | None | None |\n",
|
|
"| n_estimators | 100 | 160 |\n",
|
|
"| n_jobs | -1 | -1 |\n",
|
|
"| oob_score | False | False |\n",
|
|
"| random_state | 1234 | 1234 |\n",
|
|
"| verbose | 0 | 0 |\n",
|
|
"| warm_start | False | False |"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.7"
|
|
},
|
|
"toc": {
|
|
"base_numbering": "5.5",
|
|
"nav_menu": {},
|
|
"number_sections": true,
|
|
"sideBar": true,
|
|
"skip_h1_title": false,
|
|
"title_cell": "5.5 Deployment und Abschluss - pycaret",
|
|
"title_sidebar": "Contents",
|
|
"toc_cell": true,
|
|
"toc_position": {
|
|
"height": "370.667px",
|
|
"left": "25px",
|
|
"top": "110.233px",
|
|
"width": "187.667px"
|
|
},
|
|
"toc_section_display": true,
|
|
"toc_window_display": false
|
|
},
|
|
"toc-autonumbering": true,
|
|
"varInspector": {
|
|
"cols": {
|
|
"lenName": 16,
|
|
"lenType": 16,
|
|
"lenVar": 40
|
|
},
|
|
"kernels_config": {
|
|
"python": {
|
|
"delete_cmd_postfix": "",
|
|
"delete_cmd_prefix": "del ",
|
|
"library": "var_list.py",
|
|
"varRefreshCmd": "print(var_dic_list())"
|
|
},
|
|
"r": {
|
|
"delete_cmd_postfix": ") ",
|
|
"delete_cmd_prefix": "rm(",
|
|
"library": "var_list.r",
|
|
"varRefreshCmd": "cat(var_dic_list()) "
|
|
}
|
|
},
|
|
"position": {
|
|
"height": "234.85px",
|
|
"left": "911px",
|
|
"right": "20px",
|
|
"top": "120px",
|
|
"width": "350px"
|
|
},
|
|
"types_to_exclude": [
|
|
"module",
|
|
"function",
|
|
"builtin_function_or_method",
|
|
"instance",
|
|
"_Feature"
|
|
],
|
|
"window_display": false
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|