{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Deployment und Abschluss - pycaret"
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"## Install\n",
"\n",
"Attention: PyCaret does not run natively on Apple Silicon and in this case it is recommended to use Docker instead\n",
"\n",
"ref: https://pycaret.gitbook.io/docs/get-started/installation\n",
"* You can install PyCaret with Python's pip package manager:"
]
},
{
"cell_type": "raw",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
"!pip install pycaret"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load and prep Data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2021-12-25T16:56:08.445356Z",
"start_time": "2021-12-25T16:56:06.863957Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(9860, 30)\n"
]
}
],
"source": [
"## load data\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"datapath = '../3_data'\n",
"from os import chdir; chdir(datapath)\n",
"dataset = pd.read_csv('bank_data_prep.csv')\n",
"print(dataset.shape)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"## remove duration\n",
"dataset = dataset.drop(\"duration\", axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Data for Modeling: (8874, 29)\n",
"Unseen Data For Predictions: (986, 29)\n"
]
}
],
"source": [
"## train - test - split\n",
"from sklearn.model_selection import train_test_split\n",
"data, data_unseen = train_test_split(dataset, train_size=0.9, random_state=1234)\n",
"\n",
"print('Data for Modeling: ' + str(data.shape))\n",
"print('Unseen Data For Predictions: ' + str(data_unseen.shape))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Run a Classication Experiment"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Init setup"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | | \n",
" Description | \n",
" Value | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Session id | \n",
" 1234 | \n",
"
\n",
" \n",
" | 1 | \n",
" Target | \n",
" y | \n",
"
\n",
" \n",
" | 2 | \n",
" Target type | \n",
" Binary | \n",
"
\n",
" \n",
" | 3 | \n",
" Target mapping | \n",
" no: 0, yes: 1 | \n",
"
\n",
" \n",
" | 4 | \n",
" Original data shape | \n",
" (8874, 29) | \n",
"
\n",
" \n",
" | 5 | \n",
" Transformed data shape | \n",
" (8874, 29) | \n",
"
\n",
" \n",
" | 6 | \n",
" Transformed train set shape | \n",
" (6211, 29) | \n",
"
\n",
" \n",
" | 7 | \n",
" Transformed test set shape | \n",
" (2663, 29) | \n",
"
\n",
" \n",
" | 8 | \n",
" Numeric features | \n",
" 14 | \n",
"
\n",
" \n",
" | 9 | \n",
" Preprocess | \n",
" True | \n",
"
\n",
" \n",
" | 10 | \n",
" Imputation type | \n",
" simple | \n",
"
\n",
" \n",
" | 11 | \n",
" Numeric imputation | \n",
" mean | \n",
"
\n",
" \n",
" | 12 | \n",
" Categorical imputation | \n",
" mode | \n",
"
\n",
" \n",
" | 13 | \n",
" Fold Generator | \n",
" StratifiedKFold | \n",
"
\n",
" \n",
" | 14 | \n",
" Fold Number | \n",
" 5 | \n",
"
\n",
" \n",
" | 15 | \n",
" CPU Jobs | \n",
" -1 | \n",
"
\n",
" \n",
" | 16 | \n",
" Use GPU | \n",
" False | \n",
"
\n",
" \n",
" | 17 | \n",
" Log Experiment | \n",
" False | \n",
"
\n",
" \n",
" | 18 | \n",
" Experiment Name | \n",
" clf-default-name | \n",
"
\n",
" \n",
" | 19 | \n",
" USI | \n",
" 9ce1 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from pycaret.classification import *\n",
"s = setup(\n",
" data = data, \n",
" target = 'y', \n",
" fold = 5, ## defaul = 10\n",
" session_id=1234) ## random seed"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Show available Models (for Classification)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Reference | \n",
" Turbo | \n",
"
\n",
" \n",
" | ID | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | lr | \n",
" Logistic Regression | \n",
" sklearn.linear_model._logistic.LogisticRegression | \n",
" True | \n",
"
\n",
" \n",
" | knn | \n",
" K Neighbors Classifier | \n",
" sklearn.neighbors._classification.KNeighborsCl... | \n",
" True | \n",
"
\n",
" \n",
" | nb | \n",
" Naive Bayes | \n",
" sklearn.naive_bayes.GaussianNB | \n",
" True | \n",
"
\n",
" \n",
" | dt | \n",
" Decision Tree Classifier | \n",
" sklearn.tree._classes.DecisionTreeClassifier | \n",
" True | \n",
"
\n",
" \n",
" | svm | \n",
" SVM - Linear Kernel | \n",
" sklearn.linear_model._stochastic_gradient.SGDC... | \n",
" True | \n",
"
\n",
" \n",
" | rbfsvm | \n",
" SVM - Radial Kernel | \n",
" sklearn.svm._classes.SVC | \n",
" False | \n",
"
\n",
" \n",
" | gpc | \n",
" Gaussian Process Classifier | \n",
" sklearn.gaussian_process._gpc.GaussianProcessC... | \n",
" False | \n",
"
\n",
" \n",
" | mlp | \n",
" MLP Classifier | \n",
" sklearn.neural_network._multilayer_perceptron.... | \n",
" False | \n",
"
\n",
" \n",
" | ridge | \n",
" Ridge Classifier | \n",
" sklearn.linear_model._ridge.RidgeClassifier | \n",
" True | \n",
"
\n",
" \n",
" | rf | \n",
" Random Forest Classifier | \n",
" sklearn.ensemble._forest.RandomForestClassifier | \n",
" True | \n",
"
\n",
" \n",
" | qda | \n",
" Quadratic Discriminant Analysis | \n",
" sklearn.discriminant_analysis.QuadraticDiscrim... | \n",
" True | \n",
"
\n",
" \n",
" | ada | \n",
" Ada Boost Classifier | \n",
" sklearn.ensemble._weight_boosting.AdaBoostClas... | \n",
" True | \n",
"
\n",
" \n",
" | gbc | \n",
" Gradient Boosting Classifier | \n",
" sklearn.ensemble._gb.GradientBoostingClassifier | \n",
" True | \n",
"
\n",
" \n",
" | lda | \n",
" Linear Discriminant Analysis | \n",
" sklearn.discriminant_analysis.LinearDiscrimina... | \n",
" True | \n",
"
\n",
" \n",
" | et | \n",
" Extra Trees Classifier | \n",
" sklearn.ensemble._forest.ExtraTreesClassifier | \n",
" True | \n",
"
\n",
" \n",
" | lightgbm | \n",
" Light Gradient Boosting Machine | \n",
" lightgbm.sklearn.LGBMClassifier | \n",
" True | \n",
"
\n",
" \n",
" | catboost | \n",
" CatBoost Classifier | \n",
" catboost.core.CatBoostClassifier | \n",
" True | \n",
"
\n",
" \n",
" | dummy | \n",
" Dummy Classifier | \n",
" sklearn.dummy.DummyClassifier | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name \\\n",
"ID \n",
"lr Logistic Regression \n",
"knn K Neighbors Classifier \n",
"nb Naive Bayes \n",
"dt Decision Tree Classifier \n",
"svm SVM - Linear Kernel \n",
"rbfsvm SVM - Radial Kernel \n",
"gpc Gaussian Process Classifier \n",
"mlp MLP Classifier \n",
"ridge Ridge Classifier \n",
"rf Random Forest Classifier \n",
"qda Quadratic Discriminant Analysis \n",
"ada Ada Boost Classifier \n",
"gbc Gradient Boosting Classifier \n",
"lda Linear Discriminant Analysis \n",
"et Extra Trees Classifier \n",
"lightgbm Light Gradient Boosting Machine \n",
"catboost CatBoost Classifier \n",
"dummy Dummy Classifier \n",
"\n",
" Reference Turbo \n",
"ID \n",
"lr sklearn.linear_model._logistic.LogisticRegression True \n",
"knn sklearn.neighbors._classification.KNeighborsCl... True \n",
"nb sklearn.naive_bayes.GaussianNB True \n",
"dt sklearn.tree._classes.DecisionTreeClassifier True \n",
"svm sklearn.linear_model._stochastic_gradient.SGDC... True \n",
"rbfsvm sklearn.svm._classes.SVC False \n",
"gpc sklearn.gaussian_process._gpc.GaussianProcessC... False \n",
"mlp sklearn.neural_network._multilayer_perceptron.... False \n",
"ridge sklearn.linear_model._ridge.RidgeClassifier True \n",
"rf sklearn.ensemble._forest.RandomForestClassifier True \n",
"qda sklearn.discriminant_analysis.QuadraticDiscrim... True \n",
"ada sklearn.ensemble._weight_boosting.AdaBoostClas... True \n",
"gbc sklearn.ensemble._gb.GradientBoostingClassifier True \n",
"lda sklearn.discriminant_analysis.LinearDiscrimina... True \n",
"et sklearn.ensemble._forest.ExtraTreesClassifier True \n",
"lightgbm lightgbm.sklearn.LGBMClassifier True \n",
"catboost catboost.core.CatBoostClassifier True \n",
"dummy sklearn.dummy.DummyClassifier True "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"models()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model Training and Selection"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | | \n",
" Model | \n",
" Accuracy | \n",
" AUC | \n",
" Recall | \n",
" Prec. | \n",
" F1 | \n",
" Kappa | \n",
" MCC | \n",
" TT (Sec) | \n",
"
\n",
" \n",
" \n",
" \n",
" | gbc | \n",
" Gradient Boosting Classifier | \n",
" 0.7617 | \n",
" 0.8020 | \n",
" 0.7617 | \n",
" 0.7730 | \n",
" 0.7567 | \n",
" 0.5144 | \n",
" 0.5290 | \n",
" 0.4640 | \n",
"
\n",
" \n",
" | catboost | \n",
" CatBoost Classifier | \n",
" 0.7582 | \n",
" 0.7996 | \n",
" 0.7582 | \n",
" 0.7681 | \n",
" 0.7534 | \n",
" 0.5074 | \n",
" 0.5207 | \n",
" 3.4800 | \n",
"
\n",
" \n",
" | lightgbm | \n",
" Light Gradient Boosting Machine | \n",
" 0.7543 | \n",
" 0.7954 | \n",
" 0.7543 | \n",
" 0.7632 | \n",
" 0.7498 | \n",
" 0.4998 | \n",
" 0.5120 | \n",
" 0.3200 | \n",
"
\n",
" \n",
" | ada | \n",
" Ada Boost Classifier | \n",
" 0.7492 | \n",
" 0.7957 | \n",
" 0.7492 | \n",
" 0.7616 | \n",
" 0.7432 | \n",
" 0.4882 | \n",
" 0.5044 | \n",
" 0.2180 | \n",
"
\n",
" \n",
" | rf | \n",
" Random Forest Classifier | \n",
" 0.7421 | \n",
" 0.7902 | \n",
" 0.7421 | \n",
" 0.7451 | \n",
" 0.7394 | \n",
" 0.4771 | \n",
" 0.4826 | \n",
" 0.4160 | \n",
"
\n",
" \n",
" | lr | \n",
" Logistic Regression | \n",
" 0.7393 | \n",
" 0.7852 | \n",
" 0.7393 | \n",
" 0.7435 | \n",
" 0.7361 | \n",
" 0.4709 | \n",
" 0.4778 | \n",
" 2.8060 | \n",
"
\n",
" \n",
" | ridge | \n",
" Ridge Classifier | \n",
" 0.7326 | \n",
" 0.7851 | \n",
" 0.7326 | \n",
" 0.7353 | \n",
" 0.7298 | \n",
" 0.4578 | \n",
" 0.4630 | \n",
" 0.0740 | \n",
"
\n",
" \n",
" | lda | \n",
" Linear Discriminant Analysis | \n",
" 0.7324 | \n",
" 0.7851 | \n",
" 0.7324 | \n",
" 0.7351 | \n",
" 0.7296 | \n",
" 0.4575 | \n",
" 0.4627 | \n",
" 0.0780 | \n",
"
\n",
" \n",
" | et | \n",
" Extra Trees Classifier | \n",
" 0.7224 | \n",
" 0.7732 | \n",
" 0.7224 | \n",
" 0.7232 | \n",
" 0.7206 | \n",
" 0.4387 | \n",
" 0.4414 | \n",
" 0.4140 | \n",
"
\n",
" \n",
" | qda | \n",
" Quadratic Discriminant Analysis | \n",
" 0.7221 | \n",
" 0.7713 | \n",
" 0.7221 | \n",
" 0.7379 | \n",
" 0.7132 | \n",
" 0.4313 | \n",
" 0.4520 | \n",
" 0.0900 | \n",
"
\n",
" \n",
" | nb | \n",
" Naive Bayes | \n",
" 0.7150 | \n",
" 0.7637 | \n",
" 0.7150 | \n",
" 0.7169 | \n",
" 0.7122 | \n",
" 0.4224 | \n",
" 0.4269 | \n",
" 0.0640 | \n",
"
\n",
" \n",
" | knn | \n",
" K Neighbors Classifier | \n",
" 0.7142 | \n",
" 0.7454 | \n",
" 0.7142 | \n",
" 0.7144 | \n",
" 0.7129 | \n",
" 0.4230 | \n",
" 0.4247 | \n",
" 2.5480 | \n",
"
\n",
" \n",
" | dt | \n",
" Decision Tree Classifier | \n",
" 0.6614 | \n",
" 0.6608 | \n",
" 0.6614 | \n",
" 0.6618 | \n",
" 0.6615 | \n",
" 0.3206 | \n",
" 0.3207 | \n",
" 0.0840 | \n",
"
\n",
" \n",
" | dummy | \n",
" Dummy Classifier | \n",
" 0.5320 | \n",
" 0.5000 | \n",
" 0.5320 | \n",
" 0.2830 | \n",
" 0.3694 | \n",
" 0.0000 | \n",
" 0.0000 | \n",
" 0.0620 | \n",
"
\n",
" \n",
" | svm | \n",
" SVM - Linear Kernel | \n",
" 0.5118 | \n",
" 0.7084 | \n",
" 0.5118 | \n",
" 0.4463 | \n",
" 0.3527 | \n",
" 0.0124 | \n",
" 0.0413 | \n",
" 0.1960 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Processing: 0%| | 0/65 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"best_model = compare_models()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"MCC, see: https://en.wikipedia.org/wiki/Phi_coefficient"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,\n",
" learning_rate=0.1, loss='log_loss', max_depth=3,\n",
" max_features=None, max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" n_estimators=100, n_iter_no_change=None,\n",
" random_state=1234, subsample=1.0, tol=0.0001,\n",
" validation_fraction=0.1, verbose=0,\n",
" warm_start=False)\n"
]
}
],
"source": [
"print(best_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Analyze best Model"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0a3a89d79b974d2f9db0d40bb6869fdb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"evaluate_model(best_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Predict on unseen Data"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | | \n",
" Model | \n",
" Accuracy | \n",
" AUC | \n",
" Recall | \n",
" Prec. | \n",
" F1 | \n",
" Kappa | \n",
" MCC | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Gradient Boosting Classifier | \n",
" 0.7495 | \n",
" 0.7990 | \n",
" 0.7495 | \n",
" 0.7626 | \n",
" 0.7457 | \n",
" 0.4972 | \n",
" 0.5109 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" age education housing contact_cellular month day_of_week \\\n",
"2809 36.0 6 0 1 5 2 \n",
"4052 34.0 6 1 1 8 3 \n",
"658 36.0 6 1 0 5 2 \n",
"786 40.0 5 0 0 5 3 \n",
"6675 36.0 6 0 1 11 3 \n",
"\n",
" campaign pdays previous emp_var_rate ... job_student \\\n",
"2809 0.477121 0 0 -1.8 ... False \n",
"4052 0.698970 0 0 1.4 ... False \n",
"658 0.903090 0 0 -1.8 ... False \n",
"786 0.698970 0 0 1.1 ... False \n",
"6675 0.477121 0 0 -0.1 ... False \n",
"\n",
" job_technician job_unemployed marital_married marital_single \\\n",
"2809 False False False True \n",
"4052 True False False True \n",
"658 False False False True \n",
"786 False False True False \n",
"6675 False False False True \n",
"\n",
" loan_unknown loan_yes y prediction_label prediction_score \n",
"2809 False False yes no 0.5681 \n",
"4052 False True yes no 0.6703 \n",
"658 False False yes no 0.8444 \n",
"786 False False no no 0.7768 \n",
"6675 False False no no 0.6379 \n",
"\n",
"[5 rows x 31 columns]\n"
]
}
],
"source": [
"predictions = predict_model(best_model, data=data_unseen)\n",
"print(predictions.head())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save best Model Pipeline"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"save_model(best_model, 'best_model_pipeline')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Tune a specific Model\n",
"ref: https://pycaret.gitbook.io/docs/get-started/functions/optimize"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### rf: Random Forest Classifier"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | | \n",
" Accuracy | \n",
" AUC | \n",
" Recall | \n",
" Prec. | \n",
" F1 | \n",
" Kappa | \n",
" MCC | \n",
"
\n",
" \n",
" | Fold | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0.7409 | \n",
" 0.7939 | \n",
" 0.7409 | \n",
" 0.7447 | \n",
" 0.7379 | \n",
" 0.4744 | \n",
" 0.4808 | \n",
"
\n",
" \n",
" | 1 | \n",
" 0.7536 | \n",
" 0.7961 | \n",
" 0.7536 | \n",
" 0.7583 | \n",
" 0.7505 | \n",
" 0.4998 | \n",
" 0.5072 | \n",
"
\n",
" \n",
" | 2 | \n",
" 0.7351 | \n",
" 0.7710 | \n",
" 0.7351 | \n",
" 0.7392 | \n",
" 0.7317 | \n",
" 0.4621 | \n",
" 0.4691 | \n",
"
\n",
" \n",
" | 3 | \n",
" 0.7311 | \n",
" 0.7967 | \n",
" 0.7311 | \n",
" 0.7329 | \n",
" 0.7287 | \n",
" 0.4553 | \n",
" 0.4594 | \n",
"
\n",
" \n",
" | 4 | \n",
" 0.7496 | \n",
" 0.7931 | \n",
" 0.7496 | \n",
" 0.7506 | \n",
" 0.7481 | \n",
" 0.4940 | \n",
" 0.4966 | \n",
"
\n",
" \n",
" | Mean | \n",
" 0.7421 | \n",
" 0.7902 | \n",
" 0.7421 | \n",
" 0.7451 | \n",
" 0.7394 | \n",
" 0.4771 | \n",
" 0.4826 | \n",
"
\n",
" \n",
" | Std | \n",
" 0.0085 | \n",
" 0.0097 | \n",
" 0.0085 | \n",
" 0.0088 | \n",
" 0.0087 | \n",
" 0.0174 | \n",
" 0.0175 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Processing: 0%| | 0/4 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"## create model\n",
"model_rf = create_model('rf')"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | | \n",
" Accuracy | \n",
" AUC | \n",
" Recall | \n",
" Prec. | \n",
" F1 | \n",
" Kappa | \n",
" MCC | \n",
"
\n",
" \n",
" | Fold | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0.7562 | \n",
" 0.8053 | \n",
" 0.7562 | \n",
" 0.7654 | \n",
" 0.7517 | \n",
" 0.5037 | \n",
" 0.5161 | \n",
"
\n",
" \n",
" | 1 | \n",
" 0.7689 | \n",
" 0.8134 | \n",
" 0.7689 | \n",
" 0.7825 | \n",
" 0.7635 | \n",
" 0.5285 | \n",
" 0.5456 | \n",
"
\n",
" \n",
" | 2 | \n",
" 0.7544 | \n",
" 0.7897 | \n",
" 0.7544 | \n",
" 0.7672 | \n",
" 0.7485 | \n",
" 0.4988 | \n",
" 0.5154 | \n",
"
\n",
" \n",
" | 3 | \n",
" 0.7617 | \n",
" 0.8021 | \n",
" 0.7617 | \n",
" 0.7709 | \n",
" 0.7572 | \n",
" 0.5147 | \n",
" 0.5272 | \n",
"
\n",
" \n",
" | 4 | \n",
" 0.7689 | \n",
" 0.8071 | \n",
" 0.7689 | \n",
" 0.7759 | \n",
" 0.7655 | \n",
" 0.5305 | \n",
" 0.5402 | \n",
"
\n",
" \n",
" | Mean | \n",
" 0.7620 | \n",
" 0.8035 | \n",
" 0.7620 | \n",
" 0.7724 | \n",
" 0.7573 | \n",
" 0.5153 | \n",
" 0.5289 | \n",
"
\n",
" \n",
" | Std | \n",
" 0.0061 | \n",
" 0.0079 | \n",
" 0.0061 | \n",
" 0.0062 | \n",
" 0.0066 | \n",
" 0.0127 | \n",
" 0.0123 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Processing: 0%| | 0/7 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 5 folds for each of 10 candidates, totalling 50 fits\n"
]
}
],
"source": [
"## tune model\n",
"model_rf_tuned = tune_model(model_rf)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n",
" criterion='gini', max_depth=None, max_features='sqrt',\n",
" max_leaf_nodes=None, max_samples=None,\n",
" min_impurity_decrease=0.0, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" monotonic_cst=None, n_estimators=100, n_jobs=-1,\n",
" oob_score=False, random_state=1234, verbose=0,\n",
" warm_start=False)\n"
]
}
],
"source": [
"## parameters of default model\n",
"print(model_rf)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,\n",
" class_weight='balanced_subsample', criterion='gini',\n",
" max_depth=10, max_features='sqrt', max_leaf_nodes=None,\n",
" max_samples=None, min_impurity_decrease=0,\n",
" min_samples_leaf=5, min_samples_split=7,\n",
" min_weight_fraction_leaf=0.0, monotonic_cst=None,\n",
" n_estimators=160, n_jobs=-1, oob_score=False,\n",
" random_state=1234, verbose=0, warm_start=False)\n"
]
}
],
"source": [
"## parameters of tuned model\n",
"print(model_rf_tuned)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"comparison of the default parameters and the tuned parameters"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"| parameter | default | tuned |\n",
"| :--- | :--- | :--- |\n",
"| ccp_alpha | 0 | 0 |\n",
"| class_weight | None | 'balanced_subsample' |\n",
"| criterion | 'gini' | 'gini' |\n",
"| max_depth | None | 10 |\n",
"| max_features | 'sqrt' | 'sqrt' |\n",
"| max_leaf_nodes | None | None |\n",
"| max_samples | None | None |\n",
"| min_impurity_decrease | 0 | 0 |\n",
"| min_samples_leaf | 1 | 5 |\n",
"| min_samples_split | 2 | 7 |\n",
"| min_weight_fraction_leaf | 0 | 0 |\n",
"| monotonic_cst | None | None |\n",
"| n_estimators | 100 | 160 |\n",
"| n_jobs | -1 | -1 |\n",
"| oob_score | False | False |\n",
"| random_state | 1234 | 1234 |\n",
"| verbose | 0 | 0 |\n",
"| warm_start | False | False |"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
},
"toc": {
"base_numbering": "5.5",
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "5.5 Deployment und Abschluss - pycaret",
"title_sidebar": "Contents",
"toc_cell": true,
"toc_position": {
"height": "370.667px",
"left": "25px",
"top": "110.233px",
"width": "187.667px"
},
"toc_section_display": true,
"toc_window_display": false
},
"toc-autonumbering": true,
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"position": {
"height": "234.85px",
"left": "911px",
"right": "20px",
"top": "120px",
"width": "350px"
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}