{ "cells": [ { "cell_type": "markdown", "metadata": { "toc": true }, "source": [ "# WS 14 Random Search CV" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* untersuchen Sie Kombinationen von Parameterwerten bei RandomForestClassifier\n", "* Vorschlag:\n", " * n_estimators in [50, 100, 150, 200]\n", " * max_features in [3, 5, 7, 9]\n", " * criterion in ['gini', 'entropy']\n", " * min_samples_leaf in [1, 2, 3, 4]\n", "* wenden Sie 5-fach Kreuzvalidierung an\n", "* setzen Sie die Anzahl der zu untersuchenden Kombinationen auf 12\n", "* arbeiten Sie ohne setzen von random_state, damit anschliessend die Ergebnisse verglichen werden können" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "## import libraries\n", "import pandas as pd\n", "import numpy as np\n", "\n", "## load data\n", "datapath = '../../3_data'\n", "from os import chdir; chdir(datapath)\n", "bank_df = pd.read_csv('bank_data_prep.csv')\n", "\n", "## features - target - split\n", "X = bank_df.drop('y', axis=1)\n", "y = bank_df['y']" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "best_params_ : {'n_estimators': 50, 'min_samples_leaf': 4, 'max_features': 9, 'criterion': 'entropy'}\n", "best_score_ : 0.8884381338742393\n", "CPU times: total: 3.09 s\n", "Wall time: 43.6 s\n" ] } ], "source": [ "%%time\n", "## import classes from sklearn\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.model_selection import RandomizedSearchCV\n", "\n", "## define parameter grid\n", "parameter_grid = {'n_estimators': [50, 100, 150, 200],\n", " 'max_features': [3, 5, 7, 9],\n", " 'criterion': ['gini', 'entropy'],\n", " 'min_samples_leaf': [1, 2, 3, 4]}\n", "\n", "## define RandomizedSearchCV\n", "rscv = RandomizedSearchCV(\n", " estimator=RandomForestClassifier(random_state=1234), \n", " param_distributions=parameter_grid, \n", " cv=5,\n", " n_iter=12,\n", " random_state=1234,\n", " n_jobs=-1)\n", "\n", "## run RandomizedSearchCV\n", "rscv.fit(X, y)\n", "\n", "## evaluate RandomizedSearchCV\n", "print('best_params_ :', rscv.best_params_)\n", "print('best_score_ :', rscv.best_score_)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#rscv.best_estimator_" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(9860, 29)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Fazit:**\n", "* n_estimators: [50, 100, 150, 200] -> 50\n", " * hier müsste der Suchbereich nach unten erweiter werden\n", "* max_features: [3, 5, 7, 9] -> 9\n", " * hier müsste der Suchbereich nach oben erweiter werden\n", "* criterion: ['gini', 'entropy']\n", " * Suchbereich ok\n", "* min_samples_leaf: [1, 2, 3, 4] -> 4\n", " * hier müsste der Suchbereich nach oben erweiter werden" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" }, "toc": { "base_numbering": "", "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "WS 17 Validierung - Random Search CV", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "195.867px" }, "toc_section_display": true, "toc_window_display": true }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "position": { "height": "306.85px", "left": "862px", "right": "20px", "top": "137px", "width": "350px" }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }