mirror of
https://github.com/vale981/ray
synced 2025-03-09 12:56:46 -04:00
475 lines
No EOL
28 KiB
Text
475 lines
No EOL
28 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c3192ac4",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Training a model with Sklearn\n",
|
|
"In this example we will train a model in Ray AIR using a Sklearn classifier."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "5a4823bf",
|
|
"metadata": {},
|
|
"source": [
|
|
"Let's start with installing our dependencies:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "88f4bb39",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip install -qU \"ray[tune]\" sklearn"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c049c692",
|
|
"metadata": {},
|
|
"source": [
|
|
"Then we need some imports:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "c02eb5cd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import argparse\n",
|
|
"import math\n",
|
|
"from typing import Tuple\n",
|
|
"\n",
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"import ray\n",
|
|
"from ray.data.dataset import Dataset\n",
|
|
"from ray.air.batch_predictor import BatchPredictor\n",
|
|
"from ray.air.predictors.integrations.sklearn import SklearnPredictor\n",
|
|
"from ray.air.preprocessors import Chain, OrdinalEncoder, StandardScaler\n",
|
|
"from ray.air.result import Result\n",
|
|
"from ray.train.sklearn import SklearnTrainer\n",
|
|
"\n",
|
|
"\n",
|
|
"from sklearn.datasets import load_breast_cancer\n",
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"\n",
|
|
"try:\n",
|
|
" from cuml.ensemble import RandomForestClassifier as cuMLRandomForestClassifier\n",
|
|
"except ImportError:\n",
|
|
" cuMLRandomForestClassifier = None"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "52e017f1",
|
|
"metadata": {},
|
|
"source": [
|
|
"Next we define a function to load our train, validation, and test datasets."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "3631ed1e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def prepare_data() -> Tuple[Dataset, Dataset, Dataset]:\n",
|
|
" data_raw = load_breast_cancer()\n",
|
|
" dataset_df = pd.DataFrame(data_raw[\"data\"], columns=data_raw[\"feature_names\"])\n",
|
|
" dataset_df[\"target\"] = data_raw[\"target\"]\n",
|
|
" # add a random categorical column\n",
|
|
" num_samples = len(dataset_df)\n",
|
|
" dataset_df[\"categorical_column\"] = pd.Series(\n",
|
|
" ([\"A\", \"B\"] * math.ceil(num_samples / 2))[:num_samples]\n",
|
|
" )\n",
|
|
" train_df, test_df = train_test_split(dataset_df, test_size=0.3)\n",
|
|
" train_dataset = ray.data.from_pandas(train_df)\n",
|
|
" valid_dataset = ray.data.from_pandas(test_df)\n",
|
|
" test_dataset = ray.data.from_pandas(test_df.drop(\"target\", axis=1))\n",
|
|
" return train_dataset, valid_dataset, test_dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "8d6c6d17",
|
|
"metadata": {},
|
|
"source": [
|
|
"The following function will create a Sklearn trainer, train it, and return the result."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "0fd39e42",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def train_sklearn(num_cpus: int, use_gpu: bool = False) -> Result:\n",
|
|
" if use_gpu and not cuMLRandomForestClassifier:\n",
|
|
" raise RuntimeError(\"cuML must be installed for GPU enabled sklearn estimators.\")\n",
|
|
"\n",
|
|
" train_dataset, valid_dataset, _ = prepare_data()\n",
|
|
"\n",
|
|
" # Scale some random columns\n",
|
|
" columns_to_scale = [\"mean radius\", \"mean texture\"]\n",
|
|
" preprocessor = Chain(\n",
|
|
" OrdinalEncoder([\"categorical_column\"]), StandardScaler(columns=columns_to_scale)\n",
|
|
" )\n",
|
|
"\n",
|
|
" if use_gpu:\n",
|
|
" trainer_resources = {\"CPU\": 1, \"GPU\": 1}\n",
|
|
" estimator = cuMLRandomForestClassifier()\n",
|
|
" else:\n",
|
|
" trainer_resources = {\"CPU\": num_cpus}\n",
|
|
" estimator = RandomForestClassifier()\n",
|
|
"\n",
|
|
" trainer = SklearnTrainer(\n",
|
|
" estimator=estimator,\n",
|
|
" label_column=\"target\",\n",
|
|
" datasets={\"train\": train_dataset, \"valid\": valid_dataset},\n",
|
|
" preprocessor=preprocessor,\n",
|
|
" cv=5,\n",
|
|
" scaling_config={\n",
|
|
" \"trainer_resources\": trainer_resources,\n",
|
|
" },\n",
|
|
" )\n",
|
|
" result = trainer.fit()\n",
|
|
" print(result.metrics)\n",
|
|
"\n",
|
|
" return result"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "7a2efb9d",
|
|
"metadata": {},
|
|
"source": [
|
|
"Once we have the result, we can do batch inference on the obtained model. Let's define a utility function for this."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "59eeadd8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def predict_sklearn(result: Result, use_gpu: bool = False):\n",
|
|
" _, _, test_dataset = prepare_data()\n",
|
|
"\n",
|
|
" batch_predictor = BatchPredictor.from_checkpoint(\n",
|
|
" result.checkpoint, SklearnPredictor\n",
|
|
" )\n",
|
|
"\n",
|
|
" predicted_labels = (\n",
|
|
" batch_predictor.predict(\n",
|
|
" test_dataset,\n",
|
|
" num_gpus_per_worker=int(use_gpu),\n",
|
|
" )\n",
|
|
" .map_batches(lambda df: (df > 0.5).astype(int), batch_format=\"pandas\")\n",
|
|
" .to_pandas(limit=float(\"inf\"))\n",
|
|
" )\n",
|
|
" print(f\"PREDICTED LABELS\\n{predicted_labels}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "7d073994",
|
|
"metadata": {},
|
|
"source": [
|
|
"Now we can run the training:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "43f9170a",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"2022-05-19 11:56:26,664\tINFO services.py:1483 -- View the Ray dashboard at \u001B[1m\u001B[32mhttp://127.0.0.1:8266\u001B[39m\u001B[22m\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"== Status ==<br>Current time: 2022-05-19 11:56:51 (running for 00:00:20.56)<br>Memory usage on this node: 10.1/16.0 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/4.64 GiB heap, 0.0/2.0 GiB objects<br>Result logdir: /Users/kai/ray_results/SklearnTrainer_2022-05-19_11-56-29<br>Number of trials: 1/1 (1 TERMINATED)<br><table>\n",
|
|
"<thead>\n",
|
|
"<tr><th>Trial name </th><th>status </th><th>loc </th><th style=\"text-align: right;\"> iter</th><th style=\"text-align: right;\"> total time (s)</th><th style=\"text-align: right;\"> fit_time</th></tr>\n",
|
|
"</thead>\n",
|
|
"<tbody>\n",
|
|
"<tr><td>SklearnTrainer_564d9_00000</td><td>TERMINATED</td><td>127.0.0.1:12221</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 17.1905</td><td style=\"text-align: right;\"> 2.48662</td></tr>\n",
|
|
"</tbody>\n",
|
|
"</table><br><br>"
|
|
],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:31,837\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=16 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:34,848\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=17 --runtime-env-hash=-2010331069\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m 2022-05-19 11:56:36,385\tWARNING pool.py:591 -- The 'context' argument is not supported using ray. Please refer to the documentation for how to control ray initialization.\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:37,344\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=19 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:37,344\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=18 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:39,843\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=21 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:39,845\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=20 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:42,324\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=23 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:42,324\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=22 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:44,748\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=24 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:44,749\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=25 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:47,193\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=27 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:47,193\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=26 --runtime-env-hash=-2010331134\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:49,612\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=28 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:49,612\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=29 --runtime-env-hash=-2010331134\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Result for SklearnTrainer_564d9_00000:\n",
|
|
" cv:\n",
|
|
" fit_time:\n",
|
|
" - 2.402121067047119\n",
|
|
" - 2.312839984893799\n",
|
|
" - 2.3265390396118164\n",
|
|
" - 2.325679063796997\n",
|
|
" - 2.3602960109710693\n",
|
|
" fit_time_mean: 2.34549503326416\n",
|
|
" fit_time_std: 0.032384969255539235\n",
|
|
" score_time:\n",
|
|
" - 0.10820889472961426\n",
|
|
" - 0.10829401016235352\n",
|
|
" - 0.10703587532043457\n",
|
|
" - 0.10512709617614746\n",
|
|
" - 0.10840892791748047\n",
|
|
" score_time_mean: 0.10741496086120605\n",
|
|
" score_time_std: 0.0012465199424455708\n",
|
|
" test_score:\n",
|
|
" - 0.9625\n",
|
|
" - 0.8875\n",
|
|
" - 1.0\n",
|
|
" - 0.9493670886075949\n",
|
|
" - 0.9240506329113924\n",
|
|
" test_score_mean: 0.9446835443037976\n",
|
|
" test_score_std: 0.03766947497186954\n",
|
|
" date: 2022-05-19_11-56-51\n",
|
|
" done: false\n",
|
|
" experiment_id: 200cbc1e2b84434882732d2053ec45c2\n",
|
|
" fit_time: 2.4866180419921875\n",
|
|
" hostname: Kais-MacBook-Pro.local\n",
|
|
" iterations_since_restore: 1\n",
|
|
" node_ip: 127.0.0.1\n",
|
|
" pid: 12221\n",
|
|
" should_checkpoint: true\n",
|
|
" time_since_restore: 17.19045615196228\n",
|
|
" time_this_iter_s: 17.19045615196228\n",
|
|
" time_total_s: 17.19045615196228\n",
|
|
" timestamp: 1652957811\n",
|
|
" timesteps_since_restore: 0\n",
|
|
" training_iteration: 1\n",
|
|
" trial_id: 564d9_00000\n",
|
|
" valid:\n",
|
|
" score_time: 0.10993409156799316\n",
|
|
" test_score: 0.9473684210526315\n",
|
|
" warmup_time: 0.0039539337158203125\n",
|
|
" \n",
|
|
"Result for SklearnTrainer_564d9_00000:\n",
|
|
" cv:\n",
|
|
" fit_time:\n",
|
|
" - 2.402121067047119\n",
|
|
" - 2.312839984893799\n",
|
|
" - 2.3265390396118164\n",
|
|
" - 2.325679063796997\n",
|
|
" - 2.3602960109710693\n",
|
|
" fit_time_mean: 2.34549503326416\n",
|
|
" fit_time_std: 0.032384969255539235\n",
|
|
" score_time:\n",
|
|
" - 0.10820889472961426\n",
|
|
" - 0.10829401016235352\n",
|
|
" - 0.10703587532043457\n",
|
|
" - 0.10512709617614746\n",
|
|
" - 0.10840892791748047\n",
|
|
" score_time_mean: 0.10741496086120605\n",
|
|
" score_time_std: 0.0012465199424455708\n",
|
|
" test_score:\n",
|
|
" - 0.9625\n",
|
|
" - 0.8875\n",
|
|
" - 1.0\n",
|
|
" - 0.9493670886075949\n",
|
|
" - 0.9240506329113924\n",
|
|
" test_score_mean: 0.9446835443037976\n",
|
|
" test_score_std: 0.03766947497186954\n",
|
|
" date: 2022-05-19_11-56-51\n",
|
|
" done: true\n",
|
|
" experiment_id: 200cbc1e2b84434882732d2053ec45c2\n",
|
|
" experiment_tag: '0'\n",
|
|
" fit_time: 2.4866180419921875\n",
|
|
" hostname: Kais-MacBook-Pro.local\n",
|
|
" iterations_since_restore: 1\n",
|
|
" node_ip: 127.0.0.1\n",
|
|
" pid: 12221\n",
|
|
" should_checkpoint: true\n",
|
|
" time_since_restore: 17.19045615196228\n",
|
|
" time_this_iter_s: 17.19045615196228\n",
|
|
" time_total_s: 17.19045615196228\n",
|
|
" timestamp: 1652957811\n",
|
|
" timesteps_since_restore: 0\n",
|
|
" training_iteration: 1\n",
|
|
" trial_id: 564d9_00000\n",
|
|
" valid:\n",
|
|
" score_time: 0.10993409156799316\n",
|
|
" test_score: 0.9473684210526315\n",
|
|
" warmup_time: 0.0039539337158203125\n",
|
|
" \n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:320: UserWarning: resource_tracker: There appear to be 6 leaked folder objects to clean up at shutdown\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m (len(rtype_registry), rtype))\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:333: UserWarning: resource_tracker: /var/folders/b2/0_91bd757rz02lrmr920v0gw0000gn/T/joblib_memmapping_folder_12221_5f6216ae1e6a46ba9d419e794af5d6af_23c04cd6260143c0ac6f5dbe654ee805: FileNotFoundError(2, 'No such file or directory')\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m warnings.warn('resource_tracker: %s: %r' % (name, e))\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:333: UserWarning: resource_tracker: /var/folders/b2/0_91bd757rz02lrmr920v0gw0000gn/T/joblib_memmapping_folder_12221_a9bc9a60f53a487e91b551aaace31955_1d562711c03e42ff9f97698134ab33f7: FileNotFoundError(2, 'No such file or directory')\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m warnings.warn('resource_tracker: %s: %r' % (name, e))\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:333: UserWarning: resource_tracker: /var/folders/b2/0_91bd757rz02lrmr920v0gw0000gn/T/joblib_memmapping_folder_12221_4130f87b8a7a41d4bb44d3ff87c47d73_d3df48add59547d89737f42c03172fa5: FileNotFoundError(2, 'No such file or directory')\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m warnings.warn('resource_tracker: %s: %r' % (name, e))\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:333: UserWarning: resource_tracker: /var/folders/b2/0_91bd757rz02lrmr920v0gw0000gn/T/joblib_memmapping_folder_12221_28d4366efda3422c93d8ad3a8d66986e_9d1ab8d6a92146829caf48550752190d: FileNotFoundError(2, 'No such file or directory')\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m warnings.warn('resource_tracker: %s: %r' % (name, e))\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:333: UserWarning: resource_tracker: /var/folders/b2/0_91bd757rz02lrmr920v0gw0000gn/T/joblib_memmapping_folder_12221_4dc9b4c717294776b8162f30cc5eb4fe_068611691a404ca18d46ab1be089bc5a: FileNotFoundError(2, 'No such file or directory')\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m warnings.warn('resource_tracker: %s: %r' % (name, e))\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:333: UserWarning: resource_tracker: /var/folders/b2/0_91bd757rz02lrmr920v0gw0000gn/T/joblib_memmapping_folder_12221_0b60850fd8704b0e83f6c2758d9c1f2a_6ae1cfa0a68741b8b71f28a262bd7f7a: FileNotFoundError(2, 'No such file or directory')\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m warnings.warn('resource_tracker: %s: %r' % (name, e))\n",
|
|
"2022-05-19 11:56:51,305\tINFO tune.py:753 -- Total run time: 21.67 seconds (20.55 seconds for the tuning loop).\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{'valid': {'score_time': 0.10993409156799316, 'test_score': 0.9473684210526315}, 'cv': {'fit_time': array([2.40212107, 2.31283998, 2.32653904, 2.32567906, 2.36029601]), 'score_time': array([0.10820889, 0.10829401, 0.10703588, 0.1051271 , 0.10840893]), 'test_score': array([0.9625 , 0.8875 , 1. , 0.94936709, 0.92405063]), 'fit_time_mean': 2.34549503326416, 'fit_time_std': 0.032384969255539235, 'score_time_mean': 0.10741496086120605, 'score_time_std': 0.0012465199424455708, 'test_score_mean': 0.9446835443037976, 'test_score_std': 0.03766947497186954}, 'fit_time': 2.4866180419921875, 'time_this_iter_s': 17.19045615196228, 'should_checkpoint': True, 'done': True, 'timesteps_total': None, 'episodes_total': None, 'training_iteration': 1, 'trial_id': '564d9_00000', 'experiment_id': '200cbc1e2b84434882732d2053ec45c2', 'date': '2022-05-19_11-56-51', 'timestamp': 1652957811, 'time_total_s': 17.19045615196228, 'pid': 12221, 'hostname': 'Kais-MacBook-Pro.local', 'node_ip': '127.0.0.1', 'config': {}, 'time_since_restore': 17.19045615196228, 'timesteps_since_restore': 0, 'iterations_since_restore': 1, 'warmup_time': 0.0039539337158203125, 'experiment_tag': '0'}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"result = train_sklearn(num_cpus=2, use_gpu=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "0daba603",
|
|
"metadata": {},
|
|
"source": [
|
|
"And perform inference on the obtained model:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "24b16ede",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Map Progress (1 actors 1 pending): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00, 1.59s/it]\n",
|
|
"Map_Batches: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 95.33it/s]"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"PREDICTED LABELS\n",
|
|
" predictions\n",
|
|
"0 1\n",
|
|
"1 1\n",
|
|
"2 1\n",
|
|
"3 1\n",
|
|
"4 1\n",
|
|
".. ...\n",
|
|
"166 1\n",
|
|
"167 1\n",
|
|
"168 0\n",
|
|
"169 0\n",
|
|
"170 1\n",
|
|
"\n",
|
|
"[171 rows x 1 columns]\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"predict_sklearn(result, use_gpu=False)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"jupytext": {
|
|
"cell_metadata_filter": "-all",
|
|
"main_language": "python",
|
|
"notebook_metadata_filter": "-all"
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
} |