mirror of
https://github.com/vale981/ray
synced 2025-03-09 04:46:38 -04:00

The package "ml" should be renamed to "air". Main question: Keep a `ml.py` with `from ray.air import *` for some level of backwards compatibility? I'd go for no to force people to use the new structure.
475 lines
No EOL
28 KiB
Text
475 lines
No EOL
28 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c3192ac4",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Training a model with Sklearn\n",
|
|
"In this example we will train a model in Ray Air using a Sklearn classifier."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "5a4823bf",
|
|
"metadata": {},
|
|
"source": [
|
|
"Let's start with installing our dependencies:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "88f4bb39",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip install -qU \"ray[tune]\" sklearn"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c049c692",
|
|
"metadata": {},
|
|
"source": [
|
|
"Then we need some imports:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "c02eb5cd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import argparse\n",
|
|
"import math\n",
|
|
"from typing import Tuple\n",
|
|
"\n",
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"import ray\n",
|
|
"from ray.data.dataset import Dataset\n",
|
|
"from ray.air.batch_predictor import BatchPredictor\n",
|
|
"from ray.air.predictors.integrations.sklearn import SklearnPredictor\n",
|
|
"from ray.air.preprocessors import Chain, OrdinalEncoder, StandardScaler\n",
|
|
"from ray.air.result import Result\n",
|
|
"from ray.air.train.integrations.sklearn import SklearnTrainer\n",
|
|
"\n",
|
|
"\n",
|
|
"from sklearn.datasets import load_breast_cancer\n",
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"\n",
|
|
"try:\n",
|
|
" from cuml.ensemble import RandomForestClassifier as cuMLRandomForestClassifier\n",
|
|
"except ImportError:\n",
|
|
" cuMLRandomForestClassifier = None"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "52e017f1",
|
|
"metadata": {},
|
|
"source": [
|
|
"Next we define a function to load our train, validation, and test datasets."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "3631ed1e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def prepare_data() -> Tuple[Dataset, Dataset, Dataset]:\n",
|
|
" data_raw = load_breast_cancer()\n",
|
|
" dataset_df = pd.DataFrame(data_raw[\"data\"], columns=data_raw[\"feature_names\"])\n",
|
|
" dataset_df[\"target\"] = data_raw[\"target\"]\n",
|
|
" # add a random categorical column\n",
|
|
" num_samples = len(dataset_df)\n",
|
|
" dataset_df[\"categorical_column\"] = pd.Series(\n",
|
|
" ([\"A\", \"B\"] * math.ceil(num_samples / 2))[:num_samples]\n",
|
|
" )\n",
|
|
" train_df, test_df = train_test_split(dataset_df, test_size=0.3)\n",
|
|
" train_dataset = ray.data.from_pandas(train_df)\n",
|
|
" valid_dataset = ray.data.from_pandas(test_df)\n",
|
|
" test_dataset = ray.data.from_pandas(test_df.drop(\"target\", axis=1))\n",
|
|
" return train_dataset, valid_dataset, test_dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "8d6c6d17",
|
|
"metadata": {},
|
|
"source": [
|
|
"The following function will create a Sklearn trainer, train it, and return the result."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "0fd39e42",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def train_sklearn(num_cpus: int, use_gpu: bool = False) -> Result:\n",
|
|
" if use_gpu and not cuMLRandomForestClassifier:\n",
|
|
" raise RuntimeError(\"cuML must be installed for GPU enabled sklearn estimators.\")\n",
|
|
"\n",
|
|
" train_dataset, valid_dataset, _ = prepare_data()\n",
|
|
"\n",
|
|
" # Scale some random columns\n",
|
|
" columns_to_scale = [\"mean radius\", \"mean texture\"]\n",
|
|
" preprocessor = Chain(\n",
|
|
" OrdinalEncoder([\"categorical_column\"]), StandardScaler(columns=columns_to_scale)\n",
|
|
" )\n",
|
|
"\n",
|
|
" if use_gpu:\n",
|
|
" trainer_resources = {\"CPU\": 1, \"GPU\": 1}\n",
|
|
" estimator = cuMLRandomForestClassifier()\n",
|
|
" else:\n",
|
|
" trainer_resources = {\"CPU\": num_cpus}\n",
|
|
" estimator = RandomForestClassifier()\n",
|
|
"\n",
|
|
" trainer = SklearnTrainer(\n",
|
|
" estimator=estimator,\n",
|
|
" label_column=\"target\",\n",
|
|
" datasets={\"train\": train_dataset, \"valid\": valid_dataset},\n",
|
|
" preprocessor=preprocessor,\n",
|
|
" cv=5,\n",
|
|
" scaling_config={\n",
|
|
" \"trainer_resources\": trainer_resources,\n",
|
|
" },\n",
|
|
" )\n",
|
|
" result = trainer.fit()\n",
|
|
" print(result.metrics)\n",
|
|
"\n",
|
|
" return result"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "7a2efb9d",
|
|
"metadata": {},
|
|
"source": [
|
|
"Once we have the result, we can do batch inference on the obtained model. Let's define a utility function for this."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "59eeadd8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def predict_sklearn(result: Result, use_gpu: bool = False):\n",
|
|
" _, _, test_dataset = prepare_data()\n",
|
|
"\n",
|
|
" batch_predictor = BatchPredictor.from_checkpoint(\n",
|
|
" result.checkpoint, SklearnPredictor\n",
|
|
" )\n",
|
|
"\n",
|
|
" predicted_labels = (\n",
|
|
" batch_predictor.predict(\n",
|
|
" test_dataset,\n",
|
|
" num_gpus_per_worker=int(use_gpu),\n",
|
|
" )\n",
|
|
" .map_batches(lambda df: (df > 0.5).astype(int), batch_format=\"pandas\")\n",
|
|
" .to_pandas(limit=float(\"inf\"))\n",
|
|
" )\n",
|
|
" print(f\"PREDICTED LABELS\\n{predicted_labels}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "7d073994",
|
|
"metadata": {},
|
|
"source": [
|
|
"Now we can run the training:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "43f9170a",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"2022-05-19 11:56:26,664\tINFO services.py:1483 -- View the Ray dashboard at \u001B[1m\u001B[32mhttp://127.0.0.1:8266\u001B[39m\u001B[22m\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"== Status ==<br>Current time: 2022-05-19 11:56:51 (running for 00:00:20.56)<br>Memory usage on this node: 10.1/16.0 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/4.64 GiB heap, 0.0/2.0 GiB objects<br>Result logdir: /Users/kai/ray_results/SklearnTrainer_2022-05-19_11-56-29<br>Number of trials: 1/1 (1 TERMINATED)<br><table>\n",
|
|
"<thead>\n",
|
|
"<tr><th>Trial name </th><th>status </th><th>loc </th><th style=\"text-align: right;\"> iter</th><th style=\"text-align: right;\"> total time (s)</th><th style=\"text-align: right;\"> fit_time</th></tr>\n",
|
|
"</thead>\n",
|
|
"<tbody>\n",
|
|
"<tr><td>SklearnTrainer_564d9_00000</td><td>TERMINATED</td><td>127.0.0.1:12221</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 17.1905</td><td style=\"text-align: right;\"> 2.48662</td></tr>\n",
|
|
"</tbody>\n",
|
|
"</table><br><br>"
|
|
],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:31,837\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=16 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:34,848\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=17 --runtime-env-hash=-2010331069\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m 2022-05-19 11:56:36,385\tWARNING pool.py:591 -- The 'context' argument is not supported using ray. Please refer to the documentation for how to control ray initialization.\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:37,344\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=19 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:37,344\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=18 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:39,843\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=21 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:39,845\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=20 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:42,324\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=23 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:42,324\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=22 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:44,748\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=24 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:44,749\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=25 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:47,193\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=27 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:47,193\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=26 --runtime-env-hash=-2010331134\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:49,612\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=28 --runtime-env-hash=-2010331134\n",
|
|
"\u001B[2m\u001B[33m(raylet)\u001B[0m 2022-05-19 11:56:49,612\tINFO context.py:70 -- Exec'ing worker with command: exec /Users/kai/.pyenv/versions/3.7.7/bin/python3.7 /Users/kai/coding/ray/python/ray/workers/default_worker.py --node-ip-address=127.0.0.1 --node-manager-port=55845 --object-store-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/plasma_store --raylet-name=/tmp/ray/session_2022-05-19_11-56-23_998044_12148/sockets/raylet --redis-address=None --storage=None --temp-dir=/tmp/ray --metrics-agent-port=59341 --logging-rotate-bytes=536870912 --logging-rotate-backup-count=5 --gcs-address=127.0.0.1:58305 --redis-password=5241590000000000 --startup-token=29 --runtime-env-hash=-2010331134\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Result for SklearnTrainer_564d9_00000:\n",
|
|
" cv:\n",
|
|
" fit_time:\n",
|
|
" - 2.402121067047119\n",
|
|
" - 2.312839984893799\n",
|
|
" - 2.3265390396118164\n",
|
|
" - 2.325679063796997\n",
|
|
" - 2.3602960109710693\n",
|
|
" fit_time_mean: 2.34549503326416\n",
|
|
" fit_time_std: 0.032384969255539235\n",
|
|
" score_time:\n",
|
|
" - 0.10820889472961426\n",
|
|
" - 0.10829401016235352\n",
|
|
" - 0.10703587532043457\n",
|
|
" - 0.10512709617614746\n",
|
|
" - 0.10840892791748047\n",
|
|
" score_time_mean: 0.10741496086120605\n",
|
|
" score_time_std: 0.0012465199424455708\n",
|
|
" test_score:\n",
|
|
" - 0.9625\n",
|
|
" - 0.8875\n",
|
|
" - 1.0\n",
|
|
" - 0.9493670886075949\n",
|
|
" - 0.9240506329113924\n",
|
|
" test_score_mean: 0.9446835443037976\n",
|
|
" test_score_std: 0.03766947497186954\n",
|
|
" date: 2022-05-19_11-56-51\n",
|
|
" done: false\n",
|
|
" experiment_id: 200cbc1e2b84434882732d2053ec45c2\n",
|
|
" fit_time: 2.4866180419921875\n",
|
|
" hostname: Kais-MacBook-Pro.local\n",
|
|
" iterations_since_restore: 1\n",
|
|
" node_ip: 127.0.0.1\n",
|
|
" pid: 12221\n",
|
|
" should_checkpoint: true\n",
|
|
" time_since_restore: 17.19045615196228\n",
|
|
" time_this_iter_s: 17.19045615196228\n",
|
|
" time_total_s: 17.19045615196228\n",
|
|
" timestamp: 1652957811\n",
|
|
" timesteps_since_restore: 0\n",
|
|
" training_iteration: 1\n",
|
|
" trial_id: 564d9_00000\n",
|
|
" valid:\n",
|
|
" score_time: 0.10993409156799316\n",
|
|
" test_score: 0.9473684210526315\n",
|
|
" warmup_time: 0.0039539337158203125\n",
|
|
" \n",
|
|
"Result for SklearnTrainer_564d9_00000:\n",
|
|
" cv:\n",
|
|
" fit_time:\n",
|
|
" - 2.402121067047119\n",
|
|
" - 2.312839984893799\n",
|
|
" - 2.3265390396118164\n",
|
|
" - 2.325679063796997\n",
|
|
" - 2.3602960109710693\n",
|
|
" fit_time_mean: 2.34549503326416\n",
|
|
" fit_time_std: 0.032384969255539235\n",
|
|
" score_time:\n",
|
|
" - 0.10820889472961426\n",
|
|
" - 0.10829401016235352\n",
|
|
" - 0.10703587532043457\n",
|
|
" - 0.10512709617614746\n",
|
|
" - 0.10840892791748047\n",
|
|
" score_time_mean: 0.10741496086120605\n",
|
|
" score_time_std: 0.0012465199424455708\n",
|
|
" test_score:\n",
|
|
" - 0.9625\n",
|
|
" - 0.8875\n",
|
|
" - 1.0\n",
|
|
" - 0.9493670886075949\n",
|
|
" - 0.9240506329113924\n",
|
|
" test_score_mean: 0.9446835443037976\n",
|
|
" test_score_std: 0.03766947497186954\n",
|
|
" date: 2022-05-19_11-56-51\n",
|
|
" done: true\n",
|
|
" experiment_id: 200cbc1e2b84434882732d2053ec45c2\n",
|
|
" experiment_tag: '0'\n",
|
|
" fit_time: 2.4866180419921875\n",
|
|
" hostname: Kais-MacBook-Pro.local\n",
|
|
" iterations_since_restore: 1\n",
|
|
" node_ip: 127.0.0.1\n",
|
|
" pid: 12221\n",
|
|
" should_checkpoint: true\n",
|
|
" time_since_restore: 17.19045615196228\n",
|
|
" time_this_iter_s: 17.19045615196228\n",
|
|
" time_total_s: 17.19045615196228\n",
|
|
" timestamp: 1652957811\n",
|
|
" timesteps_since_restore: 0\n",
|
|
" training_iteration: 1\n",
|
|
" trial_id: 564d9_00000\n",
|
|
" valid:\n",
|
|
" score_time: 0.10993409156799316\n",
|
|
" test_score: 0.9473684210526315\n",
|
|
" warmup_time: 0.0039539337158203125\n",
|
|
" \n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:320: UserWarning: resource_tracker: There appear to be 6 leaked folder objects to clean up at shutdown\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m (len(rtype_registry), rtype))\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:333: UserWarning: resource_tracker: /var/folders/b2/0_91bd757rz02lrmr920v0gw0000gn/T/joblib_memmapping_folder_12221_5f6216ae1e6a46ba9d419e794af5d6af_23c04cd6260143c0ac6f5dbe654ee805: FileNotFoundError(2, 'No such file or directory')\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m warnings.warn('resource_tracker: %s: %r' % (name, e))\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:333: UserWarning: resource_tracker: /var/folders/b2/0_91bd757rz02lrmr920v0gw0000gn/T/joblib_memmapping_folder_12221_a9bc9a60f53a487e91b551aaace31955_1d562711c03e42ff9f97698134ab33f7: FileNotFoundError(2, 'No such file or directory')\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m warnings.warn('resource_tracker: %s: %r' % (name, e))\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:333: UserWarning: resource_tracker: /var/folders/b2/0_91bd757rz02lrmr920v0gw0000gn/T/joblib_memmapping_folder_12221_4130f87b8a7a41d4bb44d3ff87c47d73_d3df48add59547d89737f42c03172fa5: FileNotFoundError(2, 'No such file or directory')\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m warnings.warn('resource_tracker: %s: %r' % (name, e))\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:333: UserWarning: resource_tracker: /var/folders/b2/0_91bd757rz02lrmr920v0gw0000gn/T/joblib_memmapping_folder_12221_28d4366efda3422c93d8ad3a8d66986e_9d1ab8d6a92146829caf48550752190d: FileNotFoundError(2, 'No such file or directory')\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m warnings.warn('resource_tracker: %s: %r' % (name, e))\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:333: UserWarning: resource_tracker: /var/folders/b2/0_91bd757rz02lrmr920v0gw0000gn/T/joblib_memmapping_folder_12221_4dc9b4c717294776b8162f30cc5eb4fe_068611691a404ca18d46ab1be089bc5a: FileNotFoundError(2, 'No such file or directory')\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m warnings.warn('resource_tracker: %s: %r' % (name, e))\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m /Users/kai/.pyenv/versions/3.7.7/lib/python3.7/site-packages/joblib/externals/loky/backend/resource_tracker.py:333: UserWarning: resource_tracker: /var/folders/b2/0_91bd757rz02lrmr920v0gw0000gn/T/joblib_memmapping_folder_12221_0b60850fd8704b0e83f6c2758d9c1f2a_6ae1cfa0a68741b8b71f28a262bd7f7a: FileNotFoundError(2, 'No such file or directory')\n",
|
|
"\u001B[2m\u001B[36m(TrainTrainable pid=12221)\u001B[0m warnings.warn('resource_tracker: %s: %r' % (name, e))\n",
|
|
"2022-05-19 11:56:51,305\tINFO tune.py:753 -- Total run time: 21.67 seconds (20.55 seconds for the tuning loop).\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{'valid': {'score_time': 0.10993409156799316, 'test_score': 0.9473684210526315}, 'cv': {'fit_time': array([2.40212107, 2.31283998, 2.32653904, 2.32567906, 2.36029601]), 'score_time': array([0.10820889, 0.10829401, 0.10703588, 0.1051271 , 0.10840893]), 'test_score': array([0.9625 , 0.8875 , 1. , 0.94936709, 0.92405063]), 'fit_time_mean': 2.34549503326416, 'fit_time_std': 0.032384969255539235, 'score_time_mean': 0.10741496086120605, 'score_time_std': 0.0012465199424455708, 'test_score_mean': 0.9446835443037976, 'test_score_std': 0.03766947497186954}, 'fit_time': 2.4866180419921875, 'time_this_iter_s': 17.19045615196228, 'should_checkpoint': True, 'done': True, 'timesteps_total': None, 'episodes_total': None, 'training_iteration': 1, 'trial_id': '564d9_00000', 'experiment_id': '200cbc1e2b84434882732d2053ec45c2', 'date': '2022-05-19_11-56-51', 'timestamp': 1652957811, 'time_total_s': 17.19045615196228, 'pid': 12221, 'hostname': 'Kais-MacBook-Pro.local', 'node_ip': '127.0.0.1', 'config': {}, 'time_since_restore': 17.19045615196228, 'timesteps_since_restore': 0, 'iterations_since_restore': 1, 'warmup_time': 0.0039539337158203125, 'experiment_tag': '0'}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"result = train_sklearn(num_cpus=2, use_gpu=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "0daba603",
|
|
"metadata": {},
|
|
"source": [
|
|
"And perform inference on the obtained model:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "24b16ede",
|
|
"metadata": {
|
|
"pycharm": {
|
|
"name": "#%%\n"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Map Progress (1 actors 1 pending): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00, 1.59s/it]\n",
|
|
"Map_Batches: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 95.33it/s]"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"PREDICTED LABELS\n",
|
|
" predictions\n",
|
|
"0 1\n",
|
|
"1 1\n",
|
|
"2 1\n",
|
|
"3 1\n",
|
|
"4 1\n",
|
|
".. ...\n",
|
|
"166 1\n",
|
|
"167 1\n",
|
|
"168 0\n",
|
|
"169 0\n",
|
|
"170 1\n",
|
|
"\n",
|
|
"[171 rows x 1 columns]\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"predict_sklearn(result, use_gpu=False)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"jupytext": {
|
|
"cell_metadata_filter": "-all",
|
|
"main_language": "python",
|
|
"notebook_metadata_filter": "-all"
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
} |