mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
[tune] Remove TF MNIST example + add TrialRunner hook to execut… (#5868)
* remove test * add trial runner * remvoerestore * Remove other mnist examples * tunetest * revert * v1 * Revert "v1" This reverts commit c8bddaf2db7a8270c43c02021cac0e75df15ed20. * Revert "revert" This reverts commit b58f56884a0c288d3a6f997d149ab4d496ddd7a3. * errors * format
This commit is contained in:
parent
52e5c9b22d
commit
1650f7b174
13 changed files with 26 additions and 788 deletions
|
@ -46,10 +46,6 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
|
|||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/python/ray/tune/tests/tutorial.py
|
||||
|
||||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/python/ray/tune/examples/tune_mnist_ray.py \
|
||||
--smoke-test
|
||||
|
||||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/python/ray/tune/examples/pbt_example.py \
|
||||
--smoke-test
|
||||
|
@ -68,14 +64,6 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
|
|||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
bash -c 'pip install tensorflow==1.15.0rc1 && python /ray/python/ray/tune/examples/async_hyperband_example.py --smoke-test'
|
||||
|
||||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/python/ray/tune/examples/tune_mnist_ray_hyperband.py \
|
||||
--smoke-test
|
||||
|
||||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/python/ray/tune/examples/tune_mnist_async_hyperband.py \
|
||||
--smoke-test
|
||||
|
||||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/python/ray/tune/examples/lightgbm_example.py
|
||||
|
||||
|
@ -126,7 +114,7 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
|
|||
--smoke-test
|
||||
|
||||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/python/ray/tune/examples/memnn_example.py \
|
||||
python /ray/python/ray/tune/examples/pbt_memnn_example.py \
|
||||
--smoke-test
|
||||
|
||||
# uncomment once statsmodels is updated.
|
||||
|
|
|
@ -16,7 +16,6 @@ General Examples
|
|||
- `pbt_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_example.py>`__: Example of using a Trainable class with PopulationBasedTraining scheduler.
|
||||
- `pbt_ppo_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_ppo_example.py>`__: Example of optimizing a distributed RLlib algorithm (PPO) with the PopulationBasedTraining scheduler.
|
||||
- `logging_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/logging_example.py>`__: Example of custom loggers and custom trial directory naming.
|
||||
- `pbt_memnn_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_memnn_example.py>`__: Example of training a Memory NN on bAbI with Keras using PBT.
|
||||
|
||||
Search Algorithm Examples
|
||||
-------------------------
|
||||
|
@ -26,10 +25,11 @@ Search Algorithm Examples
|
|||
- `Nevergrad example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/nevergrad_example.py>`__: Optimize a simple toy function with the gradient-free optimization package `Nevergrad <https://github.com/facebookresearch/nevergrad>`_ with 4 parallel workers.
|
||||
- `Bayesian Optimization example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/bayesopt_example.py>`__: Optimize a simple toy function using `Bayesian Optimization <https://github.com/fmfn/BayesianOptimization>`_ with 4 parallel workers.
|
||||
|
||||
Keras Examples
|
||||
--------------
|
||||
Tensorflow/Keras Examples
|
||||
-------------------------
|
||||
|
||||
- `tune_mnist_keras <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_keras.py>`__: Converts the Keras MNIST example to use Tune with the function-based API and a Keras callback. Also shows how to easily convert something relying on argparse to use Tune.
|
||||
- `pbt_memnn_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_memnn_example.py>`__: Example of training a Memory NN on bAbI with Keras using PBT.
|
||||
|
||||
|
||||
PyTorch Examples
|
||||
|
@ -39,14 +39,6 @@ PyTorch Examples
|
|||
- `mnist_pytorch_trainable <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/mnist_pytorch_trainable.py>`__: Converts the PyTorch MNIST example to use Tune with Trainable API. Also uses the HyperBandScheduler and checkpoints the model at the end.
|
||||
|
||||
|
||||
TensorFlow Examples
|
||||
-------------------
|
||||
|
||||
- `tune_mnist_ray <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_ray.py>`__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class.
|
||||
- `tune_mnist_ray_hyperband <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_ray_hyperband.py>`__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class and the HyperBand scheduler.
|
||||
- `tune_mnist_async_hyperband <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_async_hyperband.py>`__: Example of tuning a TensorFlow model on MNIST using AsyncHyperBand.
|
||||
|
||||
|
||||
XGBoost Example
|
||||
---------------
|
||||
|
||||
|
|
|
@ -16,7 +16,6 @@ General Examples
|
|||
- `pbt_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_example.py>`__: Example of using a Trainable class with PopulationBasedTraining scheduler.
|
||||
- `pbt_ppo_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_ppo_example.py>`__: Example of optimizing a distributed RLlib algorithm (PPO) with the PopulationBasedTraining scheduler.
|
||||
- `logging_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/logging_example.py>`__: Example of custom loggers and custom trial directory naming.
|
||||
- `pbt_memnn_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_memnn_example.py>`__: Example of training a Memory NN on bAbI with Keras using PBT.
|
||||
|
||||
Search Algorithm Examples
|
||||
-------------------------
|
||||
|
@ -26,10 +25,11 @@ Search Algorithm Examples
|
|||
- `Nevergrad example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/nevergrad_example.py>`__: Optimize a simple toy function with the gradient-free optimization package `Nevergrad <https://github.com/facebookresearch/nevergrad>`_ with 4 parallel workers.
|
||||
- `Bayesian Optimization example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/bayesopt_example.py>`__: Optimize a simple toy function using `Bayesian Optimization <https://github.com/fmfn/BayesianOptimization>`_ with 4 parallel workers.
|
||||
|
||||
Keras Examples
|
||||
--------------
|
||||
Tensorflow/Keras Examples
|
||||
-------------------------
|
||||
|
||||
- `tune_mnist_keras <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_keras.py>`__: Converts the Keras MNIST example to use Tune with the function-based API and a Keras callback. Also shows how to easily convert something relying on argparse to use Tune.
|
||||
- `pbt_memnn_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_memnn_example.py>`__: Example of training a Memory NN on bAbI with Keras using PBT.
|
||||
|
||||
|
||||
PyTorch Examples
|
||||
|
@ -39,14 +39,6 @@ PyTorch Examples
|
|||
- `mnist_pytorch_trainable <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/mnist_pytorch_trainable.py>`__: Converts the PyTorch MNIST example to use Tune with Trainable API. Also uses the HyperBandScheduler and checkpoints the model at the end.
|
||||
|
||||
|
||||
TensorFlow Examples
|
||||
-------------------
|
||||
|
||||
- `tune_mnist_ray <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_ray.py>`__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class.
|
||||
- `tune_mnist_ray_hyperband <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_ray_hyperband.py>`__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class and the HyperBand scheduler.
|
||||
- `tune_mnist_async_hyperband <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_async_hyperband.py>`__: Example of tuning a TensorFlow model on MNIST using AsyncHyperBand.
|
||||
|
||||
|
||||
XGBoost Example
|
||||
---------------
|
||||
|
||||
|
|
|
@ -5,15 +5,15 @@ References Keras and is based off of https://keras.io/examples/babi_memnn/.
|
|||
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.python.keras.models import Sequential, Model, load_model
|
||||
from tensorflow.python.keras.layers.embeddings import Embedding
|
||||
from tensorflow.python.keras.layers import (Input, Activation, Dense, Permute,
|
||||
Dropout)
|
||||
from tensorflow.python.keras.layers import add, dot, concatenate
|
||||
from tensorflow.python.keras.layers import LSTM
|
||||
from tensorflow.python.keras.optimizers import RMSprop
|
||||
from tensorflow.python.keras.utils.data_utils import get_file
|
||||
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
|
||||
from tensorflow.keras.models import Sequential, Model, load_model
|
||||
from tensorflow.keras.layers import Embedding
|
||||
from tensorflow.keras.layers import (Input, Activation, Dense, Permute,
|
||||
Dropout)
|
||||
from tensorflow.keras.layers import add, dot, concatenate
|
||||
from tensorflow.keras.layers import LSTM
|
||||
from tensorflow.keras.optimizers import RMSprop
|
||||
from tensorflow.keras.utils import get_file
|
||||
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
||||
from ray.tune import Trainable
|
||||
import argparse
|
||||
import tarfile
|
||||
|
|
|
@ -1,247 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""A deep MNIST classifier using convolutional layers.
|
||||
|
||||
See extensive documentation at
|
||||
https://www.tensorflow.org/get_started/mnist/pros
|
||||
"""
|
||||
# Disable linter warnings to maintain consistency with tutorial.
|
||||
# pylint: disable=invalid-name
|
||||
# pylint: disable=g-bad-import-order
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
import ray
|
||||
from ray.tune import grid_search, run
|
||||
|
||||
from tensorflow.examples.tutorials.mnist import input_data
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
FLAGS = None
|
||||
status_reporter = None # used to report training status back to Ray
|
||||
activation_fn = None # e.g. tf.nn.relu
|
||||
|
||||
|
||||
def deepnn(x):
|
||||
"""deepnn builds the graph for a deep net for classifying digits.
|
||||
|
||||
Args:
|
||||
x: an input tensor with the dimensions (N_examples, 784), where 784 is
|
||||
the number of pixels in a standard MNIST image.
|
||||
|
||||
Returns:
|
||||
A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with
|
||||
values equal to the logits of classifying the digit into one of 10
|
||||
classes (the digits 0-9). keep_prob is a scalar placeholder for the
|
||||
probability of dropout.
|
||||
"""
|
||||
# Reshape to use within a convolutional neural net.
|
||||
# Last dimension is for "features" - there is only one here, since images
|
||||
# are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
|
||||
with tf.name_scope("reshape"):
|
||||
x_image = tf.reshape(x, [-1, 28, 28, 1])
|
||||
|
||||
# First convolutional layer - maps one grayscale image to 32 feature maps.
|
||||
with tf.name_scope("conv1"):
|
||||
W_conv1 = weight_variable([5, 5, 1, 32])
|
||||
b_conv1 = bias_variable([32])
|
||||
h_conv1 = activation_fn(conv2d(x_image, W_conv1) + b_conv1)
|
||||
|
||||
# Pooling layer - downsamples by 2X.
|
||||
with tf.name_scope("pool1"):
|
||||
h_pool1 = max_pool_2x2(h_conv1)
|
||||
|
||||
# Second convolutional layer -- maps 32 feature maps to 64.
|
||||
with tf.name_scope("conv2"):
|
||||
W_conv2 = weight_variable([5, 5, 32, 64])
|
||||
b_conv2 = bias_variable([64])
|
||||
h_conv2 = activation_fn(conv2d(h_pool1, W_conv2) + b_conv2)
|
||||
|
||||
# Second pooling layer.
|
||||
with tf.name_scope("pool2"):
|
||||
h_pool2 = max_pool_2x2(h_conv2)
|
||||
|
||||
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
|
||||
# is down to 7x7x64 feature maps -- maps this to 1024 features.
|
||||
with tf.name_scope("fc1"):
|
||||
W_fc1 = weight_variable([7 * 7 * 64, 1024])
|
||||
b_fc1 = bias_variable([1024])
|
||||
|
||||
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
|
||||
h_fc1 = activation_fn(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
|
||||
|
||||
# Dropout - controls the complexity of the model, prevents co-adaptation of
|
||||
# features.
|
||||
with tf.name_scope("dropout"):
|
||||
keep_prob = tf.placeholder(tf.float32)
|
||||
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
|
||||
|
||||
# Map the 1024 features to 10 classes, one for each digit
|
||||
with tf.name_scope("fc2"):
|
||||
W_fc2 = weight_variable([1024, 10])
|
||||
b_fc2 = bias_variable([10])
|
||||
|
||||
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
|
||||
return y_conv, keep_prob
|
||||
|
||||
|
||||
def conv2d(x, W):
|
||||
"""conv2d returns a 2d convolution layer with full stride."""
|
||||
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
|
||||
|
||||
|
||||
def max_pool_2x2(x):
|
||||
"""max_pool_2x2 downsamples a feature map by 2X."""
|
||||
return tf.nn.max_pool(
|
||||
x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
|
||||
|
||||
|
||||
def weight_variable(shape):
|
||||
"""weight_variable generates a weight variable of a given shape."""
|
||||
initial = tf.truncated_normal(shape, stddev=0.1)
|
||||
return tf.Variable(initial)
|
||||
|
||||
|
||||
def bias_variable(shape):
|
||||
"""bias_variable generates a bias variable of a given shape."""
|
||||
initial = tf.constant(0.1, shape=shape)
|
||||
return tf.Variable(initial)
|
||||
|
||||
|
||||
def main(_):
|
||||
# Import data
|
||||
for _ in range(10):
|
||||
try:
|
||||
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
|
||||
break
|
||||
except Exception:
|
||||
time.sleep(5)
|
||||
|
||||
# Create the model
|
||||
x = tf.placeholder(tf.float32, [None, 784])
|
||||
|
||||
# Define loss and optimizer
|
||||
y_ = tf.placeholder(tf.float32, [None, 10])
|
||||
|
||||
# Build the graph for the deep net
|
||||
y_conv, keep_prob = deepnn(x)
|
||||
|
||||
with tf.name_scope("loss"):
|
||||
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
|
||||
labels=y_, logits=y_conv)
|
||||
cross_entropy = tf.reduce_mean(cross_entropy)
|
||||
|
||||
with tf.name_scope("adam_optimizer"):
|
||||
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
|
||||
|
||||
with tf.name_scope("accuracy"):
|
||||
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
|
||||
correct_prediction = tf.cast(correct_prediction, tf.float32)
|
||||
accuracy = tf.reduce_mean(correct_prediction)
|
||||
|
||||
graph_location = tempfile.mkdtemp()
|
||||
print("Saving graph to: %s" % graph_location)
|
||||
train_writer = tf.summary.FileWriter(graph_location)
|
||||
train_writer.add_graph(tf.get_default_graph())
|
||||
|
||||
with tf.Session() as sess:
|
||||
sess.run(tf.global_variables_initializer())
|
||||
for i in range(20000):
|
||||
batch = mnist.train.next_batch(50)
|
||||
if i % 10 == 0:
|
||||
train_accuracy = accuracy.eval(feed_dict={
|
||||
x: batch[0],
|
||||
y_: batch[1],
|
||||
keep_prob: 1.0
|
||||
})
|
||||
|
||||
# !!! Report status to ray.tune !!!
|
||||
if status_reporter:
|
||||
status_reporter(
|
||||
timesteps_total=i, mean_accuracy=train_accuracy)
|
||||
|
||||
print("step %d, training accuracy %g" % (i, train_accuracy))
|
||||
train_step.run(feed_dict={
|
||||
x: batch[0],
|
||||
y_: batch[1],
|
||||
keep_prob: 0.5
|
||||
})
|
||||
|
||||
print("test accuracy %g" % accuracy.eval(feed_dict={
|
||||
x: mnist.test.images,
|
||||
y_: mnist.test.labels,
|
||||
keep_prob: 1.0
|
||||
}))
|
||||
|
||||
|
||||
# !!! Entrypoint for ray.tune !!!
|
||||
def train(config={"activation": "relu"}, reporter=None):
|
||||
global FLAGS, status_reporter, activation_fn
|
||||
status_reporter = reporter
|
||||
activation_fn = getattr(tf.nn, config["activation"])
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--data_dir",
|
||||
type=str,
|
||||
default="/tmp/tensorflow/mnist/input_data",
|
||||
help="Directory for storing input data")
|
||||
FLAGS, unparsed = parser.parse_known_args()
|
||||
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
|
||||
|
||||
|
||||
# !!! Example of using the ray.tune Python API !!!
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
mnist_spec = {
|
||||
"num_samples": 10,
|
||||
"stop": {
|
||||
"mean_accuracy": 0.99,
|
||||
"timesteps_total": 600,
|
||||
},
|
||||
"config": {
|
||||
"activation": grid_search(["relu", "elu", "tanh"]),
|
||||
},
|
||||
}
|
||||
|
||||
if args.smoke_test:
|
||||
mnist_spec["stop"]["training_iteration"] = 2
|
||||
mnist_spec["num_samples"] = 1
|
||||
|
||||
ray.init()
|
||||
|
||||
from ray.tune.schedulers import AsyncHyperBandScheduler
|
||||
run(train,
|
||||
name="tune_mnist_test",
|
||||
scheduler=AsyncHyperBandScheduler(
|
||||
time_attr="timesteps_total",
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
max_t=600,
|
||||
),
|
||||
**mnist_spec)
|
|
@ -1,241 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""A deep MNIST classifier using convolutional layers.
|
||||
|
||||
See extensive documentation at
|
||||
https://www.tensorflow.org/get_started/mnist/pros
|
||||
"""
|
||||
# Disable linter warnings to maintain consistency with tutorial.
|
||||
# pylint: disable=invalid-name
|
||||
# pylint: disable=g-bad-import-order
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune import grid_search, register_trainable
|
||||
|
||||
from tensorflow.examples.tutorials.mnist import input_data
|
||||
import numpy as np
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
FLAGS = None
|
||||
status_reporter = None # used to report training status back to Ray
|
||||
activation_fn = tf.nn.relu # e.g. tf.nn.relu
|
||||
|
||||
|
||||
def deepnn(x):
|
||||
"""deepnn builds the graph for a deep net for classifying digits.
|
||||
|
||||
Args:
|
||||
x: an input tensor with the dimensions (N_examples, 784), where 784 is
|
||||
the number of pixels in a standard MNIST image.
|
||||
|
||||
Returns:
|
||||
A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with
|
||||
values equal to the logits of classifying the digit into one of 10
|
||||
classes (the digits 0-9). keep_prob is a scalar placeholder for the
|
||||
probability of dropout.
|
||||
"""
|
||||
# Reshape to use within a convolutional neural net.
|
||||
# Last dimension is for "features" - there is only one here, since images
|
||||
# are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
|
||||
with tf.name_scope("reshape"):
|
||||
x_image = tf.reshape(x, [-1, 28, 28, 1])
|
||||
|
||||
# First convolutional layer - maps one grayscale image to 32 feature maps.
|
||||
with tf.name_scope("conv1"):
|
||||
W_conv1 = weight_variable([5, 5, 1, 32])
|
||||
b_conv1 = bias_variable([32])
|
||||
h_conv1 = activation_fn(conv2d(x_image, W_conv1) + b_conv1)
|
||||
|
||||
# Pooling layer - downsamples by 2X.
|
||||
with tf.name_scope("pool1"):
|
||||
h_pool1 = max_pool_2x2(h_conv1)
|
||||
|
||||
# Second convolutional layer -- maps 32 feature maps to 64.
|
||||
with tf.name_scope("conv2"):
|
||||
W_conv2 = weight_variable([5, 5, 32, 64])
|
||||
b_conv2 = bias_variable([64])
|
||||
h_conv2 = activation_fn(conv2d(h_pool1, W_conv2) + b_conv2)
|
||||
|
||||
# Second pooling layer.
|
||||
with tf.name_scope("pool2"):
|
||||
h_pool2 = max_pool_2x2(h_conv2)
|
||||
|
||||
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
|
||||
# is down to 7x7x64 feature maps -- maps this to 1024 features.
|
||||
with tf.name_scope("fc1"):
|
||||
W_fc1 = weight_variable([7 * 7 * 64, 1024])
|
||||
b_fc1 = bias_variable([1024])
|
||||
|
||||
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
|
||||
h_fc1 = activation_fn(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
|
||||
|
||||
# Dropout - controls the complexity of the model, prevents co-adaptation of
|
||||
# features.
|
||||
with tf.name_scope("dropout"):
|
||||
keep_prob = tf.placeholder(tf.float32)
|
||||
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
|
||||
|
||||
# Map the 1024 features to 10 classes, one for each digit
|
||||
with tf.name_scope("fc2"):
|
||||
W_fc2 = weight_variable([1024, 10])
|
||||
b_fc2 = bias_variable([10])
|
||||
|
||||
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
|
||||
return y_conv, keep_prob
|
||||
|
||||
|
||||
def conv2d(x, W):
|
||||
"""conv2d returns a 2d convolution layer with full stride."""
|
||||
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
|
||||
|
||||
|
||||
def max_pool_2x2(x):
|
||||
"""max_pool_2x2 downsamples a feature map by 2X."""
|
||||
return tf.nn.max_pool(
|
||||
x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
|
||||
|
||||
|
||||
def weight_variable(shape):
|
||||
"""weight_variable generates a weight variable of a given shape."""
|
||||
initial = tf.truncated_normal(shape, stddev=0.1)
|
||||
return tf.Variable(initial)
|
||||
|
||||
|
||||
def bias_variable(shape):
|
||||
"""bias_variable generates a bias variable of a given shape."""
|
||||
initial = tf.constant(0.1, shape=shape)
|
||||
return tf.Variable(initial)
|
||||
|
||||
|
||||
def main(_):
|
||||
# Import data
|
||||
for _ in range(10):
|
||||
try:
|
||||
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
|
||||
break
|
||||
except Exception:
|
||||
time.sleep(5)
|
||||
|
||||
# Create the model
|
||||
x = tf.placeholder(tf.float32, [None, 784])
|
||||
|
||||
# Define loss and optimizer
|
||||
y_ = tf.placeholder(tf.float32, [None, 10])
|
||||
|
||||
# Build the graph for the deep net
|
||||
y_conv, keep_prob = deepnn(x)
|
||||
|
||||
with tf.name_scope("loss"):
|
||||
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
|
||||
labels=y_, logits=y_conv)
|
||||
cross_entropy = tf.reduce_mean(cross_entropy)
|
||||
|
||||
with tf.name_scope("adam_optimizer"):
|
||||
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
|
||||
|
||||
with tf.name_scope("accuracy"):
|
||||
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
|
||||
correct_prediction = tf.cast(correct_prediction, tf.float32)
|
||||
accuracy = tf.reduce_mean(correct_prediction)
|
||||
|
||||
graph_location = tempfile.mkdtemp()
|
||||
print("Saving graph to: %s" % graph_location)
|
||||
train_writer = tf.summary.FileWriter(graph_location)
|
||||
train_writer.add_graph(tf.get_default_graph())
|
||||
|
||||
with tf.Session() as sess:
|
||||
sess.run(tf.global_variables_initializer())
|
||||
for i in range(20000):
|
||||
batch = mnist.train.next_batch(50)
|
||||
if i % 10 == 0:
|
||||
train_accuracy = accuracy.eval(feed_dict={
|
||||
x: batch[0],
|
||||
y_: batch[1],
|
||||
keep_prob: 1.0
|
||||
})
|
||||
|
||||
# !!! Report status to ray.tune !!!
|
||||
if status_reporter:
|
||||
status_reporter(
|
||||
timesteps_total=i, mean_accuracy=train_accuracy)
|
||||
|
||||
print("step %d, training accuracy %g" % (i, train_accuracy))
|
||||
train_step.run(feed_dict={
|
||||
x: batch[0],
|
||||
y_: batch[1],
|
||||
keep_prob: 0.5
|
||||
})
|
||||
|
||||
print("test accuracy %g" % accuracy.eval(feed_dict={
|
||||
x: mnist.test.images,
|
||||
y_: mnist.test.labels,
|
||||
keep_prob: 1.0
|
||||
}))
|
||||
|
||||
|
||||
# !!! Entrypoint for ray.tune !!!
|
||||
def train(config={"activation": "relu"}, reporter=None):
|
||||
global FLAGS, status_reporter, activation_fn
|
||||
status_reporter = reporter
|
||||
activation_fn = getattr(tf.nn, config["activation"])
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--data_dir",
|
||||
type=str,
|
||||
default="/tmp/tensorflow/mnist/input_data",
|
||||
help="Directory for storing input data")
|
||||
FLAGS, unparsed = parser.parse_known_args()
|
||||
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
|
||||
|
||||
|
||||
# !!! Example of using the ray.tune Python API !!!
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
register_trainable("train_mnist", train)
|
||||
mnist_spec = {
|
||||
"stop": {
|
||||
"mean_accuracy": 0.99,
|
||||
"time_total_s": 600,
|
||||
},
|
||||
"config": {
|
||||
"activation": grid_search(["relu", "elu", "tanh"]),
|
||||
# You can pass any serializable object as well
|
||||
"foo": grid_search([np.array([1, 2]),
|
||||
np.array([2, 3])]),
|
||||
},
|
||||
}
|
||||
|
||||
if args.smoke_test:
|
||||
mnist_spec["stop"]["training_iteration"] = 2
|
||||
|
||||
ray.init()
|
||||
tune.run("train_mnist", name="tune_mnist_test", **mnist_spec)
|
|
@ -1,241 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""A deep MNIST classifier using convolutional layers.
|
||||
See extensive documentation at
|
||||
https://www.tensorflow.org/get_started/mnist/pros
|
||||
"""
|
||||
# Disable linter warnings to maintain consistency with tutorial.
|
||||
# pylint: disable=invalid-name
|
||||
# pylint: disable=g-bad-import-order
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune import Trainable, sample_from
|
||||
from ray.tune.schedulers import HyperBandScheduler
|
||||
from tensorflow.examples.tutorials.mnist import input_data
|
||||
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
activation_fn = None # e.g. tf.nn.relu
|
||||
|
||||
|
||||
def setupCNN(x):
|
||||
"""setupCNN builds the graph for a deep net for classifying digits.
|
||||
Args:
|
||||
x: an input tensor with the dimensions (N_examples, 784), where 784 is
|
||||
the number of pixels in a standard MNIST image.
|
||||
Returns:
|
||||
A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with
|
||||
values equal to the logits of classifying the digit into one of 10
|
||||
classes (the digits 0-9). keep_prob is a scalar placeholder for the
|
||||
probability of dropout.
|
||||
"""
|
||||
# Reshape to use within a convolutional neural net.
|
||||
# Last dimension is for "features" - there is only one here, since images
|
||||
# are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
|
||||
with tf.name_scope("reshape"):
|
||||
x_image = tf.reshape(x, [-1, 28, 28, 1])
|
||||
|
||||
# First convolutional layer - maps one grayscale image to 32 feature maps.
|
||||
with tf.name_scope("conv1"):
|
||||
W_conv1 = weight_variable([5, 5, 1, 32])
|
||||
b_conv1 = bias_variable([32])
|
||||
h_conv1 = activation_fn(conv2d(x_image, W_conv1) + b_conv1)
|
||||
|
||||
# Pooling layer - downsamples by 2X.
|
||||
with tf.name_scope("pool1"):
|
||||
h_pool1 = max_pool_2x2(h_conv1)
|
||||
|
||||
# Second convolutional layer -- maps 32 feature maps to 64.
|
||||
with tf.name_scope("conv2"):
|
||||
W_conv2 = weight_variable([5, 5, 32, 64])
|
||||
b_conv2 = bias_variable([64])
|
||||
h_conv2 = activation_fn(conv2d(h_pool1, W_conv2) + b_conv2)
|
||||
|
||||
# Second pooling layer.
|
||||
with tf.name_scope("pool2"):
|
||||
h_pool2 = max_pool_2x2(h_conv2)
|
||||
|
||||
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
|
||||
# is down to 7x7x64 feature maps -- maps this to 1024 features.
|
||||
with tf.name_scope("fc1"):
|
||||
W_fc1 = weight_variable([7 * 7 * 64, 1024])
|
||||
b_fc1 = bias_variable([1024])
|
||||
|
||||
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
|
||||
h_fc1 = activation_fn(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
|
||||
|
||||
# Dropout - controls the complexity of the model, prevents co-adaptation of
|
||||
# features.
|
||||
with tf.name_scope("dropout"):
|
||||
keep_prob = tf.placeholder(tf.float32)
|
||||
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
|
||||
|
||||
# Map the 1024 features to 10 classes, one for each digit
|
||||
with tf.name_scope("fc2"):
|
||||
W_fc2 = weight_variable([1024, 10])
|
||||
b_fc2 = bias_variable([10])
|
||||
|
||||
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
|
||||
return y_conv, keep_prob
|
||||
|
||||
|
||||
def conv2d(x, W):
|
||||
"""conv2d returns a 2d convolution layer with full stride."""
|
||||
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
|
||||
|
||||
|
||||
def max_pool_2x2(x):
|
||||
"""max_pool_2x2 downsamples a feature map by 2X."""
|
||||
return tf.nn.max_pool(
|
||||
x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
|
||||
|
||||
|
||||
def weight_variable(shape):
|
||||
"""weight_variable generates a weight variable of a given shape."""
|
||||
initial = tf.truncated_normal(shape, stddev=0.1)
|
||||
return tf.Variable(initial)
|
||||
|
||||
|
||||
def bias_variable(shape):
|
||||
"""bias_variable generates a bias variable of a given shape."""
|
||||
initial = tf.constant(0.1, shape=shape)
|
||||
return tf.Variable(initial)
|
||||
|
||||
|
||||
class TrainMNIST(Trainable):
|
||||
"""Example MNIST trainable."""
|
||||
|
||||
def _setup(self, config):
|
||||
global activation_fn
|
||||
|
||||
self.timestep = 0
|
||||
|
||||
# Import data
|
||||
for _ in range(10):
|
||||
try:
|
||||
self.mnist = input_data.read_data_sets(
|
||||
"/tmp/mnist_ray_demo", one_hot=True)
|
||||
break
|
||||
except Exception as e:
|
||||
print("Error loading data, retrying", e)
|
||||
time.sleep(5)
|
||||
|
||||
assert self.mnist
|
||||
|
||||
self.x = tf.placeholder(tf.float32, [None, 784])
|
||||
self.y_ = tf.placeholder(tf.float32, [None, 10])
|
||||
|
||||
activation_fn = getattr(tf.nn, config.get("activation", "relu"))
|
||||
|
||||
# Build the graph for the deep net
|
||||
y_conv, self.keep_prob = setupCNN(self.x)
|
||||
|
||||
with tf.name_scope("loss"):
|
||||
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
|
||||
labels=self.y_, logits=y_conv)
|
||||
cross_entropy = tf.reduce_mean(cross_entropy)
|
||||
|
||||
with tf.name_scope("adam_optimizer"):
|
||||
train_step = tf.train.AdamOptimizer(
|
||||
config.get("learning_rate", 1e-4)).minimize(cross_entropy)
|
||||
|
||||
self.train_step = train_step
|
||||
|
||||
with tf.name_scope("accuracy"):
|
||||
correct_prediction = tf.equal(
|
||||
tf.argmax(y_conv, 1), tf.argmax(self.y_, 1))
|
||||
correct_prediction = tf.cast(correct_prediction, tf.float32)
|
||||
self.accuracy = tf.reduce_mean(correct_prediction)
|
||||
|
||||
self.sess = tf.Session()
|
||||
self.sess.run(tf.global_variables_initializer())
|
||||
self.saver = tf.train.Saver(save_relative_paths=True)
|
||||
|
||||
def _train(self):
|
||||
for i in range(10):
|
||||
batch = self.mnist.train.next_batch(50)
|
||||
self.sess.run(
|
||||
self.train_step,
|
||||
feed_dict={
|
||||
self.x: batch[0],
|
||||
self.y_: batch[1],
|
||||
self.keep_prob: 0.5
|
||||
})
|
||||
|
||||
batch = self.mnist.train.next_batch(50)
|
||||
train_accuracy = self.sess.run(
|
||||
self.accuracy,
|
||||
feed_dict={
|
||||
self.x: batch[0],
|
||||
self.y_: batch[1],
|
||||
self.keep_prob: 1.0
|
||||
})
|
||||
return {"mean_accuracy": train_accuracy}
|
||||
|
||||
def _save(self, checkpoint_dir):
|
||||
path = self.saver.save(self.sess, os.path.join(checkpoint_dir, "save"))
|
||||
return path
|
||||
|
||||
def _restore(self, checkpoint_path):
|
||||
self.saver.restore(self.sess, checkpoint_path)
|
||||
|
||||
|
||||
# !!! Example of using the ray.tune Python API !!!
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
mnist_spec = {
|
||||
"stop": {
|
||||
"mean_accuracy": 0.99,
|
||||
"time_total_s": 600,
|
||||
},
|
||||
"config": {
|
||||
"learning_rate": sample_from(
|
||||
lambda spec: 10**np.random.uniform(-5, -3)),
|
||||
"activation": "relu",
|
||||
},
|
||||
"num_samples": 10,
|
||||
}
|
||||
|
||||
if args.smoke_test:
|
||||
mnist_spec["stop"]["training_iteration"] = 20
|
||||
mnist_spec["num_samples"] = 1
|
||||
|
||||
ray.init()
|
||||
hyperband = HyperBandScheduler(
|
||||
time_attr="training_iteration",
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
max_t=10)
|
||||
|
||||
tune.run(
|
||||
TrainMNIST,
|
||||
name="mnist_hyperband_test",
|
||||
scheduler=hyperband,
|
||||
**mnist_spec)
|
|
@ -523,7 +523,7 @@ class RayTrialExecutor(TrialExecutor):
|
|||
else:
|
||||
return "? CPUs, ? GPUs"
|
||||
|
||||
def on_step_begin(self):
|
||||
def on_step_begin(self, trial_runner):
|
||||
"""Before step() called, update the available resources."""
|
||||
self._update_avail_resources()
|
||||
|
||||
|
|
|
@ -39,20 +39,20 @@ MEAN_ACCURACY = "mean_accuracy"
|
|||
# Number of episodes in this iteration.
|
||||
EPISODES_THIS_ITER = "episodes_this_iter"
|
||||
|
||||
# (Optional/Auto-filled) Accumulated number of episodes for this experiment.
|
||||
# (Optional/Auto-filled) Accumulated number of episodes for this trial.
|
||||
EPISODES_TOTAL = "episodes_total"
|
||||
|
||||
# Number of timesteps in this iteration.
|
||||
TIMESTEPS_THIS_ITER = "timesteps_this_iter"
|
||||
|
||||
# (Auto-filled) Accumulated number of timesteps for this entire experiment.
|
||||
# (Auto-filled) Accumulated number of timesteps for this entire trial.
|
||||
TIMESTEPS_TOTAL = "timesteps_total"
|
||||
|
||||
# (Auto-filled) Time in seconds this iteration took to run.
|
||||
# This may be overriden to override the system-computed time difference.
|
||||
TIME_THIS_ITER_S = "time_this_iter_s"
|
||||
|
||||
# (Auto-filled) Accumulated time in seconds for this entire experiment.
|
||||
# (Auto-filled) Accumulated time in seconds for this entire trial.
|
||||
TIME_TOTAL_S = "time_total_s"
|
||||
|
||||
# (Auto-filled) The index of this training iteration.
|
||||
|
|
|
@ -2081,12 +2081,12 @@ class TrialRunnerTest(unittest.TestCase):
|
|||
ray.init(num_cpus=4, num_gpus=2)
|
||||
runner = TrialRunner()
|
||||
|
||||
def on_step_begin(self):
|
||||
def on_step_begin(self, trialrunner):
|
||||
self._update_avail_resources()
|
||||
cnt = self.pre_step if hasattr(self, "pre_step") else 0
|
||||
setattr(self, "pre_step", cnt + 1)
|
||||
|
||||
def on_step_end(self):
|
||||
def on_step_end(self, trialrunner):
|
||||
cnt = self.pre_step if hasattr(self, "post_step") else 0
|
||||
setattr(self, "post_step", 1 + cnt)
|
||||
|
||||
|
|
|
@ -71,11 +71,6 @@ class TuneExampleTest(unittest.TestCase):
|
|||
ray.shutdown()
|
||||
_register_all()
|
||||
|
||||
def testTensorFlowMNIST(self):
|
||||
from ray.tune.examples.tune_mnist_ray_hyperband import TrainMNIST
|
||||
validate_save_restore(TrainMNIST)
|
||||
validate_save_restore(TrainMNIST, use_object_store=True)
|
||||
|
||||
def testPBTKeras(self):
|
||||
from ray.tune.examples.pbt_tune_cifar10_with_keras import Cifar10Model
|
||||
from tensorflow.python.keras.datasets import cifar10
|
||||
|
|
|
@ -142,11 +142,11 @@ class TrialExecutor(object):
|
|||
raise NotImplementedError("Subclasses of TrialExecutor must provide "
|
||||
"get_running_trials() method")
|
||||
|
||||
def on_step_begin(self):
|
||||
def on_step_begin(self, trial_runner):
|
||||
"""A hook called before running one step of the trial event loop."""
|
||||
pass
|
||||
|
||||
def on_step_end(self):
|
||||
def on_step_end(self, trial_runner):
|
||||
"""A hook called after running one step of the trial event loop."""
|
||||
pass
|
||||
|
||||
|
|
|
@ -326,7 +326,7 @@ class TrialRunner(object):
|
|||
if self.is_finished():
|
||||
raise TuneError("Called step when all trials finished?")
|
||||
with warn_if_slow("on_step_begin"):
|
||||
self.trial_executor.on_step_begin()
|
||||
self.trial_executor.on_step_begin(self)
|
||||
next_trial = self._get_next_trial() # blocking
|
||||
if next_trial is not None:
|
||||
with warn_if_slow("start_trial"):
|
||||
|
@ -367,7 +367,7 @@ class TrialRunner(object):
|
|||
if self.is_finished():
|
||||
self._server.shutdown()
|
||||
with warn_if_slow("on_step_end"):
|
||||
self.trial_executor.on_step_end()
|
||||
self.trial_executor.on_step_end(self)
|
||||
|
||||
def get_trial(self, tid):
|
||||
trial = [t for t in self._trials if t.trial_id == tid]
|
||||
|
|
Loading…
Add table
Reference in a new issue