[tune] Remove TF MNIST example + add TrialRunner hook to execut… (#5868)

* remove test * add trial runner * remvoerestore * Remove other mnist examples * tunetest * revert * v1 * Revert "v1" This reverts commit c8bddaf2db7a8270c43c02021cac0e75df15ed20. * Revert "revert" This reverts commit b58f56884a0c288d3a6f997d149ab4d496ddd7a3. * errors * format
2025-03-06 10:31:39 -05:00 · 2019-10-13 20:33:56 -07:00 · 2019-10-13 20:33:56 -07:00 · 1650f7b174
commit 1650f7b174
parent 52e5c9b22d
13 changed files with 26 additions and 788 deletions
--- a/ci/jenkins_tests/run_tune_tests.sh
+++ b/ci/jenkins_tests/run_tune_tests.sh
@ -46,10 +46,6 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
    python /ray/python/ray/tune/tests/tutorial.py

-$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
-    python /ray/python/ray/tune/examples/tune_mnist_ray.py \
-    --smoke-test
-
 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
    python /ray/python/ray/tune/examples/pbt_example.py \
    --smoke-test
@ -68,14 +64,6 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
    bash -c 'pip install tensorflow==1.15.0rc1 && python /ray/python/ray/tune/examples/async_hyperband_example.py --smoke-test'

-$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
-    python /ray/python/ray/tune/examples/tune_mnist_ray_hyperband.py \
-    --smoke-test
-
-$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
-    python /ray/python/ray/tune/examples/tune_mnist_async_hyperband.py \
-    --smoke-test
-
 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
    python /ray/python/ray/tune/examples/lightgbm_example.py

@ -126,7 +114,7 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
    --smoke-test

 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
-    python /ray/python/ray/tune/examples/memnn_example.py \
+    python /ray/python/ray/tune/examples/pbt_memnn_example.py \
    --smoke-test

 # uncomment once statsmodels is updated.
--- a/doc/source/tune-examples.rst
+++ b/doc/source/tune-examples.rst
@ -16,7 +16,6 @@ General Examples
 - `pbt_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_example.py>`__: Example of using a Trainable class with PopulationBasedTraining scheduler.
 - `pbt_ppo_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_ppo_example.py>`__: Example of optimizing a distributed RLlib algorithm (PPO) with the PopulationBasedTraining scheduler.
 - `logging_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/logging_example.py>`__: Example of custom loggers and custom trial directory naming.
- `pbt_memnn_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_memnn_example.py>`__: Example of training a Memory NN on bAbI with Keras using PBT.

 Search Algorithm Examples
 -------------------------
@ -26,10 +25,11 @@ Search Algorithm Examples
 - `Nevergrad example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/nevergrad_example.py>`__: Optimize a simple toy function with the gradient-free optimization package `Nevergrad <https://github.com/facebookresearch/nevergrad>`_ with 4 parallel workers.
 - `Bayesian Optimization example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/bayesopt_example.py>`__: Optimize a simple toy function using `Bayesian Optimization <https://github.com/fmfn/BayesianOptimization>`_ with 4 parallel workers.

-Keras Examples
--------------
+Tensorflow/Keras Examples
+-------------------------

 - `tune_mnist_keras <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_keras.py>`__: Converts the Keras MNIST example to use Tune with the function-based API and a Keras callback. Also shows how to easily convert something relying on argparse to use Tune.
+- `pbt_memnn_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_memnn_example.py>`__: Example of training a Memory NN on bAbI with Keras using PBT.


 PyTorch Examples
@ -39,14 +39,6 @@ PyTorch Examples
 - `mnist_pytorch_trainable <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/mnist_pytorch_trainable.py>`__: Converts the PyTorch MNIST example to use Tune with Trainable API. Also uses the HyperBandScheduler and checkpoints the model at the end.


-TensorFlow Examples
-------------------
-
- `tune_mnist_ray <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_ray.py>`__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class.
- `tune_mnist_ray_hyperband <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_ray_hyperband.py>`__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class and the HyperBand scheduler.
- `tune_mnist_async_hyperband <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_async_hyperband.py>`__: Example of tuning a TensorFlow model on MNIST using AsyncHyperBand.
-
-
 XGBoost Example
 ---------------

--- a/python/ray/tune/examples/README.rst
+++ b/python/ray/tune/examples/README.rst
@ -16,7 +16,6 @@ General Examples
 - `pbt_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_example.py>`__: Example of using a Trainable class with PopulationBasedTraining scheduler.
 - `pbt_ppo_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_ppo_example.py>`__: Example of optimizing a distributed RLlib algorithm (PPO) with the PopulationBasedTraining scheduler.
 - `logging_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/logging_example.py>`__: Example of custom loggers and custom trial directory naming.
- `pbt_memnn_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_memnn_example.py>`__: Example of training a Memory NN on bAbI with Keras using PBT.

 Search Algorithm Examples
 -------------------------
@ -26,10 +25,11 @@ Search Algorithm Examples
 - `Nevergrad example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/nevergrad_example.py>`__: Optimize a simple toy function with the gradient-free optimization package `Nevergrad <https://github.com/facebookresearch/nevergrad>`_ with 4 parallel workers.
 - `Bayesian Optimization example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/bayesopt_example.py>`__: Optimize a simple toy function using `Bayesian Optimization <https://github.com/fmfn/BayesianOptimization>`_ with 4 parallel workers.

-Keras Examples
--------------
+Tensorflow/Keras Examples
+-------------------------

 - `tune_mnist_keras <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_keras.py>`__: Converts the Keras MNIST example to use Tune with the function-based API and a Keras callback. Also shows how to easily convert something relying on argparse to use Tune.
+- `pbt_memnn_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_memnn_example.py>`__: Example of training a Memory NN on bAbI with Keras using PBT.


 PyTorch Examples
@ -39,14 +39,6 @@ PyTorch Examples
 - `mnist_pytorch_trainable <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/mnist_pytorch_trainable.py>`__: Converts the PyTorch MNIST example to use Tune with Trainable API. Also uses the HyperBandScheduler and checkpoints the model at the end.


-TensorFlow Examples
-------------------
-
- `tune_mnist_ray <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_ray.py>`__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class.
- `tune_mnist_ray_hyperband <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_ray_hyperband.py>`__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class and the HyperBand scheduler.
- `tune_mnist_async_hyperband <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_async_hyperband.py>`__: Example of tuning a TensorFlow model on MNIST using AsyncHyperBand.
-
-
 XGBoost Example
 ---------------

--- a/python/ray/tune/examples/pbt_memnn_example.py
+++ b/python/ray/tune/examples/pbt_memnn_example.py
@ -5,15 +5,15 @@ References Keras and is based off of https://keras.io/examples/babi_memnn/.

 from __future__ import print_function

-from tensorflow.python.keras.models import Sequential, Model, load_model
-from tensorflow.python.keras.layers.embeddings import Embedding
-from tensorflow.python.keras.layers import (Input, Activation, Dense, Permute,
-                                            Dropout)
-from tensorflow.python.keras.layers import add, dot, concatenate
-from tensorflow.python.keras.layers import LSTM
-from tensorflow.python.keras.optimizers import RMSprop
-from tensorflow.python.keras.utils.data_utils import get_file
-from tensorflow.python.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.models import Sequential, Model, load_model
+from tensorflow.keras.layers import Embedding
+from tensorflow.keras.layers import (Input, Activation, Dense, Permute,
+                                     Dropout)
+from tensorflow.keras.layers import add, dot, concatenate
+from tensorflow.keras.layers import LSTM
+from tensorflow.keras.optimizers import RMSprop
+from tensorflow.keras.utils import get_file
+from tensorflow.keras.preprocessing.sequence import pad_sequences
 from ray.tune import Trainable
 import argparse
 import tarfile
--- a/python/ray/tune/examples/tune_mnist_async_hyperband.py
+++ b/python/ray/tune/examples/tune_mnist_async_hyperband.py
@ -1,247 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""A deep MNIST classifier using convolutional layers.
-
-See extensive documentation at
-https://www.tensorflow.org/get_started/mnist/pros
-"""
-# Disable linter warnings to maintain consistency with tutorial.
-# pylint: disable=invalid-name
-# pylint: disable=g-bad-import-order
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import sys
-import tempfile
-import time
-
-import ray
-from ray.tune import grid_search, run
-
-from tensorflow.examples.tutorials.mnist import input_data
-
-import tensorflow as tf
-
-FLAGS = None
-status_reporter = None  # used to report training status back to Ray
-activation_fn = None  # e.g. tf.nn.relu
-
-
-def deepnn(x):
-    """deepnn builds the graph for a deep net for classifying digits.
-
-    Args:
-        x: an input tensor with the dimensions (N_examples, 784), where 784 is
-        the number of pixels in a standard MNIST image.
-
-    Returns:
-        A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with
-        values equal to the logits of classifying the digit into one of 10
-        classes (the digits 0-9). keep_prob is a scalar placeholder for the
-        probability of dropout.
-    """
-    # Reshape to use within a convolutional neural net.
-    # Last dimension is for "features" - there is only one here, since images
-    # are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
-    with tf.name_scope("reshape"):
-        x_image = tf.reshape(x, [-1, 28, 28, 1])
-
-    # First convolutional layer - maps one grayscale image to 32 feature maps.
-    with tf.name_scope("conv1"):
-        W_conv1 = weight_variable([5, 5, 1, 32])
-        b_conv1 = bias_variable([32])
-        h_conv1 = activation_fn(conv2d(x_image, W_conv1) + b_conv1)
-
-    # Pooling layer - downsamples by 2X.
-    with tf.name_scope("pool1"):
-        h_pool1 = max_pool_2x2(h_conv1)
-
-    # Second convolutional layer -- maps 32 feature maps to 64.
-    with tf.name_scope("conv2"):
-        W_conv2 = weight_variable([5, 5, 32, 64])
-        b_conv2 = bias_variable([64])
-        h_conv2 = activation_fn(conv2d(h_pool1, W_conv2) + b_conv2)
-
-    # Second pooling layer.
-    with tf.name_scope("pool2"):
-        h_pool2 = max_pool_2x2(h_conv2)
-
-    # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
-    # is down to 7x7x64 feature maps -- maps this to 1024 features.
-    with tf.name_scope("fc1"):
-        W_fc1 = weight_variable([7 * 7 * 64, 1024])
-        b_fc1 = bias_variable([1024])
-
-        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
-        h_fc1 = activation_fn(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
-
-    # Dropout - controls the complexity of the model, prevents co-adaptation of
-    # features.
-    with tf.name_scope("dropout"):
-        keep_prob = tf.placeholder(tf.float32)
-        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
-
-    # Map the 1024 features to 10 classes, one for each digit
-    with tf.name_scope("fc2"):
-        W_fc2 = weight_variable([1024, 10])
-        b_fc2 = bias_variable([10])
-
-        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
-    return y_conv, keep_prob
-
-
-def conv2d(x, W):
-    """conv2d returns a 2d convolution layer with full stride."""
-    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
-
-
-def max_pool_2x2(x):
-    """max_pool_2x2 downsamples a feature map by 2X."""
-    return tf.nn.max_pool(
-        x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
-
-
-def weight_variable(shape):
-    """weight_variable generates a weight variable of a given shape."""
-    initial = tf.truncated_normal(shape, stddev=0.1)
-    return tf.Variable(initial)
-
-
-def bias_variable(shape):
-    """bias_variable generates a bias variable of a given shape."""
-    initial = tf.constant(0.1, shape=shape)
-    return tf.Variable(initial)
-
-
-def main(_):
-    # Import data
-    for _ in range(10):
-        try:
-            mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
-            break
-        except Exception:
-            time.sleep(5)
-
-    # Create the model
-    x = tf.placeholder(tf.float32, [None, 784])
-
-    # Define loss and optimizer
-    y_ = tf.placeholder(tf.float32, [None, 10])
-
-    # Build the graph for the deep net
-    y_conv, keep_prob = deepnn(x)
-
-    with tf.name_scope("loss"):
-        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
-            labels=y_, logits=y_conv)
-    cross_entropy = tf.reduce_mean(cross_entropy)
-
-    with tf.name_scope("adam_optimizer"):
-        train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
-
-    with tf.name_scope("accuracy"):
-        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
-        correct_prediction = tf.cast(correct_prediction, tf.float32)
-    accuracy = tf.reduce_mean(correct_prediction)
-
-    graph_location = tempfile.mkdtemp()
-    print("Saving graph to: %s" % graph_location)
-    train_writer = tf.summary.FileWriter(graph_location)
-    train_writer.add_graph(tf.get_default_graph())
-
-    with tf.Session() as sess:
-        sess.run(tf.global_variables_initializer())
-        for i in range(20000):
-            batch = mnist.train.next_batch(50)
-            if i % 10 == 0:
-                train_accuracy = accuracy.eval(feed_dict={
-                    x: batch[0],
-                    y_: batch[1],
-                    keep_prob: 1.0
-                })
-
-                # !!! Report status to ray.tune !!!
-                if status_reporter:
-                    status_reporter(
-                        timesteps_total=i, mean_accuracy=train_accuracy)
-
-                print("step %d, training accuracy %g" % (i, train_accuracy))
-            train_step.run(feed_dict={
-                x: batch[0],
-                y_: batch[1],
-                keep_prob: 0.5
-            })
-
-        print("test accuracy %g" % accuracy.eval(feed_dict={
-            x: mnist.test.images,
-            y_: mnist.test.labels,
-            keep_prob: 1.0
-        }))
-
-
-# !!! Entrypoint for ray.tune !!!
-def train(config={"activation": "relu"}, reporter=None):
-    global FLAGS, status_reporter, activation_fn
-    status_reporter = reporter
-    activation_fn = getattr(tf.nn, config["activation"])
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--data_dir",
-        type=str,
-        default="/tmp/tensorflow/mnist/input_data",
-        help="Directory for storing input data")
-    FLAGS, unparsed = parser.parse_known_args()
-    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
-
-
-# !!! Example of using the ray.tune Python API !!!
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--smoke-test", action="store_true", help="Finish quickly for testing")
-    args, _ = parser.parse_known_args()
-
-    mnist_spec = {
-        "num_samples": 10,
-        "stop": {
-            "mean_accuracy": 0.99,
-            "timesteps_total": 600,
-        },
-        "config": {
-            "activation": grid_search(["relu", "elu", "tanh"]),
-        },
-    }
-
-    if args.smoke_test:
-        mnist_spec["stop"]["training_iteration"] = 2
-        mnist_spec["num_samples"] = 1
-
-    ray.init()
-
-    from ray.tune.schedulers import AsyncHyperBandScheduler
-    run(train,
-        name="tune_mnist_test",
-        scheduler=AsyncHyperBandScheduler(
-            time_attr="timesteps_total",
-            metric="mean_accuracy",
-            mode="max",
-            max_t=600,
-        ),
-        **mnist_spec)
--- a/python/ray/tune/examples/tune_mnist_ray.py
+++ b/python/ray/tune/examples/tune_mnist_ray.py
@ -1,241 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""A deep MNIST classifier using convolutional layers.
-
-See extensive documentation at
-https://www.tensorflow.org/get_started/mnist/pros
-"""
-# Disable linter warnings to maintain consistency with tutorial.
-# pylint: disable=invalid-name
-# pylint: disable=g-bad-import-order
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import sys
-import tempfile
-import time
-
-import ray
-from ray import tune
-from ray.tune import grid_search, register_trainable
-
-from tensorflow.examples.tutorials.mnist import input_data
-import numpy as np
-
-import tensorflow as tf
-
-FLAGS = None
-status_reporter = None  # used to report training status back to Ray
-activation_fn = tf.nn.relu  # e.g. tf.nn.relu
-
-
-def deepnn(x):
-    """deepnn builds the graph for a deep net for classifying digits.
-
-    Args:
-        x: an input tensor with the dimensions (N_examples, 784), where 784 is
-        the number of pixels in a standard MNIST image.
-
-    Returns:
-        A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with
-        values equal to the logits of classifying the digit into one of 10
-        classes (the digits 0-9). keep_prob is a scalar placeholder for the
-        probability of dropout.
-    """
-    # Reshape to use within a convolutional neural net.
-    # Last dimension is for "features" - there is only one here, since images
-    # are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
-    with tf.name_scope("reshape"):
-        x_image = tf.reshape(x, [-1, 28, 28, 1])
-
-    # First convolutional layer - maps one grayscale image to 32 feature maps.
-    with tf.name_scope("conv1"):
-        W_conv1 = weight_variable([5, 5, 1, 32])
-        b_conv1 = bias_variable([32])
-        h_conv1 = activation_fn(conv2d(x_image, W_conv1) + b_conv1)
-
-    # Pooling layer - downsamples by 2X.
-    with tf.name_scope("pool1"):
-        h_pool1 = max_pool_2x2(h_conv1)
-
-    # Second convolutional layer -- maps 32 feature maps to 64.
-    with tf.name_scope("conv2"):
-        W_conv2 = weight_variable([5, 5, 32, 64])
-        b_conv2 = bias_variable([64])
-        h_conv2 = activation_fn(conv2d(h_pool1, W_conv2) + b_conv2)
-
-    # Second pooling layer.
-    with tf.name_scope("pool2"):
-        h_pool2 = max_pool_2x2(h_conv2)
-
-    # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
-    # is down to 7x7x64 feature maps -- maps this to 1024 features.
-    with tf.name_scope("fc1"):
-        W_fc1 = weight_variable([7 * 7 * 64, 1024])
-        b_fc1 = bias_variable([1024])
-
-        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
-        h_fc1 = activation_fn(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
-
-    # Dropout - controls the complexity of the model, prevents co-adaptation of
-    # features.
-    with tf.name_scope("dropout"):
-        keep_prob = tf.placeholder(tf.float32)
-        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
-
-    # Map the 1024 features to 10 classes, one for each digit
-    with tf.name_scope("fc2"):
-        W_fc2 = weight_variable([1024, 10])
-        b_fc2 = bias_variable([10])
-
-        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
-    return y_conv, keep_prob
-
-
-def conv2d(x, W):
-    """conv2d returns a 2d convolution layer with full stride."""
-    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
-
-
-def max_pool_2x2(x):
-    """max_pool_2x2 downsamples a feature map by 2X."""
-    return tf.nn.max_pool(
-        x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
-
-
-def weight_variable(shape):
-    """weight_variable generates a weight variable of a given shape."""
-    initial = tf.truncated_normal(shape, stddev=0.1)
-    return tf.Variable(initial)
-
-
-def bias_variable(shape):
-    """bias_variable generates a bias variable of a given shape."""
-    initial = tf.constant(0.1, shape=shape)
-    return tf.Variable(initial)
-
-
-def main(_):
-    # Import data
-    for _ in range(10):
-        try:
-            mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
-            break
-        except Exception:
-            time.sleep(5)
-
-    # Create the model
-    x = tf.placeholder(tf.float32, [None, 784])
-
-    # Define loss and optimizer
-    y_ = tf.placeholder(tf.float32, [None, 10])
-
-    # Build the graph for the deep net
-    y_conv, keep_prob = deepnn(x)
-
-    with tf.name_scope("loss"):
-        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
-            labels=y_, logits=y_conv)
-    cross_entropy = tf.reduce_mean(cross_entropy)
-
-    with tf.name_scope("adam_optimizer"):
-        train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
-
-    with tf.name_scope("accuracy"):
-        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
-        correct_prediction = tf.cast(correct_prediction, tf.float32)
-    accuracy = tf.reduce_mean(correct_prediction)
-
-    graph_location = tempfile.mkdtemp()
-    print("Saving graph to: %s" % graph_location)
-    train_writer = tf.summary.FileWriter(graph_location)
-    train_writer.add_graph(tf.get_default_graph())
-
-    with tf.Session() as sess:
-        sess.run(tf.global_variables_initializer())
-        for i in range(20000):
-            batch = mnist.train.next_batch(50)
-            if i % 10 == 0:
-                train_accuracy = accuracy.eval(feed_dict={
-                    x: batch[0],
-                    y_: batch[1],
-                    keep_prob: 1.0
-                })
-
-                # !!! Report status to ray.tune !!!
-                if status_reporter:
-                    status_reporter(
-                        timesteps_total=i, mean_accuracy=train_accuracy)
-
-                print("step %d, training accuracy %g" % (i, train_accuracy))
-            train_step.run(feed_dict={
-                x: batch[0],
-                y_: batch[1],
-                keep_prob: 0.5
-            })
-
-        print("test accuracy %g" % accuracy.eval(feed_dict={
-            x: mnist.test.images,
-            y_: mnist.test.labels,
-            keep_prob: 1.0
-        }))
-
-
-# !!! Entrypoint for ray.tune !!!
-def train(config={"activation": "relu"}, reporter=None):
-    global FLAGS, status_reporter, activation_fn
-    status_reporter = reporter
-    activation_fn = getattr(tf.nn, config["activation"])
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--data_dir",
-        type=str,
-        default="/tmp/tensorflow/mnist/input_data",
-        help="Directory for storing input data")
-    FLAGS, unparsed = parser.parse_known_args()
-    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
-
-
-# !!! Example of using the ray.tune Python API !!!
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--smoke-test", action="store_true", help="Finish quickly for testing")
-    args, _ = parser.parse_known_args()
-
-    register_trainable("train_mnist", train)
-    mnist_spec = {
-        "stop": {
-            "mean_accuracy": 0.99,
-            "time_total_s": 600,
-        },
-        "config": {
-            "activation": grid_search(["relu", "elu", "tanh"]),
-            # You can pass any serializable object as well
-            "foo": grid_search([np.array([1, 2]),
-                                np.array([2, 3])]),
-        },
-    }
-
-    if args.smoke_test:
-        mnist_spec["stop"]["training_iteration"] = 2
-
-    ray.init()
-    tune.run("train_mnist", name="tune_mnist_test", **mnist_spec)
--- a/python/ray/tune/examples/tune_mnist_ray_hyperband.py
+++ b/python/ray/tune/examples/tune_mnist_ray_hyperband.py
@ -1,241 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""A deep MNIST classifier using convolutional layers.
-See extensive documentation at
-https://www.tensorflow.org/get_started/mnist/pros
-"""
-# Disable linter warnings to maintain consistency with tutorial.
-# pylint: disable=invalid-name
-# pylint: disable=g-bad-import-order
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import os
-import time
-
-import ray
-from ray import tune
-from ray.tune import Trainable, sample_from
-from ray.tune.schedulers import HyperBandScheduler
-from tensorflow.examples.tutorials.mnist import input_data
-
-import tensorflow as tf
-import numpy as np
-
-activation_fn = None  # e.g. tf.nn.relu
-
-
-def setupCNN(x):
-    """setupCNN builds the graph for a deep net for classifying digits.
-    Args:
-        x: an input tensor with the dimensions (N_examples, 784), where 784 is
-        the number of pixels in a standard MNIST image.
-    Returns:
-        A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with
-        values equal to the logits of classifying the digit into one of 10
-        classes (the digits 0-9). keep_prob is a scalar placeholder for the
-        probability of dropout.
-    """
-    # Reshape to use within a convolutional neural net.
-    # Last dimension is for "features" - there is only one here, since images
-    # are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
-    with tf.name_scope("reshape"):
-        x_image = tf.reshape(x, [-1, 28, 28, 1])
-
-    # First convolutional layer - maps one grayscale image to 32 feature maps.
-    with tf.name_scope("conv1"):
-        W_conv1 = weight_variable([5, 5, 1, 32])
-        b_conv1 = bias_variable([32])
-        h_conv1 = activation_fn(conv2d(x_image, W_conv1) + b_conv1)
-
-    # Pooling layer - downsamples by 2X.
-    with tf.name_scope("pool1"):
-        h_pool1 = max_pool_2x2(h_conv1)
-
-    # Second convolutional layer -- maps 32 feature maps to 64.
-    with tf.name_scope("conv2"):
-        W_conv2 = weight_variable([5, 5, 32, 64])
-        b_conv2 = bias_variable([64])
-        h_conv2 = activation_fn(conv2d(h_pool1, W_conv2) + b_conv2)
-
-    # Second pooling layer.
-    with tf.name_scope("pool2"):
-        h_pool2 = max_pool_2x2(h_conv2)
-
-    # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
-    # is down to 7x7x64 feature maps -- maps this to 1024 features.
-    with tf.name_scope("fc1"):
-        W_fc1 = weight_variable([7 * 7 * 64, 1024])
-        b_fc1 = bias_variable([1024])
-
-        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
-        h_fc1 = activation_fn(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
-
-    # Dropout - controls the complexity of the model, prevents co-adaptation of
-    # features.
-    with tf.name_scope("dropout"):
-        keep_prob = tf.placeholder(tf.float32)
-        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
-
-    # Map the 1024 features to 10 classes, one for each digit
-    with tf.name_scope("fc2"):
-        W_fc2 = weight_variable([1024, 10])
-        b_fc2 = bias_variable([10])
-
-        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
-    return y_conv, keep_prob
-
-
-def conv2d(x, W):
-    """conv2d returns a 2d convolution layer with full stride."""
-    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
-
-
-def max_pool_2x2(x):
-    """max_pool_2x2 downsamples a feature map by 2X."""
-    return tf.nn.max_pool(
-        x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
-
-
-def weight_variable(shape):
-    """weight_variable generates a weight variable of a given shape."""
-    initial = tf.truncated_normal(shape, stddev=0.1)
-    return tf.Variable(initial)
-
-
-def bias_variable(shape):
-    """bias_variable generates a bias variable of a given shape."""
-    initial = tf.constant(0.1, shape=shape)
-    return tf.Variable(initial)
-
-
-class TrainMNIST(Trainable):
-    """Example MNIST trainable."""
-
-    def _setup(self, config):
-        global activation_fn
-
-        self.timestep = 0
-
-        # Import data
-        for _ in range(10):
-            try:
-                self.mnist = input_data.read_data_sets(
-                    "/tmp/mnist_ray_demo", one_hot=True)
-                break
-            except Exception as e:
-                print("Error loading data, retrying", e)
-                time.sleep(5)
-
-        assert self.mnist
-
-        self.x = tf.placeholder(tf.float32, [None, 784])
-        self.y_ = tf.placeholder(tf.float32, [None, 10])
-
-        activation_fn = getattr(tf.nn, config.get("activation", "relu"))
-
-        # Build the graph for the deep net
-        y_conv, self.keep_prob = setupCNN(self.x)
-
-        with tf.name_scope("loss"):
-            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
-                labels=self.y_, logits=y_conv)
-        cross_entropy = tf.reduce_mean(cross_entropy)
-
-        with tf.name_scope("adam_optimizer"):
-            train_step = tf.train.AdamOptimizer(
-                config.get("learning_rate", 1e-4)).minimize(cross_entropy)
-
-        self.train_step = train_step
-
-        with tf.name_scope("accuracy"):
-            correct_prediction = tf.equal(
-                tf.argmax(y_conv, 1), tf.argmax(self.y_, 1))
-            correct_prediction = tf.cast(correct_prediction, tf.float32)
-        self.accuracy = tf.reduce_mean(correct_prediction)
-
-        self.sess = tf.Session()
-        self.sess.run(tf.global_variables_initializer())
-        self.saver = tf.train.Saver(save_relative_paths=True)
-
-    def _train(self):
-        for i in range(10):
-            batch = self.mnist.train.next_batch(50)
-            self.sess.run(
-                self.train_step,
-                feed_dict={
-                    self.x: batch[0],
-                    self.y_: batch[1],
-                    self.keep_prob: 0.5
-                })
-
-        batch = self.mnist.train.next_batch(50)
-        train_accuracy = self.sess.run(
-            self.accuracy,
-            feed_dict={
-                self.x: batch[0],
-                self.y_: batch[1],
-                self.keep_prob: 1.0
-            })
-        return {"mean_accuracy": train_accuracy}
-
-    def _save(self, checkpoint_dir):
-        path = self.saver.save(self.sess, os.path.join(checkpoint_dir, "save"))
-        return path
-
-    def _restore(self, checkpoint_path):
-        self.saver.restore(self.sess, checkpoint_path)
-
-
-# !!! Example of using the ray.tune Python API !!!
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--smoke-test", action="store_true", help="Finish quickly for testing")
-    args, _ = parser.parse_known_args()
-    mnist_spec = {
-        "stop": {
-            "mean_accuracy": 0.99,
-            "time_total_s": 600,
-        },
-        "config": {
-            "learning_rate": sample_from(
-                lambda spec: 10**np.random.uniform(-5, -3)),
-            "activation": "relu",
-        },
-        "num_samples": 10,
-    }
-
-    if args.smoke_test:
-        mnist_spec["stop"]["training_iteration"] = 20
-        mnist_spec["num_samples"] = 1
-
-    ray.init()
-    hyperband = HyperBandScheduler(
-        time_attr="training_iteration",
-        metric="mean_accuracy",
-        mode="max",
-        max_t=10)
-
-    tune.run(
-        TrainMNIST,
-        name="mnist_hyperband_test",
-        scheduler=hyperband,
-        **mnist_spec)
--- a/python/ray/tune/ray_trial_executor.py
+++ b/python/ray/tune/ray_trial_executor.py
@ -523,7 +523,7 @@ class RayTrialExecutor(TrialExecutor):
        else:
            return "? CPUs, ? GPUs"

-    def on_step_begin(self):
+    def on_step_begin(self, trial_runner):
        """Before step() called, update the available resources."""
        self._update_avail_resources()

--- a/python/ray/tune/result.py
+++ b/python/ray/tune/result.py
@ -39,20 +39,20 @@ MEAN_ACCURACY = "mean_accuracy"
 # Number of episodes in this iteration.
 EPISODES_THIS_ITER = "episodes_this_iter"

-# (Optional/Auto-filled) Accumulated number of episodes for this experiment.
+# (Optional/Auto-filled) Accumulated number of episodes for this trial.
 EPISODES_TOTAL = "episodes_total"

 # Number of timesteps in this iteration.
 TIMESTEPS_THIS_ITER = "timesteps_this_iter"

-# (Auto-filled) Accumulated number of timesteps for this entire experiment.
+# (Auto-filled) Accumulated number of timesteps for this entire trial.
 TIMESTEPS_TOTAL = "timesteps_total"

 # (Auto-filled) Time in seconds this iteration took to run.
 # This may be overriden to override the system-computed time difference.
 TIME_THIS_ITER_S = "time_this_iter_s"

-# (Auto-filled) Accumulated time in seconds for this entire experiment.
+# (Auto-filled) Accumulated time in seconds for this entire trial.
 TIME_TOTAL_S = "time_total_s"

 # (Auto-filled) The index of this training iteration.
--- a/python/ray/tune/tests/test_trial_runner.py
+++ b/python/ray/tune/tests/test_trial_runner.py
@ -2081,12 +2081,12 @@ class TrialRunnerTest(unittest.TestCase):
        ray.init(num_cpus=4, num_gpus=2)
        runner = TrialRunner()

-        def on_step_begin(self):
+        def on_step_begin(self, trialrunner):
            self._update_avail_resources()
            cnt = self.pre_step if hasattr(self, "pre_step") else 0
            setattr(self, "pre_step", cnt + 1)

-        def on_step_end(self):
+        def on_step_end(self, trialrunner):
            cnt = self.pre_step if hasattr(self, "post_step") else 0
            setattr(self, "post_step", 1 + cnt)

--- a/python/ray/tune/tests/test_tune_restore.py
+++ b/python/ray/tune/tests/test_tune_restore.py
@ -71,11 +71,6 @@ class TuneExampleTest(unittest.TestCase):
        ray.shutdown()
        _register_all()

-    def testTensorFlowMNIST(self):
-        from ray.tune.examples.tune_mnist_ray_hyperband import TrainMNIST
-        validate_save_restore(TrainMNIST)
-        validate_save_restore(TrainMNIST, use_object_store=True)
-
    def testPBTKeras(self):
        from ray.tune.examples.pbt_tune_cifar10_with_keras import Cifar10Model
        from tensorflow.python.keras.datasets import cifar10
--- a/python/ray/tune/trial_executor.py
+++ b/python/ray/tune/trial_executor.py
@ -142,11 +142,11 @@ class TrialExecutor(object):
        raise NotImplementedError("Subclasses of TrialExecutor must provide "
                                  "get_running_trials() method")

-    def on_step_begin(self):
+    def on_step_begin(self, trial_runner):
        """A hook called before running one step of the trial event loop."""
        pass

-    def on_step_end(self):
+    def on_step_end(self, trial_runner):
        """A hook called after running one step of the trial event loop."""
        pass

--- a/python/ray/tune/trial_runner.py
+++ b/python/ray/tune/trial_runner.py
@ -326,7 +326,7 @@ class TrialRunner(object):
        if self.is_finished():
            raise TuneError("Called step when all trials finished?")
        with warn_if_slow("on_step_begin"):
-            self.trial_executor.on_step_begin()
+            self.trial_executor.on_step_begin(self)
        next_trial = self._get_next_trial()  # blocking
        if next_trial is not None:
            with warn_if_slow("start_trial"):
@ -367,7 +367,7 @@ class TrialRunner(object):
            if self.is_finished():
                self._server.shutdown()
        with warn_if_slow("on_step_end"):
-            self.trial_executor.on_step_end()
+            self.trial_executor.on_step_end(self)

    def get_trial(self, tid):
        trial = [t for t in self._trials if t.trial_id == tid]