mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
Update TF documentation (#5918)
This commit is contained in:
parent
9f23620412
commit
d52a4983af
4 changed files with 59 additions and 12 deletions
|
@ -26,6 +26,12 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
|
|||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/doc/examples/plot_hyperparameter.py
|
||||
|
||||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/doc/examples/doc_code/torch_example.py
|
||||
|
||||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/doc/examples/doc_code/tf_example.py
|
||||
|
||||
######################## RLLIB TESTS #################################
|
||||
|
||||
source $ROOT_DIR/run_rllib_tests.sh
|
||||
|
@ -56,12 +62,6 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
|
|||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/python/ray/experimental/sgd/examples/tune_example.py --num-replicas=2
|
||||
|
||||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/doc/examples/doc_code/torch_example.py
|
||||
|
||||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/doc/examples/doc_code/tf_example.py
|
||||
|
||||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
python /ray/python/ray/experimental/sgd/examples/tensorflow_train_example.py
|
||||
|
||||
|
|
|
@ -9,11 +9,11 @@ in the documentation.
|
|||
|
||||
# yapf: disable
|
||||
# __tf_model_start__
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras import layers
|
||||
|
||||
|
||||
def create_keras_model():
|
||||
import tensorflow as tf
|
||||
model = tf.keras.Sequential()
|
||||
# Adds a densely-connected layer with 64 units to the model:
|
||||
model.add(layers.Dense(64, activation="relu", input_shape=(32, )))
|
||||
|
@ -23,7 +23,7 @@ def create_keras_model():
|
|||
model.add(layers.Dense(10, activation="softmax"))
|
||||
|
||||
model.compile(
|
||||
optimizer=tf.train.RMSPropOptimizer(0.01),
|
||||
optimizer=tf.keras.optimizers.RMSprop(0.01),
|
||||
loss=tf.keras.losses.categorical_crossentropy,
|
||||
metrics=[tf.keras.metrics.categorical_accuracy])
|
||||
return model
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
TensorFlow Distributed Training API (Experimental)
|
||||
==================================================
|
||||
TF Distributed Training
|
||||
=======================
|
||||
|
||||
Ray's ``TFTrainer`` simplifies distributed model training for Tensorflow. The ``TFTrainer`` is a wrapper around ``MultiWorkerMirroredStrategy`` with a Python API to easily incorporate distributed training into a larger Python application, as opposed to write custom logic of setting environments and starting separate processes.
|
||||
|
||||
.. important:: This API has only been tested with TensorFlow2.0rc.
|
||||
.. important:: This API has only been tested with TensorFlow2.0rc and is still highly experimental. Please file bug reports if you run into any - thanks!
|
||||
|
||||
----------
|
||||
|
||||
|
|
|
@ -1,7 +1,54 @@
|
|||
Best Practices: Ray with Tensorflow
|
||||
===================================
|
||||
|
||||
This document describes best practices for using Ray with TensorFlow. Feel free to contribute if you think this document is missing anything.
|
||||
This document describes best practices for using the Ray core APIs with TensorFlow. Ray also provides higher-level utilities for working with Tensorflow, such as distributed training APIs (`training tensorflow example`_), Tune for hyperparameter search (`Tune tensorflow example`_), RLlib for reinforcement learning (`RLlib tensorflow example`_).
|
||||
|
||||
.. _`training tensorflow example`: tf_distributed_training.html
|
||||
.. _`Tune tensorflow example`: https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tf_mnist_example.py
|
||||
.. _`RLlib tensorflow example`: rllib-models.html#tensorflow-models
|
||||
|
||||
Feel free to contribute if you think this document is missing anything.
|
||||
|
||||
|
||||
Common Issues: Pickling
|
||||
-----------------------
|
||||
|
||||
One common issue with TensorFlow2.0 is a pickling error like the following:
|
||||
|
||||
.. code-block::
|
||||
|
||||
File "/home/***/venv/lib/python3.6/site-packages/ray/actor.py", line 322, in remote
|
||||
return self._remote(args=args, kwargs=kwargs)
|
||||
File "/home/***/venv/lib/python3.6/site-packages/ray/actor.py", line 405, in _remote
|
||||
self._modified_class, self._actor_method_names)
|
||||
File "/home/***/venv/lib/python3.6/site-packages/ray/function_manager.py", line 578, in export_actor_class
|
||||
"class": pickle.dumps(Class),
|
||||
File "/home/***/venv/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 1123, in dumps
|
||||
cp.dump(obj)
|
||||
File "/home/***/lib/python3.6/site-packages/ray/cloudpickle/cloudpickle.py", line 482, in dump
|
||||
return Pickler.dump(self, obj)
|
||||
File "/usr/lib/python3.6/pickle.py", line 409, in dump
|
||||
self.save(obj)
|
||||
File "/usr/lib/python3.6/pickle.py", line 476, in save
|
||||
f(self, obj) # Call unbound method with explicit self
|
||||
File "/usr/lib/python3.6/pickle.py", line 751, in save_tuple
|
||||
save(element)
|
||||
File "/usr/lib/python3.6/pickle.py", line 808, in _batch_appends
|
||||
save(tmp[0])
|
||||
File "/usr/lib/python3.6/pickle.py", line 496, in save
|
||||
rv = reduce(self.proto)
|
||||
TypeError: can't pickle _LazyLoader objects
|
||||
|
||||
To resolve this, you should move all instances of ``import tensorflow`` into the Ray actor or function, as follows:
|
||||
|
||||
.. code-block::
|
||||
|
||||
def create_model():
|
||||
import tensorflow as tf
|
||||
...
|
||||
|
||||
This issue is caused by side-effects of importing TensorFlow and setting global state.
|
||||
|
||||
|
||||
Use Actors for Parallel Models
|
||||
------------------------------
|
||||
|
|
Loading…
Add table
Reference in a new issue