Added functionality for retrieving variables from control dependencies (#220)

* Added test for retriving variables from an optimizer * Added comments to test * Addressed comments * Fixed travis bug * Added fix to circular controls * Added set for explored operations and duplicate prefix stripping * Removed embeded ipython * Removed prefix, use seperate graph for each network * Removed redundant imports * Addressed comments and added separate graph to initializer * fix typos * get rid of prefix in documentation
2025-03-06 02:21:39 -05:00 · 2017-01-30 19:17:42 -08:00 · 2017-01-30 19:17:42 -08:00 · db7297865f
commit db7297865f
parent 6703f7be6f
4 changed files with 84 additions and 36 deletions
--- a/doc/using-ray-with-tensorflow.md
+++ b/doc/using-ray-with-tensorflow.md
@ -72,21 +72,18 @@ b.assign(np.zeros(1))  # This adds a node to the graph every time you call it.
 ## Complete Example
 Putting this all together, we would first create the graph on each worker using
-environment variables. Within the environment variables, we would use the 
+environment variables. Within the environment variables, we would use the
 `get_weights` and `set_weights` methods of the `TensorFlowVariables` class. We
-would then use those methods to ship the weights (as a dictionary of variable 
+would then use those methods to ship the weights (as a dictionary of variable
 names mapping to tensorflow tensors) between the processes without shipping the
 actual TensorFlow graphs, which are much more complex Python objects. Note that
-to avoid namespace collision with already created variables on the workers, we 
+to avoid namespace collision with already created variables on the workers, we
-use a variable_scope and a prefix in the environment variables and then pass 
+use a separate graph for each network.
 true to the prefix in `TensorFlowVariables` so it can properly decode the variable
 names.
 ```python
 import tensorflow as tf
 import numpy as np
 import ray
 import uuid
 ray.init(num_workers=5)
@ -95,11 +92,8 @@ NUM_BATCHES = 1
 NUM_ITERS = 201
 def net_vars_initializer():
-  # Prefix should be random so that there is no conflict with variable names in
+  # Use a separate graph for each network.
-  # the cluster setting.
+  with tf.Graph().as_default():
  prefix = str(uuid.uuid1().hex)
  # Use the tensorflow variable_scope to prefix all of the variables
  with tf.variable_scope(prefix):
    # Seed TensorFlow to make the script deterministic.
    tf.set_random_seed(0)
    # Define the inputs.
@ -116,9 +110,8 @@ def net_vars_initializer():
    # Define the weight initializer and session.
    init = tf.global_variables_initializer()
    sess = tf.Session()
-    # Additional code for setting and getting the weights, and use a prefix
+    # Additional code for setting and getting the weights
-    # so that the variable names can be converted between workers.
+    variables = ray.experimental.TensorFlowVariables(loss, sess)
    variables = ray.experimental.TensorFlowVariables(loss, sess, prefix=True)
    # Return all of the data needed to use the network.
  return variables, sess, train, loss, x_data, y_data, init
--- a/examples/lbfgs/driver.py
+++ b/examples/lbfgs/driver.py
@ -61,7 +61,8 @@ class LinearModel(object):
    return self.sess.run(self.cross_entropy_grads, feed_dict={self.x: xs, self.y_: ys})
 def net_initialization():
-  return LinearModel([784,10])
+  with tf.Graph().as_default():
    return LinearModel([784,10])
 # By default, when an environment variable is used by a remote function, the
 # initialization code will be rerun at the end of the remote task to ensure
--- a/python/ray/experimental/tfutils.py
+++ b/python/ray/experimental/tfutils.py
@ -28,28 +28,41 @@ class TensorFlowVariables(object):
    assignment_placeholders (List[tf.placeholders]): The nodes that weights get
      passed to.
    assignment_nodes (List[tf.Tensor]): The nodes that assign the weights.
    prefix (Bool): Boolean for if there is a prefix on the variable names.
  """
-  def __init__(self, loss, sess=None, prefix=False):
+  def __init__(self, loss, sess=None):
    """Creates a TensorFlowVariables instance."""
    import tensorflow as tf
    self.sess = sess
    self.loss = loss
    self.prefix = prefix
    queue = deque([loss])
    variable_names = []
    explored_inputs = set([loss])
    # We do a BFS on the dependency graph of the input function to find 
    # the variables.
    while len(queue) != 0:
-      op = queue.popleft().op
+      tf_obj = queue.popleft()
-      queue.extend(op.inputs)
+     
-      if op.node_def.op == "Variable":
+      # The object put into the queue is not necessarily an operation, so we
-        variable_names.append(op.node_def.name)
+      # want the op attribute to get the operation underlying the object.
      # Only operations contain the inputs that we can explore.
      if hasattr(tf_obj, "op"):
         tf_obj = tf_obj.op
      for input_op in tf_obj.inputs:
        if input_op not in explored_inputs:
          queue.append(input_op)
          explored_inputs.add(input_op)
      # Tensorflow control inputs can be circular, so we keep track of
      # explored operations.
      for control in tf_obj.control_inputs:
        if control not in explored_inputs:
          queue.append(control)
          explored_inputs.add(control)
      if tf_obj.node_def.op == "Variable":
        variable_names.append(tf_obj.node_def.name)
    self.variables = OrderedDict()
    for v in [v for v in tf.global_variables() if v.op.node_def.name in variable_names]:
-      name = v.op.node_def.name.split("/", 1 if prefix else 0)[-1]
+      self.variables[v.op.node_def.name] = v
      self.variables[name] = v
    self.assignment_placeholders = dict()
    self.assignment_nodes = []
--- a/test/tensorflow_test.py
+++ b/test/tensorflow_test.py
@ -17,32 +17,39 @@ def make_linear_network(w_name=None, b_name=None):
  b = tf.Variable(tf.zeros([1]), name=b_name)
  y = w * x_data + b
  # Return the loss and weight initializer.
-  return tf.reduce_mean(tf.square(y - y_data)), tf.global_variables_initializer()
+  return tf.reduce_mean(tf.square(y - y_data)), tf.global_variables_initializer(), x_data, y_data
 def net_vars_initializer():
-  # Random prefix so variable names do not clash if we use nets with
+  # Uses a separate graph for each network.
-  # the same name.
+  with tf.Graph().as_default():
  prefix = str(uuid.uuid1().hex)
  # Use the tensorflow variable_scope to prefix all of the variables
  with tf.variable_scope(prefix):
    # Create the network.
-    loss, init = make_linear_network()
+    loss, init, _, _ = make_linear_network()
    sess = tf.Session()
    # Additional code for setting and getting the weights.
-    variables = ray.experimental.TensorFlowVariables(loss, sess, prefix=True)
+    variables = ray.experimental.TensorFlowVariables(loss, sess)
  # Return all of the data needed to use the network.
  return variables, init, sess
 def net_vars_reinitializer(net_vars):
  return net_vars
 def train_vars_initializer():
  # Almost the same as above, but now returns the placeholders and gradient.
  with tf.Graph().as_default():
    loss, init, x_data, y_data = make_linear_network()
    sess = tf.Session()
    variables = ray.experimental.TensorFlowVariables(loss, sess)
    grad = tf.gradients(loss, list(variables.variables.values()))
  return variables, init, sess, grad, [x_data, y_data]
 class TensorFlowTest(unittest.TestCase):
  def testTensorFlowVariables(self):
    ray.init(num_workers=2)
    sess = tf.Session()
-    loss, init = make_linear_network()
+    loss, init, _, _ = make_linear_network()
    sess.run(init)
    variables = ray.experimental.TensorFlowVariables(loss, sess)
@ -54,7 +61,7 @@ class TensorFlowTest(unittest.TestCase):
    variables.set_weights(weights)
    self.assertEqual(weights, variables.get_weights())
-    loss2, init2 = make_linear_network("w", "b")
+    loss2, init2, _, _ = make_linear_network("w", "b")
    sess.run(init2)
    variables2 = ray.experimental.TensorFlowVariables(loss2, sess)
@ -148,7 +155,7 @@ class TensorFlowTest(unittest.TestCase):
    # Create a network on the driver locally.
    sess1 = tf.Session()
-    loss1, init1 = make_linear_network()
+    loss1, init1, _, _ = make_linear_network()
    net_vars1 = ray.experimental.TensorFlowVariables(loss1, sess1)
    sess1.run(init1)
@ -170,5 +177,39 @@ class TensorFlowTest(unittest.TestCase):
    ray.worker.cleanup()
  def testVariablesControlDependencies(self):
    ray.init(num_workers=1)
    # Creates a network and appends a momentum optimizer.
    sess = tf.Session()
    loss, init, _, _ = make_linear_network()
    minimizer = tf.train.MomentumOptimizer(0.9, 0.9).minimize(loss)
    net_vars = ray.experimental.TensorFlowVariables(minimizer, sess)
    sess.run(init)
    # Tests if all variables are properly retrieved, 2 variables and 2 momentum
    # variables.
    self.assertEqual(len(net_vars.variables.items()), 4)
    ray.worker.cleanup()
  def testRemoteTrainingStep(self):
    ray.init(num_workers=1)
    ray.env.net = ray.EnvironmentVariable(train_vars_initializer, net_vars_reinitializer)
    @ray.remote
    def training_step(weights):
      variables, _, sess, grad, placeholders = ray.env.net
      variables.set_weights(weights)
      return sess.run(grad, feed_dict=dict(zip(placeholders, [[1]*100]*2)))
    variables, init, sess, _, _ = ray.env.net
    sess.run(init)
    ray.get(training_step.remote(variables.get_weights()))
    ray.worker.cleanup()
 if __name__ == "__main__":
  unittest.main(verbosity=2)