Added functionality for retrieving variables from control dependencies (#220)

* Added test for retriving variables from an optimizer * Added comments to test * Addressed comments * Fixed travis bug * Added fix to circular controls * Added set for explored operations and duplicate prefix stripping * Removed embeded ipython * Removed prefix, use seperate graph for each network * Removed redundant imports * Addressed comments and added separate graph to initializer * fix typos * get rid of prefix in documentation
2025-03-06 10:31:39 -05:00 · 2017-01-30 19:17:42 -08:00 · 2017-01-30 19:17:42 -08:00 · db7297865f
commit db7297865f
parent 6703f7be6f
4 changed files with 84 additions and 36 deletions
--- a/doc/using-ray-with-tensorflow.md
+++ b/doc/using-ray-with-tensorflow.md
@ -78,15 +78,12 @@ would then use those methods to ship the weights (as a dictionary of variable
 names mapping to tensorflow tensors) between the processes without shipping the
 actual TensorFlow graphs, which are much more complex Python objects. Note that
 to avoid namespace collision with already created variables on the workers, we
-use a variable_scope and a prefix in the environment variables and then pass 
+use a separate graph for each network.
 true to the prefix in `TensorFlowVariables` so it can properly decode the variable
 names.
 ```python
 import tensorflow as tf
 import numpy as np
 import ray
 import uuid
 ray.init(num_workers=5)
@ -95,11 +92,8 @@ NUM_BATCHES = 1
 NUM_ITERS = 201
 def net_vars_initializer():
-  # Prefix should be random so that there is no conflict with variable names in
+  # Use a separate graph for each network.
-  # the cluster setting.
+  with tf.Graph().as_default():
  prefix = str(uuid.uuid1().hex)
  # Use the tensorflow variable_scope to prefix all of the variables
  with tf.variable_scope(prefix):
    # Seed TensorFlow to make the script deterministic.
    tf.set_random_seed(0)
    # Define the inputs.
@ -116,9 +110,8 @@ def net_vars_initializer():
    # Define the weight initializer and session.
    init = tf.global_variables_initializer()
    sess = tf.Session()
-    # Additional code for setting and getting the weights, and use a prefix
+    # Additional code for setting and getting the weights
-    # so that the variable names can be converted between workers.
+    variables = ray.experimental.TensorFlowVariables(loss, sess)
    variables = ray.experimental.TensorFlowVariables(loss, sess, prefix=True)
    # Return all of the data needed to use the network.
  return variables, sess, train, loss, x_data, y_data, init
--- a/examples/lbfgs/driver.py
+++ b/examples/lbfgs/driver.py
@ -61,7 +61,8 @@ class LinearModel(object):
    return self.sess.run(self.cross_entropy_grads, feed_dict={self.x: xs, self.y_: ys})
 def net_initialization():
-  return LinearModel([784,10])
+  with tf.Graph().as_default():
    return LinearModel([784,10])
 # By default, when an environment variable is used by a remote function, the
 # initialization code will be rerun at the end of the remote task to ensure
--- a/python/ray/experimental/tfutils.py
+++ b/python/ray/experimental/tfutils.py
@ -28,28 +28,41 @@ class TensorFlowVariables(object):
    assignment_placeholders (List[tf.placeholders]): The nodes that weights get
      passed to.
    assignment_nodes (List[tf.Tensor]): The nodes that assign the weights.
    prefix (Bool): Boolean for if there is a prefix on the variable names.
  """
-  def __init__(self, loss, sess=None, prefix=False):
+  def __init__(self, loss, sess=None):
    """Creates a TensorFlowVariables instance."""
    import tensorflow as tf
    self.sess = sess
    self.loss = loss
    self.prefix = prefix
    queue = deque([loss])
    variable_names = []
    explored_inputs = set([loss])
    # We do a BFS on the dependency graph of the input function to find 
    # the variables.
    while len(queue) != 0:
-      op = queue.popleft().op
+      tf_obj = queue.popleft()
-      queue.extend(op.inputs)
+     
-      if op.node_def.op == "Variable":
+      # The object put into the queue is not necessarily an operation, so we
-        variable_names.append(op.node_def.name)
+      # want the op attribute to get the operation underlying the object.
      # Only operations contain the inputs that we can explore.
      if hasattr(tf_obj, "op"):
         tf_obj = tf_obj.op
      for input_op in tf_obj.inputs:
        if input_op not in explored_inputs:
          queue.append(input_op)
          explored_inputs.add(input_op)
      # Tensorflow control inputs can be circular, so we keep track of
      # explored operations.
      for control in tf_obj.control_inputs:
        if control not in explored_inputs:
          queue.append(control)
          explored_inputs.add(control)
      if tf_obj.node_def.op == "Variable":
        variable_names.append(tf_obj.node_def.name)
    self.variables = OrderedDict()
    for v in [v for v in tf.global_variables() if v.op.node_def.name in variable_names]:
-      name = v.op.node_def.name.split("/", 1 if prefix else 0)[-1]
+      self.variables[v.op.node_def.name] = v
      self.variables[name] = v
    self.assignment_placeholders = dict()
    self.assignment_nodes = []
--- a/test/tensorflow_test.py
+++ b/test/tensorflow_test.py
@ -17,32 +17,39 @@ def make_linear_network(w_name=None, b_name=None):
  b = tf.Variable(tf.zeros([1]), name=b_name)
  y = w * x_data + b
  # Return the loss and weight initializer.
-  return tf.reduce_mean(tf.square(y - y_data)), tf.global_variables_initializer()
+  return tf.reduce_mean(tf.square(y - y_data)), tf.global_variables_initializer(), x_data, y_data
 def net_vars_initializer():
-  # Random prefix so variable names do not clash if we use nets with
+  # Uses a separate graph for each network.
-  # the same name.
+  with tf.Graph().as_default():
  prefix = str(uuid.uuid1().hex)
  # Use the tensorflow variable_scope to prefix all of the variables
  with tf.variable_scope(prefix):
    # Create the network.
-    loss, init = make_linear_network()
+    loss, init, _, _ = make_linear_network()
    sess = tf.Session()
    # Additional code for setting and getting the weights.
-    variables = ray.experimental.TensorFlowVariables(loss, sess, prefix=True)
+    variables = ray.experimental.TensorFlowVariables(loss, sess)
  # Return all of the data needed to use the network.
  return variables, init, sess
 def net_vars_reinitializer(net_vars):
  return net_vars
 def train_vars_initializer():
  # Almost the same as above, but now returns the placeholders and gradient.
  with tf.Graph().as_default():
    loss, init, x_data, y_data = make_linear_network()
    sess = tf.Session()
    variables = ray.experimental.TensorFlowVariables(loss, sess)
    grad = tf.gradients(loss, list(variables.variables.values()))
  return variables, init, sess, grad, [x_data, y_data]
 class TensorFlowTest(unittest.TestCase):
  def testTensorFlowVariables(self):
    ray.init(num_workers=2)
    sess = tf.Session()
-    loss, init = make_linear_network()
+    loss, init, _, _ = make_linear_network()
    sess.run(init)
    variables = ray.experimental.TensorFlowVariables(loss, sess)
@ -54,7 +61,7 @@ class TensorFlowTest(unittest.TestCase):
    variables.set_weights(weights)
    self.assertEqual(weights, variables.get_weights())
-    loss2, init2 = make_linear_network("w", "b")
+    loss2, init2, _, _ = make_linear_network("w", "b")
    sess.run(init2)
    variables2 = ray.experimental.TensorFlowVariables(loss2, sess)
@ -148,7 +155,7 @@ class TensorFlowTest(unittest.TestCase):
    # Create a network on the driver locally.
    sess1 = tf.Session()
-    loss1, init1 = make_linear_network()
+    loss1, init1, _, _ = make_linear_network()
    net_vars1 = ray.experimental.TensorFlowVariables(loss1, sess1)
    sess1.run(init1)
@ -170,5 +177,39 @@ class TensorFlowTest(unittest.TestCase):
    ray.worker.cleanup()
  def testVariablesControlDependencies(self):
    ray.init(num_workers=1)
    # Creates a network and appends a momentum optimizer.
    sess = tf.Session()
    loss, init, _, _ = make_linear_network()
    minimizer = tf.train.MomentumOptimizer(0.9, 0.9).minimize(loss)
    net_vars = ray.experimental.TensorFlowVariables(minimizer, sess)
    sess.run(init)
    # Tests if all variables are properly retrieved, 2 variables and 2 momentum
    # variables.
    self.assertEqual(len(net_vars.variables.items()), 4)
    ray.worker.cleanup()
  def testRemoteTrainingStep(self):
    ray.init(num_workers=1)
    ray.env.net = ray.EnvironmentVariable(train_vars_initializer, net_vars_reinitializer)
    @ray.remote
    def training_step(weights):
      variables, _, sess, grad, placeholders = ray.env.net
      variables.set_weights(weights)
      return sess.run(grad, feed_dict=dict(zip(placeholders, [[1]*100]*2)))
    variables, init, sess, _, _ = ray.env.net
    sess.run(init)
    ray.get(training_step.remote(variables.get_weights()))
    ray.worker.cleanup()
 if __name__ == "__main__":
  unittest.main(verbosity=2)