ray/examples/alexnet/alexnet.py
Robert Nishihara 4863a5155c Cleanup setting and getting of tensorflow weights. (#385)
* Cleanup setting and getting of tensorflow weights.

* Add documentation for using TensorFlow.

* Group get_weights and set_weights in a function.

* Update readme.
2016-09-16 23:05:14 -07:00

444 lines
16 KiB
Python

# The code for AlexNet is copied and adapted from the TensorFlow repository
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/image/alexnet/alexnet_benchmark.py.
import ray
import numpy as np
import tarfile, io
import boto3
import PIL.Image as Image
import tensorflow as tf
import ray.array.remote as ra
STDDEV = 0.001 # The standard deviation of the network weight initialization.
def load_chunk(tarfile, size=None):
"""Load a number of images from a single imagenet .tar file.
This function also converts the image from grayscale to RGB if necessary.
Args:
tarfile (tarfile.TarFile): The archive from which the files get loaded.
size (Optional[Tuple[int, int]]): Resize the image to this size if provided.
Returns:
numpy.ndarray: Contains the image data in format [batch, w, h, c]
"""
result = []
filenames = []
for member in tarfile.getmembers():
filename = member.path
content = tarfile.extractfile(member)
img = Image.open(content)
rgbimg = Image.new("RGB", img.size)
rgbimg.paste(img)
if size != None:
rgbimg = rgbimg.resize(size, Image.ANTIALIAS)
result.append(np.array(rgbimg).reshape(1, rgbimg.size[0], rgbimg.size[1], 3))
filenames.append(filename)
return np.concatenate(result), filenames
@ray.remote(num_return_vals=2)
def load_tarfile_from_s3(bucket, s3_key, size=[]):
"""Load an imagenet .tar file.
Args:
bucket (str): Bucket holding the imagenet .tar.
s3_key (str): s3 key from which the .tar file is loaded.
size (List[int]): Resize the image to this size if size != []; len(size) == 2 required.
Returns:
np.ndarray: The image data (see load_chunk).
"""
s3 = boto3.client("s3")
response = s3.get_object(Bucket=bucket, Key=s3_key)
output = io.BytesIO()
chunk = response["Body"].read(1024 * 8)
while chunk:
output.write(chunk)
chunk = response["Body"].read(1024 * 8)
output.seek(0) # go to the beginning of the .tar file
tar = tarfile.open(mode="r", fileobj=output)
return load_chunk(tar, size=size if size != [] else None)
def load_tarfiles_from_s3(bucket, s3_keys, size=[]):
"""Load a number of imagenet .tar files.
Args:
bucket (str): Bucket holding the imagenet .tars.
s3_keys (List[str]): List of s3 keys from which the .tar files are being
loaded.
size (List[int]): Resize the image to this size if size does not equal [].
The length of size must be 2.
Returns:
np.ndarray: Contains object IDs to the chunks of the images (see load_chunk).
"""
return [load_tarfile_from_s3.remote(bucket, s3_key, size) for s3_key in s3_keys]
def setup_variables(params, placeholders, kernelshape, biasshape):
"""Create the variables for each layer.
Args:
params (List): Network parameters used for creating feed_dicts
placeholders (List): Placeholders used for feeding weights into
kernelshape (List): Shape of the kernel used for the conv layer
biasshape (List): Shape of the bias used
Returns:
None
"""
kernel = tf.Variable(tf.truncated_normal(kernelshape, stddev=STDDEV))
biases = tf.Variable(tf.constant(0.0, shape=biasshape, dtype=tf.float32),
trainable=True, name='biases')
kernel_new = tf.placeholder(tf.float32, shape=kernel.get_shape())
biases_new = tf.placeholder(tf.float32, shape=biases.get_shape())
update_kernel = kernel.assign(kernel_new)
update_biases = biases.assign(biases_new)
params += [kernel, biases]
placeholders += [kernel_new, biases_new]
def conv_layer(parameters, prev_layer, shape, scope):
"""Constructs a convolutional layer for the network.
Args:
parameters (List): Parameters used in constructing layer.
prevlayer (Tensor): The previous layer to connect the network together.
shape (List): The strides used for convolution
scope (Scope): Current scope of tensorflow
Returns:
Tensor: Activation of layer
"""
kernel = parameters[-2]
bias = parameters[-1]
conv = tf.nn.conv2d(prev_layer, kernel, shape, padding='SAME')
add_bias = tf.nn.bias_add(conv, bias)
return tf.nn.relu(add_bias, name=scope)
def net_initialization():
images = tf.placeholder(tf.float32, shape=[None, 224, 224, 3])
y_true = tf.placeholder(tf.float32, shape=[None, 1000])
parameters = []
placeholders = []
# conv1
with tf.name_scope('conv1') as scope:
setup_variables(parameters, placeholders, [11, 11, 3, 96], [96])
conv1 = conv_layer(parameters, images, [1, 4, 4, 1], scope)
# pool1
pool1 = tf.nn.max_pool(conv1,
ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1],
padding='VALID',
name='pool1')
# lrn1
pool1_lrn = tf.nn.lrn(pool1, depth_radius=5, bias=1.0,
alpha=0.0001, beta=0.75,
name="LocalResponseNormalization")
# conv2
with tf.name_scope('conv2') as scope:
setup_variables(parameters, placeholders, [5, 5, 96, 256], [256])
conv2 = conv_layer(parameters, pool1_lrn, [1, 1, 1, 1], scope)
pool2 = tf.nn.max_pool(conv2,
ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1],
padding='VALID',
name='pool2')
# lrn2
pool2_lrn = tf.nn.lrn(pool2, depth_radius=5, bias=1.0,
alpha=0.0001, beta=0.75,
name="LocalResponseNormalization")
# conv3
with tf.name_scope('conv3') as scope:
setup_variables(parameters, placeholders, [3, 3, 256, 384], [384])
conv3 = conv_layer(parameters, pool2_lrn, [1, 1, 1, 1], scope)
# conv4
with tf.name_scope('conv4') as scope:
setup_variables(parameters, placeholders, [3, 3, 384, 384], [384])
conv4 = conv_layer(parameters, conv3, [1, 1, 1, 1], scope)
# conv5
with tf.name_scope('conv5') as scope:
setup_variables(parameters, placeholders, [3, 3, 384, 256], [256])
conv5 = conv_layer(parameters, conv4, [1, 1, 1, 1], scope)
# pool5
pool5 = tf.nn.max_pool(conv5,
ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1],
padding='VALID',
name='pool5')
# lrn5
pool5_lrn = tf.nn.lrn(pool5, depth_radius=5, bias=1.0,
alpha=0.0001, beta=0.75,
name="LocalResponseNormalization")
dropout = tf.placeholder(tf.float32)
with tf.name_scope('fc1') as scope:
n_input = int(np.prod(pool5_lrn.get_shape().as_list()[1:]))
setup_variables(parameters, placeholders, [n_input, 4096], [4096])
fc_in = tf.reshape(pool5_lrn, [-1, n_input])
fc_layer1 = tf.nn.tanh(tf.nn.bias_add(tf.matmul(fc_in, parameters[-2]), parameters[-1]))
fc_out1 = tf.nn.dropout(fc_layer1, dropout)
with tf.name_scope('fc2') as scope:
n_input = int(np.prod(fc_out1.get_shape().as_list()[1:]))
setup_variables(parameters, placeholders, [n_input, 4096], [4096])
fc_in = tf.reshape(fc_out1, [-1, n_input])
fc_layer2 = tf.nn.tanh(tf.nn.bias_add(tf.matmul(fc_in, parameters[-2]), parameters[-1]))
fc_out2 = tf.nn.dropout(fc_layer2, dropout)
with tf.name_scope('fc3') as scope:
n_input = int(np.prod(fc_out2.get_shape().as_list()[1:]))
setup_variables(parameters, placeholders, [n_input, 1000], [1000])
fc_in = tf.reshape(fc_out2, [-1, n_input])
fc_layer3 = tf.nn.softmax(tf.nn.bias_add(tf.matmul(fc_in, parameters[-2]), parameters[-1]))
y_pred = fc_layer3 / tf.reduce_sum(fc_layer3,
reduction_indices=len(fc_layer3.get_shape()) - 1,
keep_dims=True)
# manual computation of crossentropy
y_pred = tf.clip_by_value(y_pred, tf.cast(1e-10, dtype=tf.float32),
tf.cast(1. - 1e-10, dtype=tf.float32))
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_true * tf.log(y_pred),
reduction_indices=len(y_pred.get_shape()) - 1))
opt = tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9) # Any other optimizier can be placed here
correct_pred = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
comp_grads = opt.compute_gradients(cross_entropy, parameters)
application = opt.apply_gradients(zip(placeholders, parameters))
sess = tf.Session()
init_all_variables = tf.initialize_all_variables()
# In order to set the weights of the TensorFlow graph on a worker, we add
# assignment nodes. To get the network weights (as a list of numpy arrays)
# and to set the network weights (from a list of numpy arrays), use the
# methods get_weights and set_weights. This can be done from within a remote
# function or on the driver.
def get_and_set_weights_methods():
assignment_placeholders = []
assignment_nodes = []
for var in tf.trainable_variables():
assignment_placeholders.append(tf.placeholder(var.value().dtype, var.get_shape().as_list()))
assignment_nodes.append(var.assign(assignment_placeholders[-1]))
def get_weights():
return [v.eval(session=sess) for v in tf.trainable_variables()]
def set_weights(new_weights):
sess.run(assignment_nodes, feed_dict={p: w for p, w in zip(assignment_placeholders, new_weights)})
return get_weights, set_weights
get_weights, set_weights = get_and_set_weights_methods()
return comp_grads, sess, application, accuracy, images, y_true, dropout, placeholders, init_all_variables, get_weights, set_weights
def net_reinitialization(net_vars):
return net_vars
@ray.remote
def num_images(batches):
"""Counts number of images in batches.
Args:
batches (List): Collection of batches of images and labels.
Returns:
int: The number of images
"""
shape_ids = [ra.shape.remote(batch) for batch in batches]
return sum([shape[0] for shape in ray.get(shape_ids)])
@ray.remote
def compute_mean_image(batches):
"""Computes the mean image given a list of batches of images.
Args:
batches (List[ObjectID]): A list of batches of images.
Returns:
ndarray: The mean image
"""
if len(batches) == 0:
raise Exception("No images were passed into `compute_mean_image`.")
sum_image_ids = [ra.sum.remote(batch, axis=0) for batch in batches]
n_images = num_images.remote(batches)
return np.sum(ray.get(sum_image_ids), axis=0).astype("float64") / ray.get(n_images)
@ray.remote(num_return_vals=4)
def shuffle_arrays(first_images, first_labels, second_images, second_labels):
"""Shuffles the images and labels from two batches.
Args:
first_images (ndarray): First batch of images.
first_labels (ndarray): First batch of labels.
second_images (ndarray): Second batch of images.
second_labels (ndarray): Second batch of labels.
Returns:
ndarray: First batch of shuffled images.
ndarray: First batch of shuffled labels.
ndarray: Second bach of shuffled images.
ndarray: Second batch of shuffled labels.
"""
images = np.concatenate((first_images, second_images))
labels = np.concatenate((first_labels, second_labels))
total_length = len(images)
first_len = len(first_images)
random_indices = np.random.permutation(total_length)
new_first_images = images[random_indices[0:first_len]]
new_first_labels = labels[random_indices[0:first_len]]
new_second_images = images[random_indices[first_len:total_length]]
new_second_labels = labels[random_indices[first_len:total_length]]
return new_first_images, new_first_labels, new_second_images, new_second_labels
def shuffle_pair(first_batch, second_batch):
"""Shuffle two batches of data.
Args:
first_batch (Tuple[ObjectID. ObjectID]): The first batch to be shuffled. The
first component is the object ID of a batch of images, and the second
component is the object ID of the corresponding batch of labels.
second_batch (Tuple[ObjectID, ObjectID]): The second batch to be shuffled.
The first component is the object ID of a batch of images, and the second
component is the object ID of the corresponding batch of labels.
Returns:
Tuple[ObjectID, ObjectID]: The first batch of shuffled data.
Tuple[ObjectID, ObjectID]: Two second bach of shuffled data.
"""
images1, labels1, images2, labels2 = shuffle_arrays.remote(first_batch[0], first_batch[1], second_batch[0], second_batch[1])
return (images1, labels1), (images2, labels2)
@ray.remote
def filenames_to_labels(filenames, filename_label_dict):
"""Converts filename strings to integer labels.
Args:
filenames (List[str]): The filenames of the images.
filename_label_dict (Dict[str, int]): A dictionary mapping filenames to
integer labels.
Returns:
ndarray: Integer labels
"""
return np.asarray([int(filename_label_dict[filename]) for filename in filenames])
def one_hot(x):
"""Converts integer labels to one hot vectors.
Args:
x (int): Index to be set to one
Returns:
ndarray: One hot vector.
"""
zero = np.zeros([1000])
zero[x] = 1.0
return zero
def crop_images(images):
"""Randomly crop a batch of images.
This is used to generate many slightly different images from each training
example.
Args:
images (ndarray): A batch of images to crop. The shape of images should be
batch_size x height x width x channels.
Returns:
ndarray: A batch of cropped images.
"""
original_height = 256
original_width = 256
cropped_height = 224
cropped_width = 224
height_offset = np.random.randint(original_height - cropped_height + 1)
width_offset = np.random.randint(original_width - cropped_width + 1)
return images[:, height_offset:(height_offset + cropped_height), width_offset:(width_offset + cropped_width), :]
def shuffle(batches):
"""Shuffle the data.
This method groups the batches together in pairs and within each pair shuffles
the data between the two members.
Args:
batches (List[Tuple[ObjectID, ObjectID]]): This is a list of tuples, where
each tuple consists of two object IDs. The first component is an object ID
for a batch of images, and the second component is an object ID for the
corresponding batch of labels.
Returns:
List[Tuple[ObjectID, ObjectID]]: The shuffled data.
"""
# Randomly permute the order of the batches.
permuted_batches = np.random.permutation(batches)
new_batches = []
for i in range(len(batches) / 2):
# Swap data between consecutive batches.
shuffled_batch1, shuffled_batch2 = shuffle_pair(permuted_batches[2 * i], permuted_batches[2 * i + 1])
new_batches += [shuffled_batch1, shuffled_batch2]
if len(batches) % 2 == 1:
# If there is an odd number of batches, don't forget the last one.
new_batches.append(permuted_batches[-1])
return new_batches
@ray.remote
def compute_grad(X, Y, mean, weights):
"""Computes the gradient of the network.
Args:
X (ndarray): Numpy array of images in the form of [224, 224,3]
Y (ndarray): Labels corresponding to each image
mean (ndarray): Mean image to subtract from images
weights (List[ndarray]): The network weights.
Returns:
List of gradients for each variable
"""
comp_grads, sess, _, _, images, y_true, dropout, placeholders, _, get_weights, set_weights = ray.reusables.net_vars
# Set the network weights.
set_weights(weights)
# Choose a subset of the batch to compute on and crop the images.
random_indices = np.random.randint(0, len(X), size=128)
subset_X = crop_images(X[random_indices] - mean)
subset_Y = np.asarray([one_hot(label) for label in Y[random_indices]])
# Compute the gradients.
return sess.run([g for (g, v) in comp_grads], feed_dict={images: subset_X, y_true: subset_Y, dropout: 0.5})
@ray.remote
def compute_accuracy(X, Y, weights):
"""Returns the accuracy of the network
Args:
X (ndarray): A batch of images.
Y (ndarray): A batch of labels.
weights (List[ndarray]): The network weights.
Returns:
The accuracy of the network on the given batch.
"""
_, sess, _, accuracy, images, y_true, dropout, placeholders, _, get_weights, set_weights = ray.reusables.net_vars
# Set the network weights.
set_weights(weights)
one_hot_Y = np.asarray([one_hot(label) for label in Y])
cropped_X = crop_images(X)
return sess.run(accuracy, feed_dict={images: cropped_X, y_true: one_hot_Y, dropout: 1.0})