[sgd] Add file lock to protect compilation of sgd op (#3486)

* add file lock to protect compilation of sgd op

* lint

* update

* fix

* fix

* lint

* update

* rebase on arrow

* Update sgd_worker.py
This commit is contained in:
Philipp Moritz 2018-12-09 13:52:40 -08:00 committed by GitHub
parent cffe8f9806
commit 87c0d24579
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 22 additions and 7 deletions

View file

@ -15,10 +15,10 @@
# - PLASMA_SHARED_LIB
set(arrow_URL https://github.com/apache/arrow.git)
# The PR for this commit is https://github.com/apache/arrow/pull/3093. We
# The PR for this commit is https://github.com/apache/arrow/pull/3117. We
# include the link here to make it easier to find the right commit because
# Arrow often rewrites git history and invalidates certain commits.
set(arrow_TAG 187b98ed338d4995317dae9efd19870c532192cb)
set(arrow_TAG 4d810b4a9c37e79fde6b134ac90ee0c5f7f6c9bf)
set(ARROW_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/external/arrow-install)
set(ARROW_HOME ${ARROW_INSTALL_PREFIX})

View file

@ -1,5 +1,6 @@
colorama
click
filelock
flatbuffers
funcsigs
mock

View file

@ -9,9 +9,10 @@ import pyarrow.plasma as plasma
import tensorflow as tf
import ray
from ray.experimental.sgd.util import fetch, run_timeline, warmup
from ray.experimental.sgd.modified_allreduce import sum_gradients_all_reduce, \
unpack_small_tensors
from ray.experimental.sgd.util import (ensure_plasma_tensorflow_op, fetch,
run_timeline, warmup)
from ray.experimental.sgd.modified_allreduce import (sum_gradients_all_reduce,
unpack_small_tensors)
logger = logging.getLogger(__name__)
@ -112,8 +113,7 @@ class SGDWorker(object):
ray.worker.global_worker.plasma_client.store_socket_name)
manager_socket = (
ray.worker.global_worker.plasma_client.manager_socket_name)
if not plasma.tf_plasma_op:
plasma.build_plasma_tensorflow_op()
ensure_plasma_tensorflow_op()
# For fetching grads -> plasma
self.plasma_in_grads = []

View file

@ -2,10 +2,13 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import filelock
import json
import logging
import numpy as np
import os
import pyarrow
import pyarrow.plasma as plasma
import time
import tensorflow as tf
@ -120,6 +123,16 @@ class Timeline(object):
logger.info("Wrote chrome timeline to", filename)
def ensure_plasma_tensorflow_op():
base_path = os.path.join(pyarrow.__path__[0], "tensorflow")
lock_path = os.path.join(base_path, "compile_op.lock")
with filelock.FileLock(lock_path):
if not os.path.exists(os.path.join(base_path, "plasma_op.so")):
plasma.build_plasma_tensorflow_op()
else:
plasma.load_plasma_tensorflow_op()
if __name__ == "__main__":
a = Timeline(1)
b = Timeline(2)

View file

@ -136,6 +136,7 @@ def find_version(*filepath):
requires = [
"numpy",
"filelock",
"funcsigs",
"click",
"colorama",