Bring cloudpickle inside the repository. (#1445)

* Bring cloudpickle version 0.5.2 inside the repo. * Use internal copy of cloudpickle everywhere. * Fix linting. * Import ordering. * Change __init__.py. * Set pickler in serialization context. * Don't check ray location.
2025-04-23 06:25:52 -04:00 · 2018-01-25 11:36:37 -08:00 · 2018-01-25 11:36:37 -08:00 · ab5d4a6010
commit ab5d4a6010
parent 173f1d629a
15 changed files with 1128 additions and 44 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -41,7 +41,7 @@ matrix:
        - sphinx-build -W -b html -d _build/doctrees source _build/html
        - cd ..
        # Run Python linting.
-        - flake8 --exclude=python/ray/core/src/common/flatbuffers_ep-prefix/,python/ray/core/generated/,src/common/format/,doc/source/conf.py
+        - flake8 --exclude=python/ray/core/src/common/flatbuffers_ep-prefix/,python/ray/core/generated/,src/common/format/,doc/source/conf.py,python/ray/cloudpickle/

    - os: linux
      dist: trusty
--- a/.travis/install-dependencies.sh
+++ b/.travis/install-dependencies.sh
@ -24,7 +24,7 @@ if [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "linux" ]]; then
  wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh -nv
  bash miniconda.sh -b -p $HOME/miniconda
  export PATH="$HOME/miniconda/bin:$PATH"
-  pip install -q numpy cloudpickle==0.5.2 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python pyyaml pandas
+  pip install -q numpy cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python pyyaml pandas
 elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then
  sudo apt-get update
  sudo apt-get install -y cmake pkg-config python-dev python-numpy build-essential autoconf curl libtool unzip
@ -32,7 +32,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then
  wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh -nv
  bash miniconda.sh -b -p $HOME/miniconda
  export PATH="$HOME/miniconda/bin:$PATH"
-  pip install -q numpy cloudpickle==0.5.2 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python pyyaml pandas
+  pip install -q numpy cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python pyyaml pandas
 elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then
  # check that brew is installed
  which -s brew
@ -48,7 +48,7 @@ elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then
  wget https://repo.continuum.io/miniconda/Miniconda2-latest-MacOSX-x86_64.sh -O miniconda.sh -nv
  bash miniconda.sh -b -p $HOME/miniconda
  export PATH="$HOME/miniconda/bin:$PATH"
-  pip install -q numpy cloudpickle==0.5.2 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python pyyaml pandas
+  pip install -q numpy cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python pyyaml pandas
 elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then
  # check that brew is installed
  which -s brew
@ -64,7 +64,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then
  wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh -nv
  bash miniconda.sh -b -p $HOME/miniconda
  export PATH="$HOME/miniconda/bin:$PATH"
-  pip install -q numpy cloudpickle==0.5.2 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python pyyaml pandas
+  pip install -q numpy cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python pyyaml pandas
 elif [[ "$LINT" == "1" ]]; then
  sudo apt-get update
  sudo apt-get install -y cmake build-essential autoconf curl libtool unzip
--- a/doc/requirements-doc.txt
+++ b/doc/requirements-doc.txt
@ -1,6 +1,5 @@
 colorama
 click
-cloudpickle
 funcsigs
 mock
 numpy
--- a/doc/source/install-on-macosx.rst
+++ b/doc/source/install-on-macosx.rst
@ -28,7 +28,7 @@ To build Ray, first install the following dependencies. We recommend using
  brew update
  brew install cmake pkg-config automake autoconf libtool boost wget

-  pip install numpy cloudpickle funcsigs click colorama psutil redis flatbuffers cython --ignore-installed six
+  pip install numpy funcsigs click colorama psutil redis flatbuffers cython --ignore-installed six

 If you are using Anaconda, you may also need to run the following.

--- a/doc/source/install-on-ubuntu.rst
+++ b/doc/source/install-on-ubuntu.rst
@ -35,7 +35,7 @@ To build Ray, first install the following dependencies. We recommend using
  # If you are on Ubuntu 14.04, you need the following.
  pip install cmake

-  pip install numpy cloudpickle funcsigs click colorama psutil redis flatbuffers cython
+  pip install numpy funcsigs click colorama psutil redis flatbuffers cython


 If you are using Anaconda, you may also need to run the following.
--- a/docker/base-deps/Dockerfile
+++ b/docker/base-deps/Dockerfile
@ -12,4 +12,3 @@ RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh \
 ENV PATH "/opt/conda/bin:$PATH"
 RUN conda install -y libgcc
 RUN pip install flatbuffers
-RUN pip install --upgrade pip cloudpickle==0.5.2
--- a/python/ray/actor.py
+++ b/python/ray/actor.py
@ -2,7 +2,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import cloudpickle as pickle
 import copy
 import hashlib
 import inspect
@ -11,6 +10,7 @@ import numpy as np
 import traceback

 import pyarrow.plasma as plasma
+import ray.cloudpickle as pickle
 import ray.local_scheduler
 import ray.signature as signature
 import ray.worker
--- a/python/ray/autoscaler/aws/development-example.yaml
+++ b/python/ray/autoscaler/aws/development-example.yaml
@ -88,7 +88,7 @@ setup_commands:
    - echo 'export PATH="$HOME/anaconda3/bin:$PATH"' >> ~/.bashrc
    # Build Ray.
    - git clone https://github.com/ray-project/ray || true
-    - pip install -U cloudpickle boto3==1.4.8
+    - pip install boto3==1.4.8
    - cd ray/python; python setup.py develop

 # Custom commands that will be run on the head node after common setup.
--- a/python/ray/cloudpickle/init.py
+++ b/python/ray/cloudpickle/init.py
@ -0,0 +1,5 @@
+from __future__ import absolute_import
+
+from ray.cloudpickle.cloudpickle import *
+
+__version__ = '0.5.2'
--- a/python/ray/cloudpickle/cloudpickle.py
+++ b/python/ray/cloudpickle/cloudpickle.py
--- a/python/ray/services.py
+++ b/python/ray/services.py
@ -5,7 +5,6 @@ from __future__ import print_function
 import binascii
 from collections import namedtuple, OrderedDict
 from datetime import datetime
-import cloudpickle
 import json
 import os
 import psutil
@ -295,25 +294,22 @@ def _autodetect_num_gpus():


 def _compute_version_info():
-    """Compute the versions of Python, cloudpickle, pyarrow, and Ray.
+    """Compute the versions of Python, pyarrow, and Ray.

    Returns:
        A tuple containing the version information.
    """
    ray_version = ray.__version__
-    ray_location = os.path.abspath(ray.__file__)
    python_version = ".".join(map(str, sys.version_info[:3]))
-    cloudpickle_version = cloudpickle.__version__
    pyarrow_version = pyarrow.__version__
-    return (ray_version, ray_location, python_version, cloudpickle_version,
-            pyarrow_version)
+    return (ray_version, python_version, pyarrow_version)


 def _put_version_info_in_redis(redis_client):
    """Store version information in Redis.

    This will be used to detect if workers or drivers are started using
-    different versions of Python, cloudpickle, pyarrow, or Ray.
+    different versions of Python, pyarrow, or Ray.

    Args:
        redis_client: A client for the primary Redis shard.
@ -325,7 +321,7 @@ def check_version_info(redis_client):
    """Check if various version info of this process is correct.

    This will be used to detect if workers or drivers are started using
-    different versions of Python, cloudpickle, pyarrow, or Ray. If the version
+    different versions of Python, pyarrow, or Ray. If the version
    information is not present in Redis, then no check is done.

    Args:
@ -347,18 +343,14 @@ def check_version_info(redis_client):
        node_ip_address = ray.services.get_node_ip_address()
        error_message = ("Version mismatch: The cluster was started with:\n"
                         "    Ray: " + true_version_info[0] + "\n"
-                         "    Ray location: " + true_version_info[1] + "\n"
-                         "    Python: " + true_version_info[2] + "\n"
-                         "    Cloudpickle: " + true_version_info[3] + "\n"
-                         "    Pyarrow: " + str(true_version_info[4]) + "\n"
+                         "    Python: " + true_version_info[1] + "\n"
+                         "    Pyarrow: " + str(true_version_info[2]) + "\n"
                         "This process on node " + node_ip_address +
                         " was started with:" + "\n"
                         "    Ray: " + version_info[0] + "\n"
-                         "    Ray location: " + version_info[1] + "\n"
-                         "    Python: " + version_info[2] + "\n"
-                         "    Cloudpickle: " + version_info[3] + "\n"
-                         "    Pyarrow: " + str(version_info[4]))
-        if version_info[:4] != true_version_info[:4]:
+                         "    Python: " + version_info[1] + "\n"
+                         "    Pyarrow: " + str(version_info[2]))
+        if version_info[:2] != true_version_info[:2]:
            raise Exception(error_message)
        else:
            print(error_message)
--- a/python/ray/worker.py
+++ b/python/ray/worker.py
@ -3,7 +3,6 @@ from __future__ import division
 from __future__ import print_function

 import atexit
-import cloudpickle as pickle
 import collections
 import colorama
 import copy
@ -22,6 +21,7 @@ import traceback
 # Ray modules
 import pyarrow
 import pyarrow.plasma as plasma
+import ray.cloudpickle as pickle
 import ray.experimental.state as state
 import ray.serialization as serialization
 import ray.services as services
@ -1040,6 +1040,9 @@ def _initialize_serialization(worker=global_worker):
    serialize several exception classes that we define for error handling.
    """
    worker.serialization_context = pyarrow.SerializationContext()
+    # Tell the serialization context to use the cloudpickle version that we
+    # ship with Ray.
+    worker.serialization_context.set_pickle(pickle.dumps, pickle.loads)
    pyarrow.register_default_serialization_handlers(
        worker.serialization_context)

--- a/python/setup.py
+++ b/python/setup.py
@ -114,7 +114,6 @@ setup(name="ray",
                        "pytest",
                        "pyyaml",
                        "redis",
-                        "cloudpickle == 0.5.2",
                        # The six module is required by pyarrow.
                        "six >= 1.0.0",
                        "flatbuffers"],
--- a/test/failure_test.py
+++ b/test/failure_test.py
@ -441,15 +441,14 @@ class ConfigurationTest(unittest.TestCase):
        ray.worker.cleanup()

    def testVersionMismatch(self):
-        import cloudpickle
-        cloudpickle_version = cloudpickle.__version__
-        cloudpickle.__version__ = "fake cloudpickle version"
+        ray_version = ray.__version__
+        ray.__version__ = "fake ray version"

        ray.init(num_workers=1, driver_mode=ray.SILENT_MODE)

        wait_for_errors(b"version_mismatch", 1)

-        cloudpickle.__version__ = cloudpickle_version
+        ray.__version__ = ray_version


 if __name__ == "__main__":
--- a/test/runtest.py
+++ b/test/runtest.py
@ -193,16 +193,6 @@ DICT_OBJECTS = (

 RAY_TEST_OBJECTS = BASE_OBJECTS + LIST_OBJECTS + TUPLE_OBJECTS + DICT_OBJECTS

-# Check that the correct version of cloudpickle is installed.
-try:
-    import cloudpickle
-    cloudpickle.dumps(Point)
-except AttributeError:
-    cloudpickle_command = "pip install --upgrade cloudpickle"
-    raise Exception("You have an older version of cloudpickle that is not "
-                    "able to serialize namedtuples. Try running "
-                    "\n\n{}\n\n".format(cloudpickle_command))
-

 class SerializationTest(unittest.TestCase):
    def tearDown(self):