mirror of
https://github.com/vale981/ray
synced 2025-03-12 14:16:39 -04:00

* Remove all __future__ imports from RLlib. * Remove (object) again from tf_run_builder.py::TFRunBuilder. * Fix 2xLINT warnings. * Fix broken appo_policy import (must be appo_tf_policy) * Remove future imports from all other ray files (not just RLlib). * Remove future imports from all other ray files (not just RLlib). * Remove future import blocks that contain `unicode_literals` as well. Revert appo_tf_policy.py to appo_policy.py (belongs to another PR). * Add two empty lines before Schedule class. * Put back __future__ imports into determine_tests_to_run.py. Fails otherwise on a py2/print related error.
67 lines
2 KiB
Python
67 lines
2 KiB
Python
import argparse
|
|
import logging
|
|
import time
|
|
|
|
import ray
|
|
from ray.streaming.streaming import Environment
|
|
|
|
logger = logging.getLogger(__name__)
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--input-file", required=True, help="the input text file")
|
|
|
|
|
|
# A class used to check attribute-based key selection
|
|
class Record:
|
|
def __init__(self, record):
|
|
k, _ = record
|
|
self.word = k
|
|
self.record = record
|
|
|
|
|
|
# Splits input line into words and outputs objects of type Record
|
|
# each one consisting of a key (word) and a tuple (word,1)
|
|
def splitter(line):
|
|
records = []
|
|
words = line.split()
|
|
for w in words:
|
|
records.append(Record((w, 1)))
|
|
return records
|
|
|
|
|
|
# Receives an object of type Record and returns the actual tuple
|
|
def as_tuple(record):
|
|
return record.record
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Get program parameters
|
|
args = parser.parse_args()
|
|
input_file = str(args.input_file)
|
|
|
|
ray.init()
|
|
ray.register_custom_serializer(Record, use_dict=True)
|
|
|
|
# A Ray streaming environment with the default configuration
|
|
env = Environment()
|
|
env.set_parallelism(2) # Each operator will be executed by two actors
|
|
|
|
# 'key_by("word")' physically partitions the stream of records
|
|
# based on the hash value of the 'word' attribute (see Record class above)
|
|
# 'map(as_tuple)' maps a record of type Record into a tuple
|
|
# 'sum(1)' sums the 2nd element of the tuple, i.e. the word count
|
|
stream = env.read_text_file(input_file) \
|
|
.round_robin() \
|
|
.flat_map(splitter) \
|
|
.key_by("word") \
|
|
.map(as_tuple) \
|
|
.sum(1) \
|
|
.inspect(print) # Prints the content of the
|
|
# stream to stdout
|
|
start = time.time()
|
|
env_handle = env.execute() # Deploys and executes the dataflow
|
|
ray.get(env_handle) # Stay alive until execution finishes
|
|
end = time.time()
|
|
logger.info("Elapsed time: {} secs".format(end - start))
|
|
logger.debug("Output stream id: {}".format(stream.id))
|