mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
58 lines
1.4 KiB
Python
58 lines
1.4 KiB
Python
import argparse
|
|
import numpy as np
|
|
import os
|
|
|
|
from xgboost_ray.tests.utils import create_parquet
|
|
|
|
if __name__ == "__main__":
|
|
if "OMP_NUM_THREADS" in os.environ:
|
|
del os.environ["OMP_NUM_THREADS"]
|
|
|
|
parser = argparse.ArgumentParser(description="Create fake data.")
|
|
parser.add_argument(
|
|
"filename", type=str, default="/data/parted.parquet/", help="ray/dask")
|
|
parser.add_argument(
|
|
"-r",
|
|
"--num-rows",
|
|
required=False,
|
|
type=int,
|
|
default=1e8,
|
|
help="num rows")
|
|
parser.add_argument(
|
|
"-p",
|
|
"--num-partitions",
|
|
required=False,
|
|
type=int,
|
|
default=100,
|
|
help="num partitions")
|
|
parser.add_argument(
|
|
"-c",
|
|
"--num-cols",
|
|
required=False,
|
|
type=int,
|
|
default=4,
|
|
help="num columns (features)")
|
|
parser.add_argument(
|
|
"-C",
|
|
"--num-classes",
|
|
required=False,
|
|
type=int,
|
|
default=2,
|
|
help="num classes")
|
|
parser.add_argument(
|
|
"-s",
|
|
"--seed",
|
|
required=False,
|
|
type=int,
|
|
default=1234,
|
|
help="random seed")
|
|
|
|
args = parser.parse_args()
|
|
|
|
np.random.seed(args.seed)
|
|
create_parquet(
|
|
args.filename,
|
|
num_rows=int(args.num_rows),
|
|
num_partitions=int(args.num_partitions),
|
|
num_features=int(args.num_cols),
|
|
num_classes=int(args.num_classes))
|