mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
62 lines
1.5 KiB
Python
62 lines
1.5 KiB
Python
![]() |
import argparse
|
||
|
import numpy as np
|
||
|
import os
|
||
|
|
||
|
from xgboost_ray.tests.utils import create_parquet
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
if "OMP_NUM_THREADS" in os.environ:
|
||
|
del os.environ["OMP_NUM_THREADS"]
|
||
|
|
||
|
parser = argparse.ArgumentParser(description="Create fake data.")
|
||
|
parser.add_argument(
|
||
|
"filename", type=str, default="/data/parted.parquet/", help="ray/dask")
|
||
|
parser.add_argument(
|
||
|
"-r",
|
||
|
"--num-rows",
|
||
|
required=False,
|
||
|
type=int,
|
||
|
default=1e8,
|
||
|
help="num rows")
|
||
|
parser.add_argument(
|
||
|
"-p",
|
||
|
"--num-partitions",
|
||
|
required=False,
|
||
|
type=int,
|
||
|
default=100,
|
||
|
help="num partitions")
|
||
|
parser.add_argument(
|
||
|
"-c",
|
||
|
"--num-cols",
|
||
|
required=False,
|
||
|
type=int,
|
||
|
default=4,
|
||
|
help="num columns (features)")
|
||
|
parser.add_argument(
|
||
|
"-C",
|
||
|
"--num-classes",
|
||
|
required=False,
|
||
|
type=int,
|
||
|
default=2,
|
||
|
help="num classes")
|
||
|
parser.add_argument(
|
||
|
"-s",
|
||
|
"--seed",
|
||
|
required=False,
|
||
|
type=int,
|
||
|
default=1234,
|
||
|
help="random seed")
|
||
|
|
||
|
args = parser.parse_args()
|
||
|
|
||
|
if os.path.exists(args.filename):
|
||
|
print(f"File already exists: {args.filename}. Skipping creation.")
|
||
|
|
||
|
np.random.seed(args.seed)
|
||
|
create_parquet(
|
||
|
args.filename,
|
||
|
num_rows=int(args.num_rows),
|
||
|
num_partitions=int(args.num_partitions),
|
||
|
num_features=int(args.num_cols),
|
||
|
num_classes=int(args.num_classes))
|