import argparse
import numpy as np
import os

from xgboost_ray.tests.utils import create_parquet

if __name__ == "__main__":
    if "OMP_NUM_THREADS" in os.environ:
        del os.environ["OMP_NUM_THREADS"]

    parser = argparse.ArgumentParser(description="Create fake data.")
    parser.add_argument(
        "filename", type=str, default="/data/parted.parquet/", help="ray/dask"
    )
    parser.add_argument(
        "-r", "--num-rows", required=False, type=int, default=1e8, help="num rows"
    )
    parser.add_argument(
        "-p",
        "--num-partitions",
        required=False,
        type=int,
        default=100,
        help="num partitions",
    )
    parser.add_argument(
        "-c",
        "--num-cols",
        required=False,
        type=int,
        default=4,
        help="num columns (features)",
    )
    parser.add_argument(
        "-C", "--num-classes", required=False, type=int, default=2, help="num classes"
    )
    parser.add_argument(
        "-s", "--seed", required=False, type=int, default=1234, help="random seed"
    )

    args = parser.parse_args()

    if os.path.exists(args.filename):
        print(f"File already exists: {args.filename}. Skipping creation.")

    np.random.seed(args.seed)
    create_parquet(
        args.filename,
        num_rows=int(args.num_rows),
        num_partitions=int(args.num_partitions),
        num_features=int(args.num_cols),
        num_classes=int(args.num_classes),
    )