mirror of
https://github.com/vale981/ray
synced 2025-03-07 02:51:39 -05:00
41 lines
1.4 KiB
Python
41 lines
1.4 KiB
Python
import ray
|
|
from ray.data.preprocessors import StandardScaler
|
|
from ray.train.batch_predictor import BatchPredictor
|
|
from ray.train.xgboost import XGBoostTrainer, XGBoostPredictor
|
|
from ray.air.config import ScalingConfig
|
|
|
|
# Split data into train and validation.
|
|
dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
|
|
train_dataset, valid_dataset = dataset.train_test_split(test_size=0.3)
|
|
test_dataset = valid_dataset.drop_columns(["target"])
|
|
|
|
columns_to_scale = ["mean radius", "mean texture"]
|
|
preprocessor = StandardScaler(columns=columns_to_scale)
|
|
|
|
trainer = XGBoostTrainer(
|
|
label_column="target",
|
|
num_boost_round=20,
|
|
scaling_config=ScalingConfig(num_workers=2),
|
|
params={
|
|
"objective": "binary:logistic",
|
|
"eval_metric": ["logloss", "error"],
|
|
},
|
|
datasets={"train": train_dataset},
|
|
preprocessor=preprocessor,
|
|
)
|
|
result = trainer.fit()
|
|
|
|
# You can also create a checkpoint from a trained model using
|
|
# `XGBoostCheckpoint.from_model`.
|
|
|
|
# import xgboost as xgb
|
|
# from ray.train.xgboost import XGBoostCheckpoint
|
|
# model = xgb.Booster()
|
|
# model.load_model(...)
|
|
# checkpoint = XGBoostCheckpoint.from_model(model, path=".")
|
|
checkpoint = result.checkpoint
|
|
|
|
batch_predictor = BatchPredictor.from_checkpoint(checkpoint, XGBoostPredictor)
|
|
|
|
predicted_probabilities = batch_predictor.predict(test_dataset)
|
|
predicted_probabilities.show()
|