Welcome to Ray!

Getting Started

Learn basics
Understand how the Ray framework scales your ML workflows.
Learn more >

Install Ray
pip install -U "ray[air]"
Installation guide >

Try it out
Experiment with Ray with an introductory notebook.
Open the notebook>
Scaling with Ray
from ray import data
# Step 1: read 100 files in parallel from S3 directory
dataset = data.read_csv(paths="s3://structured/data", parallelism=100)
# Step 2: partition the dataset into blocks
dataset = dataset.repartition(num_blocks=1000)
# Step 3: preprocess the data at scale, 1000 blocks in parallel
preprocessor = data.preprocessors.StandardScaler(columns=["value"])
dataset_transformed = preprocessor.fit_transform(dataset=dataset)
from ray.air.config import ScalingConfig
from ray.train.torch import TorchTrainer
# Step 1: setup PyTorch model training as you normally would
def train_loop_per_worker():
model = ...
train_dataset = ...
for epoch in range(num_epochs):
... # model training logic
# Step 2: setup Ray's PyTorch Trainer to run on 32 GPUs
trainer = TorchTrainer(
train_loop_per_worker=train_loop_per_worker,
scaling_config=ScalingConfig(num_workers=32, use_gpu=True),
datasets={"train": train_dataset},
)
# Step 3: run distributed model training on 32 GPUs
result = trainer.fit()
from ray import tune
from ray.air.config import ScalingConfig
from ray.train.lightgbm import LightGBMTrainer
train_dataset, eval_dataset = ...
# Step 1: setup Ray's LightGBM Trainer to train on 64 CPUs
trainer = LightGBMTrainer(
...
scaling_config=ScalingConfig(num_workers=64),
datasets={"train": train_dataset, "eval": eval_dataset},
)
# Step 2: setup Ray Tuner to run 1000 trials
tuner = tune.Tuner(
trainer=trainer,
param_space=hyper_param_space,
tune_config=tune.TuneConfig(num_sa
les=1000),
)
# Step 3: run distributed HPO with 1000 trials; each trial runs on 64 CPUs
result_grid = tuner.fit()
from ray.train.batch_predictor import BatchPredictor
from ray.train.torch import TorchPredictor
dataset = ...
# Step 1: create batch predictor to run inference at scale
batch_predictor = BatchPredictor.from_checkpoint(
checkpoint=model_checkpoint, predictor_cls=TorchPredictor
)
# Step 2: run batch inference on 64 GPUs
results = batch_predictor.predict(dataset, batch_size=512, num_gpus_per_worker=64)
from ray import serve
from ray.serve import PredictorDeployment
from ray.train.lightgbm import LightGBMPredictor
# Deploy 50 replicas of the LightGBM model as a live endpoint.
# Convert incoming JSON requests into a DataFrame.
serve.run(
PredictorDeployment.options(
name="LightGBM_Service",
num_replicas=50,
).bind(
predictor_cls=LightGBMPredictor,
checkpoint=lgbm_best_checkpoint,
http_adapter=serve.http_adapters.pandas_read_json,
)
)
from ray.rllib.algorithms.ppo import PPOConfig
# Step 1: configure PPO to run 64 parallel workers to collect samples from the env.
ppo_config = (
PPOConfig()
.environment(env="Taxi-v3")
.rollouts(num_rollout_workers=64)
.framework("torch")
.training(model=rnn_lage)
)
# Step 2: build the PPO algorithm
ppo_algo = ppo_config.build()
# Step 3: train and evaluate PPO
for _ in range(5):
print(ppo_algo.train())
ppo_algo.evaluate()
Beyond the basics

Ray AI Runtime
Scale the entire ML pipeline from data ingest to model serving with high-level Python APIs that integrate with popular ecosystem frameworks.
Learn more about AIR >
Ray Core
Scale generic Python code with simple, foundational primitives that enable a high degree of control for building distributed applications or custom platforms.
Learn more about Core >
Ray Clusters
Deploy a Ray cluster on AWS, GCP, Azure or kubernetes from a laptop to a large cluster to seamlessly scale workloads for production
Learn more about clusters >