Source code for ray.train.v2.api.exceptions

from typing import Dict, Union

from ray.train.v2._internal.exceptions import RayTrainError
from ray.util.annotations import PublicAPI


[docs] @PublicAPI(stability="alpha") class WorkerGroupError(RayTrainError): """Exception raised from the worker group during training. Args: error_message: A human-readable error message describing the training worker failures. worker_failures: A mapping from worker rank to the exception that occurred on that worker during training. """ def __init__(self, error_message: str, worker_failures: Dict[int, Exception]): super().__init__("Training failed due to worker errors:\n" + error_message) self._error_message = error_message self.worker_failures = worker_failures def __reduce__(self): return (self.__class__, (self._error_message, self.worker_failures))
[docs] @PublicAPI(stability="alpha") class ControllerError(RayTrainError): """Exception raised when training fails due to a controller error. Args: controller_failure: The exception that occurred on the controller. """ def __init__(self, controller_failure: Exception): super().__init__( "Training failed due to controller error:\n" + str(controller_failure) ) self.controller_failure = controller_failure def __reduce__(self): return (self.__class__, (self.controller_failure,))
TrainingFailedError = Union[WorkerGroupError, ControllerError]