ray.serve.schema.ControllerHealthMetrics#

pydantic model ray.serve.schema.ControllerHealthMetrics[source]#

Health metrics for the Ray Serve controller.

These metrics help diagnose controller performance issues, especially as cluster size increases.

PublicAPI (alpha): This API is in alpha and may change before becoming stable.

field application_state_update_duration_s: DurationStats | None = None#

Application state update duration statistics over a rolling window.

field controller_start_time: float = 0.0#

When the controller started (epoch seconds).

field deployment_state_update_duration_s: DurationStats | None = None#

Deployment state update duration statistics over a rolling window.

field event_loop_delay_s: float = 0.0#

Difference between actual and expected sleep duration. Positive values indicate an overloaded event loop.

field expected_sleep_duration_s: float = 0.0#

Expected sleep duration (CONTROL_LOOP_INTERVAL_S).

field handle_metrics_delay_ms: DurationStats | None = None#

Delay between when handle metrics are generated and when they reach the controller (rolling window, milliseconds).

field last_control_loop_time: float = 0.0#

Time of the last control loop execution (epoch seconds).

field last_sleep_duration_s: float = 0.0#

Actual sleep duration of the last iteration.

field loop_duration_s: DurationStats | None = None#

Control loop duration statistics over a rolling window.

field loops_per_second: float = 0.0#

Control loop iterations per second.

field node_update_duration_s: DurationStats | None = None#

Node update duration statistics over a rolling window.

field num_asyncio_tasks: int = 0#

Number of pending asyncio tasks.

field num_control_loops: int = 0#

Total number of control loops executed.

field process_memory_mb: float = 0.0#

Controller process memory usage in MB.

field proxy_state_update_duration_s: DurationStats | None = None#

Proxy state update duration statistics over a rolling window.

field replica_metrics_delay_ms: DurationStats | None = None#

Delay between when replica metrics are generated and when they reach the controller (rolling window, milliseconds).

field timestamp: float = 0.0#

When these metrics were collected (epoch seconds).

field uptime_s: float = 0.0#

Controller uptime in seconds.