ray.serve.schema.DeploymentSchema#

pydantic model ray.serve.schema.DeploymentSchema[source]#

Specifies options for one deployment within a Serve application. For each deployment this can optionally be included in ServeApplicationSchema to override deployment options specified in code.

Show JSON schema
{
   "title": "DeploymentSchema",
   "description": "Specifies options for one deployment within a Serve application. For each deployment\nthis can optionally be included in `ServeApplicationSchema` to override deployment\noptions specified in code.",
   "type": "object",
   "properties": {
      "name": {
         "title": "Name",
         "description": "Globally-unique name identifying this deployment.",
         "type": "string"
      },
      "num_replicas": {
         "title": "Num Replicas",
         "description": "The number of processes that handle requests to this deployment. Uses a default if null. Can also be set to `auto` for a default autoscaling configuration (experimental).",
         "default": 1,
         "anyOf": [
            {
               "type": "integer",
               "exclusiveMinimum": 0
            },
            {
               "type": "string"
            }
         ]
      },
      "route_prefix": {
         "title": "Route Prefix",
         "description": "[DEPRECATED] Please use route_prefix under ServeApplicationSchema instead.",
         "default": 1,
         "type": "string"
      },
      "max_concurrent_queries": {
         "title": "Max Concurrent Queries",
         "description": "[DEPRECATED] The max number of requests that will be executed at once in each replica. Defaults to 100.",
         "default": 1,
         "exclusiveMinimum": 0,
         "type": "integer"
      },
      "max_ongoing_requests": {
         "title": "Max Ongoing Requests",
         "description": "Maximum number of requests that are sent in parallel to each replica of this deployment. The limit is enforced across all callers (HTTP requests or DeploymentHandles). Defaults to 100.",
         "default": 1,
         "exclusiveMinimum": 0,
         "type": "integer"
      },
      "max_queued_requests": {
         "title": "Max Queued Requests",
         "description": "[DEPRECATED] The max number of requests that will be executed at once in each replica. Defaults to 100.",
         "default": 1,
         "type": "integer"
      },
      "user_config": {
         "title": "User Config",
         "description": "Config to pass into this deployment's reconfigure method. This can be updated dynamically without restarting replicas",
         "default": 1,
         "type": "object"
      },
      "autoscaling_config": {
         "title": "Autoscaling Config",
         "description": "Config specifying autoscaling parameters for the deployment's number of replicas. If null, the deployment won't autoscale its number of replicas; the number of replicas will be fixed at num_replicas.",
         "default": 1,
         "type": "object"
      },
      "graceful_shutdown_wait_loop_s": {
         "title": "Graceful Shutdown Wait Loop S",
         "description": "Duration that deployment replicas will wait until there is no more work to be done before shutting down. Uses a default if null.",
         "default": 1,
         "minimum": 0,
         "type": "number"
      },
      "graceful_shutdown_timeout_s": {
         "title": "Graceful Shutdown Timeout S",
         "description": "Serve controller waits for this duration before forcefully killing the replica for shutdown. Uses a default if null.",
         "default": 1,
         "minimum": 0,
         "type": "number"
      },
      "health_check_period_s": {
         "title": "Health Check Period S",
         "description": "Frequency at which the controller will health check replicas. Uses a default if null.",
         "default": 1,
         "exclusiveMinimum": 0,
         "type": "number"
      },
      "health_check_timeout_s": {
         "title": "Health Check Timeout S",
         "description": "Timeout that the controller will wait for a response from the replica's health check before marking it unhealthy. Uses a default if null.",
         "default": 1,
         "exclusiveMinimum": 0,
         "type": "number"
      },
      "ray_actor_options": {
         "title": "Ray Actor Options",
         "description": "Options set for each replica actor.",
         "default": 1,
         "allOf": [
            {
               "$ref": "#/definitions/RayActorOptionsSchema"
            }
         ]
      },
      "placement_group_bundles": {
         "title": "Placement Group Bundles",
         "description": "Define a set of placement group bundles to be scheduled *for each replica* of this deployment. The replica actor will be scheduled in the first bundle provided, so the resources specified in `ray_actor_options` must be a subset of the first bundle's resources. All actors and tasks created by the replica actor will be scheduled in the placement group by default (`placement_group_capture_child_tasks` is set to True).",
         "default": 1,
         "type": "array",
         "items": {
            "type": "object",
            "additionalProperties": {
               "type": "number"
            }
         }
      },
      "placement_group_strategy": {
         "title": "Placement Group Strategy",
         "description": "Strategy to use for the replica placement group specified via `placement_group_bundles`. Defaults to `PACK`.",
         "default": 1,
         "type": "string"
      },
      "max_replicas_per_node": {
         "title": "Max Replicas Per Node",
         "description": "The max number of replicas of this deployment that can run on a single Valid values are None (default, no limit) or an integer in the range of [1, 100]. ",
         "default": 1,
         "type": "integer"
      },
      "logging_config": {
         "title": "Logging Config",
         "description": "Logging config for configuring serve deployment logs.",
         "default": 1,
         "allOf": [
            {
               "$ref": "#/definitions/LoggingConfig"
            }
         ]
      }
   },
   "required": [
      "name"
   ],
   "definitions": {
      "RayActorOptionsSchema": {
         "title": "RayActorOptionsSchema",
         "description": "Options with which to start a replica actor.",
         "type": "object",
         "properties": {
            "runtime_env": {
               "title": "Runtime Env",
               "description": "This deployment's runtime_env. working_dir and py_modules may contain only remote URIs.",
               "default": {},
               "type": "object"
            },
            "num_cpus": {
               "title": "Num Cpus",
               "description": "The number of CPUs required by the deployment's application per replica. This is the same as a ray actor's num_cpus. Uses a default if null.",
               "minimum": 0,
               "type": "number"
            },
            "num_gpus": {
               "title": "Num Gpus",
               "description": "The number of GPUs required by the deployment's application per replica. This is the same as a ray actor's num_gpus. Uses a default if null.",
               "minimum": 0,
               "type": "number"
            },
            "memory": {
               "title": "Memory",
               "description": "Restrict the heap memory usage of each replica. Uses a default if null.",
               "minimum": 0,
               "type": "number"
            },
            "object_store_memory": {
               "title": "Object Store Memory",
               "description": "Restrict the object store memory used per replica when creating objects. Uses a default if null.",
               "minimum": 0,
               "type": "number"
            },
            "resources": {
               "title": "Resources",
               "description": "The custom resources required by each replica.",
               "default": {},
               "type": "object"
            },
            "accelerator_type": {
               "title": "Accelerator Type",
               "description": "Forces replicas to run on nodes with the specified accelerator type.See :ref:`accelerator types <accelerator_types>`.",
               "type": "string"
            }
         }
      },
      "EncodingType": {
         "title": "EncodingType",
         "description": "Encoding type for the serve logs.\n\n**PublicAPI (alpha):** This API is in alpha and may change before becoming stable.",
         "enum": [
            "TEXT",
            "JSON"
         ],
         "type": "string"
      },
      "LoggingConfig": {
         "title": "LoggingConfig",
         "description": "Logging config schema for configuring serve components logs.\n\nExample:\n\n    .. code-block:: python\n\n        from ray import serve\n        from ray.serve.schema import LoggingConfig\n        # Set log level for the deployment.\n        @serve.deployment(LoggingConfig(log_level=\"DEBUG\")\n        class MyDeployment:\n            def __call__(self) -> str:\n                return \"Hello world!\"\n        # Set log directory for the deployment.\n        @serve.deployment(LoggingConfig(logs_dir=\"/my_dir\")\n        class MyDeployment:\n            def __call__(self) -> str:\n                return \"Hello world!\"\n\n**PublicAPI (alpha):** This API is in alpha and may change before becoming stable.",
         "type": "object",
         "properties": {
            "encoding": {
               "title": "Encoding",
               "description": "Encoding type for the serve logs. Defaults to 'TEXT'. The default can be overwritten using the `RAY_SERVE_LOG_ENCODING` environment variable. 'JSON' is also supported for structured logging.",
               "anyOf": [
                  {
                     "type": "string"
                  },
                  {
                     "$ref": "#/definitions/EncodingType"
                  }
               ]
            },
            "log_level": {
               "title": "Log Level",
               "description": "Log level for the serve logs. Defaults to INFO. You can set it to 'DEBUG' to get more detailed debug logs.",
               "default": "INFO",
               "anyOf": [
                  {
                     "type": "integer"
                  },
                  {
                     "type": "string"
                  }
               ]
            },
            "logs_dir": {
               "title": "Logs Dir",
               "description": "Directory to store the logs. Default to None, which means logs will be stored in the default directory ('/tmp/ray/session_latest/logs/serve/...').",
               "type": "string"
            },
            "enable_access_log": {
               "title": "Enable Access Log",
               "description": "Whether to enable access logs for each request. Default to True.",
               "default": true,
               "type": "boolean"
            }
         },
         "additionalProperties": false
      }
   }
}

Fields:
Validators:
field autoscaling_config: Dict | None = DEFAULT.VALUE#

Config specifying autoscaling parameters for the deployment’s number of replicas. If null, the deployment won’t autoscale its number of replicas; the number of replicas will be fixed at num_replicas.

Validated by:
field graceful_shutdown_timeout_s: float = DEFAULT.VALUE#

Serve controller waits for this duration before forcefully killing the replica for shutdown. Uses a default if null.

Constraints:
  • minimum = 0

Validated by:
field graceful_shutdown_wait_loop_s: float = DEFAULT.VALUE#

Duration that deployment replicas will wait until there is no more work to be done before shutting down. Uses a default if null.

Constraints:
  • minimum = 0

Validated by:
field health_check_period_s: float = DEFAULT.VALUE#

Frequency at which the controller will health check replicas. Uses a default if null.

Constraints:
  • exclusiveMinimum = 0

Validated by:
field health_check_timeout_s: float = DEFAULT.VALUE#

Timeout that the controller will wait for a response from the replica’s health check before marking it unhealthy. Uses a default if null.

Constraints:
  • exclusiveMinimum = 0

Validated by:
field logging_config: LoggingConfig = DEFAULT.VALUE#

Logging config for configuring serve deployment logs.

Validated by:
field max_concurrent_queries: int = DEFAULT.VALUE#

[DEPRECATED] The max number of requests that will be executed at once in each replica. Defaults to 100.

Constraints:
  • exclusiveMinimum = 0

Validated by:
field max_ongoing_requests: int = DEFAULT.VALUE#

Maximum number of requests that are sent in parallel to each replica of this deployment. The limit is enforced across all callers (HTTP requests or DeploymentHandles). Defaults to 100.

Constraints:
  • exclusiveMinimum = 0

Validated by:
field max_queued_requests: StrictInt = DEFAULT.VALUE#

[DEPRECATED] The max number of requests that will be executed at once in each replica. Defaults to 100.

Validated by:
field max_replicas_per_node: int = DEFAULT.VALUE#

The max number of replicas of this deployment that can run on a single Valid values are None (default, no limit) or an integer in the range of [1, 100].

Validated by:
field name: str [Required]#

Globally-unique name identifying this deployment.

Validated by:
field num_replicas: PositiveInt | str | None = DEFAULT.VALUE#

The number of processes that handle requests to this deployment. Uses a default if null. Can also be set to auto for a default autoscaling configuration (experimental).

Validated by:
field placement_group_bundles: List[Dict[str, float]] = DEFAULT.VALUE#

Define a set of placement group bundles to be scheduled for each replica of this deployment. The replica actor will be scheduled in the first bundle provided, so the resources specified in ray_actor_options must be a subset of the first bundle’s resources. All actors and tasks created by the replica actor will be scheduled in the placement group by default (placement_group_capture_child_tasks is set to True).

Validated by:
field placement_group_strategy: str = DEFAULT.VALUE#

Strategy to use for the replica placement group specified via placement_group_bundles. Defaults to PACK.

Validated by:
field ray_actor_options: RayActorOptionsSchema = DEFAULT.VALUE#

Options set for each replica actor.

Validated by:
field route_prefix: str | None = DEFAULT.VALUE#

[DEPRECATED] Please use route_prefix under ServeApplicationSchema instead.

Validated by:
field user_config: Dict | None = DEFAULT.VALUE#

Config to pass into this deployment’s reconfigure method. This can be updated dynamically without restarting replicas

Validated by:
validator validate_max_queued_requests  »  all fields[source]#
validator validate_max_replicas_per_node_and_placement_group_bundles  »  all fields[source]#
validator validate_num_replicas_and_autoscaling_config  »  all fields[source]#