ray.serve.llm.openai_api_models.ChatCompletionRequest#
- pydantic model ray.serve.llm.openai_api_models.ChatCompletionRequest[source]#
ChatCompletionRequest is the request body for the chat completion API.
This model is compatible with vLLM’s OpenAI API models.
PublicAPI (alpha): This API is in alpha and may change before becoming stable.
Show JSON schema
{ "title": "ChatCompletionRequest", "description": "ChatCompletionRequest is the request body for the chat completion API.\n\nThis model is compatible with vLLM's OpenAI API models.\n\n**PublicAPI (alpha):** This API is in alpha and may change before becoming stable.", "type": "object", "properties": { "messages": { "items": { "$ref": "#/$defs/ChatCompletionMessageParam" }, "minItems": 1, "title": "Messages", "type": "array" }, "model": { "title": "Model", "type": "string" }, "frequency_penalty": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "default": 0.0, "title": "Frequency Penalty" }, "logit_bias": { "anyOf": [ { "additionalProperties": { "type": "number" }, "type": "object" }, { "type": "null" } ], "default": null, "title": "Logit Bias" }, "logprobs": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "title": "Logprobs" }, "top_logprobs": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": 0, "title": "Top Logprobs" }, "max_tokens": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": null, "deprecated": true, "title": "Max Tokens" }, "max_completion_tokens": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": null, "title": "Max Completion Tokens" }, "n": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": 1, "title": "N" }, "presence_penalty": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "default": 0.0, "title": "Presence Penalty" }, "response_format": { "anyOf": [ { "$ref": "#/$defs/ResponseFormatText" }, { "$ref": "#/$defs/ResponseFormatGrammar" }, { "$ref": "#/$defs/ResponseFormatJsonObject" }, { "type": "null" } ], "default": null, "title": "Response Format" }, "seed": { "anyOf": [ { "maximum": 9223372036854775807, "minimum": -9223372036854775808, "type": "integer" }, { "type": "null" } ], "default": null, "title": "Seed" }, "stop": { "anyOf": [ { "type": "string" }, { "items": { "type": "string" }, "type": "array" }, { "type": "null" } ], "title": "Stop" }, "stream": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "title": "Stream" }, "stream_options": { "anyOf": [ { "$ref": "#/$defs/StreamOptions" }, { "type": "null" } ], "default": null }, "temperature": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "default": null, "title": "Temperature" }, "top_p": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "default": null, "title": "Top P" }, "tools": { "anyOf": [ { "items": { "$ref": "#/$defs/ChatCompletionToolsParam" }, "type": "array" }, { "type": "null" } ], "default": null, "title": "Tools" }, "tool_choice": { "anyOf": [ { "const": "none", "type": "string" }, { "const": "auto", "type": "string" }, { "$ref": "#/$defs/ChatCompletionNamedToolChoiceParam" }, { "type": "null" } ], "default": "none", "title": "Tool Choice" }, "parallel_tool_calls": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "title": "Parallel Tool Calls" }, "user": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "title": "User" }, "best_of": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": null, "title": "Best Of" }, "use_beam_search": { "default": false, "title": "Use Beam Search", "type": "boolean" }, "top_k": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": null, "title": "Top K" }, "min_p": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "default": null, "title": "Min P" }, "repetition_penalty": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "default": null, "title": "Repetition Penalty" }, "length_penalty": { "default": 1.0, "title": "Length Penalty", "type": "number" }, "stop_token_ids": { "anyOf": [ { "items": { "type": "integer" }, "type": "array" }, { "type": "null" } ], "title": "Stop Token Ids" }, "include_stop_str_in_output": { "default": false, "title": "Include Stop Str In Output", "type": "boolean" }, "ignore_eos": { "default": false, "title": "Ignore Eos", "type": "boolean" }, "min_tokens": { "default": 0, "title": "Min Tokens", "type": "integer" }, "skip_special_tokens": { "default": true, "title": "Skip Special Tokens", "type": "boolean" }, "spaces_between_special_tokens": { "default": true, "title": "Spaces Between Special Tokens", "type": "boolean" }, "truncate_prompt_tokens": { "anyOf": [ { "minimum": 1, "type": "integer" }, { "type": "null" } ], "default": null, "title": "Truncate Prompt Tokens" }, "prompt_logprobs": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": null, "title": "Prompt Logprobs" }, "echo": { "default": false, "description": "If true, the new message will be prepended with the last message if they belong to the same role.", "title": "Echo", "type": "boolean" }, "add_generation_prompt": { "default": true, "description": "If true, the generation prompt will be added to the chat template. This is a parameter used by chat template in tokenizer config of the model.", "title": "Add Generation Prompt", "type": "boolean" }, "continue_final_message": { "default": false, "description": "If this is set, the chat will be formatted so that the final message in the chat is open-ended, without any EOS tokens. The model will continue this message rather than starting a new one. This allows you to \"prefill\" part of the model's response for it. Cannot be used at the same time as `add_generation_prompt`.", "title": "Continue Final Message", "type": "boolean" }, "add_special_tokens": { "default": false, "description": "If true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).", "title": "Add Special Tokens", "type": "boolean" }, "documents": { "anyOf": [ { "items": { "additionalProperties": { "type": "string" }, "type": "object" }, "type": "array" }, { "type": "null" } ], "default": null, "description": "A list of dicts representing documents that will be accessible to the model if it is performing RAG (retrieval-augmented generation). If the template does not support RAG, this argument will have no effect. We recommend that each document should be a dict containing \"title\" and \"text\" keys.", "title": "Documents" }, "chat_template": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "A Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one.", "title": "Chat Template" }, "chat_template_kwargs": { "anyOf": [ { "type": "object" }, { "type": "null" } ], "default": null, "description": "Additional kwargs to pass to the template renderer. Will be accessible by the chat template.", "title": "Chat Template Kwargs" }, "guided_json": { "anyOf": [ { "type": "string" }, { "type": "object" }, { "$ref": "#/$defs/BaseModel" }, { "type": "null" } ], "default": null, "description": "If specified, the output will follow the JSON schema.", "title": "Guided Json" }, "guided_regex": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "If specified, the output will follow the regex pattern.", "title": "Guided Regex" }, "guided_choice": { "anyOf": [ { "items": { "type": "string" }, "type": "array" }, { "type": "null" } ], "default": null, "description": "If specified, the output will be exactly one of the choices.", "title": "Guided Choice" }, "guided_grammar": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "If specified, the output will follow the context free grammar.", "title": "Guided Grammar" }, "guided_decoding_backend": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "If specified, will override the default guided decoding backend of the server for this specific request. If set, must be either 'outlines' / 'lm-format-enforcer'", "title": "Guided Decoding Backend" }, "guided_whitespace_pattern": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "If specified, will override the default whitespace pattern for guided json decoding.", "title": "Guided Whitespace Pattern" }, "priority": { "default": 0, "description": "The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.", "title": "Priority", "type": "integer" }, "request_id": { "description": "The request_id related to this request. If the caller does not set it, a generate_request_id will be generated. This id is used through out the inference process and return in response.", "title": "Request Id", "type": "string" }, "logits_processors": { "anyOf": [ { "items": { "anyOf": [ { "type": "string" }, { "$ref": "#/$defs/LogitsProcessorConstructor" } ] }, "type": "array" }, { "type": "null" } ], "default": null, "description": "A list of either qualified names of logits processors, or constructor objects, to apply when sampling. A constructor is a JSON object with a required 'qualname' field specifying the qualified name of the processor class/factory, and optional 'args' and 'kwargs' fields containing positional and keyword arguments. For example: {'qualname': 'my_module.MyLogitsProcessor', 'args': [1, 2], 'kwargs': {'param': 'value'}}.", "title": "Logits Processors" } }, "$defs": { "AudioURL": { "properties": { "url": { "title": "Url", "type": "string" } }, "required": [ "url" ], "title": "AudioURL", "type": "object" }, "BaseModel": { "properties": {}, "title": "BaseModel", "type": "object" }, "ChatCompletionContentPartAudioParam": { "properties": { "audio_url": { "$ref": "#/$defs/AudioURL" }, "type": { "const": "audio_url", "title": "Type", "type": "string" } }, "required": [ "audio_url", "type" ], "title": "ChatCompletionContentPartAudioParam", "type": "object" }, "ChatCompletionContentPartVideoParam": { "properties": { "video_url": { "$ref": "#/$defs/VideoURL" }, "type": { "const": "video_url", "title": "Type", "type": "string" } }, "required": [ "video_url", "type" ], "title": "ChatCompletionContentPartVideoParam", "type": "object" }, "ChatCompletionMessageParam": { "description": "Enables custom roles in the Chat Completion API.", "properties": { "role": { "title": "Role", "type": "string" }, "content": { "anyOf": [ { "type": "string" }, { "items": { "anyOf": [ {}, { "$ref": "#/$defs/ChatCompletionContentPartAudioParam" }, { "$ref": "#/$defs/ChatCompletionContentPartVideoParam" }, { "$ref": "#/$defs/CustomChatCompletionContentSimpleImageParam" }, { "$ref": "#/$defs/CustomChatCompletionContentSimpleAudioParam" }, { "$ref": "#/$defs/CustomChatCompletionContentSimpleVideoParam" }, { "type": "string" } ] }, "type": "array" } ], "title": "Content" }, "name": { "title": "Name", "type": "string" }, "tool_call_id": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Tool Call Id" }, "tool_calls": { "anyOf": [ { "items": {}, "type": "array" }, { "type": "null" } ], "title": "Tool Calls" } }, "required": [ "role" ], "title": "ChatCompletionMessageParam", "type": "object" }, "ChatCompletionNamedFunction": { "properties": { "name": { "title": "Name", "type": "string" } }, "required": [ "name" ], "title": "ChatCompletionNamedFunction", "type": "object" }, "ChatCompletionNamedToolChoiceParam": { "properties": { "function": { "$ref": "#/$defs/ChatCompletionNamedFunction" }, "type": { "const": "function", "default": "function", "title": "Type", "type": "string" } }, "required": [ "function" ], "title": "ChatCompletionNamedToolChoiceParam", "type": "object" }, "ChatCompletionToolsParam": { "properties": { "type": { "const": "function", "default": "function", "title": "Type", "type": "string" }, "function": { "$ref": "#/$defs/FunctionDefinition" } }, "required": [ "function" ], "title": "ChatCompletionToolsParam", "type": "object" }, "CustomChatCompletionContentSimpleAudioParam": { "description": "A simpler version of the param that only accepts a plain audio_url.\n\nExample:\n{\n \"audio_url\": \"https://example.com/audio.mp3\"\n}", "properties": { "audio_url": { "title": "Audio Url", "type": "string" } }, "required": [ "audio_url" ], "title": "CustomChatCompletionContentSimpleAudioParam", "type": "object" }, "CustomChatCompletionContentSimpleImageParam": { "description": "A simpler version of the param that only accepts a plain image_url.\nThis is supported by OpenAI API, although it is not documented.\n\nExample:\n{\n \"image_url\": \"https://example.com/image.jpg\"\n}", "properties": { "image_url": { "title": "Image Url", "type": "string" } }, "required": [ "image_url" ], "title": "CustomChatCompletionContentSimpleImageParam", "type": "object" }, "CustomChatCompletionContentSimpleVideoParam": { "description": "A simpler version of the param that only accepts a plain audio_url.\n\nExample:\n{\n \"video_url\": \"https://example.com/video.mp4\"\n}", "properties": { "video_url": { "title": "Video Url", "type": "string" } }, "required": [ "video_url" ], "title": "CustomChatCompletionContentSimpleVideoParam", "type": "object" }, "FunctionDefinition": { "properties": { "name": { "title": "Name", "type": "string" }, "description": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "title": "Description" }, "parameters": { "anyOf": [ { "type": "object" }, { "type": "null" } ], "default": null, "title": "Parameters" } }, "required": [ "name" ], "title": "FunctionDefinition", "type": "object" }, "LogitsProcessorConstructor": { "properties": { "qualname": { "title": "Qualname", "type": "string" }, "args": { "anyOf": [ { "items": {}, "type": "array" }, { "type": "null" } ], "default": null, "title": "Args" }, "kwargs": { "anyOf": [ { "type": "object" }, { "type": "null" } ], "default": null, "title": "Kwargs" } }, "required": [ "qualname" ], "title": "LogitsProcessorConstructor", "type": "object" }, "ResponseFormatGrammar": { "additionalProperties": false, "properties": { "type": { "enum": [ "grammar", "grammar_gbnf" ], "title": "Type", "type": "string" }, "grammar": { "title": "Grammar", "type": "string" } }, "required": [ "type", "grammar" ], "title": "ResponseFormatGrammar", "type": "object" }, "ResponseFormatJsonObject": { "additionalProperties": false, "properties": { "type": { "enum": [ "json_object", "json_schema" ], "title": "Type", "type": "string" }, "schema": { "anyOf": [ { "type": "object" }, { "type": "string" }, { "type": "null" } ], "default": {}, "description": "Schema for the JSON response format", "title": "Schema" } }, "required": [ "type" ], "title": "ResponseFormatJsonObject", "type": "object" }, "ResponseFormatText": { "additionalProperties": false, "properties": { "type": { "const": "text", "title": "Type", "type": "string" } }, "required": [ "type" ], "title": "ResponseFormatText", "type": "object" }, "StreamOptions": { "properties": { "include_usage": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": true, "title": "Include Usage" }, "continuous_usage_stats": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "title": "Continuous Usage Stats" } }, "title": "StreamOptions", "type": "object" }, "VideoURL": { "properties": { "url": { "title": "Url", "type": "string" } }, "required": [ "url" ], "title": "VideoURL", "type": "object" } }, "required": [ "messages", "model" ] }
- field add_generation_prompt: bool = True#
If true, the generation prompt will be added to the chat template. This is a parameter used by chat template in tokenizer config of the model.
- field add_special_tokens: bool = False#
If true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).
- field chat_template: str | None = None#
A Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one.
- field chat_template_kwargs: Dict[str, Any] | None = None#
Additional kwargs to pass to the template renderer. Will be accessible by the chat template.
- field continue_final_message: bool = False#
If this is set, the chat will be formatted so that the final message in the chat is open-ended, without any EOS tokens. The model will continue this message rather than starting a new one. This allows you to “prefill” part of the model’s response for it. Cannot be used at the same time as
add_generation_prompt
.
- field documents: List[Dict[str, str]] | None = None#
A list of dicts representing documents that will be accessible to the model if it is performing RAG (retrieval-augmented generation). If the template does not support RAG, this argument will have no effect. We recommend that each document should be a dict containing “title” and “text” keys.
- field echo: bool = False#
If true, the new message will be prepended with the last message if they belong to the same role.
- field guided_choice: List[str] | None = None#
If specified, the output will be exactly one of the choices.
- field guided_decoding_backend: str | None = None#
If specified, will override the default guided decoding backend of the server for this specific request. If set, must be either ‘outlines’ / ‘lm-format-enforcer’
- field guided_grammar: str | None = None#
If specified, the output will follow the context free grammar.
- field guided_json: str | dict | BaseModel | None = None#
If specified, the output will follow the JSON schema.
- field guided_whitespace_pattern: str | None = None#
If specified, will override the default whitespace pattern for guided json decoding.
- field logits_processors: LogitsProcessors | None = None#
A list of either qualified names of logits processors, or constructor objects, to apply when sampling. A constructor is a JSON object with a required ‘qualname’ field specifying the qualified name of the processor class/factory, and optional ‘args’ and ‘kwargs’ fields containing positional and keyword arguments. For example: {‘qualname’: ‘my_module.MyLogitsProcessor’, ‘args’: [1, 2], ‘kwargs’: {‘param’: ‘value’}}.
- field messages: Annotated[List[ChatCompletionMessageParam], Field(min_length=1)] [Required]#
- Constraints:
min_length = 1
- field priority: int = 0#
The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.
- field request_id: str [Optional]#
The request_id related to this request. If the caller does not set it, a generate_request_id will be generated. This id is used through out the inference process and return in response.
- field tool_choice: Literal['none'] | Literal['auto'] | ChatCompletionNamedToolChoiceParam | None = 'none'#
- field truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None#
- max_tokens: int | None#
Read-only data descriptor used to emit a runtime deprecation warning before accessing a deprecated field.
- msg#
The deprecation message to be emitted.
- wrapped_property#
The property instance if the deprecated field is a computed field, or
None
.
- field_name#
The name of the field being deprecated.