ray.serve.llm.openai_api_models.CompletionRequest#
- pydantic model ray.serve.llm.openai_api_models.CompletionRequest[source]#
CompletionRequest is the request body for the completion API.
This model is compatible with vLLM’s OpenAI API models.
PublicAPI (alpha): This API is in alpha and may change before becoming stable.
Show JSON schema
{ "title": "CompletionRequest", "description": "CompletionRequest is the request body for the completion API.\n\nThis model is compatible with vLLM's OpenAI API models.\n\n**PublicAPI (alpha):** This API is in alpha and may change before becoming stable.", "type": "object", "properties": { "model": { "title": "Model", "type": "string" }, "prompt": { "anyOf": [ { "items": { "type": "integer" }, "type": "array" }, { "items": { "items": { "type": "integer" }, "type": "array" }, "type": "array" }, { "type": "string" }, { "items": { "type": "string" }, "type": "array" } ], "title": "Prompt" }, "best_of": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": null, "title": "Best Of" }, "echo": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "title": "Echo" }, "frequency_penalty": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "default": 0.0, "title": "Frequency Penalty" }, "logit_bias": { "anyOf": [ { "additionalProperties": { "type": "number" }, "type": "object" }, { "type": "null" } ], "default": null, "title": "Logit Bias" }, "logprobs": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": null, "title": "Logprobs" }, "max_tokens": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": 16, "title": "Max Tokens" }, "n": { "default": 1, "title": "N", "type": "integer" }, "presence_penalty": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "default": 0.0, "title": "Presence Penalty" }, "seed": { "anyOf": [ { "maximum": 9223372036854775807, "minimum": -9223372036854775808, "type": "integer" }, { "type": "null" } ], "default": null, "title": "Seed" }, "stop": { "anyOf": [ { "type": "string" }, { "items": { "type": "string" }, "type": "array" }, { "type": "null" } ], "title": "Stop" }, "stream": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "title": "Stream" }, "stream_options": { "anyOf": [ { "$ref": "#/$defs/StreamOptions" }, { "type": "null" } ], "default": null }, "suffix": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "title": "Suffix" }, "temperature": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "default": null, "title": "Temperature" }, "top_p": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "default": null, "title": "Top P" }, "user": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "title": "User" }, "use_beam_search": { "default": false, "title": "Use Beam Search", "type": "boolean" }, "top_k": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": null, "title": "Top K" }, "min_p": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "default": null, "title": "Min P" }, "repetition_penalty": { "anyOf": [ { "type": "number" }, { "type": "null" } ], "default": null, "title": "Repetition Penalty" }, "length_penalty": { "default": 1.0, "title": "Length Penalty", "type": "number" }, "stop_token_ids": { "anyOf": [ { "items": { "type": "integer" }, "type": "array" }, { "type": "null" } ], "title": "Stop Token Ids" }, "include_stop_str_in_output": { "default": false, "title": "Include Stop Str In Output", "type": "boolean" }, "ignore_eos": { "default": false, "title": "Ignore Eos", "type": "boolean" }, "min_tokens": { "default": 0, "title": "Min Tokens", "type": "integer" }, "skip_special_tokens": { "default": true, "title": "Skip Special Tokens", "type": "boolean" }, "spaces_between_special_tokens": { "default": true, "title": "Spaces Between Special Tokens", "type": "boolean" }, "truncate_prompt_tokens": { "anyOf": [ { "minimum": 1, "type": "integer" }, { "type": "null" } ], "default": null, "title": "Truncate Prompt Tokens" }, "allowed_token_ids": { "anyOf": [ { "items": { "type": "integer" }, "type": "array" }, { "type": "null" } ], "default": null, "title": "Allowed Token Ids" }, "prompt_logprobs": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": null, "title": "Prompt Logprobs" }, "add_special_tokens": { "default": true, "description": "If true (the default), special tokens (e.g. BOS) will be added to the prompt.", "title": "Add Special Tokens", "type": "boolean" }, "response_format": { "anyOf": [ { "$ref": "#/$defs/ResponseFormatText" }, { "$ref": "#/$defs/ResponseFormatGrammar" }, { "$ref": "#/$defs/ResponseFormatJsonObject" }, { "type": "null" } ], "default": null, "description": "Similar to chat completion, this parameter specifies the format of output. Only {'type': 'json_object'}, {'type': 'json_schema'} or {'type': 'text' } is supported.", "title": "Response Format" }, "guided_json": { "anyOf": [ { "type": "string" }, { "type": "object" }, { "$ref": "#/$defs/BaseModel" }, { "type": "null" } ], "default": null, "description": "If specified, the output will follow the JSON schema.", "title": "Guided Json" }, "guided_regex": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "If specified, the output will follow the regex pattern.", "title": "Guided Regex" }, "guided_choice": { "anyOf": [ { "items": { "type": "string" }, "type": "array" }, { "type": "null" } ], "default": null, "description": "If specified, the output will be exactly one of the choices.", "title": "Guided Choice" }, "guided_grammar": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "If specified, the output will follow the context free grammar.", "title": "Guided Grammar" }, "guided_decoding_backend": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "If specified, will override the default guided decoding backend of the server for this specific request. If set, must be one of 'outlines' / 'lm-format-enforcer'", "title": "Guided Decoding Backend" }, "guided_whitespace_pattern": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "If specified, will override the default whitespace pattern for guided json decoding.", "title": "Guided Whitespace Pattern" }, "priority": { "default": 0, "description": "The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.", "title": "Priority", "type": "integer" }, "logits_processors": { "anyOf": [ { "items": { "anyOf": [ { "type": "string" }, { "$ref": "#/$defs/LogitsProcessorConstructor" } ] }, "type": "array" }, { "type": "null" } ], "default": null, "description": "A list of either qualified names of logits processors, or constructor objects, to apply when sampling. A constructor is a JSON object with a required 'qualname' field specifying the qualified name of the processor class/factory, and optional 'args' and 'kwargs' fields containing positional and keyword arguments. For example: {'qualname': 'my_module.MyLogitsProcessor', 'args': [1, 2], 'kwargs': {'param': 'value'}}.", "title": "Logits Processors" } }, "$defs": { "BaseModel": { "properties": {}, "title": "BaseModel", "type": "object" }, "LogitsProcessorConstructor": { "properties": { "qualname": { "title": "Qualname", "type": "string" }, "args": { "anyOf": [ { "items": {}, "type": "array" }, { "type": "null" } ], "default": null, "title": "Args" }, "kwargs": { "anyOf": [ { "type": "object" }, { "type": "null" } ], "default": null, "title": "Kwargs" } }, "required": [ "qualname" ], "title": "LogitsProcessorConstructor", "type": "object" }, "ResponseFormatGrammar": { "additionalProperties": false, "properties": { "type": { "enum": [ "grammar", "grammar_gbnf" ], "title": "Type", "type": "string" }, "grammar": { "title": "Grammar", "type": "string" } }, "required": [ "type", "grammar" ], "title": "ResponseFormatGrammar", "type": "object" }, "ResponseFormatJsonObject": { "additionalProperties": false, "properties": { "type": { "enum": [ "json_object", "json_schema" ], "title": "Type", "type": "string" }, "schema": { "anyOf": [ { "type": "object" }, { "type": "string" }, { "type": "null" } ], "default": {}, "description": "Schema for the JSON response format", "title": "Schema" } }, "required": [ "type" ], "title": "ResponseFormatJsonObject", "type": "object" }, "ResponseFormatText": { "additionalProperties": false, "properties": { "type": { "const": "text", "title": "Type", "type": "string" } }, "required": [ "type" ], "title": "ResponseFormatText", "type": "object" }, "StreamOptions": { "properties": { "include_usage": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": true, "title": "Include Usage" }, "continuous_usage_stats": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "title": "Continuous Usage Stats" } }, "title": "StreamOptions", "type": "object" } }, "required": [ "model", "prompt" ] }
- field add_special_tokens: bool = True#
If true (the default), special tokens (e.g. BOS) will be added to the prompt.
- field guided_choice: List[str] | None = None#
If specified, the output will be exactly one of the choices.
- field guided_decoding_backend: str | None = None#
If specified, will override the default guided decoding backend of the server for this specific request. If set, must be one of ‘outlines’ / ‘lm-format-enforcer’
- field guided_grammar: str | None = None#
If specified, the output will follow the context free grammar.
- field guided_json: str | dict | BaseModel | None = None#
If specified, the output will follow the JSON schema.
- field guided_whitespace_pattern: str | None = None#
If specified, will override the default whitespace pattern for guided json decoding.
- field logits_processors: LogitsProcessors | None = None#
A list of either qualified names of logits processors, or constructor objects, to apply when sampling. A constructor is a JSON object with a required ‘qualname’ field specifying the qualified name of the processor class/factory, and optional ‘args’ and ‘kwargs’ fields containing positional and keyword arguments. For example: {‘qualname’: ‘my_module.MyLogitsProcessor’, ‘args’: [1, 2], ‘kwargs’: {‘param’: ‘value’}}.
- field priority: int = 0#
The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.
- field response_format: ResponseFormat | None = None#
Similar to chat completion, this parameter specifies the format of output. Only {‘type’: ‘json_object’}, {‘type’: ‘json_schema’} or {‘type’: ‘text’ } is supported.
- field truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None#