ray.serve.llm.openai_api_models.CompletionRequest#

pydantic model ray.serve.llm.openai_api_models.CompletionRequest[source]#

CompletionRequest is the request body for the completion API.

This model is compatible with vLLM’s OpenAI API models.

PublicAPI (alpha): This API is in alpha and may change before becoming stable.

Show JSON schema
{
   "title": "CompletionRequest",
   "description": "CompletionRequest is the request body for the completion API.\n\nThis model is compatible with vLLM's OpenAI API models.\n\n**PublicAPI (alpha):** This API is in alpha and may change before becoming stable.",
   "type": "object",
   "properties": {
      "model": {
         "title": "Model",
         "type": "string"
      },
      "prompt": {
         "anyOf": [
            {
               "items": {
                  "type": "integer"
               },
               "type": "array"
            },
            {
               "items": {
                  "items": {
                     "type": "integer"
                  },
                  "type": "array"
               },
               "type": "array"
            },
            {
               "type": "string"
            },
            {
               "items": {
                  "type": "string"
               },
               "type": "array"
            }
         ],
         "title": "Prompt"
      },
      "best_of": {
         "anyOf": [
            {
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Best Of"
      },
      "echo": {
         "anyOf": [
            {
               "type": "boolean"
            },
            {
               "type": "null"
            }
         ],
         "default": false,
         "title": "Echo"
      },
      "frequency_penalty": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": 0.0,
         "title": "Frequency Penalty"
      },
      "logit_bias": {
         "anyOf": [
            {
               "additionalProperties": {
                  "type": "number"
               },
               "type": "object"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Logit Bias"
      },
      "logprobs": {
         "anyOf": [
            {
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Logprobs"
      },
      "max_tokens": {
         "anyOf": [
            {
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": 16,
         "title": "Max Tokens"
      },
      "n": {
         "default": 1,
         "title": "N",
         "type": "integer"
      },
      "presence_penalty": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": 0.0,
         "title": "Presence Penalty"
      },
      "seed": {
         "anyOf": [
            {
               "maximum": 9223372036854775807,
               "minimum": -9223372036854775808,
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Seed"
      },
      "stop": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "items": {
                  "type": "string"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "title": "Stop"
      },
      "stream": {
         "anyOf": [
            {
               "type": "boolean"
            },
            {
               "type": "null"
            }
         ],
         "default": false,
         "title": "Stream"
      },
      "stream_options": {
         "anyOf": [
            {
               "$ref": "#/$defs/StreamOptions"
            },
            {
               "type": "null"
            }
         ],
         "default": null
      },
      "suffix": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Suffix"
      },
      "temperature": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Temperature"
      },
      "top_p": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Top P"
      },
      "user": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "User"
      },
      "use_beam_search": {
         "default": false,
         "title": "Use Beam Search",
         "type": "boolean"
      },
      "top_k": {
         "anyOf": [
            {
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Top K"
      },
      "min_p": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Min P"
      },
      "repetition_penalty": {
         "anyOf": [
            {
               "type": "number"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Repetition Penalty"
      },
      "length_penalty": {
         "default": 1.0,
         "title": "Length Penalty",
         "type": "number"
      },
      "stop_token_ids": {
         "anyOf": [
            {
               "items": {
                  "type": "integer"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "title": "Stop Token Ids"
      },
      "include_stop_str_in_output": {
         "default": false,
         "title": "Include Stop Str In Output",
         "type": "boolean"
      },
      "ignore_eos": {
         "default": false,
         "title": "Ignore Eos",
         "type": "boolean"
      },
      "min_tokens": {
         "default": 0,
         "title": "Min Tokens",
         "type": "integer"
      },
      "skip_special_tokens": {
         "default": true,
         "title": "Skip Special Tokens",
         "type": "boolean"
      },
      "spaces_between_special_tokens": {
         "default": true,
         "title": "Spaces Between Special Tokens",
         "type": "boolean"
      },
      "truncate_prompt_tokens": {
         "anyOf": [
            {
               "minimum": 1,
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Truncate Prompt Tokens"
      },
      "allowed_token_ids": {
         "anyOf": [
            {
               "items": {
                  "type": "integer"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Allowed Token Ids"
      },
      "prompt_logprobs": {
         "anyOf": [
            {
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Prompt Logprobs"
      },
      "add_special_tokens": {
         "default": true,
         "description": "If true (the default), special tokens (e.g. BOS) will be added to the prompt.",
         "title": "Add Special Tokens",
         "type": "boolean"
      },
      "response_format": {
         "anyOf": [
            {
               "$ref": "#/$defs/ResponseFormatText"
            },
            {
               "$ref": "#/$defs/ResponseFormatGrammar"
            },
            {
               "$ref": "#/$defs/ResponseFormatJsonObject"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "Similar to chat completion, this parameter specifies the format of output. Only {'type': 'json_object'}, {'type': 'json_schema'} or {'type': 'text' } is supported.",
         "title": "Response Format"
      },
      "guided_json": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "object"
            },
            {
               "$ref": "#/$defs/BaseModel"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "If specified, the output will follow the JSON schema.",
         "title": "Guided Json"
      },
      "guided_regex": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "If specified, the output will follow the regex pattern.",
         "title": "Guided Regex"
      },
      "guided_choice": {
         "anyOf": [
            {
               "items": {
                  "type": "string"
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "If specified, the output will be exactly one of the choices.",
         "title": "Guided Choice"
      },
      "guided_grammar": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "If specified, the output will follow the context free grammar.",
         "title": "Guided Grammar"
      },
      "guided_decoding_backend": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "If specified, will override the default guided decoding backend of the server for this specific request. If set, must be one of 'outlines' / 'lm-format-enforcer'",
         "title": "Guided Decoding Backend"
      },
      "guided_whitespace_pattern": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "If specified, will override the default whitespace pattern for guided json decoding.",
         "title": "Guided Whitespace Pattern"
      },
      "priority": {
         "default": 0,
         "description": "The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.",
         "title": "Priority",
         "type": "integer"
      },
      "logits_processors": {
         "anyOf": [
            {
               "items": {
                  "anyOf": [
                     {
                        "type": "string"
                     },
                     {
                        "$ref": "#/$defs/LogitsProcessorConstructor"
                     }
                  ]
               },
               "type": "array"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "A list of either qualified names of logits processors, or constructor objects, to apply when sampling. A constructor is a JSON object with a required 'qualname' field specifying the qualified name of the processor class/factory, and optional 'args' and 'kwargs' fields containing positional and keyword arguments. For example: {'qualname': 'my_module.MyLogitsProcessor', 'args': [1, 2], 'kwargs': {'param': 'value'}}.",
         "title": "Logits Processors"
      }
   },
   "$defs": {
      "BaseModel": {
         "properties": {},
         "title": "BaseModel",
         "type": "object"
      },
      "LogitsProcessorConstructor": {
         "properties": {
            "qualname": {
               "title": "Qualname",
               "type": "string"
            },
            "args": {
               "anyOf": [
                  {
                     "items": {},
                     "type": "array"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "title": "Args"
            },
            "kwargs": {
               "anyOf": [
                  {
                     "type": "object"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": null,
               "title": "Kwargs"
            }
         },
         "required": [
            "qualname"
         ],
         "title": "LogitsProcessorConstructor",
         "type": "object"
      },
      "ResponseFormatGrammar": {
         "additionalProperties": false,
         "properties": {
            "type": {
               "enum": [
                  "grammar",
                  "grammar_gbnf"
               ],
               "title": "Type",
               "type": "string"
            },
            "grammar": {
               "title": "Grammar",
               "type": "string"
            }
         },
         "required": [
            "type",
            "grammar"
         ],
         "title": "ResponseFormatGrammar",
         "type": "object"
      },
      "ResponseFormatJsonObject": {
         "additionalProperties": false,
         "properties": {
            "type": {
               "enum": [
                  "json_object",
                  "json_schema"
               ],
               "title": "Type",
               "type": "string"
            },
            "schema": {
               "anyOf": [
                  {
                     "type": "object"
                  },
                  {
                     "type": "string"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": {},
               "description": "Schema for the JSON response format",
               "title": "Schema"
            }
         },
         "required": [
            "type"
         ],
         "title": "ResponseFormatJsonObject",
         "type": "object"
      },
      "ResponseFormatText": {
         "additionalProperties": false,
         "properties": {
            "type": {
               "const": "text",
               "title": "Type",
               "type": "string"
            }
         },
         "required": [
            "type"
         ],
         "title": "ResponseFormatText",
         "type": "object"
      },
      "StreamOptions": {
         "properties": {
            "include_usage": {
               "anyOf": [
                  {
                     "type": "boolean"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": true,
               "title": "Include Usage"
            },
            "continuous_usage_stats": {
               "anyOf": [
                  {
                     "type": "boolean"
                  },
                  {
                     "type": "null"
                  }
               ],
               "default": false,
               "title": "Continuous Usage Stats"
            }
         },
         "title": "StreamOptions",
         "type": "object"
      }
   },
   "required": [
      "model",
      "prompt"
   ]
}

field add_special_tokens: bool = True#

If true (the default), special tokens (e.g. BOS) will be added to the prompt.

field allowed_token_ids: List[int] | None = None#
field best_of: int | None = None#
field echo: bool | None = False#
field frequency_penalty: float | None = 0.0#
field guided_choice: List[str] | None = None#

If specified, the output will be exactly one of the choices.

field guided_decoding_backend: str | None = None#

If specified, will override the default guided decoding backend of the server for this specific request. If set, must be one of ‘outlines’ / ‘lm-format-enforcer’

field guided_grammar: str | None = None#

If specified, the output will follow the context free grammar.

field guided_json: str | dict | BaseModel | None = None#

If specified, the output will follow the JSON schema.

field guided_regex: str | None = None#

If specified, the output will follow the regex pattern.

field guided_whitespace_pattern: str | None = None#

If specified, will override the default whitespace pattern for guided json decoding.

field ignore_eos: bool = False#
field include_stop_str_in_output: bool = False#
field length_penalty: float = 1.0#
field logit_bias: Dict[str, float] | None = None#
field logits_processors: LogitsProcessors | None = None#

A list of either qualified names of logits processors, or constructor objects, to apply when sampling. A constructor is a JSON object with a required ‘qualname’ field specifying the qualified name of the processor class/factory, and optional ‘args’ and ‘kwargs’ fields containing positional and keyword arguments. For example: {‘qualname’: ‘my_module.MyLogitsProcessor’, ‘args’: [1, 2], ‘kwargs’: {‘param’: ‘value’}}.

field logprobs: int | None = None#
field max_tokens: int | None = 16#
field min_p: float | None = None#
field min_tokens: int = 0#
field model: str [Required]#
field n: int = 1#
field presence_penalty: float | None = 0.0#
field priority: int = 0#

The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.

field prompt: List[int] | List[List[int]] | str | List[str] [Required]#
field prompt_logprobs: int | None = None#
field repetition_penalty: float | None = None#
field response_format: ResponseFormat | None = None#

Similar to chat completion, this parameter specifies the format of output. Only {‘type’: ‘json_object’}, {‘type’: ‘json_schema’} or {‘type’: ‘text’ } is supported.

field seed: int | None = None#
Constraints:
  • ge = -9223372036854775808

  • le = 9223372036854775807

field skip_special_tokens: bool = True#
field spaces_between_special_tokens: bool = True#
field stop: str | List[str] | None [Optional]#
field stop_token_ids: List[int] | None [Optional]#
field stream: bool | None = False#
field stream_options: StreamOptions | None = None#
field suffix: str | None = None#
field temperature: float | None = None#
field top_k: int | None = None#
field top_p: float | None = None#
field truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None#
field user: str | None = None#