ray.serve.llm.openai_api_models.EmbeddingRequest#
- pydantic model ray.serve.llm.openai_api_models.EmbeddingRequest[source]#
EmbeddingRequest is the request body for the embedding API.
This model is compatible with vLLM’s OpenAI API models.
PublicAPI (alpha): This API is in alpha and may change before becoming stable.
Show JSON schema
{ "title": "EmbeddingRequest", "description": "EmbeddingRequest is the request body for the embedding API.\n\nThis model is compatible with vLLM's OpenAI API models.\n\n**PublicAPI (alpha):** This API is in alpha and may change before becoming stable.", "type": "object", "properties": { "model": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "title": "Model" }, "input": { "anyOf": [ { "items": { "type": "integer" }, "type": "array" }, { "items": { "items": { "type": "integer" }, "type": "array" }, "type": "array" }, { "type": "string" }, { "items": { "type": "string" }, "type": "array" } ], "title": "Input" }, "encoding_format": { "default": "float", "enum": [ "float", "base64" ], "title": "Encoding Format", "type": "string" }, "dimensions": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": null, "title": "Dimensions" }, "user": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "title": "User" }, "truncate_prompt_tokens": { "anyOf": [ { "minimum": 1, "type": "integer" }, { "type": "null" } ], "default": null, "title": "Truncate Prompt Tokens" }, "additional_data": { "anyOf": [ {}, { "type": "null" } ], "default": null, "title": "Additional Data" }, "add_special_tokens": { "default": true, "description": "If true (the default), special tokens (e.g. BOS) will be added to the prompt.", "title": "Add Special Tokens", "type": "boolean" }, "priority": { "default": 0, "description": "The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.", "title": "Priority", "type": "integer" } }, "required": [ "input" ] }
- field add_special_tokens: bool = True#
If true (the default), special tokens (e.g. BOS) will be added to the prompt.
- field encoding_format: Literal['float', 'base64'] = 'float'#
- field priority: int = 0#
The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.
- field truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None#