ray.serve.llm.openai_api_models.EmbeddingRequest#

pydantic model ray.serve.llm.openai_api_models.EmbeddingRequest[source]#

EmbeddingRequest is the request body for the embedding API.

This model is compatible with vLLM’s OpenAI API models.

PublicAPI (alpha): This API is in alpha and may change before becoming stable.

Show JSON schema
{
   "title": "EmbeddingRequest",
   "description": "EmbeddingRequest is the request body for the embedding API.\n\nThis model is compatible with vLLM's OpenAI API models.\n\n**PublicAPI (alpha):** This API is in alpha and may change before becoming stable.",
   "type": "object",
   "properties": {
      "model": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Model"
      },
      "input": {
         "anyOf": [
            {
               "items": {
                  "type": "integer"
               },
               "type": "array"
            },
            {
               "items": {
                  "items": {
                     "type": "integer"
                  },
                  "type": "array"
               },
               "type": "array"
            },
            {
               "type": "string"
            },
            {
               "items": {
                  "type": "string"
               },
               "type": "array"
            }
         ],
         "title": "Input"
      },
      "encoding_format": {
         "default": "float",
         "enum": [
            "float",
            "base64"
         ],
         "title": "Encoding Format",
         "type": "string"
      },
      "dimensions": {
         "anyOf": [
            {
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Dimensions"
      },
      "user": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "User"
      },
      "truncate_prompt_tokens": {
         "anyOf": [
            {
               "minimum": 1,
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Truncate Prompt Tokens"
      },
      "additional_data": {
         "anyOf": [
            {},
            {
               "type": "null"
            }
         ],
         "default": null,
         "title": "Additional Data"
      },
      "add_special_tokens": {
         "default": true,
         "description": "If true (the default), special tokens (e.g. BOS) will be added to the prompt.",
         "title": "Add Special Tokens",
         "type": "boolean"
      },
      "priority": {
         "default": 0,
         "description": "The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.",
         "title": "Priority",
         "type": "integer"
      }
   },
   "required": [
      "input"
   ]
}

field add_special_tokens: bool = True#

If true (the default), special tokens (e.g. BOS) will be added to the prompt.

field additional_data: Any | None = None#
field dimensions: int | None = None#
field encoding_format: Literal['float', 'base64'] = 'float'#
field input: List[int] | List[List[int]] | str | List[str] [Required]#
field model: str | None = None#
field priority: int = 0#

The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.

field truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None#
field user: str | None = None#