> ## Documentation Index
> Fetch the complete documentation index at: https://docs.tera.gw/llms.txt
> Use this file to discover all available pages before exploring further.

# Chat completions

> OpenAI-compatible chat completions endpoint. Set `stream: true` for
Server-Sent Events. Reasoning models return chain-of-thought traces in
a separate field — `reasoning` for models using the OpenAI gpt-oss
parser (e.g. `openai/gpt-oss-20b`), `reasoning_content` for models
using the qwen3 parser (e.g. `Qwen/Qwen3.5-27B`). Treat the two as
aliases. See [Reasoning models](/concepts/reasoning).


## OpenAPI

````yaml POST /v1/chat/completions
openapi: 3.0.3
info:
  title: Tera API
  description: >
    OpenAI-compatible inference API. Existing OpenAI SDKs work without
    modification —

    point them at `https://api.tera.gw` with your Tera API key.
  version: 1.0.0
servers:
  - url: https://api.tera.gw
    description: Production
security:
  - bearerAuth: []
tags:
  - name: chat
    description: Chat completions
  - name: completions
    description: Text completions
  - name: models
    description: Model catalog
  - name: audio
    description: Text-to-speech
paths:
  /v1/chat/completions:
    post:
      tags:
        - chat
      summary: Create a chat completion
      description: |
        OpenAI-compatible chat completions endpoint. Set `stream: true` for
        Server-Sent Events. Reasoning models return chain-of-thought traces in
        a separate field — `reasoning` for models using the OpenAI gpt-oss
        parser (e.g. `openai/gpt-oss-20b`), `reasoning_content` for models
        using the qwen3 parser (e.g. `Qwen/Qwen3.5-27B`). Treat the two as
        aliases. See [Reasoning models](/concepts/reasoning).
      operationId: createChatCompletion
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
      responses:
        '200':
          description: 'A chat completion (or an SSE stream when `stream: true`).'
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
        '429':
          $ref: '#/components/responses/RateLimited'
        '500':
          $ref: '#/components/responses/ServerError'
components:
  schemas:
    ChatCompletionRequest:
      type: object
      required:
        - model
        - messages
      properties:
        model:
          type: string
          description: HuggingFace model id. See `/v1/models`.
          example: Qwen/Qwen2.5-7B-Instruct
        messages:
          type: array
          items:
            $ref: '#/components/schemas/ChatMessage'
        max_tokens:
          type: integer
          description: Maximum tokens to generate.
          example: 256
        temperature:
          type: number
          minimum: 0
          maximum: 2
          example: 0.7
        top_p:
          type: number
          minimum: 0
          maximum: 1
        top_k:
          type: integer
          description: vLLM-specific. Top-k sampling.
        stop:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
        seed:
          type: integer
          description: Deterministic seed for sampling.
        frequency_penalty:
          type: number
          minimum: -2
          maximum: 2
        presence_penalty:
          type: number
          minimum: -2
          maximum: 2
        repetition_penalty:
          type: number
          description: vLLM-specific. Penalty for repeated tokens.
        stream:
          type: boolean
          default: false
        tools:
          type: array
          items:
            $ref: '#/components/schemas/Tool'
        tool_choice:
          oneOf:
            - type: string
              enum:
                - none
                - auto
                - required
            - type: object
        response_format:
          type: object
          description: |
            Optional response constraints — e.g. `{"type": "json_object"}`
            for JSON mode, or `{"type": "json_schema", "json_schema": {...}}`
            for structured outputs.
    ChatCompletionResponse:
      type: object
      properties:
        id:
          type: string
        object:
          type: string
          example: chat.completion
        created:
          type: integer
        model:
          type: string
        choices:
          type: array
          items:
            type: object
            properties:
              index:
                type: integer
              message:
                type: object
                properties:
                  role:
                    type: string
                  content:
                    type: string
                  reasoning:
                    type: string
                    description: |
                      Chain-of-thought trace from reasoning models that use
                      the OpenAI gpt-oss parser (e.g. `openai/gpt-oss-20b`).
                      OpenAI's recommended field name for raw CoT in Chat
                      Completions. See [Reasoning models](/concepts/reasoning).
                  reasoning_content:
                    type: string
                    description: |
                      Legacy alias for `reasoning` emitted by models that use
                      the qwen3 reasoning parser (e.g. `Qwen/Qwen3.5-27B`).
                      Treat as equivalent to `reasoning`. Absent on
                      non-reasoning models.
                  tool_calls:
                    type: array
                    items:
                      $ref: '#/components/schemas/ToolCall'
              finish_reason:
                type: string
                enum:
                  - stop
                  - length
                  - tool_calls
        usage:
          type: object
          properties:
            prompt_tokens:
              type: integer
            completion_tokens:
              type: integer
            total_tokens:
              type: integer
    ChatMessage:
      type: object
      required:
        - role
      properties:
        role:
          type: string
          enum:
            - system
            - user
            - assistant
            - tool
        content:
          oneOf:
            - type: string
            - type: array
        name:
          type: string
        tool_call_id:
          type: string
        tool_calls:
          type: array
          items:
            $ref: '#/components/schemas/ToolCall'
    Tool:
      type: object
      properties:
        type:
          type: string
          enum:
            - function
        function:
          type: object
          required:
            - name
          properties:
            name:
              type: string
            description:
              type: string
            parameters:
              type: object
              description: JSON Schema describing the function arguments.
    ToolCall:
      type: object
      properties:
        id:
          type: string
        type:
          type: string
          enum:
            - function
        function:
          type: object
          properties:
            name:
              type: string
            arguments:
              type: string
              description: JSON-encoded arguments.
    ErrorResponse:
      type: object
      properties:
        error:
          type: object
          properties:
            message:
              type: string
            type:
              type: string
            code:
              type: string
  responses:
    BadRequest:
      description: Malformed request.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
    Unauthorized:
      description: Missing or invalid API key.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
    NotFound:
      description: Model not found or not deployed.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
    RateLimited:
      description: Too many requests.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
    ServerError:
      description: Backend failure after retries.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: sk-tera-...

````