JerrettDavis · June 13, 2024 15:31
diff --git a/ollama-openapi b/ollama-openapi
 openapi: 3.1.0
 info:
  title: Ollama API
  description: API for interacting with the Ollama service.
  version: 1.0.0
 servers:
  - url: http://{host}:{port}
    description: Ollama API server
    variables:
      host:
        default: 127.0.0.1
      port:
        default: '11434'

 paths:
  /api/generate:
    post:
      summary: Generate a response for a given prompt
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GenerateRequest'
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GenerateResponse'

  /api/chat:
    post:
      summary: Generate the next message in a chat
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatRequest'
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatResponse'

  /api/pull:
    post:
      summary: Download a model from the Ollama library
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/PullRequest'
      responses:
        '200':
          description: Successful response
          content:
            application/x-ndjson:
              schema:
                $ref: '#/components/schemas/ProgressResponse'

  /api/push:
    post:
      summary: Upload a model to the model library
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/PushRequest'
      responses:
        '200':
          description: Successful response
          content:
            application/x-ndjson:
              schema:
                $ref: '#/components/schemas/ProgressResponse'

  /api/create:
    post:
      summary: Create a model from a Modelfile
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateRequest'
      responses:
        '200':
          description: Successful response
          content:
            application/x-ndjson:
              schema:
                $ref: '#/components/schemas/ProgressResponse'

  /api/tags:
    get:
      summary: List models that are available locally
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListResponse'

  /api/ps:
    get:
      summary: List running models
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ProcessResponse'

  /api/copy:
    post:
      summary: Copy a model
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CopyRequest'
      responses:
        '200':
          description: Successful response

  /api/delete:
    delete:
      summary: Delete a model and its data
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/DeleteRequest'
      responses:
        '200':
          description: Successful response

  /api/show:
    post:
      summary: Obtain model information
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ShowRequest'
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ShowResponse'

  /api/embeddings:
    post:
      summary: Generate embeddings from a model
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EmbeddingRequest'
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EmbeddingResponse'

  /api/blobs/{digest}:
    post:
      summary: Create a blob from a file on the server
      parameters:
        - name: digest
          in: path
          required: true
          schema:
            type: string
      requestBody:
        required: true
        content:
          application/octet-stream:
            schema:
              type: string
              format: binary
      responses:
        '200':
          description: Successful response

  /api/version:
    get:
      summary: Return the Ollama server version
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                type: object
                properties:
                  version:
                    type: string

 components:
  schemas:
    GenerateRequest:
      type: object
      properties:
        model:
          type: string
          description: The model name
        prompt:
          type: string
          description: The prompt to generate a response for
        system:
          type: string
          description: System message to (overrides what is defined in the Modelfile)
          example: null
        template:
          type: string
          description: The prompt template to use (overrides what is defined in the Modelfile)
          example: null
        context:
          type: array
          items:
            type: integer
          description: The context parameter returned from a previous request to /generate, this can be used to keep a short conversational memory
          example: []
        stream:
          type: boolean
          description: If false the response will be returned as a single response object, rather than a stream of objects
        raw:
          type: boolean
          description: If true no formatting will be applied to the prompt. You may choose to use the raw parameter if you are specifying a full templated prompt in your request to the API
        format:
          type: string
          description: The format to return a response in. Currently the only accepted value is json
          example: null
        keep_alive:
          $ref: '#/components/schemas/Duration'
        images:
          type: array
          items:
            type: string
            format: byte
          description: A list of base64-encoded images (for multimodal models such as llava)
          example: null
        options:
          $ref: '#/components/schemas/Options'
      required:
        - model
        - prompt

    GenerateResponse:
      type: object
      properties:
        model:
          type: string
          description: The model name that generated the response
        created_at:
          type: string
          format: date-time
          description: Timestamp of the response
        response:
          type: string
          description: The textual response itself
        done:
          type: boolean
          description: Specifies if the response is complete
        context:
          type: array
          items:
            type: integer
          description: When done, encoding of the conversation used in this response
        total_duration:
          type: number
          description: When done, time spent generating the response
        load_duration:
          type: number
          description: When done, time spent in nanoseconds loading the model
        prompt_eval_count:
          type: integer
          description: When done, number of tokens in the prompt
        prompt_eval_duration:
          type: number
          description: When done, time spent in nanoseconds evaluating the prompt
        eval_count:
          type: integer
          description: When done, number of tokens in the response
        eval_duration:
          type: number
          description: When done, time in nanoseconds spent generating the response 

    ChatRequest:
      type: object
      properties:
        model:
          type: string
          description: The model name
        messages:
          type: array
          items:
            $ref: '#/components/schemas/Message'
          description: Messages of the chat - can be used to keep a chat memory
        stream:
          type: boolean
          description: Enable streaming of returned response
        format:
          type: string
          description: Format to return the response in (e.g. "json")
        keep_alive:
          $ref: '#/components/schemas/Duration'
        options:
          $ref: '#/components/schemas/Options'

    ChatResponse:
      type: object
      properties:
        model:
          type: string
          description: The model name
        created_at:
          type: string
          format: date-time
          description: Timestamp of the response
        message:
          $ref: '#/components/schemas/Message'
        done_reason:
          type: string
          description: Reason the model stopped generating text
        done:
          type: boolean
          description: Specifies if the response is complete
        total_duration:
          type: number
          description: Total duration of the request
        load_duration:
          type: string
          description: Load duration of the request
        prompt_eval_count:
          type: integer
          description: Count of prompt evaluations
        prompt_eval_duration:
          type: string
          description: Duration of prompt evaluations
        eval_count:
          type: integer
          description: Count of evaluations
        eval_duration:
          type: string
          description: Duration of evaluations

    PullRequest:
      type: object
      properties:
        model:
          type: string
          description: The name of the model to pull
        insecure:
          type: boolean
          description: Whether the pull request is insecure
        username:
          type: string
          description: Username for authentication
        password:
          type: string
          description: Password for authentication
        stream:
          type: boolean
          description: Enable streaming of progress

    PushRequest:
      type: object
      properties:
        model:
          type: string
          description: The name of the model to push
        insecure:
          type: boolean
          description: Whether the push request is insecure
        username:
          type: string
          description: Username for authentication
        password:
          type: string
          description: Password for authentication
        stream:
          type: boolean
          description: Enable streaming of progress

    CreateRequest:
      type: object
      properties:
        model:
          type: string
          description: The name of the model
        path:
          type: string
          description: The path to the model file
        modelfile:
          type: string
          description: The modelfile content
        stream:
          type: boolean
          description: Enable streaming of progress
        quantize:
          type: string
          description: The quantization level

    DeleteRequest:
      type: object
      properties:
        model:
          type: string
          description: The name of the model to delete

    ShowRequest:
      type: object
      properties:
        model:
          type: string
          description: The name of the model
        system:
          type: string
          description: Overrides the model's default system message/prompt
        template:
          type: string
          description: Overrides the model's default prompt template
        options:
          $ref: '#/components/schemas/Options'

    ShowResponse:
      type: object
      properties:
        license:
          type: string
          description: The model license
        modelfile:
          type: string
          description: The modelfile content
        parameters:
          type: string
          description: The model parameters
        template:
          type: string
          description: The model template
        system:
          type: string
          description: The model system message/prompt
        details:
          $ref: '#/components/schemas/ModelDetails'
        messages:
          type: array
          items:
            $ref: '#/components/schemas/Message'

    EmbeddingRequest:
      type: object
      properties:
        model:
          type: string
          description: The model name
        prompt:
          type: string
          description: The textual prompt to embed
        keep_alive:
          $ref: '#/components/schemas/Duration'
        options:
          $ref: '#/components/schemas/Options'

    EmbeddingResponse:
      type: object
      properties:
        embedding:
          type: array
          items:
            type: number
          description: The generated embeddings

    ProgressResponse:
      type: object
      properties:
        status:
          type: string
          description: The status of the progress
        digest:
          type: string
          description: The digest of the progress
        total:
          type: integer
          description: The total size of the task
        completed:
          type: integer
          description: The completed size of the task

    ListResponse:
      type: object
      properties:
        models:
          type: array
          items:
            $ref: '#/components/schemas/ListModelResponse'

    ProcessResponse:
      type: object
      properties:
        models:
          type: array
          items:
            $ref: '#/components/schemas/ProcessModelResponse'

    ListModelResponse:
      type: object
      properties:
        name:
          type: string
        model:
          type: string
        modified_at:
          type: string
          format: date-time
        size:
          type: integer
        digest:
          type: string
        details:
          $ref: '#/components/schemas/ModelDetails'

    ProcessModelResponse:
      type: object
      properties:
        name:
          type: string
        model:
          type: string
        size:
          type: integer
        digest:
          type: string
        details:
          $ref: '#/components/schemas/ModelDetails'
        expires_at:
          type: string
          format: date-time
        size_vram:
          type: integer

    CopyRequest:
      type: object
      properties:
        source:
          type: string
        destination:
          type: string

    Message:
      type: object
      properties:
        role:
          type: string
        content:
          type: string
        images:
          type: array
          items:
            type: string
            format: byte

    ModelDetails:
      type: object
      properties:
        parent_model:
          type: string
        format:
          type: string
        family:
          type: string
        families:
          type: array
          items:
            type: string
        parameter_size:
          type: string
        quantization_level:
          type: string

    Duration:
      type: string
      description: A string representing the duration
      example: "5m"

    Options:
      type: object
      properties:
        num_keep:
          type: integer
          description: Number of items to keep
          example: 4
        seed:
          type: integer
          description: Seed value
          example: -1
        num_predict:
          type: integer
          description: Number of predictions
          example: -1
        top_k:
          type: integer
          description: Top K value
          example: 40
        top_p:
          type: number
          format: float
          description: Top P value
          example: 0.9
        tfs_z:
          type: number
          format: float
          description: TFSZ value
          example: 1.0
        typical_p:
          type: number
          format: float
          description: Typical P value
          example: 1.0
        repeat_last_n:
          type: integer
          description: Repeat last N value
          example: 64
        temperature:
          type: number
          format: float
          description: Temperature value
          example: 0.8
        repeat_penalty:
          type: number
          format: float
          description: Repeat penalty value
          example: 1.1
        presence_penalty:
          type: number
          format: float
          description: Presence penalty value
          example: 0.8
        frequency_penalty:
          type: number
          format: float
          description: Frequency penalty value
          example: 0.8
        mirostat:
          type: integer
          description: Mirostat value
          example: 0
        mirostat_tau:
          type: number
          format: float
          description: Mirostat Tau value
          example: 5.8
        mirostat_eta:
          type: number
          format: float
          description: Mirostat Eta value
          example: 0.1
        penalize_newline:
          type: boolean
          description: Penalize newline value
          example: true
        stop:
          type: array
          items:
            type: string
          description: How to stop/end each response 
          example: null
        numa:
          type: boolean
          description: Use NUMA value
          example: false
        num_ctx:
          type: integer
          description: Number of contexts
          example: 2048
        num_batch:
          type: integer
          description: Number of batches
          example: 512
        num_gpu:
          type: integer
          description: Number of GPUs
          example: -1
        main_gpu:
          type: integer
          description: Main GPU
        low_vram:
          type: boolean
          description: Low VRAM value
          example: false
        f16_kv:
          type: boolean
          description: F16KV value
          example: true
        logits_all:
          type: boolean
          description: Logits all value
          example: false
        vocab_only:
          type: boolean
          description: Vocab only value
          example: false
        use_mmap:
          type: boolean
          description: Use mmap value
          example: true
        use_mlock:
          type: boolean
          description: Use mlock value
          example: false
        num_thread:
          type: integer
          description: Number of threads
          example: 0
	openapi: 3.1.0
	info:
	title: Ollama API
	description: API for interacting with the Ollama service.
	version: 1.0.0
	servers:
	- url: http://{host}:{port}
	description: Ollama API server
	variables:
	host:
	default: 127.0.0.1
	port:
	default: '11434'

	paths:
	/api/generate:
	post:
	summary: Generate a response for a given prompt
	requestBody:
	required: true
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/GenerateRequest'
	responses:
	'200':
	description: Successful response
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/GenerateResponse'

	/api/chat:
	post:
	summary: Generate the next message in a chat
	requestBody:
	required: true
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/ChatRequest'
	responses:
	'200':
	description: Successful response
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/ChatResponse'

	/api/pull:
	post:
	summary: Download a model from the Ollama library
	requestBody:
	required: true
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/PullRequest'
	responses:
	'200':
	description: Successful response
	content:
	application/x-ndjson:
	schema:
	$ref: '#/components/schemas/ProgressResponse'

	/api/push:
	post:
	summary: Upload a model to the model library
	requestBody:
	required: true
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/PushRequest'
	responses:
	'200':
	description: Successful response
	content:
	application/x-ndjson:
	schema:
	$ref: '#/components/schemas/ProgressResponse'

	/api/create:
	post:
	summary: Create a model from a Modelfile
	requestBody:
	required: true
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/CreateRequest'
	responses:
	'200':
	description: Successful response
	content:
	application/x-ndjson:
	schema:
	$ref: '#/components/schemas/ProgressResponse'

	/api/tags:
	get:
	summary: List models that are available locally
	responses:
	'200':
	description: Successful response
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/ListResponse'

	/api/ps:
	get:
	summary: List running models
	responses:
	'200':
	description: Successful response
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/ProcessResponse'

	/api/copy:
	post:
	summary: Copy a model
	requestBody:
	required: true
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/CopyRequest'
	responses:
	'200':
	description: Successful response

	/api/delete:
	delete:
	summary: Delete a model and its data
	requestBody:
	required: true
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/DeleteRequest'
	responses:
	'200':
	description: Successful response

	/api/show:
	post:
	summary: Obtain model information
	requestBody:
	required: true
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/ShowRequest'
	responses:
	'200':
	description: Successful response
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/ShowResponse'

	/api/embeddings:
	post:
	summary: Generate embeddings from a model
	requestBody:
	required: true
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/EmbeddingRequest'
	responses:
	'200':
	description: Successful response
	content:
	application/json:
	schema:
	$ref: '#/components/schemas/EmbeddingResponse'

	/api/blobs/{digest}:
	post:
	summary: Create a blob from a file on the server
	parameters:
	- name: digest
	in: path
	required: true
	schema:
	type: string
	requestBody:
	required: true
	content:
	application/octet-stream:
	schema:
	type: string
	format: binary
	responses:
	'200':
	description: Successful response

	/api/version:
	get:
	summary: Return the Ollama server version
	responses:
	'200':
	description: Successful response
	content:
	application/json:
	schema:
	type: object
	properties:
	version:
	type: string

	components:
	schemas:
	GenerateRequest:
	type: object
	properties:
	model:
	type: string
	description: The model name
	prompt:
	type: string
	description: The prompt to generate a response for
	system:
	type: string
	description: System message to (overrides what is defined in the Modelfile)
	example: null
	template:
	type: string
	description: The prompt template to use (overrides what is defined in the Modelfile)
	example: null
	context:
	type: array
	items:
	type: integer
	description: The context parameter returned from a previous request to /generate, this can be used to keep a short conversational memory
	example: []
	stream:
	type: boolean
	description: If false the response will be returned as a single response object, rather than a stream of objects
	raw:
	type: boolean
	description: If true no formatting will be applied to the prompt. You may choose to use the raw parameter if you are specifying a full templated prompt in your request to the API
	format:
	type: string
	description: The format to return a response in. Currently the only accepted value is json
	example: null
	keep_alive:
	$ref: '#/components/schemas/Duration'
	images:
	type: array
	items:
	type: string
	format: byte
	description: A list of base64-encoded images (for multimodal models such as llava)
	example: null
	options:
	$ref: '#/components/schemas/Options'
	required:
	- model
	- prompt

	GenerateResponse:
	type: object
	properties:
	model:
	type: string
	description: The model name that generated the response
	created_at:
	type: string
	format: date-time
	description: Timestamp of the response
	response:
	type: string
	description: The textual response itself
	done:
	type: boolean
	description: Specifies if the response is complete
	context:
	type: array
	items:
	type: integer
	description: When done, encoding of the conversation used in this response
	total_duration:
	type: number
	description: When done, time spent generating the response
	load_duration:
	type: number
	description: When done, time spent in nanoseconds loading the model
	prompt_eval_count:
	type: integer
	description: When done, number of tokens in the prompt
	prompt_eval_duration:
	type: number
	description: When done, time spent in nanoseconds evaluating the prompt
	eval_count:
	type: integer
	description: When done, number of tokens in the response
	eval_duration:
	type: number
	description: When done, time in nanoseconds spent generating the response

	ChatRequest:
	type: object
	properties:
	model:
	type: string
	description: The model name
	messages:
	type: array
	items:
	$ref: '#/components/schemas/Message'
	description: Messages of the chat - can be used to keep a chat memory
	stream:
	type: boolean
	description: Enable streaming of returned response
	format:
	type: string
	description: Format to return the response in (e.g. "json")
	keep_alive:
	$ref: '#/components/schemas/Duration'
	options:
	$ref: '#/components/schemas/Options'

	ChatResponse:
	type: object
	properties:
	model:
	type: string
	description: The model name
	created_at:
	type: string
	format: date-time
	description: Timestamp of the response
	message:
	$ref: '#/components/schemas/Message'
	done_reason:
	type: string
	description: Reason the model stopped generating text
	done:
	type: boolean
	description: Specifies if the response is complete
	total_duration:
	type: number
	description: Total duration of the request
	load_duration:
	type: string
	description: Load duration of the request
	prompt_eval_count:
	type: integer
	description: Count of prompt evaluations
	prompt_eval_duration:
	type: string
	description: Duration of prompt evaluations
	eval_count:
	type: integer
	description: Count of evaluations
	eval_duration:
	type: string
	description: Duration of evaluations

	PullRequest:
	type: object
	properties:
	model:
	type: string
	description: The name of the model to pull
	insecure:
	type: boolean
	description: Whether the pull request is insecure
	username:
	type: string
	description: Username for authentication
	password:
	type: string
	description: Password for authentication
	stream:
	type: boolean
	description: Enable streaming of progress

	PushRequest:
	type: object
	properties:
	model:
	type: string
	description: The name of the model to push
	insecure:
	type: boolean
	description: Whether the push request is insecure
	username:
	type: string
	description: Username for authentication
	password:
	type: string
	description: Password for authentication
	stream:
	type: boolean
	description: Enable streaming of progress

	CreateRequest:
	type: object
	properties:
	model:
	type: string
	description: The name of the model
	path:
	type: string
	description: The path to the model file
	modelfile:
	type: string
	description: The modelfile content
	stream:
	type: boolean
	description: Enable streaming of progress
	quantize:
	type: string
	description: The quantization level

	DeleteRequest:
	type: object
	properties:
	model:
	type: string
	description: The name of the model to delete

	ShowRequest:
	type: object
	properties:
	model:
	type: string
	description: The name of the model
	system:
	type: string
	description: Overrides the model's default system message/prompt
	template:
	type: string
	description: Overrides the model's default prompt template
	options:
	$ref: '#/components/schemas/Options'

	ShowResponse:
	type: object
	properties:
	license:
	type: string
	description: The model license
	modelfile:
	type: string
	description: The modelfile content
	parameters:
	type: string
	description: The model parameters
	template:
	type: string
	description: The model template
	system:
	type: string
	description: The model system message/prompt
	details:
	$ref: '#/components/schemas/ModelDetails'
	messages:
	type: array
	items:
	$ref: '#/components/schemas/Message'

	EmbeddingRequest:
	type: object
	properties:
	model:
	type: string
	description: The model name
	prompt:
	type: string
	description: The textual prompt to embed
	keep_alive:
	$ref: '#/components/schemas/Duration'
	options:
	$ref: '#/components/schemas/Options'

	EmbeddingResponse:
	type: object
	properties:
	embedding:
	type: array
	items:
	type: number
	description: The generated embeddings

	ProgressResponse:
	type: object
	properties:
	status:
	type: string
	description: The status of the progress
	digest:
	type: string
	description: The digest of the progress
	total:
	type: integer
	description: The total size of the task
	completed:
	type: integer
	description: The completed size of the task

	ListResponse:
	type: object
	properties:
	models:
	type: array
	items:
	$ref: '#/components/schemas/ListModelResponse'

	ProcessResponse:
	type: object
	properties:
	models:
	type: array
	items:
	$ref: '#/components/schemas/ProcessModelResponse'

	ListModelResponse:
	type: object
	properties:
	name:
	type: string
	model:
	type: string
	modified_at:
	type: string
	format: date-time
	size:
	type: integer
	digest:
	type: string
	details:
	$ref: '#/components/schemas/ModelDetails'

	ProcessModelResponse:
	type: object
	properties:
	name:
	type: string
	model:
	type: string
	size:
	type: integer
	digest:
	type: string
	details:
	$ref: '#/components/schemas/ModelDetails'
	expires_at:
	type: string
	format: date-time
	size_vram:
	type: integer

	CopyRequest:
	type: object
	properties:
	source:
	type: string
	destination:
	type: string

	Message:
	type: object
	properties:
	role:
	type: string
	content:
	type: string
	images:
	type: array
	items:
	type: string
	format: byte

	ModelDetails:
	type: object
	properties:
	parent_model:
	type: string
	format:
	type: string
	family:
	type: string
	families:
	type: array
	items:
	type: string
	parameter_size:
	type: string
	quantization_level:
	type: string

	Duration:
	type: string
	description: A string representing the duration
	example: "5m"

	Options:
	type: object
	properties:
	num_keep:
	type: integer
	description: Number of items to keep
	example: 4
	seed:
	type: integer
	description: Seed value
	example: -1
	num_predict:
	type: integer
	description: Number of predictions
	example: -1
	top_k:
	type: integer
	description: Top K value
	example: 40
	top_p:
	type: number
	format: float
	description: Top P value
	example: 0.9
	tfs_z:
	type: number
	format: float
	description: TFSZ value
	example: 1.0
	typical_p:
	type: number
	format: float
	description: Typical P value
	example: 1.0
	repeat_last_n:
	type: integer
	description: Repeat last N value
	example: 64
	temperature:
	type: number
	format: float
	description: Temperature value
	example: 0.8
	repeat_penalty:
	type: number
	format: float
	description: Repeat penalty value
	example: 1.1
	presence_penalty:
	type: number
	format: float
	description: Presence penalty value
	example: 0.8
	frequency_penalty:
	type: number
	format: float
	description: Frequency penalty value
	example: 0.8
	mirostat:
	type: integer
	description: Mirostat value
	example: 0
	mirostat_tau:
	type: number
	format: float
	description: Mirostat Tau value
	example: 5.8
	mirostat_eta:
	type: number
	format: float
	description: Mirostat Eta value
	example: 0.1
	penalize_newline:
	type: boolean
	description: Penalize newline value
	example: true
	stop:
	type: array
	items:
	type: string
	description: How to stop/end each response
	example: null
	numa:
	type: boolean
	description: Use NUMA value
	example: false
	num_ctx:
	type: integer
	description: Number of contexts
	example: 2048
	num_batch:
	type: integer
	description: Number of batches
	example: 512
	num_gpu:
	type: integer
	description: Number of GPUs
	example: -1
	main_gpu:
	type: integer
	description: Main GPU
	low_vram:
	type: boolean
	description: Low VRAM value
	example: false
	f16_kv:
	type: boolean
	description: F16KV value
	example: true
	logits_all:
	type: boolean
	description: Logits all value
	example: false
	vocab_only:
	type: boolean
	description: Vocab only value
	example: false
	use_mmap:
	type: boolean
	description: Use mmap value
	example: true
	use_mlock:
	type: boolean
	description: Use mlock value
	example: false
	num_thread:
	type: integer
	description: Number of threads
	example: 0
No results found