Skip to content

Instantly share code, notes, and snippets.

@JerrettDavis
Created June 13, 2024 15:31
Show Gist options
  • Select an option

  • Save JerrettDavis/5469a17c48849b6c60fa51cb17e9c124 to your computer and use it in GitHub Desktop.

Select an option

Save JerrettDavis/5469a17c48849b6c60fa51cb17e9c124 to your computer and use it in GitHub Desktop.
openapi: 3.1.0
info:
title: Ollama API
description: API for interacting with the Ollama service.
version: 1.0.0
servers:
- url: http://{host}:{port}
description: Ollama API server
variables:
host:
default: 127.0.0.1
port:
default: '11434'
paths:
/api/generate:
post:
summary: Generate a response for a given prompt
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/GenerateRequest'
responses:
'200':
description: Successful response
content:
application/json:
schema:
$ref: '#/components/schemas/GenerateResponse'
/api/chat:
post:
summary: Generate the next message in a chat
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/ChatRequest'
responses:
'200':
description: Successful response
content:
application/json:
schema:
$ref: '#/components/schemas/ChatResponse'
/api/pull:
post:
summary: Download a model from the Ollama library
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/PullRequest'
responses:
'200':
description: Successful response
content:
application/x-ndjson:
schema:
$ref: '#/components/schemas/ProgressResponse'
/api/push:
post:
summary: Upload a model to the model library
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/PushRequest'
responses:
'200':
description: Successful response
content:
application/x-ndjson:
schema:
$ref: '#/components/schemas/ProgressResponse'
/api/create:
post:
summary: Create a model from a Modelfile
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateRequest'
responses:
'200':
description: Successful response
content:
application/x-ndjson:
schema:
$ref: '#/components/schemas/ProgressResponse'
/api/tags:
get:
summary: List models that are available locally
responses:
'200':
description: Successful response
content:
application/json:
schema:
$ref: '#/components/schemas/ListResponse'
/api/ps:
get:
summary: List running models
responses:
'200':
description: Successful response
content:
application/json:
schema:
$ref: '#/components/schemas/ProcessResponse'
/api/copy:
post:
summary: Copy a model
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CopyRequest'
responses:
'200':
description: Successful response
/api/delete:
delete:
summary: Delete a model and its data
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/DeleteRequest'
responses:
'200':
description: Successful response
/api/show:
post:
summary: Obtain model information
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/ShowRequest'
responses:
'200':
description: Successful response
content:
application/json:
schema:
$ref: '#/components/schemas/ShowResponse'
/api/embeddings:
post:
summary: Generate embeddings from a model
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingRequest'
responses:
'200':
description: Successful response
content:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingResponse'
/api/blobs/{digest}:
post:
summary: Create a blob from a file on the server
parameters:
- name: digest
in: path
required: true
schema:
type: string
requestBody:
required: true
content:
application/octet-stream:
schema:
type: string
format: binary
responses:
'200':
description: Successful response
/api/version:
get:
summary: Return the Ollama server version
responses:
'200':
description: Successful response
content:
application/json:
schema:
type: object
properties:
version:
type: string
components:
schemas:
GenerateRequest:
type: object
properties:
model:
type: string
description: The model name
prompt:
type: string
description: The prompt to generate a response for
system:
type: string
description: System message to (overrides what is defined in the Modelfile)
example: null
template:
type: string
description: The prompt template to use (overrides what is defined in the Modelfile)
example: null
context:
type: array
items:
type: integer
description: The context parameter returned from a previous request to /generate, this can be used to keep a short conversational memory
example: []
stream:
type: boolean
description: If false the response will be returned as a single response object, rather than a stream of objects
raw:
type: boolean
description: If true no formatting will be applied to the prompt. You may choose to use the raw parameter if you are specifying a full templated prompt in your request to the API
format:
type: string
description: The format to return a response in. Currently the only accepted value is json
example: null
keep_alive:
$ref: '#/components/schemas/Duration'
images:
type: array
items:
type: string
format: byte
description: A list of base64-encoded images (for multimodal models such as llava)
example: null
options:
$ref: '#/components/schemas/Options'
required:
- model
- prompt
GenerateResponse:
type: object
properties:
model:
type: string
description: The model name that generated the response
created_at:
type: string
format: date-time
description: Timestamp of the response
response:
type: string
description: The textual response itself
done:
type: boolean
description: Specifies if the response is complete
context:
type: array
items:
type: integer
description: When done, encoding of the conversation used in this response
total_duration:
type: number
description: When done, time spent generating the response
load_duration:
type: number
description: When done, time spent in nanoseconds loading the model
prompt_eval_count:
type: integer
description: When done, number of tokens in the prompt
prompt_eval_duration:
type: number
description: When done, time spent in nanoseconds evaluating the prompt
eval_count:
type: integer
description: When done, number of tokens in the response
eval_duration:
type: number
description: When done, time in nanoseconds spent generating the response
ChatRequest:
type: object
properties:
model:
type: string
description: The model name
messages:
type: array
items:
$ref: '#/components/schemas/Message'
description: Messages of the chat - can be used to keep a chat memory
stream:
type: boolean
description: Enable streaming of returned response
format:
type: string
description: Format to return the response in (e.g. "json")
keep_alive:
$ref: '#/components/schemas/Duration'
options:
$ref: '#/components/schemas/Options'
ChatResponse:
type: object
properties:
model:
type: string
description: The model name
created_at:
type: string
format: date-time
description: Timestamp of the response
message:
$ref: '#/components/schemas/Message'
done_reason:
type: string
description: Reason the model stopped generating text
done:
type: boolean
description: Specifies if the response is complete
total_duration:
type: number
description: Total duration of the request
load_duration:
type: string
description: Load duration of the request
prompt_eval_count:
type: integer
description: Count of prompt evaluations
prompt_eval_duration:
type: string
description: Duration of prompt evaluations
eval_count:
type: integer
description: Count of evaluations
eval_duration:
type: string
description: Duration of evaluations
PullRequest:
type: object
properties:
model:
type: string
description: The name of the model to pull
insecure:
type: boolean
description: Whether the pull request is insecure
username:
type: string
description: Username for authentication
password:
type: string
description: Password for authentication
stream:
type: boolean
description: Enable streaming of progress
PushRequest:
type: object
properties:
model:
type: string
description: The name of the model to push
insecure:
type: boolean
description: Whether the push request is insecure
username:
type: string
description: Username for authentication
password:
type: string
description: Password for authentication
stream:
type: boolean
description: Enable streaming of progress
CreateRequest:
type: object
properties:
model:
type: string
description: The name of the model
path:
type: string
description: The path to the model file
modelfile:
type: string
description: The modelfile content
stream:
type: boolean
description: Enable streaming of progress
quantize:
type: string
description: The quantization level
DeleteRequest:
type: object
properties:
model:
type: string
description: The name of the model to delete
ShowRequest:
type: object
properties:
model:
type: string
description: The name of the model
system:
type: string
description: Overrides the model's default system message/prompt
template:
type: string
description: Overrides the model's default prompt template
options:
$ref: '#/components/schemas/Options'
ShowResponse:
type: object
properties:
license:
type: string
description: The model license
modelfile:
type: string
description: The modelfile content
parameters:
type: string
description: The model parameters
template:
type: string
description: The model template
system:
type: string
description: The model system message/prompt
details:
$ref: '#/components/schemas/ModelDetails'
messages:
type: array
items:
$ref: '#/components/schemas/Message'
EmbeddingRequest:
type: object
properties:
model:
type: string
description: The model name
prompt:
type: string
description: The textual prompt to embed
keep_alive:
$ref: '#/components/schemas/Duration'
options:
$ref: '#/components/schemas/Options'
EmbeddingResponse:
type: object
properties:
embedding:
type: array
items:
type: number
description: The generated embeddings
ProgressResponse:
type: object
properties:
status:
type: string
description: The status of the progress
digest:
type: string
description: The digest of the progress
total:
type: integer
description: The total size of the task
completed:
type: integer
description: The completed size of the task
ListResponse:
type: object
properties:
models:
type: array
items:
$ref: '#/components/schemas/ListModelResponse'
ProcessResponse:
type: object
properties:
models:
type: array
items:
$ref: '#/components/schemas/ProcessModelResponse'
ListModelResponse:
type: object
properties:
name:
type: string
model:
type: string
modified_at:
type: string
format: date-time
size:
type: integer
digest:
type: string
details:
$ref: '#/components/schemas/ModelDetails'
ProcessModelResponse:
type: object
properties:
name:
type: string
model:
type: string
size:
type: integer
digest:
type: string
details:
$ref: '#/components/schemas/ModelDetails'
expires_at:
type: string
format: date-time
size_vram:
type: integer
CopyRequest:
type: object
properties:
source:
type: string
destination:
type: string
Message:
type: object
properties:
role:
type: string
content:
type: string
images:
type: array
items:
type: string
format: byte
ModelDetails:
type: object
properties:
parent_model:
type: string
format:
type: string
family:
type: string
families:
type: array
items:
type: string
parameter_size:
type: string
quantization_level:
type: string
Duration:
type: string
description: A string representing the duration
example: "5m"
Options:
type: object
properties:
num_keep:
type: integer
description: Number of items to keep
example: 4
seed:
type: integer
description: Seed value
example: -1
num_predict:
type: integer
description: Number of predictions
example: -1
top_k:
type: integer
description: Top K value
example: 40
top_p:
type: number
format: float
description: Top P value
example: 0.9
tfs_z:
type: number
format: float
description: TFSZ value
example: 1.0
typical_p:
type: number
format: float
description: Typical P value
example: 1.0
repeat_last_n:
type: integer
description: Repeat last N value
example: 64
temperature:
type: number
format: float
description: Temperature value
example: 0.8
repeat_penalty:
type: number
format: float
description: Repeat penalty value
example: 1.1
presence_penalty:
type: number
format: float
description: Presence penalty value
example: 0.8
frequency_penalty:
type: number
format: float
description: Frequency penalty value
example: 0.8
mirostat:
type: integer
description: Mirostat value
example: 0
mirostat_tau:
type: number
format: float
description: Mirostat Tau value
example: 5.8
mirostat_eta:
type: number
format: float
description: Mirostat Eta value
example: 0.1
penalize_newline:
type: boolean
description: Penalize newline value
example: true
stop:
type: array
items:
type: string
description: How to stop/end each response
example: null
numa:
type: boolean
description: Use NUMA value
example: false
num_ctx:
type: integer
description: Number of contexts
example: 2048
num_batch:
type: integer
description: Number of batches
example: 512
num_gpu:
type: integer
description: Number of GPUs
example: -1
main_gpu:
type: integer
description: Main GPU
low_vram:
type: boolean
description: Low VRAM value
example: false
f16_kv:
type: boolean
description: F16KV value
example: true
logits_all:
type: boolean
description: Logits all value
example: false
vocab_only:
type: boolean
description: Vocab only value
example: false
use_mmap:
type: boolean
description: Use mmap value
example: true
use_mlock:
type: boolean
description: Use mlock value
example: false
num_thread:
type: integer
description: Number of threads
example: 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment