Last active
October 30, 2025 13:11
-
-
Save bedwards/2c5eb18fc91b6228356bf7969857139e to your computer and use it in GitHub Desktop.
burrito
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| codestral:22b | |
| 0.20 seconds | |
| qwen3-coder:30b | |
| 0.21 seconds | |
| gemma2:27b | |
| 0.29 seconds | |
| command-r-plus:104b | |
| 0.56 seconds | |
| llama3.1:70b | |
| 0.60 seconds | |
| gpt-oss:120b | |
| 1.35 seconds |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| $ ~/bin/ollama-load.py | |
| Loading gpt-oss:120b | |
| 8.94 seconds | |
| What is a burrito? In three words. | |
| Stuffed tortilla wrap. | |
| 4.17 seconds | |
| What is a burrito? In three words. | |
| Stuffed tortilla wrap. | |
| 1.32 seconds | |
| What is a burrito? In three words. | |
| Tortilla‑filled wrap. | |
| 1.35 seconds | |
| $ ~/bin/ollama-load.py 0 | |
| Unloading gpt-oss:120b | |
| 0.11 seconds | |
| --- | |
| $ ~/bin/ollama-load.py llama3.1:70b | |
| Loading llama3.1:70b | |
| 4.19 seconds | |
| What is a burrito? In three words. | |
| Large Mexican wrap. | |
| 12.39 seconds | |
| What is a burrito? In three words. | |
| Tortilla wrapped meal. | |
| 0.60 seconds | |
| What is a burrito? In three words. | |
| Tortilla wrapped meal. | |
| 0.60 seconds | |
| $ ~/bin/ollama-load.py llama3.1:70b 0 | |
| Unloading llama3.1:70b | |
| 0.03 seconds | |
| --- | |
| $ ~/bin/ollama-load.py gemma2:27b | |
| Loading gemma2:27b | |
| 0.98 seconds | |
| What is a burrito? In three words. | |
| Flour tortilla wrap. | |
| 22.61 seconds | |
| What is a burrito? In three words. | |
| Flour tortilla wrap. | |
| 0.29 seconds | |
| What is a burrito? In three words. | |
| Mexican stuffed tortilla. | |
| 0.29 seconds | |
| $ ~/bin/ollama-load.py gemma2:27b 0 | |
| Unloading gemma2:27b | |
| 0.18 seconds | |
| --- | |
| $ ~/bin/ollama-load.py qwen3-coder:30b | |
| Loading qwen3-coder:30b | |
| 5.37 seconds | |
| What is a burrito? In three words. | |
| Mexican food wrap. | |
| 1.71 seconds | |
| What is a burrito? In three words. | |
| Mexican food wrap. | |
| 0.19 seconds | |
| What is a burrito? In three words. | |
| Mexican food wrap. | |
| 0.21 seconds | |
| $ ~/bin/ollama-load.py qwen3-coder:30b 0 | |
| Unloading qwen3-coder:30b | |
| 0.21 seconds | |
| --- | |
| $ ~/bin/ollama-load.py codestral:22b | |
| Loading codestral:22b | |
| 1.22 seconds | |
| What is a burrito? In three words. | |
| Mexican Food Delight: A Wrap of Joy! | |
| 18.48 seconds | |
| What is a burrito? In three words. | |
| Mexican wrap, filled delight. | |
| 0.20 seconds | |
| What is a burrito? In three words. | |
| Mexican, filled, wrap. | |
| 0.20 seconds | |
| $ ~/bin/ollama-load.py codestral:22b 0 | |
| Unloading codestral:22b | |
| 0.03 seconds | |
| --- | |
| $ ~/bin/ollama-load.py command-r-plus:104b | |
| Loading command-r-plus:104b | |
| 3.62 seconds | |
| What is a burrito? In three words. | |
| A Mexican food wrap. | |
| 89.33 seconds | |
| What is a burrito? In three words. | |
| A Mexican food. | |
| 0.56 seconds | |
| What is a burrito? In three words. | |
| A filled tortilla. | |
| 0.55 seconds | |
| $ ~/bin/ollama-load.py command-r-plus:104b 0 | |
| Unloading command-r-plus:104b | |
| 0.13 seconds |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| from datetime import datetime | |
| import json | |
| import sys | |
| import re | |
| import requests | |
| # --- Configuration (Defaults) --- | |
| OLLAMA_HOST = "http://studio:11434" | |
| DEFAULT_MODEL = "gpt-oss:120b" | |
| DEFAULT_KEEP_ALIVE = -1 | |
| PROMPT = "What is a burrito? In three words." | |
| # Regex to check if a string is an integer (positive or negative) | |
| NUMERIC_REGEX = re.compile(r"^[+-]?[0-9]+$") | |
| n_pings = 3 | |
| # --- Argument Parsing --- | |
| def parse_args(): | |
| """Parses arguments to determine model and keep_alive values.""" | |
| # Get raw arguments (excluding the script name) | |
| args = sys.argv[1:] | |
| model = DEFAULT_MODEL | |
| keep_alive = DEFAULT_KEEP_ALIVE | |
| if not args: | |
| # No arguments: Use all defaults | |
| pass | |
| elif len(args) == 1: | |
| arg1 = args[0] | |
| if NUMERIC_REGEX.match(arg1): | |
| # Case 1: Argument 1 is a number (it's the keep_alive) | |
| keep_alive = int(arg1) | |
| # Model remains the DEFAULT_MODEL | |
| else: | |
| # Case 2: Argument 1 is not a number (it's the model name) | |
| model = arg1 | |
| # keep_alive remains the DEFAULT_KEEP_ALIVE | |
| elif len(args) >= 2: | |
| # Case 3: Both model and keep_alive are provided | |
| model = args[0] | |
| # We assume arg2 is the keep_alive string, but it must be convertible to a number for the API | |
| try: | |
| keep_alive = int(args[1]) | |
| except ValueError: | |
| sys.stderr.write(f"Warning: Argument 2 ('{args[1]}') is not a valid integer for keep_alive. Using default ({DEFAULT_KEEP_ALIVE}).\n") | |
| # If the user passed a non-numeric string (e.g., "10m"), we treat it as an argument error here, | |
| # or you could pass the string directly if Ollama supported it for this logic. Sticking to numbers for parity. | |
| return model, keep_alive | |
| # --- API Functions (Modified to accept model/keep_alive) --- | |
| def handle_model_action(host, model, keep_alive): | |
| """Sends the initial model action request (load, keep-alive, or unload).""" | |
| url = f"{host}/api/generate" | |
| model_action_payload = { | |
| "model": model, | |
| "keep_alive": keep_alive | |
| } | |
| action_type = "Loading" if keep_alive != 0 else "Unloading" | |
| # print(f"Executing model action ({action_type}): {model}, keep_alive={keep_alive}") | |
| print(f"{action_type} {model}") | |
| try: | |
| response = requests.post(url, json=model_action_payload, stream=False) | |
| response.raise_for_status() | |
| load_data = response.json() | |
| # print(f"Model action complete. Created at: {load_data.get('created_at')}") | |
| except requests.exceptions.RequestException as e: | |
| sys.stderr.write(f"Error during model action {action_type} for {model}: {e}\n") | |
| sys.exit(1) | |
| def generate_response(host, model, prompt): | |
| """Sends the prompt and collects the streaming response, printing only the clean result.""" | |
| full_response_text = "" | |
| try: | |
| url = f"{host}/api/generate" | |
| payload = { | |
| "model": model, | |
| "prompt": prompt, | |
| "stream": True | |
| } | |
| # print(f"Sending prompt to {model}: '{prompt}'") | |
| print(prompt) | |
| response = requests.post(url, json=payload, stream=True) | |
| response.raise_for_status() | |
| # Process stream and filter for non-empty responses | |
| for line in response.iter_lines(decode_unicode=True): | |
| if line: | |
| try: | |
| data = json.loads(line) | |
| token = data.get("response", "") | |
| if token: | |
| full_response_text += token | |
| except json.JSONDecodeError: | |
| sys.stderr.write(f"Warning: Could not decode JSON line: {line}\n") | |
| except requests.exceptions.RequestException as e: | |
| sys.stderr.write(f"Error generating response: {e}\n") | |
| sys.exit(1) | |
| return full_response_text.strip() | |
| # --- Main Execution --- | |
| if __name__ == "__main__": | |
| model_name, keep_alive_value = parse_args() | |
| # 1. Execute the initial model action | |
| start = datetime.now() | |
| handle_model_action(OLLAMA_HOST, model_name, keep_alive_value) | |
| print(f"{(datetime.now() - start).total_seconds():.2f} seconds") | |
| # 2. Conditional prompt generation, matching the shell script's `if [ "$keep_alive" -ne 0 ];` | |
| if keep_alive_value != 0: | |
| for _ in range(n_pings): | |
| start = datetime.now() | |
| final_output = generate_response(OLLAMA_HOST, model_name, PROMPT) | |
| # print("\n--- Final Clean Output ---") | |
| print(final_output) | |
| print(f"{(datetime.now() - start).total_seconds():.2f} seconds") | |
| # print("--------------------------") | |
| else: | |
| # print("\nPrompt skipped: keep_alive was set to 0 (model unload command).") | |
| pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment