Skip to content

Instantly share code, notes, and snippets.

@mseri
Last active December 3, 2025 17:44
Show Gist options
  • Select an option

  • Save mseri/956ab07c2c4a756a2575745e2b9907d5 to your computer and use it in GitHub Desktop.

Select an option

Save mseri/956ab07c2c4a756a2575745e2b9907d5 to your computer and use it in GitHub Desktop.
llama-launcher
#!/bin/bash
set -euo pipefail
MODELS="aquif, gemma3, granite4, granite4-moe, lfm2, lfm2-moe, lfm2-vl, ministral3 (3,8B;i,r), nemotron, qwen3(i,r), voxtral"
function usage() {
echo "Usage: $0 <model> [options]"
echo "Models: $MODELS."
echo "Options:"
echo " --dry to see the invocation string only"
echo " --temp <value> to set temperature for granite models (default: $temp)"
}
if [ $# -eq 0 ]; then
usage
exit 1
fi
command=""
dry=false
temp=0
while [ $# -gt 0 ]; do
case $1 in
"aquif")
command="llama-server -hf mradermacher/aquif-3.6-8B-GGUF:Q4_K_M --temp 0.7 -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 8192"
shift ;;
"granite4")
# one can also use ibm-granite/granite-4.0-h-micro-GGUF:Q4_K_M and pick
# parameters at will. IBM says they are all good depending on the needs
command="llama-server -hf unsloth/granite-4.0-h-micro-GGUF:UD-Q4_K_XL --top-k 20 --top-p 0.95 --min-p 0.0 --temp $temp -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 16384"
shift ;;
"granite4-moe")
# one can also use ibm-granite/granite-4.0-h-tiny-GGUF:Q4_K_M and pick
# parameters at will. IBM says they are all good depending on the needs
command="llama-server -hf unsloth/granite-4.0-h-tiny-GGUF:UD-Q4_K_XL --top-k 20 --top-p 0.95 --min-p 0.0 --temp $temp -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 16384"
shift ;;
"gemma3")
command="llama-server -hf stduhpf/google-gemma-3-4b-it-qat-q4_0-gguf-small --top-k 64 --top-p 0.95 --min-p 0.0 --repeat-penalty 1.0 --temp 1.0 -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 16384"
shift ;;
"gpt")
command="llama-server -m /Users/mseri/Downloads/GPT-OSS-20B-Pruned-Q5_0.gguf --temp 1.0 --top-p 1.0 --top-k 0 --min-p 0.01 --threads 4 -ngl 0 -ub 2048 -b 2048 --jinja -c 16384 --chat-template-kwargs '{\"reasoning_effort\": \"medium\"}'"
shift ;;
"lfm2")
command="llama-server -hf LiquidAI/LFM2-2.6B-GGUF:Q8_0 --temp 0.3 --min-p 0.15 --repeat-penalty 1.05 --sampling-seq edskypmxt --threads 4 -ngl 0 --jinja -c 16384"
shift ;;
"lfm2-moe")
command="llama-server -hf unsloth/LFM2-8B-A1B-GGUF:Q4_K_XL --temp 0.3 --min-p 0.15 --repeat-penalty 1.05 --threads 4 -ngl 0 --jinja -c 16384"
shift ;;
"lfm2-vl")
command="llama-server -hf bartowski/LiquidAI_LFM2-VL-1.6B-GGUF:Q6_K --temp 0.1 --min-p 0.15 --top-p 1.0 --top-k 50 --repeat-penalty 1.05 -ngl 0 --threads 4 --jinja -c 16384"
shift ;;
"ministral3-3b")
command="llama-server -hf mistralai/Ministral-3-3B-Instruct-2512-GGUF:Q4_K_M --temp 0.15 -ngl 0 --threads 4 --jinja -c 16384"
shift ;;
"ministral3-3br")
command="llama-server -hf mistralai/Ministral-3-3B-Reasoning-2512-GGUF:Q4_K_M --top-p 0.95 --temp 0.7 -ngl 0 --threads 4 --jinja -c 16384"
shift ;;
"ministral3-8b")
command="llama-server -hf mistralai/Ministral-3-8B-Instruct-2512-GGUF:Q4_K_M --temp 0.15 -ngl 0 --threads 4 --jinja -c 16384"
shift ;;
"ministral3-8br")
command="llama-server -hf mistralai/Ministral-3-8B-Reasoning-2512-GGUF:Q4_K_M --top-p 0.95 --temp 0.7 -ngl 0 --threads 4 --jinja -c 16384"
shift ;;
"nemotron")
command="llama-server -hf bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF:Q4_K_M --temp 0.6 --top-p 0.95 -ngl 0 --threads 4 --jinja -c 16384"
shift ;;
"qwen3i")
command="llama-server -hf unsloth/Qwen3-4B-Instruct-2507-GGUF:Q4_K_XL --top-k 20 --top-p 0.8 --min-p 0.0 --temp 0.7 -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 8192"
shift ;;
"qwen3r")
command="llama-server -hf unsloth/Qwen3-4B-Thinking-2507-GGUF:Q4_K_XL --top-k 20 --top-p 0.95 --min-p 0.0 --temp 0.6 -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 8192"
shift ;;
"voxtral")
command="llama-server -hf bartowski/mistralai_Voxtral-Mini-3B-2507-GGUF:Q4_K_M --top-p 0.95 --temp 0.2 -ngl 0 -c 16384 --cache-reuse 256 --threads 4 -ctk q8_0 -ctv q8_0"
shift ;;
"--dry")
dry=true
shift ;;
"--temp")
if [ $# -lt 2 ]; then
echo "Error: --temp requires a value."
exit 1
fi
temp="$2"
shift 2 ;;
"--help"|"-h")
usage
exit 0 ;;
*)
echo "Error: Unknown argument."
usage
exit 1 ;;
esac
done
if $dry; then
echo "$command"
exit 0
else
eval "$command"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment