mseri · December 3, 2025 17:44
diff --git a/lll.sh b/lll.sh
 #!/bin/bash
 set -euo pipefail

 MODELS="aquif, gemma3, granite4, granite4-moe, lfm2, lfm2-moe, lfm2-vl, ministral3 (3,8B;i,r), nemotron, qwen3(i,r), voxtral"

 function usage() {
  echo "Usage: $0 <model> [options]"
  echo "Models: $MODELS."
  echo "Options:"
  echo "  --dry to see the invocation string only"
  echo "  --temp <value> to set temperature for granite models (default: $temp)"
 }

 if [ $# -eq 0 ]; then
  usage
  exit 1
 fi

 command=""
 dry=false
 temp=0

 while [ $# -gt 0 ]; do
  case $1 in
    "aquif")
      command="llama-server -hf mradermacher/aquif-3.6-8B-GGUF:Q4_K_M --temp 0.7 -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 8192"
      shift ;;
    "granite4")
      # one can also use ibm-granite/granite-4.0-h-micro-GGUF:Q4_K_M and pick
      # parameters at will. IBM says they are all good depending on the needs
      command="llama-server -hf unsloth/granite-4.0-h-micro-GGUF:UD-Q4_K_XL --top-k 20 --top-p 0.95 --min-p 0.0 --temp $temp -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 16384"
      shift ;;
    "granite4-moe")
      # one can also use ibm-granite/granite-4.0-h-tiny-GGUF:Q4_K_M and pick
      # parameters at will. IBM says they are all good depending on the needs
      command="llama-server -hf unsloth/granite-4.0-h-tiny-GGUF:UD-Q4_K_XL --top-k 20 --top-p 0.95 --min-p 0.0 --temp $temp -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 16384"
      shift ;;
    "gemma3")
      command="llama-server -hf stduhpf/google-gemma-3-4b-it-qat-q4_0-gguf-small --top-k 64 --top-p 0.95 --min-p 0.0 --repeat-penalty 1.0 --temp 1.0 -ngl 0 --threads 4 --jinja --cache-reuse 256  -c 16384"
      shift ;;
    "gpt")
      command="llama-server -m /Users/mseri/Downloads/GPT-OSS-20B-Pruned-Q5_0.gguf --temp 1.0 --top-p 1.0 --top-k 0 --min-p 0.01 --threads 4 -ngl 0 -ub 2048 -b 2048 --jinja -c 16384 --chat-template-kwargs '{\"reasoning_effort\": \"medium\"}'"
      shift ;;
    "lfm2")
      command="llama-server -hf LiquidAI/LFM2-2.6B-GGUF:Q8_0 --temp 0.3 --min-p 0.15 --repeat-penalty 1.05 --sampling-seq edskypmxt --threads 4 -ngl 0 --jinja -c 16384"
      shift  ;;
    "lfm2-moe")
      command="llama-server -hf unsloth/LFM2-8B-A1B-GGUF:Q4_K_XL --temp 0.3 --min-p 0.15 --repeat-penalty 1.05 --threads 4 -ngl 0 --jinja -c 16384"
      shift ;;
    "lfm2-vl")
      command="llama-server -hf bartowski/LiquidAI_LFM2-VL-1.6B-GGUF:Q6_K --temp 0.1 --min-p 0.15 --top-p 1.0 --top-k 50 --repeat-penalty 1.05 -ngl 0 --threads 4 --jinja -c 16384"
      shift ;;
    "ministral3-3b")
      command="llama-server -hf mistralai/Ministral-3-3B-Instruct-2512-GGUF:Q4_K_M --temp 0.15 -ngl 0 --threads 4 --jinja -c 16384"
      shift ;;
    "ministral3-3br")
      command="llama-server -hf mistralai/Ministral-3-3B-Reasoning-2512-GGUF:Q4_K_M --top-p 0.95 --temp 0.7 -ngl 0 --threads 4 --jinja -c 16384"
      shift ;;
    "ministral3-8b")
      command="llama-server -hf mistralai/Ministral-3-8B-Instruct-2512-GGUF:Q4_K_M --temp 0.15 -ngl 0 --threads 4 --jinja -c 16384"
      shift ;;
    "ministral3-8br")
      command="llama-server -hf mistralai/Ministral-3-8B-Reasoning-2512-GGUF:Q4_K_M --top-p 0.95 --temp 0.7 -ngl 0 --threads 4 --jinja -c 16384"
      shift ;;
    "nemotron")
      command="llama-server -hf bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF:Q4_K_M --temp 0.6 --top-p 0.95 -ngl 0 --threads 4 --jinja -c 16384"
      shift ;;
    "qwen3i")
      command="llama-server -hf unsloth/Qwen3-4B-Instruct-2507-GGUF:Q4_K_XL --top-k 20 --top-p 0.8 --min-p 0.0 --temp 0.7 -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 8192"
      shift ;;
    "qwen3r")
      command="llama-server -hf unsloth/Qwen3-4B-Thinking-2507-GGUF:Q4_K_XL --top-k 20 --top-p 0.95 --min-p 0.0 --temp 0.6 -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 8192"
      shift ;;
    "voxtral")
      command="llama-server -hf bartowski/mistralai_Voxtral-Mini-3B-2507-GGUF:Q4_K_M --top-p 0.95 --temp 0.2 -ngl 0 -c 16384 --cache-reuse 256 --threads 4 -ctk q8_0 -ctv q8_0"
      shift ;;
    "--dry")
      dry=true
      shift ;;
    "--temp")
      if [ $# -lt 2 ]; then
        echo "Error: --temp requires a value."
        exit 1
      fi
      temp="$2"
      shift 2 ;;
    "--help"|"-h")
      usage
      exit 0 ;;
    *)
      echo "Error: Unknown argument."
      usage
      exit 1 ;;
  esac
 done

 if $dry; then
  echo "$command"
  exit 0
 else
  eval "$command"
 fi
	#!/bin/bash
	set -euo pipefail

	MODELS="aquif, gemma3, granite4, granite4-moe, lfm2, lfm2-moe, lfm2-vl, ministral3 (3,8B;i,r), nemotron, qwen3(i,r), voxtral"

	function usage() {
	echo "Usage: $0 <model> [options]"
	echo "Models: $MODELS."
	echo "Options:"
	echo " --dry to see the invocation string only"
	echo " --temp <value> to set temperature for granite models (default: $temp)"
	}

	if [ $# -eq 0 ]; then
	usage
	exit 1
	fi

	command=""
	dry=false
	temp=0

	while [ $# -gt 0 ]; do
	case $1 in
	"aquif")
	command="llama-server -hf mradermacher/aquif-3.6-8B-GGUF:Q4_K_M --temp 0.7 -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 8192"
	shift ;;
	"granite4")
	# one can also use ibm-granite/granite-4.0-h-micro-GGUF:Q4_K_M and pick
	# parameters at will. IBM says they are all good depending on the needs
	command="llama-server -hf unsloth/granite-4.0-h-micro-GGUF:UD-Q4_K_XL --top-k 20 --top-p 0.95 --min-p 0.0 --temp $temp -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 16384"
	shift ;;
	"granite4-moe")
	# one can also use ibm-granite/granite-4.0-h-tiny-GGUF:Q4_K_M and pick
	# parameters at will. IBM says they are all good depending on the needs
	command="llama-server -hf unsloth/granite-4.0-h-tiny-GGUF:UD-Q4_K_XL --top-k 20 --top-p 0.95 --min-p 0.0 --temp $temp -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 16384"
	shift ;;
	"gemma3")
	command="llama-server -hf stduhpf/google-gemma-3-4b-it-qat-q4_0-gguf-small --top-k 64 --top-p 0.95 --min-p 0.0 --repeat-penalty 1.0 --temp 1.0 -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 16384"
	shift ;;
	"gpt")
	command="llama-server -m /Users/mseri/Downloads/GPT-OSS-20B-Pruned-Q5_0.gguf --temp 1.0 --top-p 1.0 --top-k 0 --min-p 0.01 --threads 4 -ngl 0 -ub 2048 -b 2048 --jinja -c 16384 --chat-template-kwargs '{\"reasoning_effort\": \"medium\"}'"
	shift ;;
	"lfm2")
	command="llama-server -hf LiquidAI/LFM2-2.6B-GGUF:Q8_0 --temp 0.3 --min-p 0.15 --repeat-penalty 1.05 --sampling-seq edskypmxt --threads 4 -ngl 0 --jinja -c 16384"
	shift ;;
	"lfm2-moe")
	command="llama-server -hf unsloth/LFM2-8B-A1B-GGUF:Q4_K_XL --temp 0.3 --min-p 0.15 --repeat-penalty 1.05 --threads 4 -ngl 0 --jinja -c 16384"
	shift ;;
	"lfm2-vl")
	command="llama-server -hf bartowski/LiquidAI_LFM2-VL-1.6B-GGUF:Q6_K --temp 0.1 --min-p 0.15 --top-p 1.0 --top-k 50 --repeat-penalty 1.05 -ngl 0 --threads 4 --jinja -c 16384"
	shift ;;
	"ministral3-3b")
	command="llama-server -hf mistralai/Ministral-3-3B-Instruct-2512-GGUF:Q4_K_M --temp 0.15 -ngl 0 --threads 4 --jinja -c 16384"
	shift ;;
	"ministral3-3br")
	command="llama-server -hf mistralai/Ministral-3-3B-Reasoning-2512-GGUF:Q4_K_M --top-p 0.95 --temp 0.7 -ngl 0 --threads 4 --jinja -c 16384"
	shift ;;
	"ministral3-8b")
	command="llama-server -hf mistralai/Ministral-3-8B-Instruct-2512-GGUF:Q4_K_M --temp 0.15 -ngl 0 --threads 4 --jinja -c 16384"
	shift ;;
	"ministral3-8br")
	command="llama-server -hf mistralai/Ministral-3-8B-Reasoning-2512-GGUF:Q4_K_M --top-p 0.95 --temp 0.7 -ngl 0 --threads 4 --jinja -c 16384"
	shift ;;
	"nemotron")
	command="llama-server -hf bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF:Q4_K_M --temp 0.6 --top-p 0.95 -ngl 0 --threads 4 --jinja -c 16384"
	shift ;;
	"qwen3i")
	command="llama-server -hf unsloth/Qwen3-4B-Instruct-2507-GGUF:Q4_K_XL --top-k 20 --top-p 0.8 --min-p 0.0 --temp 0.7 -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 8192"
	shift ;;
	"qwen3r")
	command="llama-server -hf unsloth/Qwen3-4B-Thinking-2507-GGUF:Q4_K_XL --top-k 20 --top-p 0.95 --min-p 0.0 --temp 0.6 -ngl 0 --threads 4 --jinja --cache-reuse 256 -c 8192"
	shift ;;
	"voxtral")
	command="llama-server -hf bartowski/mistralai_Voxtral-Mini-3B-2507-GGUF:Q4_K_M --top-p 0.95 --temp 0.2 -ngl 0 -c 16384 --cache-reuse 256 --threads 4 -ctk q8_0 -ctv q8_0"
	shift ;;
	"--dry")
	dry=true
	shift ;;
	"--temp")
	if [ $# -lt 2 ]; then
	echo "Error: --temp requires a value."
	exit 1
	fi
	temp="$2"
	shift 2 ;;
	"--help"\|"-h")
	usage
	exit 0 ;;
	*)
	echo "Error: Unknown argument."
	usage
	exit 1 ;;
	esac
	done

	if $dry; then
	echo "$command"
	exit 0
	else
	eval "$command"
	fi
No results found