Rebuilt llama.cpp:
llama.cpp/build/bin/llama-server \
-hf unsloth/GLM-4.7-Flash-GGUF:UD-Q4_K_XL \
--jinja --threads -1 --ctx-size 96768 \
--temp 1.0 --top-p 0.95 --min-p 0.01 --fit on \
--flash-attn off--flash-attn off - as directed by Unsloth developer, apparently this isn't available/working.
Claude settings:
ANTHROPIC_AUTH_TOKEN=local-claude \
ANTHROPIC_BASE_URL=http://127.0.0.1:8080 \
~/.arkade/bin/claude \
--model unsloth/GLM-4.7-Flash-GGUF:UD-Q4_K_XLSlicer config:
config:
host_groups:
- name: gpu
storage: image
storage_size: 280G
count: 1
vcpu: 24
ram_gb: 96
gpu_count: 2
network:
bridge: brgpu0
tap_prefix: gputap
gateway: 192.168.139.1/24
github_user: alexellis
ssh_keys:
- AAAAB3NzaC1yc2EAAAADAQABAAABgQDAjHDlD9NEC8T0+qxRWHt2bGPKgVSJK9tuVq91nUJw7f0H0wOvm5UVKuXUWMGmX3lWr+zqKVQTms3KOmpLLJhw1DG1maXqvi6aUh3Ayn3O1V3EEgoD9uH1I/NbYrnXE1algiqGb+w4eI1QEl7gsgCTdrx8VfsJ5TlQDCFsgf9WxzwrUMc2I7ul/KgfJ8KNFUuZMZ68GKAi4N9wtYAMPah941e+kDZBB8MdLbY2MfHklxxdN4qdb6WcT1IGmoevnlMGndwacKtueSLl7hQi+2bQqTm5arhO76eLhYKatYx+aeiC3ZJ222rGoPnPUmSJ/yvxyOw9I/IKv7dO0xKEAKgl1W8DgT/B/EtCQ8/mn2gl04eE4fR9F3N/hjeoBRflsx5uG8sJT4DVuq2qSEoDahObX9QGrSggBjbzZNXN3swQrYdSAIXsJmiPU3hqmpctaKVHHPEZ3TYnv+v3xznj8sqlPO5rvUxWSoIdHWl3i9GRkuNkgMWl1IgukK7+/Nrtyf0=
image: "ghcr.io/openfaasltd/slicer-systemd-ch:5.10.240-x86_64-latest"
hypervisor: cloud-hypervisorRelated posts: