After compiling with RPC enabled (GGML_RPC CMake parameter):
Run rpc-server on the remote nodes:
rpc-server --port 5001 --host 169.254.51.65
| from selenium import webdriver | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.chrome.service import Service | |
| from webdriver_manager.chrome import ChromeDriverManager | |
| # 1. Setup the WebDriver (using webdriver_manager for simplicity) | |
| # This automatically downloads and manages the correct ChromeDriver version | |
| service = Service(ChromeDriverManager().install()) | |
| driver = webdriver.Chrome(service=service) |
| # llama-swap configuration | |
| models: | |
| VibeThinker-1.5B: | |
| cmd: llama-server --port ${PORT} -c 0 --model /home/ugo/.cache/llama.cpp/VibeThinker-1.5B.f16.gguf -ngl 99 | |
| Aquif-3.5-Max-42B-A3B: | |
| cmd: > | |
| llama-server --port ${PORT} | |
| --model /home/ugo/.cache/llama.cpp/unsloth-aquif-3.5-Max-42B-A3B-GGUF/aquif-3.5-Max-42B-A3B-UD-Q6_K_XL.gguf -ngl 99 -fa on | |
| Aquif-3.5-Max-42B-A3B-Coding-Q6_K_XL-KVQ8: | |
| cmd: > |
| #!/usr/bin/env bash | |
| hf download $1 --local-dir "$HOME/.cache/llama.cpp/$1/$2" --include="*$2.gguf" |
| #!/usr/bin/env bash | |
| # $1: model file name | |
| # $2: port | |
| # $3: context size | |
| # $4: alias, (model name sent to client) | |
| # Define the help text as a function | |
| show_help() { | |
| echo "Usage: $0 <model file> <port> \\" | |
| echo " <context length, 0 for default> \\" |
| GGML_BLAS_VENDOR Intel10_64_dyn | |
| Vulkan_GLSLANG_VALIDATOR_EXECU ~/.local/vulkan/1.4.321.1/x86_64/bin/glslangValidator | |
| Vulkan_GLSLC_EXECUTABLE ~/.local/vulkan/1.4.321.1/x86_64/bin/glslc | |
| Vulkan_INCLUDE_DIR ~/.local/vulkan/1.4.321.1/x86_64/include | |
| Vulkan_LIBRARY ~/.local/vulkan/1.4.321.1/x86_64/lib/libvulkan.so | |
| #/etc/default/grub | |
| #run sudo grub2-mkconfig -o /boot/grub2/grub.cfg after changing the file | |
| GRUB_TIMEOUT=5 | |
| GRUB_DISTRIBUTOR="$(sed 's, release .*$,,g' /etc/system-release)" | |
| GRUB_DEFAULT=saved | |
| GRUB_DISABLE_SUBMENU=true | |
| GRUB_TERMINAL_OUTPUT="console" | |
| ##### vvvvvvvvvvvvvvv | |
| GRUB_CMDLINE_LINUX="rhgb quiet amd_iommu=off amdgpu.gttsize=131072 ttm.pages_limit=33554432" | |
| ##### ^^^^^^^^^^^^^^^ |
| diff --git a/discover/amd_linux.go b/discover/amd_linux.go | |
| index ebffbdf6..bbb5ec49 100644 | |
| --- a/discover/amd_linux.go | |
| +++ b/discover/amd_linux.go | |
| @@ -315,15 +315,15 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) { | |
| gpuOrdinalID += 1 | |
| // iGPU detection, remove this check once we can support an iGPU variant of the rocm library | |
| - if totalMemory < IGPUMemLimit { | |
| - reason := "unsupported Radeon iGPU detected skipping" |
| #!/usr/bin/env bash | |
| #linking bitcode files to files in the current directory is required for JAX to work | |
| #with ROCm | |
| if [ "$#" -eq 0 ]; then | |
| echo "Usage: $0 <path to bitcode files, e.g. /opt/rocm-6.4/llvm/amdgcn/bitcode>" | |
| exit 1 # Exit with a non-zero status to indicate an error | |
| fi | |
| BCPATH=$1 | |
| ln -s $BCPATH/opencl.bc ./opencl.bc |
| #!/usr/bin/env bash | |
| # link-all.sh <target-dir> | |
| # Used to link all ROCM .bc bitcode files to local files in order to use XLA and JAX | |
| set -euo pipefail | |
| [[ $# -eq 1 ]] || { echo "Usage: $0 <target-directory>" >&2; exit 1; } | |
| target=${1%/} # strip trailing slash | |
| [[ -d $target ]] || { echo "\"$target\" is not a directory" >&2; exit 1; } |