Last active
March 1, 2025 10:50
-
-
Save yvvki/214be54c441338a307298dbafa6abb3e to your computer and use it in GitHub Desktop.
KoboldCpp - HF
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/yvvki/214be54c441338a307298dbafa6abb3e/colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "2FCn5tmpn3UV" | |
| }, | |
| "source": [ | |
| "## Welcome to the Simplified KoboldCpp Colab Notebook\n", | |
| "It's really easy to get started. Just press the two **Play** buttons below, and then connect to the **Cloudflare URL** shown at the end.\n", | |
| "You can select a model from the dropdown, or enter a **custom URL** to a GGUF model (Example: `https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_M.gguf`)\n", | |
| "\n", | |
| "**Keep this page open and occationally check for captcha's so that your AI is not shut down**" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "id": "QNaj3u0jn3UW" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#@title <-- Tap this if you play on Mobile { display-mode: \"form\" }\n", | |
| "%%html\n", | |
| "<b>Press play on the music player to keep the tab alive, then start KoboldCpp below</b><br/>\n", | |
| "<audio src=\"https://raw.githubusercontent.com/KoboldAI/KoboldAI-Client/main/colab/silence.m4a\" loop controls autoplay>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "id": "uJS9i_Dltv8Y" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# @title <b>v-- Enter your model below and then click this to start Koboldcpp</b> { display-mode: \"form\" }\n", | |
| "\n", | |
| "Model = \"https://huggingface.co/mradermacher/Irida-SCE-9B-GGUF/resolve/main/Irida-SCE-9B.Q8_0.gguf\" # @param {\"type\":\"string\"}\n", | |
| "Context = \"12288\" # @param [4096,8192,12288,16384,24576,32768] {allow-input: true}\n", | |
| "Instruct = \"AutoGuess\" # @param [\"AutoGuess\",\"ChatML\",\"Gemma 2\",\"Llama 3 Instruct\"]\n", | |
| "\n", | |
| "premade_instruct = {\n", | |
| " \"ChatML\": {\n", | |
| " \"user_start\": \"\\n<|im_start|>user\\n\",\n", | |
| " \"user_end\": \"<|im_end|>\",\n", | |
| " \"assistant_start\": \"\\n<|im_start|>assistant\\n\",\n", | |
| " \"assistant_end\": \"<|im_end|>\",\n", | |
| " \"system_start\": \"<|im_start|>system\\n\",\n", | |
| " \"system_end\": \"<|im_end|>\",\n", | |
| " },\n", | |
| " \"Gemma 2\": {\n", | |
| " \"user_start\": \"<start_of_turn>user\\n\",\n", | |
| " \"user_end\": \"<end_of_turn>\\n\",\n", | |
| " \"assistant_start\": \"<start_of_turn>model\\n\",\n", | |
| " \"assistant_end\": \"<end_of_turn>\\n\",\n", | |
| " \"system_start\": \"<start_of_turn>user\\n\",\n", | |
| " \"system_end\": \"<end_of_turn>\\n\",\n", | |
| " },\n", | |
| " \"Llama 3 Instruct\": {\n", | |
| " \"user_start\": \"<|start_header_id|>user<|end_header_id|>\\n\\n\",\n", | |
| " \"user_end\": \"<|eot_id|>\",\n", | |
| " \"assistant_start\": \"<|start_header_id|>assistant<|end_header_id|>\\n\\n\",\n", | |
| " \"assistant_end\": \"<|eot_id|>\",\n", | |
| " \"system_start\": \"<|start_header_id|>system<|end_header_id|>\\n\\n\",\n", | |
| " \"system_end\": \"<|eot_id|>\",\n", | |
| " },\n", | |
| "}\n", | |
| "\n", | |
| "# #@markdown ---\n", | |
| "# LoadVisionMMProjector = False #@param {type:\"boolean\"}\n", | |
| "# Mmproj = \"https://huggingface.co/koboldcpp/mmproj/resolve/main/llama-13b-mmproj-v1.5.Q4_1.gguf\" #@param [\"https://huggingface.co/koboldcpp/mmproj/resolve/main/llama-13b-mmproj-v1.5.Q4_1.gguf\",\"https://huggingface.co/koboldcpp/mmproj/resolve/main/mistral-7b-mmproj-v1.5-Q4_1.gguf\",\"https://huggingface.co/koboldcpp/mmproj/resolve/main/llama-7b-mmproj-v1.5-Q4_0.gguf\",\"https://huggingface.co/koboldcpp/mmproj/resolve/main/LLaMA3-8B_mmproj-Q4_1.gguf\"]{allow-input: true}\n", | |
| "# #@markdown ---\n", | |
| "# LoadImgModel = False #@param {type:\"boolean\"}\n", | |
| "# ImgModel = \"https://huggingface.co/koboldcpp/imgmodel/resolve/main/imgmodel_ftuned_q4_0.gguf\" #@param [\"https://huggingface.co/koboldcpp/imgmodel/resolve/main/imgmodel_ftuned_q4_0.gguf\"]{allow-input: true}\n", | |
| "# #@markdown ---\n", | |
| "# LoadSpeechModel = False #@param {type:\"boolean\"}\n", | |
| "# SpeechModel = \"https://huggingface.co/koboldcpp/whisper/resolve/main/whisper-base.en-q5_1.bin\" #@param [\"https://huggingface.co/koboldcpp/whisper/resolve/main/whisper-base.en-q5_1.bin\"]{allow-input: true}\n", | |
| "# #@markdown ---\n", | |
| "# LoadTTSModel = False #@param {type:\"boolean\"}\n", | |
| "# TTSModel = \"https://huggingface.co/koboldcpp/tts/resolve/main/OuteTTS-0.2-500M-Q4_0.gguf\" #@param [\"https://huggingface.co/koboldcpp/tts/resolve/main/OuteTTS-0.2-500M-Q4_0.gguf\"]{allow-input: true}\n", | |
| "# WavTokModel = \"https://huggingface.co/koboldcpp/tts/resolve/main/WavTokenizer-Large-75-Q4_0.gguf\" #@param [\"https://huggingface.co/koboldcpp/tts/resolve/main/WavTokenizer-Large-75-Q4_0.gguf\"]{allow-input: true}\n", | |
| "\n", | |
| "# @markdown ---\n", | |
| "# @markdown ### Advanced settings\n", | |
| "# @markdown [GPU layer offloading](https://github.com/LostRuins/koboldcpp/wiki#what-does-gpu-layer-offloading-do-how-many-layers-can-i-offload)\n", | |
| "Layers = 99 # @param {\"type\":\"integer\"}\n", | |
| "# @markdown [Flash Attention](https://github.com/LostRuins/koboldcpp/wiki#flash-attention)\n", | |
| "FlashAttention = True #@param {type:\"boolean\"}\n", | |
| "# @markdown [Quantized KV Cache](https://github.com/LostRuins/koboldcpp/wiki#quantized-kv-cache) (-1 to disable)\n", | |
| "QuantKV = \"-1\" # @param [\"-1\",\"0\",\"1\",\"2\"]\n", | |
| "\n", | |
| "# @markdown ---\n", | |
| "# @markdown ### Experimental settings\n", | |
| "# @markdown [Cloudflare Zero Trust Tunnel](https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/)\n", | |
| "# @markdown\n", | |
| "# @markdown Please add the `CONNECTOR_TOKEN` secret.\n", | |
| "EnableTunnel = True #@param {type:\"boolean\"}\n", | |
| "\n", | |
| "if \"https://huggingface.co/\" not in Model:\n", | |
| " raise ValueError(\"⚠️ This notebook only supports HuggingFace models\")\n", | |
| "\n", | |
| "import os\n", | |
| "if not os.path.isfile(\"/opt/bin/nvidia-smi\"):\n", | |
| " raise RuntimeError(\"⚠️ Colab did not give you a GPU due to usage limits, this can take a few hours before they let you back in. Check out https://lite.koboldai.net for a free alternative (that does not provide an API link but can load KoboldAI saves and chat cards) or subscribe to Colab Pro for immediate access.\")\n", | |
| "\n", | |
| "CCACommand = \"--chatcompletionsadapter AutoGuess\"\n", | |
| "\n", | |
| "import json\n", | |
| "if Instruct != \"AutoGuess\":\n", | |
| " with open(\"instruct.json\", \"w\") as f:\n", | |
| " f.write(json.dumps(premade_instruct[Instruct], separators=(\",\", \":\")))\n", | |
| " CCACommand = \"--chatcompletionsadapter instruct.json\"\n", | |
| "\n", | |
| "# VCommand = \"--mmproj vmodel.gguf\" if LoadVisionMMProjector and Mmproj else \"\"\n", | |
| "# ICommand = \"--sdmodel imodel.gguf --sdthreads 4 --sdquant --sdclamped\" if LoadImgModel and ImgModel else \"\"\n", | |
| "# WCommand = \"--whispermodel wmodel.bin\" if LoadSpeechModel and SpeechModel else \"\"\n", | |
| "# TTSCommand = \"--ttsmodel ttsmodel.bin --ttswavtokenizer ttswavtok.bin --ttsgpu\" if LoadTTSModel and TTSModel and WavTokModel else \"\"\n", | |
| "FACommand = \"--flashattention\" if FlashAttention else \"\"\n", | |
| "QKCommand = f\"--quantkv {QuantKV}\" if QuantKV != \"-1\" else \"\"\n", | |
| "RTCommand = \"--remotetunnel\" if not EnableTunnel else \"\"\n", | |
| "\n", | |
| "if EnableTunnel:\n", | |
| " !curl -L --output cloudflared.deb https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb\n", | |
| " !dpkg -i cloudflared.deb\n", | |
| "\n", | |
| " from google.colab import userdata\n", | |
| " CONNECTOR_TOKEN = userdata.get('CONNECTOR_TOKEN')\n", | |
| "\n", | |
| " !cloudflared service uninstall\n", | |
| " !cloudflared service install $CONNECTOR_TOKEN\n", | |
| "\n", | |
| "from urllib.parse import urlparse, parse_qs\n", | |
| "\n", | |
| "def extract_hf_url(url):\n", | |
| " parsed_url = urlparse(url)\n", | |
| " path_parts = parsed_url.path.strip('/').split('/')\n", | |
| " query = parse_qs(parsed_url.query)\n", | |
| "\n", | |
| " model_name = path_parts[0]+\"/\"+path_parts[1]\n", | |
| " version = path_parts[3]\n", | |
| " file_name = path_parts[4]\n", | |
| "\n", | |
| " return model_name, version, file_name\n", | |
| "\n", | |
| "# simple fix for a common URL mistake\n", | |
| "if \"/blob/\" in Model:\n", | |
| " Model = Model.replace(\"/blob/\", \"/resolve/\")\n", | |
| "\n", | |
| "# faster downloads\n", | |
| "import os\n", | |
| "!pip install hf_transfer\n", | |
| "os.environ[\"HF_HUB_ENABLE_HF_TRANSFER\"] = \"1\"\n", | |
| "\n", | |
| "# model info\n", | |
| "REPO_ID, REVISION, FILE = extract_hf_url(Model)\n", | |
| "\n", | |
| "DIR = REPO_ID.replace(\"/\", \"_\")\n", | |
| "\n", | |
| "if REVISION != \"\" and REVISION != \"main\":\n", | |
| " DIR = f\"{DIR}_{REVISION}\"\n", | |
| "\n", | |
| "print(\"Model Name: \" + DIR)\n", | |
| "print(\"Version: \" + REVISION)\n", | |
| "print(\"File: \" + FILE)\n", | |
| "\n", | |
| "from huggingface_hub import hf_hub_download\n", | |
| "\n", | |
| "FULLPATH = f\"/content/models/{DIR}\"\n", | |
| "\n", | |
| "hf_hub_download(repo_id=REPO_ID, filename=FILE, revision=REVISION, local_dir=FULLPATH)\n", | |
| "\n", | |
| "# TODO: extra models functionality\n", | |
| "\n", | |
| "# init and start\n", | |
| "!wget -O dlfile.tmp https://kcpplinux.concedo.workers.dev -q --show-progress --progress=bar:force:noscroll && mv dlfile.tmp koboldcpp_linux\n", | |
| "!test -f koboldcpp_linux && echo Download Successful || echo Download Failed\n", | |
| "!chmod +x ./koboldcpp_linux\n", | |
| "\n", | |
| "ModelPath = FULLPATH + \"/\" + FILE\n", | |
| "print(\"Model to load: \" + ModelPath)\n", | |
| "\n", | |
| "!./koboldcpp_linux $ModelPath \\\n", | |
| "--usecublas 0 mmq \\\n", | |
| "$CCACommand \\\n", | |
| "--gpulayers $Layers \\\n", | |
| "--contextsize $Context \\\n", | |
| "$FACommand \\\n", | |
| "$QKCommand \\\n", | |
| "--websearch \\\n", | |
| "--quiet \\\n", | |
| "$RTCommand" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "accelerator": "GPU", | |
| "colab": { | |
| "gpuType": "T4", | |
| "provenance": [], | |
| "name": "KoboldCpp - HF", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment