Last active
January 26, 2026 07:26
-
-
Save mrbid/8d01f6de5a12fcdb230fd08757eb3c84 to your computer and use it in GitHub Desktop.
Exposes llama-cli over IRC
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # A wrapper to expose llama-cli over IRC by Test_User | |
| # This no longer works on the latest llama.cpp. | |
| # The last known working git commit is: 5d195f17bc60eacc15cfb929f9403cf29ccdf419 | |
| # https://github.com/ggml-org/llama.cpp | |
| import os | |
| import socket | |
| import ssl | |
| import subprocess | |
| import threading | |
| import time | |
| from pathlib import Path | |
| bot_nick = b"llmbot" | |
| channel = b"#llm" | |
| prefix = b"$" | |
| thread_count = "22" | |
| root_dir = str(Path.home()) | |
| llms_dir = "/media/x/LLM/" | |
| allowed_models = { | |
| b"GLM-4.6-Q3_K_M": {"path": llms_dir+"GLM-4.6-Q3_K_M.gguf", "gpu": True}, | |
| b"GLM-4.6-Uncensored-Q3_K_M": {"path": llms_dir+"GLM-4.6-Uncensored-Q3_K_M.gguf", "gpu": True}, | |
| b"Qwen3-235B-A22B-Instruct-2507-Q4_K_M": {"path": llms_dir+"Qwen3-235B-A22B-Instruct-2507-Q4_K_M.gguf", "gpu": True}, | |
| b"Qwen3-235B-A22B-Instruct-Uncensored-Q4_K_M": {"path": llms_dir+"Qwen3-Instruct-Uncensored-Q4_K_M.gguf", "gpu": True}, | |
| b"Qwen3-235B-A22B-Thinking-Uncensored-Q4_K_M": {"path": llms_dir+"Qwen3-Thinking-Uncensored-Q4_K_M.gguf", "gpu": True}, | |
| b"Qwen3-Coder-480B-A35B-Instruct-Q2_K": {"path": llms_dir+"Qwen3-Coder-480B-A35B-Instruct-Q2_K.gguf", "gpu": True}, | |
| b"Qwen3-Coder-480B-A35B-Instruct-Q4_K_M": {"path": llms_dir+"Qwen3-Coder-480B-A35B-Instruct-Q4_K_M.gguf", "gpu": True}, | |
| b"Llama-3.3-70B-Instruct-F16": {"path": llms_dir+"Llama-3.3-70B-Instruct-F16.gguf", "gpu": True}, | |
| b"xai-org_grok-2-Q4_K_M": {"path": llms_dir+"xai-org_grok-2-Q4_K_M.gguf", "gpu": True}, | |
| b"Llama-3.1-405B-Instruct-Q2_K": {"path": llms_dir+"Llama-3.1-405B-Instruct-Q2_K.gguf", "gpu": True}, | |
| b"Llama-3.1-405B-Instruct-Q3_K_S": {"path": llms_dir+"Llama-3.1-405B-Instruct-Q3_K_S.gguf", "gpu": True}, | |
| b"Solar-Open-100B-Q8_0": {"path": llms_dir+"Solar-Open-100B-Q8_0.gguf", "gpu": True}, | |
| } | |
| model=llms_dir+"Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf" | |
| gpu=True | |
| system_prompt=b"short responses" | |
| irc_ready = False | |
| llm_ready = False | |
| irc_socket = None | |
| llm_pipe = None | |
| state_lock = threading.Lock() | |
| def sendall(fd, msg): | |
| while len(msg) != 0: | |
| done = os.write(fd, msg) | |
| if done < 0: | |
| return | |
| msg = msg[done:] | |
| def llm_side(): | |
| global state_lock | |
| global irc_ready | |
| global irc_socket | |
| global llm_ready | |
| global llm_pipe | |
| global gpu | |
| global model | |
| global system_prompt | |
| while True: | |
| read_pipe, tmp_write = os.pipe() | |
| tmp_read, write_pipe = os.pipe() | |
| state_lock.acquire() | |
| args = [root_dir+"/llama.cpp/build/bin/llama-cli", "--offline", "-m", model, "--system-prompt", system_prompt, | |
| "--simple-io", "--no-display-prompt", "--conversation", "--keep", "-1", | |
| "--cpu-strict", "1", "--no-kv-offload", "--ctx-size", "8192", "--split-mode", "row"] | |
| if not gpu: | |
| args.append("--device") | |
| args.append("none") | |
| args.append("--threads") | |
| args.append(thread_count) | |
| else: | |
| args.append("--device") | |
| args.append("Vulkan1,Vulkan2,Vulkan3,Vulkan4,Vulkan5") | |
| args.append("--threads") | |
| args.append(thread_count) | |
| subprocess.Popen(args, stdin=tmp_read, stdout=tmp_write) #, stderr=subprocess.DEVNULL) | |
| llm_pipe = write_pipe | |
| llm_ready = True | |
| state_lock.release() | |
| os.close(tmp_write) | |
| os.close(tmp_read) | |
| msg = b"" | |
| while True: | |
| error = False | |
| while True: | |
| newmsg = os.read(read_pipe, 4096) | |
| if newmsg == b"": | |
| error = True | |
| break | |
| oldmsg = msg | |
| msg = msg + newmsg | |
| if (newmsg.find(b"\n")) != -1: | |
| break | |
| if error: | |
| break | |
| split_lines = msg.split(b"\n") | |
| msg = split_lines[-1] | |
| split_lines = split_lines[:-1] | |
| state_lock.acquire() | |
| if not irc_ready: | |
| state_lock.release() | |
| continue | |
| for line in split_lines: | |
| if line.startswith(b"> "): | |
| line = line[2:] | |
| irc_socket.send(b"PRIVMSG " + channel + b" :" + line.replace(b"\r", b"") + b"\r\n") | |
| state_lock.release() | |
| state_lock.acquire() | |
| if llm_ready: | |
| os.close(write_pipe) | |
| llm_ready = False | |
| os.close(read_pipe) | |
| state_lock.release() | |
| def irc_side(): | |
| global state_lock | |
| global irc_ready | |
| global irc_socket | |
| global llm_ready | |
| global llm_pipe | |
| global gpu | |
| global model | |
| global system_prompt | |
| state_lock.acquire() | |
| irc_socket = ssl.create_default_context().wrap_socket(socket.socket(socket.AF_INET, socket.SOCK_STREAM), server_hostname="EXAMPLE-IRC-SERVER.org") | |
| #irc_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
| try: | |
| irc_socket.connect(("irc.EXAMPLE-IRC-SERVER.org", 6697)) | |
| except Exception: | |
| print("connect fail") | |
| return | |
| print("connected") | |
| irc_socket.sendall(b"USER ai * * :some sus bot\r\nNICK " + bot_nick + b"\r\n") | |
| irc_socket.settimeout(60) | |
| msg = b"" | |
| while True: | |
| state_lock.release() | |
| fails = 0 | |
| while True: | |
| try: | |
| newmsg = irc_socket.recv(4096) | |
| except Exception as e: | |
| if fails < 1: | |
| state_lock.acquire() | |
| irc_socket.sendall(b"PING e\r\n") | |
| state_lock.release() | |
| else: | |
| return | |
| fails = fails + 1 | |
| continue | |
| if newmsg == b"": | |
| return | |
| oldmsg = msg | |
| msg = oldmsg + newmsg | |
| if (oldmsg[-1:-1] == b"\r" and newmsg[0:0] == b"\n") or newmsg.find(b"\r\n") != -1: | |
| break | |
| # last is partial | |
| split_lines = msg.split(b"\r\n") | |
| msg = split_lines[-1] | |
| split_lines = split_lines[:-1] | |
| state_lock.acquire() | |
| for line in split_lines: | |
| source = b"" | |
| lastarg = None | |
| if line.startswith(b":"): | |
| source = line.split(b" ", maxsplit=1)[0][1:] | |
| line = line.split(b" ", maxsplit=1)[1] | |
| command = line.split(b" ", maxsplit=1)[0] | |
| line = line.split(b" ", maxsplit=1)[1] | |
| if line.startswith(b":"): | |
| line = b" " + line | |
| if len(line.split(b" :", maxsplit=1)) > 1: | |
| lastarg = line.split(b" :", maxsplit=1)[1] | |
| line = line.split(b" :")[0] | |
| args = [arg for arg in line.split(b" ") if arg != b""] | |
| if lastarg is not None: | |
| args.append(lastarg) | |
| if command == b"PING": | |
| if len(args) == 0: | |
| irc_socket.sendall(b"PONG\r\n") | |
| elif len(args) == 1: | |
| irc_socket.sendall(b"PONG :" + args[0] + b"\r\n") | |
| else: | |
| irc_socket.sendall(b"PONG " + args[1] + b" " + args[0] + b"\r\n") | |
| elif command == b"001": | |
| irc_ready = True | |
| irc_socket.sendall(b"JOIN " + channel + b"\r\n") | |
| elif command == b"PRIVMSG" and irc_ready: | |
| if len(args) < 2: | |
| continue | |
| if args[0] != channel: | |
| continue | |
| if args[1].startswith(prefix): | |
| privmsg = args[1][len(prefix):] | |
| if llm_ready: | |
| sendall(llm_pipe, privmsg + b"\n") | |
| elif args[1].startswith(bot_nick + b": "): | |
| privmsg = args[1][len(bot_nick + b": "):] | |
| restart = False | |
| if privmsg == b"prompt": | |
| irc_socket.sendall(b"NOTICE " + channel + b" :Prompt is set to " + system_prompt + b"\r\n") | |
| elif privmsg.startswith(b"prompt "): | |
| system_prompt = privmsg[len(b"prompt "):] | |
| restart = True | |
| elif privmsg == b"reset": | |
| irc_socket.sendall(b"NOTICE " + channel + b" :Context reset\r\n") | |
| restart = True | |
| elif privmsg == b"model": | |
| irc_socket.sendall(b"NOTICE " + channel + b" :Model set to " + model.encode("UTF-8") + b"\r\n") | |
| elif privmsg.startswith(b"model "): | |
| tmp = privmsg[len(b"model "):] | |
| if tmp in allowed_models: | |
| model = allowed_models[tmp]["path"] | |
| gpu = allowed_models[tmp]["gpu"] | |
| irc_socket.sendall(b"NOTICE " + channel + b" :Model set to " + model.encode("UTF-8") + b"\r\n") | |
| restart = True | |
| else: | |
| irc_socket.sendall(b"NOTICE " + channel + b" :The specified model is not known. Model NOT changed\r\n") | |
| elif privmsg == b"list" or privmsg == b"models": | |
| for model_name in allowed_models: | |
| irc_socket.sendall(b"NOTICE " + channel + b" :" + model_name + b"\r\n") | |
| if restart: | |
| if llm_ready: | |
| os.close(llm_pipe) | |
| llm_ready = False | |
| threading.Thread(target=llm_side).start() | |
| last_time = time.time() - 60 | |
| while True: | |
| current_time = time.time() | |
| if last_time + 60 > current_time: | |
| time.sleep(last_time + 60 - current_time) | |
| last_time = current_time | |
| irc_side() | |
| state_lock.acquire() | |
| irc_ready = False | |
| irc_socket.close() | |
| state_lock.release() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment