Skip to content

Instantly share code, notes, and snippets.

@mrbid
Last active January 26, 2026 07:26
Show Gist options
  • Select an option

  • Save mrbid/8d01f6de5a12fcdb230fd08757eb3c84 to your computer and use it in GitHub Desktop.

Select an option

Save mrbid/8d01f6de5a12fcdb230fd08757eb3c84 to your computer and use it in GitHub Desktop.
Exposes llama-cli over IRC
#!/usr/bin/env python3
# A wrapper to expose llama-cli over IRC by Test_User
# This no longer works on the latest llama.cpp.
# The last known working git commit is: 5d195f17bc60eacc15cfb929f9403cf29ccdf419
# https://github.com/ggml-org/llama.cpp
import os
import socket
import ssl
import subprocess
import threading
import time
from pathlib import Path
bot_nick = b"llmbot"
channel = b"#llm"
prefix = b"$"
thread_count = "22"
root_dir = str(Path.home())
llms_dir = "/media/x/LLM/"
allowed_models = {
b"GLM-4.6-Q3_K_M": {"path": llms_dir+"GLM-4.6-Q3_K_M.gguf", "gpu": True},
b"GLM-4.6-Uncensored-Q3_K_M": {"path": llms_dir+"GLM-4.6-Uncensored-Q3_K_M.gguf", "gpu": True},
b"Qwen3-235B-A22B-Instruct-2507-Q4_K_M": {"path": llms_dir+"Qwen3-235B-A22B-Instruct-2507-Q4_K_M.gguf", "gpu": True},
b"Qwen3-235B-A22B-Instruct-Uncensored-Q4_K_M": {"path": llms_dir+"Qwen3-Instruct-Uncensored-Q4_K_M.gguf", "gpu": True},
b"Qwen3-235B-A22B-Thinking-Uncensored-Q4_K_M": {"path": llms_dir+"Qwen3-Thinking-Uncensored-Q4_K_M.gguf", "gpu": True},
b"Qwen3-Coder-480B-A35B-Instruct-Q2_K": {"path": llms_dir+"Qwen3-Coder-480B-A35B-Instruct-Q2_K.gguf", "gpu": True},
b"Qwen3-Coder-480B-A35B-Instruct-Q4_K_M": {"path": llms_dir+"Qwen3-Coder-480B-A35B-Instruct-Q4_K_M.gguf", "gpu": True},
b"Llama-3.3-70B-Instruct-F16": {"path": llms_dir+"Llama-3.3-70B-Instruct-F16.gguf", "gpu": True},
b"xai-org_grok-2-Q4_K_M": {"path": llms_dir+"xai-org_grok-2-Q4_K_M.gguf", "gpu": True},
b"Llama-3.1-405B-Instruct-Q2_K": {"path": llms_dir+"Llama-3.1-405B-Instruct-Q2_K.gguf", "gpu": True},
b"Llama-3.1-405B-Instruct-Q3_K_S": {"path": llms_dir+"Llama-3.1-405B-Instruct-Q3_K_S.gguf", "gpu": True},
b"Solar-Open-100B-Q8_0": {"path": llms_dir+"Solar-Open-100B-Q8_0.gguf", "gpu": True},
}
model=llms_dir+"Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf"
gpu=True
system_prompt=b"short responses"
irc_ready = False
llm_ready = False
irc_socket = None
llm_pipe = None
state_lock = threading.Lock()
def sendall(fd, msg):
while len(msg) != 0:
done = os.write(fd, msg)
if done < 0:
return
msg = msg[done:]
def llm_side():
global state_lock
global irc_ready
global irc_socket
global llm_ready
global llm_pipe
global gpu
global model
global system_prompt
while True:
read_pipe, tmp_write = os.pipe()
tmp_read, write_pipe = os.pipe()
state_lock.acquire()
args = [root_dir+"/llama.cpp/build/bin/llama-cli", "--offline", "-m", model, "--system-prompt", system_prompt,
"--simple-io", "--no-display-prompt", "--conversation", "--keep", "-1",
"--cpu-strict", "1", "--no-kv-offload", "--ctx-size", "8192", "--split-mode", "row"]
if not gpu:
args.append("--device")
args.append("none")
args.append("--threads")
args.append(thread_count)
else:
args.append("--device")
args.append("Vulkan1,Vulkan2,Vulkan3,Vulkan4,Vulkan5")
args.append("--threads")
args.append(thread_count)
subprocess.Popen(args, stdin=tmp_read, stdout=tmp_write) #, stderr=subprocess.DEVNULL)
llm_pipe = write_pipe
llm_ready = True
state_lock.release()
os.close(tmp_write)
os.close(tmp_read)
msg = b""
while True:
error = False
while True:
newmsg = os.read(read_pipe, 4096)
if newmsg == b"":
error = True
break
oldmsg = msg
msg = msg + newmsg
if (newmsg.find(b"\n")) != -1:
break
if error:
break
split_lines = msg.split(b"\n")
msg = split_lines[-1]
split_lines = split_lines[:-1]
state_lock.acquire()
if not irc_ready:
state_lock.release()
continue
for line in split_lines:
if line.startswith(b"> "):
line = line[2:]
irc_socket.send(b"PRIVMSG " + channel + b" :" + line.replace(b"\r", b"") + b"\r\n")
state_lock.release()
state_lock.acquire()
if llm_ready:
os.close(write_pipe)
llm_ready = False
os.close(read_pipe)
state_lock.release()
def irc_side():
global state_lock
global irc_ready
global irc_socket
global llm_ready
global llm_pipe
global gpu
global model
global system_prompt
state_lock.acquire()
irc_socket = ssl.create_default_context().wrap_socket(socket.socket(socket.AF_INET, socket.SOCK_STREAM), server_hostname="EXAMPLE-IRC-SERVER.org")
#irc_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
irc_socket.connect(("irc.EXAMPLE-IRC-SERVER.org", 6697))
except Exception:
print("connect fail")
return
print("connected")
irc_socket.sendall(b"USER ai * * :some sus bot\r\nNICK " + bot_nick + b"\r\n")
irc_socket.settimeout(60)
msg = b""
while True:
state_lock.release()
fails = 0
while True:
try:
newmsg = irc_socket.recv(4096)
except Exception as e:
if fails < 1:
state_lock.acquire()
irc_socket.sendall(b"PING e\r\n")
state_lock.release()
else:
return
fails = fails + 1
continue
if newmsg == b"":
return
oldmsg = msg
msg = oldmsg + newmsg
if (oldmsg[-1:-1] == b"\r" and newmsg[0:0] == b"\n") or newmsg.find(b"\r\n") != -1:
break
# last is partial
split_lines = msg.split(b"\r\n")
msg = split_lines[-1]
split_lines = split_lines[:-1]
state_lock.acquire()
for line in split_lines:
source = b""
lastarg = None
if line.startswith(b":"):
source = line.split(b" ", maxsplit=1)[0][1:]
line = line.split(b" ", maxsplit=1)[1]
command = line.split(b" ", maxsplit=1)[0]
line = line.split(b" ", maxsplit=1)[1]
if line.startswith(b":"):
line = b" " + line
if len(line.split(b" :", maxsplit=1)) > 1:
lastarg = line.split(b" :", maxsplit=1)[1]
line = line.split(b" :")[0]
args = [arg for arg in line.split(b" ") if arg != b""]
if lastarg is not None:
args.append(lastarg)
if command == b"PING":
if len(args) == 0:
irc_socket.sendall(b"PONG\r\n")
elif len(args) == 1:
irc_socket.sendall(b"PONG :" + args[0] + b"\r\n")
else:
irc_socket.sendall(b"PONG " + args[1] + b" " + args[0] + b"\r\n")
elif command == b"001":
irc_ready = True
irc_socket.sendall(b"JOIN " + channel + b"\r\n")
elif command == b"PRIVMSG" and irc_ready:
if len(args) < 2:
continue
if args[0] != channel:
continue
if args[1].startswith(prefix):
privmsg = args[1][len(prefix):]
if llm_ready:
sendall(llm_pipe, privmsg + b"\n")
elif args[1].startswith(bot_nick + b": "):
privmsg = args[1][len(bot_nick + b": "):]
restart = False
if privmsg == b"prompt":
irc_socket.sendall(b"NOTICE " + channel + b" :Prompt is set to " + system_prompt + b"\r\n")
elif privmsg.startswith(b"prompt "):
system_prompt = privmsg[len(b"prompt "):]
restart = True
elif privmsg == b"reset":
irc_socket.sendall(b"NOTICE " + channel + b" :Context reset\r\n")
restart = True
elif privmsg == b"model":
irc_socket.sendall(b"NOTICE " + channel + b" :Model set to " + model.encode("UTF-8") + b"\r\n")
elif privmsg.startswith(b"model "):
tmp = privmsg[len(b"model "):]
if tmp in allowed_models:
model = allowed_models[tmp]["path"]
gpu = allowed_models[tmp]["gpu"]
irc_socket.sendall(b"NOTICE " + channel + b" :Model set to " + model.encode("UTF-8") + b"\r\n")
restart = True
else:
irc_socket.sendall(b"NOTICE " + channel + b" :The specified model is not known. Model NOT changed\r\n")
elif privmsg == b"list" or privmsg == b"models":
for model_name in allowed_models:
irc_socket.sendall(b"NOTICE " + channel + b" :" + model_name + b"\r\n")
if restart:
if llm_ready:
os.close(llm_pipe)
llm_ready = False
threading.Thread(target=llm_side).start()
last_time = time.time() - 60
while True:
current_time = time.time()
if last_time + 60 > current_time:
time.sleep(last_time + 60 - current_time)
last_time = current_time
irc_side()
state_lock.acquire()
irc_ready = False
irc_socket.close()
state_lock.release()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment