Skip to content

Instantly share code, notes, and snippets.

@tsangwpx
Last active February 28, 2026 06:48
Show Gist options
  • Select an option

  • Save tsangwpx/0926192db82c06073116b8a0a12a878f to your computer and use it in GitHub Desktop.

Select an option

Save tsangwpx/0926192db82c06073116b8a0a12a878f to your computer and use it in GitHub Desktop.
Use ioctl to set up KVM_CAP_HALT_POLL in qemu
// vibe code product. Use with CAUTION.
// Unlicense.
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/kvm.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ptrace.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/user.h>
#include <sys/wait.h>
#include <unistd.h>
#define KVM_CAP_HALT_POLL 182
#define KVM_ENABLE_CAP_IOCTL 0x4068aea3
/* --- HELPERS --- */
int is_process_stopped(pid_t pid) {
char path[256];
snprintf(path, sizeof(path), "/proc/%d/status", pid);
FILE *f = fopen(path, "r");
if (!f)
return -1;
char line[256];
int stopped = 0;
while (fgets(line, sizeof(line), f)) {
if (strncmp(line, "State:", 6) == 0) {
char *p = line + 6;
while (*p == ' ' || *p == '\t')
p++;
if (*p == 'T' || *p == 't')
stopped = 1;
break;
}
}
fclose(f);
return stopped;
}
int rw_mem(pid_t pid, unsigned long addr, void *data, size_t len,
int is_write) {
char path[256];
snprintf(path, sizeof(path), "/proc/%d/mem", pid);
int fd = open(path, is_write ? O_WRONLY : O_RDONLY);
if (fd < 0)
return -1;
ssize_t ret = is_write ? pwrite(fd, data, len, (off_t)addr)
: pread(fd, data, len, (off_t)addr);
close(fd);
return (ret == (ssize_t)len) ? 0 : -1;
}
unsigned long find_syscall_gadget(pid_t pid) {
char path[256];
snprintf(path, sizeof(path), "/proc/%d/maps", pid);
FILE *f = fopen(path, "r");
if (!f)
return 0;
char line[1024], perms[5];
unsigned long start, end, gadget = 0;
while (fgets(line, sizeof(line), f)) {
if (sscanf(line, "%lx-%lx %4s", &start, &end, perms) == 3) {
if (perms[0] == 'r' && perms[2] == 'x') {
size_t size = end - start;
if (size > 32 * 1024 * 1024)
size = 32 * 1024 * 1024;
unsigned char *buf = malloc(size);
if (buf) {
if (rw_mem(pid, start, buf, size, 0) == 0) {
for (size_t i = 0; i < size - 1; i++) {
if (buf[i] == 0x0f && buf[i + 1] == 0x05) {
gadget = start + i;
break;
}
}
}
free(buf);
}
if (gadget)
break;
}
}
}
fclose(f);
return gadget;
}
int find_kvm_vm_fd(pid_t pid) {
char path[256];
snprintf(path, sizeof(path), "/proc/%d/fd", pid);
DIR *dir = opendir(path);
if (!dir)
return -1;
struct dirent *entry;
int vm_fd = -1;
while ((entry = readdir(dir)) != NULL) {
if (entry->d_name[0] == '.')
continue;
char fd_path[512], link_target[512];
snprintf(fd_path, sizeof(fd_path), "%s/%s", path, entry->d_name);
ssize_t len = readlink(fd_path, link_target, sizeof(link_target) - 1);
if (len > 0) {
link_target[len] = '\0';
if (strcmp(link_target, "anon_inode:kvm-vm") == 0) {
vm_fd = atoi(entry->d_name);
break;
}
}
}
closedir(dir);
return vm_fd;
}
/* --- MAIN INJECTION LOGIC --- */
int main(int argc, char *argv[]) {
if (argc != 3) {
fprintf(stderr, "Usage: %s <qemu_pid> <halt_poll_ns>\n", argv[0]);
return 1;
}
long ret_val = -1;
pid_t target_pid = atoi(argv[1]);
unsigned long long halt_poll_ns = strtoull(argv[2], NULL, 10);
// 1. Snapshot: Check if QEMU was already suspended by user/libvirt
int originally_stopped = is_process_stopped(target_pid);
if (originally_stopped < 0) {
fprintf(stderr, "[-] Could not read status for PID %d. Does it exist?\n",
target_pid);
return 1;
}
if (originally_stopped) {
printf("[*] Target is currently stopped (State: T). Will restore to "
"stopped state upon exit.\n");
}
// 2. Seize: Attach without sending noisy SIGSTOP
if (ptrace(PTRACE_SEIZE, target_pid, NULL, 0) < 0) {
perror("[-] PTRACE_SEIZE failed (are you root?)");
return 1;
}
// 3. Interrupt: Force the thread to a clean debugger-stop
if (ptrace(PTRACE_INTERRUPT, target_pid, NULL, NULL) < 0) {
perror("[-] PTRACE_INTERRUPT failed");
ptrace(PTRACE_DETACH, target_pid, NULL, NULL);
return 1;
}
// 4. Wait & Drain: Let standard signals pass through until the Event Stop
// hits
int status;
while (waitpid(target_pid, &status, 0) > 0) {
if (!WIFSTOPPED(status)) {
fprintf(stderr, "[-] Target process died or exited prematurely.\n");
return 1;
}
// PTRACE_EVENT_STOP signifies our INTERRUPT succeeded
if ((status >> 16) == PTRACE_EVENT_STOP) {
break;
}
// Otherwise, it stopped due to a real signal (like a QEMU timer). Pass it
// along!
int sig = WSTOPSIG(status);
ptrace(PTRACE_CONT, target_pid, NULL, (void *)(long)sig);
}
printf("[+] Process seized, interrupted, and stabilized safely.\n");
// 5. Context: Scan for FD and Gadget while process is completely frozen
int vm_fd = find_kvm_vm_fd(target_pid);
if (vm_fd < 0) {
fprintf(stderr, "[-] Could not find anon_inode:kvm-vm. Detaching.\n");
goto cleanup_detach;
}
unsigned long gadget = find_syscall_gadget(target_pid);
if (!gadget) {
fprintf(stderr, "[-] Could not find a syscall gadget. Detaching.\n");
goto cleanup_detach;
}
printf("[+] Found KVM VM fd: %d | Syscall Gadget: 0x%lx\n", vm_fd, gadget);
// 6. Backup State
struct user_regs_struct orig_regs, regs;
ptrace(PTRACE_GETREGS, target_pid, NULL, &orig_regs);
regs = orig_regs;
// Safety: Go 8KB deep below stack pointer to evade the x86_64 Red Zone and
// signal frames
unsigned long struct_addr = (regs.rsp - 8192) & ~7ULL;
char orig_stack[104];
rw_mem(target_pid, struct_addr, orig_stack, 104, 0); // Backup memory
// Write ioctl payload to target memory
struct kvm_enable_cap cap;
memset(&cap, 0, sizeof(cap));
cap.cap = KVM_CAP_HALT_POLL;
cap.args[0] = halt_poll_ns;
rw_mem(target_pid, struct_addr, &cap, 104, 1);
// 7. Setup Registers for: ioctl(vm_fd, KVM_ENABLE_CAP_IOCTL, struct_addr)
regs.rax = SYS_ioctl;
regs.rdi = vm_fd;
regs.rsi = KVM_ENABLE_CAP_IOCTL;
regs.rdx = struct_addr;
regs.rip =
gadget; // Point instruction pointer exactly at the 'syscall' opcode
// Clear volatile registers to avoid undefined syscall behaviors
regs.r8 = regs.r9 = regs.r10 = regs.r11 = 0;
ptrace(PTRACE_SETREGS, target_pid, NULL, &regs);
// 8. Shielded Single Step
while (1) {
// Passing 0 as data suppresses pending signals so they don't hijack the RIP
ptrace(PTRACE_SINGLESTEP, target_pid, NULL, 0);
waitpid(target_pid, &status, 0);
if (!WIFSTOPPED(status)) {
fprintf(stderr, "[-] Target died during single step.\n");
break;
}
int sig = WSTOPSIG(status);
if (sig == SIGTRAP) {
// Trap means the instruction executed!
ptrace(PTRACE_GETREGS, target_pid, NULL, &regs);
ret_val = (long)regs.rax;
break;
}
// If stopped by a timer/signal, we loop and step again (signal is still
// suppressed)
}
if (ret_val == 0) {
printf("[+] Success! Dynamically set KVM_CAP_HALT_POLL to %llu ns.\n",
halt_poll_ns);
} else {
printf("[-] ioctl failed, kernel returned: %ld\n", ret_val);
}
// 9. Restore State
rw_mem(target_pid, struct_addr, orig_stack, 104, 1); // Restore original stack
ptrace(PTRACE_SETREGS, target_pid, NULL, &orig_regs); // Restore all registers
cleanup_detach:
// 10. Release Process
// If the process was stopped before we touched it, we leave it stopped using
// SIGSTOP. Otherwise, pass 0 so it continues running immediately.
int detach_sig = originally_stopped ? SIGSTOP : 0;
ptrace(PTRACE_DETACH, target_pid, NULL, (void *)(long)detach_sig);
printf("[+] Detached and restored target process completely.\n");
return (ret_val == 0) ? 0 : 1;
}
@tsangwpx
Copy link
Author

tsangwpx commented Feb 28, 2026

apt update
apt install -y build-essential linux-headers-amd64
gcc -Wall -O2 -o set_kvm_cap_halt_poll set_kvm_cap_halt_poll.c

@tsangwpx
Copy link
Author

Example: disable halt poll for a particular libvirt domain

PID=$(cat /run/libvirt/qemu/DOMAIN.pid)
./set_kvm_cap_halt_poll "$PID" 0

However, once VM-specific halt_poll_ns is set, it cannot be unset. To restore the default, copy the system value to the vm-specific one.

PID=$(cat /run/libvirt/qemu/DOMAIN.pid)
VALUE=$(cat /sys/module/kvm/parameters/halt_poll_ns)
./set_kvm_cap_halt_poll "$PID" "$VALUE"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment