Last active
February 28, 2026 06:48
-
-
Save tsangwpx/0926192db82c06073116b8a0a12a878f to your computer and use it in GitHub Desktop.
Use ioctl to set up KVM_CAP_HALT_POLL in qemu
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // vibe code product. Use with CAUTION. | |
| // Unlicense. | |
| #include <dirent.h> | |
| #include <errno.h> | |
| #include <fcntl.h> | |
| #include <linux/kvm.h> | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <sys/ptrace.h> | |
| #include <sys/stat.h> | |
| #include <sys/syscall.h> | |
| #include <sys/types.h> | |
| #include <sys/user.h> | |
| #include <sys/wait.h> | |
| #include <unistd.h> | |
| #define KVM_CAP_HALT_POLL 182 | |
| #define KVM_ENABLE_CAP_IOCTL 0x4068aea3 | |
| /* --- HELPERS --- */ | |
| int is_process_stopped(pid_t pid) { | |
| char path[256]; | |
| snprintf(path, sizeof(path), "/proc/%d/status", pid); | |
| FILE *f = fopen(path, "r"); | |
| if (!f) | |
| return -1; | |
| char line[256]; | |
| int stopped = 0; | |
| while (fgets(line, sizeof(line), f)) { | |
| if (strncmp(line, "State:", 6) == 0) { | |
| char *p = line + 6; | |
| while (*p == ' ' || *p == '\t') | |
| p++; | |
| if (*p == 'T' || *p == 't') | |
| stopped = 1; | |
| break; | |
| } | |
| } | |
| fclose(f); | |
| return stopped; | |
| } | |
| int rw_mem(pid_t pid, unsigned long addr, void *data, size_t len, | |
| int is_write) { | |
| char path[256]; | |
| snprintf(path, sizeof(path), "/proc/%d/mem", pid); | |
| int fd = open(path, is_write ? O_WRONLY : O_RDONLY); | |
| if (fd < 0) | |
| return -1; | |
| ssize_t ret = is_write ? pwrite(fd, data, len, (off_t)addr) | |
| : pread(fd, data, len, (off_t)addr); | |
| close(fd); | |
| return (ret == (ssize_t)len) ? 0 : -1; | |
| } | |
| unsigned long find_syscall_gadget(pid_t pid) { | |
| char path[256]; | |
| snprintf(path, sizeof(path), "/proc/%d/maps", pid); | |
| FILE *f = fopen(path, "r"); | |
| if (!f) | |
| return 0; | |
| char line[1024], perms[5]; | |
| unsigned long start, end, gadget = 0; | |
| while (fgets(line, sizeof(line), f)) { | |
| if (sscanf(line, "%lx-%lx %4s", &start, &end, perms) == 3) { | |
| if (perms[0] == 'r' && perms[2] == 'x') { | |
| size_t size = end - start; | |
| if (size > 32 * 1024 * 1024) | |
| size = 32 * 1024 * 1024; | |
| unsigned char *buf = malloc(size); | |
| if (buf) { | |
| if (rw_mem(pid, start, buf, size, 0) == 0) { | |
| for (size_t i = 0; i < size - 1; i++) { | |
| if (buf[i] == 0x0f && buf[i + 1] == 0x05) { | |
| gadget = start + i; | |
| break; | |
| } | |
| } | |
| } | |
| free(buf); | |
| } | |
| if (gadget) | |
| break; | |
| } | |
| } | |
| } | |
| fclose(f); | |
| return gadget; | |
| } | |
| int find_kvm_vm_fd(pid_t pid) { | |
| char path[256]; | |
| snprintf(path, sizeof(path), "/proc/%d/fd", pid); | |
| DIR *dir = opendir(path); | |
| if (!dir) | |
| return -1; | |
| struct dirent *entry; | |
| int vm_fd = -1; | |
| while ((entry = readdir(dir)) != NULL) { | |
| if (entry->d_name[0] == '.') | |
| continue; | |
| char fd_path[512], link_target[512]; | |
| snprintf(fd_path, sizeof(fd_path), "%s/%s", path, entry->d_name); | |
| ssize_t len = readlink(fd_path, link_target, sizeof(link_target) - 1); | |
| if (len > 0) { | |
| link_target[len] = '\0'; | |
| if (strcmp(link_target, "anon_inode:kvm-vm") == 0) { | |
| vm_fd = atoi(entry->d_name); | |
| break; | |
| } | |
| } | |
| } | |
| closedir(dir); | |
| return vm_fd; | |
| } | |
| /* --- MAIN INJECTION LOGIC --- */ | |
| int main(int argc, char *argv[]) { | |
| if (argc != 3) { | |
| fprintf(stderr, "Usage: %s <qemu_pid> <halt_poll_ns>\n", argv[0]); | |
| return 1; | |
| } | |
| long ret_val = -1; | |
| pid_t target_pid = atoi(argv[1]); | |
| unsigned long long halt_poll_ns = strtoull(argv[2], NULL, 10); | |
| // 1. Snapshot: Check if QEMU was already suspended by user/libvirt | |
| int originally_stopped = is_process_stopped(target_pid); | |
| if (originally_stopped < 0) { | |
| fprintf(stderr, "[-] Could not read status for PID %d. Does it exist?\n", | |
| target_pid); | |
| return 1; | |
| } | |
| if (originally_stopped) { | |
| printf("[*] Target is currently stopped (State: T). Will restore to " | |
| "stopped state upon exit.\n"); | |
| } | |
| // 2. Seize: Attach without sending noisy SIGSTOP | |
| if (ptrace(PTRACE_SEIZE, target_pid, NULL, 0) < 0) { | |
| perror("[-] PTRACE_SEIZE failed (are you root?)"); | |
| return 1; | |
| } | |
| // 3. Interrupt: Force the thread to a clean debugger-stop | |
| if (ptrace(PTRACE_INTERRUPT, target_pid, NULL, NULL) < 0) { | |
| perror("[-] PTRACE_INTERRUPT failed"); | |
| ptrace(PTRACE_DETACH, target_pid, NULL, NULL); | |
| return 1; | |
| } | |
| // 4. Wait & Drain: Let standard signals pass through until the Event Stop | |
| // hits | |
| int status; | |
| while (waitpid(target_pid, &status, 0) > 0) { | |
| if (!WIFSTOPPED(status)) { | |
| fprintf(stderr, "[-] Target process died or exited prematurely.\n"); | |
| return 1; | |
| } | |
| // PTRACE_EVENT_STOP signifies our INTERRUPT succeeded | |
| if ((status >> 16) == PTRACE_EVENT_STOP) { | |
| break; | |
| } | |
| // Otherwise, it stopped due to a real signal (like a QEMU timer). Pass it | |
| // along! | |
| int sig = WSTOPSIG(status); | |
| ptrace(PTRACE_CONT, target_pid, NULL, (void *)(long)sig); | |
| } | |
| printf("[+] Process seized, interrupted, and stabilized safely.\n"); | |
| // 5. Context: Scan for FD and Gadget while process is completely frozen | |
| int vm_fd = find_kvm_vm_fd(target_pid); | |
| if (vm_fd < 0) { | |
| fprintf(stderr, "[-] Could not find anon_inode:kvm-vm. Detaching.\n"); | |
| goto cleanup_detach; | |
| } | |
| unsigned long gadget = find_syscall_gadget(target_pid); | |
| if (!gadget) { | |
| fprintf(stderr, "[-] Could not find a syscall gadget. Detaching.\n"); | |
| goto cleanup_detach; | |
| } | |
| printf("[+] Found KVM VM fd: %d | Syscall Gadget: 0x%lx\n", vm_fd, gadget); | |
| // 6. Backup State | |
| struct user_regs_struct orig_regs, regs; | |
| ptrace(PTRACE_GETREGS, target_pid, NULL, &orig_regs); | |
| regs = orig_regs; | |
| // Safety: Go 8KB deep below stack pointer to evade the x86_64 Red Zone and | |
| // signal frames | |
| unsigned long struct_addr = (regs.rsp - 8192) & ~7ULL; | |
| char orig_stack[104]; | |
| rw_mem(target_pid, struct_addr, orig_stack, 104, 0); // Backup memory | |
| // Write ioctl payload to target memory | |
| struct kvm_enable_cap cap; | |
| memset(&cap, 0, sizeof(cap)); | |
| cap.cap = KVM_CAP_HALT_POLL; | |
| cap.args[0] = halt_poll_ns; | |
| rw_mem(target_pid, struct_addr, &cap, 104, 1); | |
| // 7. Setup Registers for: ioctl(vm_fd, KVM_ENABLE_CAP_IOCTL, struct_addr) | |
| regs.rax = SYS_ioctl; | |
| regs.rdi = vm_fd; | |
| regs.rsi = KVM_ENABLE_CAP_IOCTL; | |
| regs.rdx = struct_addr; | |
| regs.rip = | |
| gadget; // Point instruction pointer exactly at the 'syscall' opcode | |
| // Clear volatile registers to avoid undefined syscall behaviors | |
| regs.r8 = regs.r9 = regs.r10 = regs.r11 = 0; | |
| ptrace(PTRACE_SETREGS, target_pid, NULL, ®s); | |
| // 8. Shielded Single Step | |
| while (1) { | |
| // Passing 0 as data suppresses pending signals so they don't hijack the RIP | |
| ptrace(PTRACE_SINGLESTEP, target_pid, NULL, 0); | |
| waitpid(target_pid, &status, 0); | |
| if (!WIFSTOPPED(status)) { | |
| fprintf(stderr, "[-] Target died during single step.\n"); | |
| break; | |
| } | |
| int sig = WSTOPSIG(status); | |
| if (sig == SIGTRAP) { | |
| // Trap means the instruction executed! | |
| ptrace(PTRACE_GETREGS, target_pid, NULL, ®s); | |
| ret_val = (long)regs.rax; | |
| break; | |
| } | |
| // If stopped by a timer/signal, we loop and step again (signal is still | |
| // suppressed) | |
| } | |
| if (ret_val == 0) { | |
| printf("[+] Success! Dynamically set KVM_CAP_HALT_POLL to %llu ns.\n", | |
| halt_poll_ns); | |
| } else { | |
| printf("[-] ioctl failed, kernel returned: %ld\n", ret_val); | |
| } | |
| // 9. Restore State | |
| rw_mem(target_pid, struct_addr, orig_stack, 104, 1); // Restore original stack | |
| ptrace(PTRACE_SETREGS, target_pid, NULL, &orig_regs); // Restore all registers | |
| cleanup_detach: | |
| // 10. Release Process | |
| // If the process was stopped before we touched it, we leave it stopped using | |
| // SIGSTOP. Otherwise, pass 0 so it continues running immediately. | |
| int detach_sig = originally_stopped ? SIGSTOP : 0; | |
| ptrace(PTRACE_DETACH, target_pid, NULL, (void *)(long)detach_sig); | |
| printf("[+] Detached and restored target process completely.\n"); | |
| return (ret_val == 0) ? 0 : 1; | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example: disable halt poll for a particular libvirt domain
However, once VM-specific halt_poll_ns is set, it cannot be unset. To restore the default, copy the system value to the vm-specific one.