Skip to content

Instantly share code, notes, and snippets.

@stedolan
Last active October 29, 2025 16:41
Show Gist options
  • Select an option

  • Save stedolan/4e740b3bcb15a78a51bc4232c27917e6 to your computer and use it in GitHub Desktop.

Select an option

Save stedolan/4e740b3bcb15a78a51bc4232c27917e6 to your computer and use it in GitHub Desktop.
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <sys/mman.h>
#include <stdint.h>
#include <assert.h>
#include <x86intrin.h>
#include <string.h>
#include <sys/fcntl.h>
enum { MAX_PERF_EVENTS = 20 };
struct perf_counter_rdpmc_info {
uint64_t offset;
uint32_t index;
uint16_t width;
};
struct perf_counters {
int ncounters;
void* mmap_pages[MAX_PERF_EVENTS];
uint32_t* leader_seq_lock;
uint32_t last_seq;
struct perf_counter_rdpmc_info rdpmc_info[MAX_PERF_EVENTS];
};
#define seqlock_barrier() asm volatile("" ::: "memory")
static void perf_events_teardown(struct perf_counters* counters)
{
for (int i = 0; i < counters->ncounters; i++) {
if (counters->mmap_pages[i] != NULL)
munmap(counters->mmap_pages, sysconf(_SC_PAGESIZE));
}
}
static int perf_events_setup_rdpmc(struct perf_counters* counters)
{
uint32_t seqs[MAX_PERF_EVENTS];
struct perf_event_mmap_page* pg;
retry:
for (int i = 0; i < counters->ncounters; i++) {
pg = counters->mmap_pages[i];
seqs[i] = pg->lock;
}
seqlock_barrier();
for (int i = 0; i < counters->ncounters; i++) {
pg = counters->mmap_pages[i];
struct perf_counter_rdpmc_info* info = &counters->rdpmc_info[i];
info->offset = pg->offset;
if (!(pg->cap_user_rdpmc && pg->index > 0)) {
printf("bad: %d %d %d\n", pg->cap_user_rdpmc, pg->index, pg->lock);
return 1;
}
info->index = pg->index - 1;
info->width = pg->pmc_width;
//printf("%d %d %lx\n", i, info->index, info->offset);
}
seqlock_barrier();
for (int i = 0; i < counters->ncounters; i++) {
pg = counters->mmap_pages[i];
if (pg->lock != seqs[i])
goto retry;
}
counters->last_seq = seqs[0];
return 0;
}
static const char* perf_events_setup(const char* format, struct perf_counters* counters)
{
char err[256] = {0};
memset(counters, 0, sizeof(*counters));
int leader_fd = -1;
while (*format != '\0') {
if (format[0] != 'r') {
snprintf(err, sizeof err, "events '%s': cannot parse (only 'rNNN' events supported)", format);
break;
}
format++;
char* endptr;
unsigned long config = strtoul(format, &endptr, 16);
if (*endptr == ',') format = endptr + 1;
else if (*endptr == '\0') format = endptr;
else {
snprintf(err, sizeof err, "events '%s': cannot parse (only 'rNNN' events supported)", format);
break;
}
int i = counters->ncounters++;
struct perf_event_attr attr = {
.size = sizeof(struct perf_event_attr),
.type = PERF_TYPE_RAW,
.config = config,
.sample_type = PERF_SAMPLE_READ,
.exclude_kernel = 0,
.pinned = (i == 0)
};
int fd =
syscall(SYS_perf_event_open,
&attr, 0, -1,
leader_fd,
PERF_FLAG_FD_CLOEXEC);
if (fd < 0) {
snprintf(err, sizeof err, "event %d (r%lx): perf_event_open: %s", i, config, strerror(errno));
break;
}
struct perf_event_mmap_page* page;
page = counters->mmap_pages[i] =
mmap(NULL, sysconf(_SC_PAGESIZE), PROT_READ, MAP_SHARED, fd, 0);
if (page == NULL) {
snprintf(err, sizeof err, "event %d (r%lx): mmap: %s", i, config, strerror(errno));
break;
}
int rdpmc_ok;
uint32_t seq;
do {
seq = page->lock;
seqlock_barrier();
rdpmc_ok = page->cap_user_rdpmc && (page->index > 0);
if (rdpmc_ok) _rdpmc(page->index - 1);
seqlock_barrier();
} while (page->lock != seq);
if (!rdpmc_ok) {
snprintf(err, sizeof err, "event %d (r%lx): rdpmc reads unavailable (too many counters?)", i, config);
break;
}
if (i == 0) {
leader_fd = fd;
counters->leader_seq_lock = &page->lock;
counters->last_seq = (uint32_t)-1;
} else {
close(fd);
}
}
if (leader_fd != -1) close(leader_fd);
if (err[0] == 0 && perf_events_setup_rdpmc(counters) != 0) {
snprintf(err, sizeof err, "events: failed to setup rdpmc info");
}
if (err[0]) {
perf_events_teardown(counters);
return strdup(err);
} else {
return NULL;
}
}
void perf_events_sample(struct perf_counters* counters, uint64_t* samples)
{
if (*counters->leader_seq_lock != counters->last_seq)
perf_events_setup_rdpmc(counters);
retry:
int n = counters->ncounters;
for (int i = 0; i < n; i++) {
struct perf_counter_rdpmc_info info = counters->rdpmc_info[i];
uint64_t v = _rdpmc(info.index);
v &= ((uint64_t)(-1)) >> (64 - info.width);
v += info.offset;
samples[i] = v;
}
seqlock_barrier();
if (*counters->leader_seq_lock != counters->last_seq) {
perf_events_setup_rdpmc(counters);
goto retry;
}
}
volatile int vol1, vol2, vol3;
int main() {
const char* env = getenv("PERF");
if (!env) return 1;
struct perf_counters counters;
const char* err = perf_events_setup(env, &counters);
if (err) { puts(err); return 1; }
uint64_t before[MAX_PERF_EVENTS], after[MAX_PERF_EVENTS], tmp[MAX_PERF_EVENTS];
perf_events_sample(&counters, before);
for (int i = 0; i < 1000000; i++) vol1 = vol2 + vol3;
perf_events_sample(&counters, after);
for (int i = 0; i < counters.ncounters; i++) {
printf("%15lu\n", after[i] - before[i]);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment