Last active
October 29, 2025 16:41
-
-
Save stedolan/4e740b3bcb15a78a51bc4232c27917e6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <linux/perf_event.h> | |
| #include <linux/hw_breakpoint.h> | |
| #include <sys/syscall.h> | |
| #include <unistd.h> | |
| #include <stdio.h> | |
| #include <errno.h> | |
| #include <sys/mman.h> | |
| #include <stdint.h> | |
| #include <assert.h> | |
| #include <x86intrin.h> | |
| #include <string.h> | |
| #include <sys/fcntl.h> | |
| enum { MAX_PERF_EVENTS = 20 }; | |
| struct perf_counter_rdpmc_info { | |
| uint64_t offset; | |
| uint32_t index; | |
| uint16_t width; | |
| }; | |
| struct perf_counters { | |
| int ncounters; | |
| void* mmap_pages[MAX_PERF_EVENTS]; | |
| uint32_t* leader_seq_lock; | |
| uint32_t last_seq; | |
| struct perf_counter_rdpmc_info rdpmc_info[MAX_PERF_EVENTS]; | |
| }; | |
| #define seqlock_barrier() asm volatile("" ::: "memory") | |
| static void perf_events_teardown(struct perf_counters* counters) | |
| { | |
| for (int i = 0; i < counters->ncounters; i++) { | |
| if (counters->mmap_pages[i] != NULL) | |
| munmap(counters->mmap_pages, sysconf(_SC_PAGESIZE)); | |
| } | |
| } | |
| static int perf_events_setup_rdpmc(struct perf_counters* counters) | |
| { | |
| uint32_t seqs[MAX_PERF_EVENTS]; | |
| struct perf_event_mmap_page* pg; | |
| retry: | |
| for (int i = 0; i < counters->ncounters; i++) { | |
| pg = counters->mmap_pages[i]; | |
| seqs[i] = pg->lock; | |
| } | |
| seqlock_barrier(); | |
| for (int i = 0; i < counters->ncounters; i++) { | |
| pg = counters->mmap_pages[i]; | |
| struct perf_counter_rdpmc_info* info = &counters->rdpmc_info[i]; | |
| info->offset = pg->offset; | |
| if (!(pg->cap_user_rdpmc && pg->index > 0)) { | |
| printf("bad: %d %d %d\n", pg->cap_user_rdpmc, pg->index, pg->lock); | |
| return 1; | |
| } | |
| info->index = pg->index - 1; | |
| info->width = pg->pmc_width; | |
| //printf("%d %d %lx\n", i, info->index, info->offset); | |
| } | |
| seqlock_barrier(); | |
| for (int i = 0; i < counters->ncounters; i++) { | |
| pg = counters->mmap_pages[i]; | |
| if (pg->lock != seqs[i]) | |
| goto retry; | |
| } | |
| counters->last_seq = seqs[0]; | |
| return 0; | |
| } | |
| static const char* perf_events_setup(const char* format, struct perf_counters* counters) | |
| { | |
| char err[256] = {0}; | |
| memset(counters, 0, sizeof(*counters)); | |
| int leader_fd = -1; | |
| while (*format != '\0') { | |
| if (format[0] != 'r') { | |
| snprintf(err, sizeof err, "events '%s': cannot parse (only 'rNNN' events supported)", format); | |
| break; | |
| } | |
| format++; | |
| char* endptr; | |
| unsigned long config = strtoul(format, &endptr, 16); | |
| if (*endptr == ',') format = endptr + 1; | |
| else if (*endptr == '\0') format = endptr; | |
| else { | |
| snprintf(err, sizeof err, "events '%s': cannot parse (only 'rNNN' events supported)", format); | |
| break; | |
| } | |
| int i = counters->ncounters++; | |
| struct perf_event_attr attr = { | |
| .size = sizeof(struct perf_event_attr), | |
| .type = PERF_TYPE_RAW, | |
| .config = config, | |
| .sample_type = PERF_SAMPLE_READ, | |
| .exclude_kernel = 0, | |
| .pinned = (i == 0) | |
| }; | |
| int fd = | |
| syscall(SYS_perf_event_open, | |
| &attr, 0, -1, | |
| leader_fd, | |
| PERF_FLAG_FD_CLOEXEC); | |
| if (fd < 0) { | |
| snprintf(err, sizeof err, "event %d (r%lx): perf_event_open: %s", i, config, strerror(errno)); | |
| break; | |
| } | |
| struct perf_event_mmap_page* page; | |
| page = counters->mmap_pages[i] = | |
| mmap(NULL, sysconf(_SC_PAGESIZE), PROT_READ, MAP_SHARED, fd, 0); | |
| if (page == NULL) { | |
| snprintf(err, sizeof err, "event %d (r%lx): mmap: %s", i, config, strerror(errno)); | |
| break; | |
| } | |
| int rdpmc_ok; | |
| uint32_t seq; | |
| do { | |
| seq = page->lock; | |
| seqlock_barrier(); | |
| rdpmc_ok = page->cap_user_rdpmc && (page->index > 0); | |
| if (rdpmc_ok) _rdpmc(page->index - 1); | |
| seqlock_barrier(); | |
| } while (page->lock != seq); | |
| if (!rdpmc_ok) { | |
| snprintf(err, sizeof err, "event %d (r%lx): rdpmc reads unavailable (too many counters?)", i, config); | |
| break; | |
| } | |
| if (i == 0) { | |
| leader_fd = fd; | |
| counters->leader_seq_lock = &page->lock; | |
| counters->last_seq = (uint32_t)-1; | |
| } else { | |
| close(fd); | |
| } | |
| } | |
| if (leader_fd != -1) close(leader_fd); | |
| if (err[0] == 0 && perf_events_setup_rdpmc(counters) != 0) { | |
| snprintf(err, sizeof err, "events: failed to setup rdpmc info"); | |
| } | |
| if (err[0]) { | |
| perf_events_teardown(counters); | |
| return strdup(err); | |
| } else { | |
| return NULL; | |
| } | |
| } | |
| void perf_events_sample(struct perf_counters* counters, uint64_t* samples) | |
| { | |
| if (*counters->leader_seq_lock != counters->last_seq) | |
| perf_events_setup_rdpmc(counters); | |
| retry: | |
| int n = counters->ncounters; | |
| for (int i = 0; i < n; i++) { | |
| struct perf_counter_rdpmc_info info = counters->rdpmc_info[i]; | |
| uint64_t v = _rdpmc(info.index); | |
| v &= ((uint64_t)(-1)) >> (64 - info.width); | |
| v += info.offset; | |
| samples[i] = v; | |
| } | |
| seqlock_barrier(); | |
| if (*counters->leader_seq_lock != counters->last_seq) { | |
| perf_events_setup_rdpmc(counters); | |
| goto retry; | |
| } | |
| } | |
| volatile int vol1, vol2, vol3; | |
| int main() { | |
| const char* env = getenv("PERF"); | |
| if (!env) return 1; | |
| struct perf_counters counters; | |
| const char* err = perf_events_setup(env, &counters); | |
| if (err) { puts(err); return 1; } | |
| uint64_t before[MAX_PERF_EVENTS], after[MAX_PERF_EVENTS], tmp[MAX_PERF_EVENTS]; | |
| perf_events_sample(&counters, before); | |
| for (int i = 0; i < 1000000; i++) vol1 = vol2 + vol3; | |
| perf_events_sample(&counters, after); | |
| for (int i = 0; i < counters.ncounters; i++) { | |
| printf("%15lu\n", after[i] - before[i]); | |
| } | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment