Created
February 3, 2026 11:55
-
-
Save hsqStephenZhang/8afa7159b2a9df683536a7280fc75956 to your computer and use it in GitHub Desktop.
benchmark std::time and rdtsc performance.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /// Benchmark comparing RDTSC vs std::time::Instant for time measurements | |
| /// | |
| /// This benchmark measures the overhead of both time measurement approaches | |
| /// used in Miri's tracing infrastructure. | |
| use std::time::{Duration, Instant}; | |
| #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))] | |
| fn rdtsc() -> u64 { | |
| #[cfg(target_arch = "x86")] | |
| use std::arch::x86::_rdtsc; | |
| #[cfg(target_arch = "x86_64")] | |
| use std::arch::x86_64::_rdtsc; | |
| unsafe { _rdtsc() } | |
| } | |
| #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))] | |
| fn benchmark_rdtsc(iterations: usize) -> Duration { | |
| let start = Instant::now(); | |
| let mut sum: u64 = 0; | |
| for _ in 0..iterations { | |
| sum = sum.wrapping_add(rdtsc()); | |
| } | |
| // Use sum to prevent optimization | |
| std::hint::black_box(sum); | |
| start.elapsed() | |
| } | |
| fn benchmark_instant(iterations: usize) -> Duration { | |
| let start = Instant::now(); | |
| let mut sum: Duration = Duration::ZERO; | |
| for _ in 0..iterations { | |
| let now = Instant::now(); | |
| sum += now.duration_since(start); | |
| } | |
| // Use sum to prevent optimization | |
| std::hint::black_box(sum); | |
| start.elapsed() | |
| } | |
| fn benchmark_instant_elapsed(iterations: usize) -> Duration { | |
| let start = Instant::now(); | |
| let reference = Instant::now(); | |
| let mut sum: Duration = Duration::ZERO; | |
| for _ in 0..iterations { | |
| sum += reference.elapsed(); | |
| } | |
| // Use sum to prevent optimization | |
| std::hint::black_box(sum); | |
| start.elapsed() | |
| } | |
| fn main() { | |
| const ITERATIONS: usize = 1_000_000; | |
| const WARMUP_ITERATIONS: usize = 100_000; | |
| println!("Benchmarking time measurement overhead"); | |
| println!("======================================\n"); | |
| // Warm up | |
| println!("Warming up..."); | |
| benchmark_instant(WARMUP_ITERATIONS); | |
| benchmark_instant_elapsed(WARMUP_ITERATIONS); | |
| #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))] | |
| benchmark_rdtsc(WARMUP_ITERATIONS); | |
| println!("\nRunning benchmarks with {} iterations...\n", ITERATIONS); | |
| // Benchmark Instant::now() | |
| let instant_now_duration = benchmark_instant(ITERATIONS); | |
| let instant_now_ns_per_call = instant_now_duration.as_nanos() / ITERATIONS as u128; | |
| println!("std::time::Instant::now():"); | |
| println!(" Total time: {:?}", instant_now_duration); | |
| println!(" Per call: ~{} ns", instant_now_ns_per_call); | |
| // Benchmark elapsed() | |
| let instant_elapsed_duration = benchmark_instant_elapsed(ITERATIONS); | |
| let instant_elapsed_ns_per_call = instant_elapsed_duration.as_nanos() / ITERATIONS as u128; | |
| println!("\nstd::time::Instant::elapsed():"); | |
| println!(" Total time: {:?}", instant_elapsed_duration); | |
| println!(" Per call: ~{} ns", instant_elapsed_ns_per_call); | |
| #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))] | |
| let rdtsc_ns_per_call = { | |
| // Benchmark RDTSC | |
| let rdtsc_duration = benchmark_rdtsc(ITERATIONS); | |
| let rdtsc_ns_per_call = rdtsc_duration.as_nanos() / ITERATIONS as u128; | |
| println!("\nRDTSC (x86/x64 only):"); | |
| println!(" Total time: {:?}", rdtsc_duration); | |
| println!(" Per call: ~{} ns", rdtsc_ns_per_call); | |
| // Calculate speedup | |
| let speedup_vs_now = instant_now_ns_per_call as f64 / rdtsc_ns_per_call as f64; | |
| let speedup_vs_elapsed = instant_elapsed_ns_per_call as f64 / rdtsc_ns_per_call as f64; | |
| println!("\nSpeedup analysis:"); | |
| println!(" RDTSC vs Instant::now(): {:.2}x faster", speedup_vs_now); | |
| println!(" RDTSC vs Instant::elapsed(): {:.2}x faster", speedup_vs_elapsed); | |
| // Analysis from issue #4563 | |
| println!("\n=== Analysis ==="); | |
| println!("According to issue #4563 discussion:"); | |
| println!("- RDTSC overhead: ~5 ns per call"); | |
| println!("- Instant overhead: ~1500 ns (1.5 µs) per call"); | |
| println!("- Expected speedup: ~300x"); | |
| println!("\nActual measurements:"); | |
| println!("- RDTSC: {} ns", rdtsc_ns_per_call); | |
| println!("- Instant::now(): {} ns", instant_now_ns_per_call); | |
| println!("- Instant::elapsed(): {} ns", instant_elapsed_ns_per_call); | |
| println!("\nNote: Modern kernels use vDSO for clock_gettime, which typically"); | |
| println!("uses RDTSC internally when the TSC is stable. The overhead comes from:"); | |
| println!("1. Function call through libc"); | |
| println!("2. VDSO wrapper logic"); | |
| println!("3. Additional safety checks"); | |
| rdtsc_ns_per_call | |
| }; | |
| #[cfg(not(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64"))))] | |
| { | |
| println!("\nRDTSC benchmarks are only available on Linux x86/x86_64"); | |
| } | |
| println!("\n=== Impact on Tracing ==="); | |
| println!("If tracing happens 1,000,000 times during compilation:"); | |
| let instant_total_ms = (instant_elapsed_ns_per_call * 1_000_000) / 1_000_000; | |
| println!("- With Instant: ~{} ms overhead", instant_total_ms); | |
| #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))] | |
| { | |
| let rdtsc_total_ms = (rdtsc_ns_per_call * 1_000_000) / 1_000_000; | |
| println!("- With RDTSC: ~{} ms overhead", rdtsc_total_ms); | |
| println!("- Savings: ~{} ms", instant_total_ms - rdtsc_total_ms); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment