Skip to content

Instantly share code, notes, and snippets.

@hsqStephenZhang
Created February 3, 2026 11:55
Show Gist options
  • Select an option

  • Save hsqStephenZhang/8afa7159b2a9df683536a7280fc75956 to your computer and use it in GitHub Desktop.

Select an option

Save hsqStephenZhang/8afa7159b2a9df683536a7280fc75956 to your computer and use it in GitHub Desktop.
benchmark std::time and rdtsc performance.
/// Benchmark comparing RDTSC vs std::time::Instant for time measurements
///
/// This benchmark measures the overhead of both time measurement approaches
/// used in Miri's tracing infrastructure.
use std::time::{Duration, Instant};
#[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
fn rdtsc() -> u64 {
#[cfg(target_arch = "x86")]
use std::arch::x86::_rdtsc;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::_rdtsc;
unsafe { _rdtsc() }
}
#[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
fn benchmark_rdtsc(iterations: usize) -> Duration {
let start = Instant::now();
let mut sum: u64 = 0;
for _ in 0..iterations {
sum = sum.wrapping_add(rdtsc());
}
// Use sum to prevent optimization
std::hint::black_box(sum);
start.elapsed()
}
fn benchmark_instant(iterations: usize) -> Duration {
let start = Instant::now();
let mut sum: Duration = Duration::ZERO;
for _ in 0..iterations {
let now = Instant::now();
sum += now.duration_since(start);
}
// Use sum to prevent optimization
std::hint::black_box(sum);
start.elapsed()
}
fn benchmark_instant_elapsed(iterations: usize) -> Duration {
let start = Instant::now();
let reference = Instant::now();
let mut sum: Duration = Duration::ZERO;
for _ in 0..iterations {
sum += reference.elapsed();
}
// Use sum to prevent optimization
std::hint::black_box(sum);
start.elapsed()
}
fn main() {
const ITERATIONS: usize = 1_000_000;
const WARMUP_ITERATIONS: usize = 100_000;
println!("Benchmarking time measurement overhead");
println!("======================================\n");
// Warm up
println!("Warming up...");
benchmark_instant(WARMUP_ITERATIONS);
benchmark_instant_elapsed(WARMUP_ITERATIONS);
#[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
benchmark_rdtsc(WARMUP_ITERATIONS);
println!("\nRunning benchmarks with {} iterations...\n", ITERATIONS);
// Benchmark Instant::now()
let instant_now_duration = benchmark_instant(ITERATIONS);
let instant_now_ns_per_call = instant_now_duration.as_nanos() / ITERATIONS as u128;
println!("std::time::Instant::now():");
println!(" Total time: {:?}", instant_now_duration);
println!(" Per call: ~{} ns", instant_now_ns_per_call);
// Benchmark elapsed()
let instant_elapsed_duration = benchmark_instant_elapsed(ITERATIONS);
let instant_elapsed_ns_per_call = instant_elapsed_duration.as_nanos() / ITERATIONS as u128;
println!("\nstd::time::Instant::elapsed():");
println!(" Total time: {:?}", instant_elapsed_duration);
println!(" Per call: ~{} ns", instant_elapsed_ns_per_call);
#[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
let rdtsc_ns_per_call = {
// Benchmark RDTSC
let rdtsc_duration = benchmark_rdtsc(ITERATIONS);
let rdtsc_ns_per_call = rdtsc_duration.as_nanos() / ITERATIONS as u128;
println!("\nRDTSC (x86/x64 only):");
println!(" Total time: {:?}", rdtsc_duration);
println!(" Per call: ~{} ns", rdtsc_ns_per_call);
// Calculate speedup
let speedup_vs_now = instant_now_ns_per_call as f64 / rdtsc_ns_per_call as f64;
let speedup_vs_elapsed = instant_elapsed_ns_per_call as f64 / rdtsc_ns_per_call as f64;
println!("\nSpeedup analysis:");
println!(" RDTSC vs Instant::now(): {:.2}x faster", speedup_vs_now);
println!(" RDTSC vs Instant::elapsed(): {:.2}x faster", speedup_vs_elapsed);
// Analysis from issue #4563
println!("\n=== Analysis ===");
println!("According to issue #4563 discussion:");
println!("- RDTSC overhead: ~5 ns per call");
println!("- Instant overhead: ~1500 ns (1.5 µs) per call");
println!("- Expected speedup: ~300x");
println!("\nActual measurements:");
println!("- RDTSC: {} ns", rdtsc_ns_per_call);
println!("- Instant::now(): {} ns", instant_now_ns_per_call);
println!("- Instant::elapsed(): {} ns", instant_elapsed_ns_per_call);
println!("\nNote: Modern kernels use vDSO for clock_gettime, which typically");
println!("uses RDTSC internally when the TSC is stable. The overhead comes from:");
println!("1. Function call through libc");
println!("2. VDSO wrapper logic");
println!("3. Additional safety checks");
rdtsc_ns_per_call
};
#[cfg(not(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64"))))]
{
println!("\nRDTSC benchmarks are only available on Linux x86/x86_64");
}
println!("\n=== Impact on Tracing ===");
println!("If tracing happens 1,000,000 times during compilation:");
let instant_total_ms = (instant_elapsed_ns_per_call * 1_000_000) / 1_000_000;
println!("- With Instant: ~{} ms overhead", instant_total_ms);
#[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))]
{
let rdtsc_total_ms = (rdtsc_ns_per_call * 1_000_000) / 1_000_000;
println!("- With RDTSC: ~{} ms overhead", rdtsc_total_ms);
println!("- Savings: ~{} ms", instant_total_ms - rdtsc_total_ms);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment