Skip to content

Instantly share code, notes, and snippets.

@hsqStephenZhang
Created February 22, 2026 20:01
Show Gist options
  • Select an option

  • Save hsqStephenZhang/35ae708330ee18edd7da15c0e9b4cef4 to your computer and use it in GitHub Desktop.

Select an option

Save hsqStephenZhang/35ae708330ee18edd7da15c0e9b4cef4 to your computer and use it in GitHub Desktop.
benchmark for atoi simd on neon
use criterion::{Criterion, criterion_group, criterion_main};
use std::arch::aarch64::*;
use std::hint::black_box;
#[inline(always)]
unsafe fn atoi_unzip(data_ptr: *const u8) -> u64 {
let zeros = vdupq_n_u8(b'0');
let reg = vsubq_u8(vld1q_u8(data_ptr), zeros);
// Level 1: u8 -> u16
let even = vuzp1q_u8(reg, reg);
let odd = vuzp2q_u8(reg, reg);
let res = vaddw_u8(vmull_u8(vget_low_u8(even), vdup_n_u8(10)), vget_low_u8(odd));
// Level 2: u16 -> u32
let even = vuzp1q_u16(res, res);
let odd = vuzp2q_u16(res, res);
let res = vaddw_u16(
vmull_u16(vget_low_u16(even), vdup_n_u16(100)),
vget_low_u16(odd),
);
// Level 3: u32 -> u64
let even = vuzp1q_u32(res, res);
let odd = vuzp2q_u32(res, res);
let res = vaddw_u32(
vmull_u32(vget_low_u32(even), vdup_n_u32(10000)),
vget_low_u32(odd),
);
vgetq_lane_u64::<0>(res) * 100000000 + vgetq_lane_u64::<1>(res)
}
#[inline(always)]
unsafe fn atoi_pairwise(data_ptr: *const u8) -> u64 {
let digits = vsubq_u8(vld1q_u8(data_ptr), vdupq_n_u8(b'0'));
// Level 1: u8 -> u16
let w1 = vld1q_u8([10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1].as_ptr());
let res = vpaddlq_u8(vmulq_u8(digits, w1));
// Level 2: u16 -> u32
let w2 = vld1q_u16([100, 1, 100, 1, 100, 1, 100, 1].as_ptr());
let res = vpaddlq_u16(vmulq_u16(res, w2));
// Level 3: u32 -> u64
let w3 = vld1q_u32([10000, 1, 10000, 1].as_ptr());
let res = vpaddlq_u32(vmulq_u32(res, w3));
vgetq_lane_u64::<0>(res) * 100000000 + vgetq_lane_u64::<1>(res)
}
fn bench_atoi(c: &mut Criterion) {
let input = b"1234567890654321";
let mut group = c.benchmark_group("SIMD_Atoi_16B");
group.bench_function("unzip_method", |b| {
b.iter(|| unsafe { black_box(atoi_unzip(black_box(input.as_ptr()))) })
});
group.bench_function("pairwise_method", |b| {
b.iter(|| unsafe { black_box(atoi_pairwise(black_box(input.as_ptr()))) })
});
group.bench_function("atoi_simd_method", |b| {
b.iter(|| black_box(atoi_simd::parse_pos::<u64, false>(black_box(input)).unwrap()))
});
group.finish();
}
criterion_group!(benches, bench_atoi);
criterion_main!(benches);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment