Created
February 22, 2026 20:01
-
-
Save hsqStephenZhang/35ae708330ee18edd7da15c0e9b4cef4 to your computer and use it in GitHub Desktop.
benchmark for atoi simd on neon
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use criterion::{Criterion, criterion_group, criterion_main}; | |
| use std::arch::aarch64::*; | |
| use std::hint::black_box; | |
| #[inline(always)] | |
| unsafe fn atoi_unzip(data_ptr: *const u8) -> u64 { | |
| let zeros = vdupq_n_u8(b'0'); | |
| let reg = vsubq_u8(vld1q_u8(data_ptr), zeros); | |
| // Level 1: u8 -> u16 | |
| let even = vuzp1q_u8(reg, reg); | |
| let odd = vuzp2q_u8(reg, reg); | |
| let res = vaddw_u8(vmull_u8(vget_low_u8(even), vdup_n_u8(10)), vget_low_u8(odd)); | |
| // Level 2: u16 -> u32 | |
| let even = vuzp1q_u16(res, res); | |
| let odd = vuzp2q_u16(res, res); | |
| let res = vaddw_u16( | |
| vmull_u16(vget_low_u16(even), vdup_n_u16(100)), | |
| vget_low_u16(odd), | |
| ); | |
| // Level 3: u32 -> u64 | |
| let even = vuzp1q_u32(res, res); | |
| let odd = vuzp2q_u32(res, res); | |
| let res = vaddw_u32( | |
| vmull_u32(vget_low_u32(even), vdup_n_u32(10000)), | |
| vget_low_u32(odd), | |
| ); | |
| vgetq_lane_u64::<0>(res) * 100000000 + vgetq_lane_u64::<1>(res) | |
| } | |
| #[inline(always)] | |
| unsafe fn atoi_pairwise(data_ptr: *const u8) -> u64 { | |
| let digits = vsubq_u8(vld1q_u8(data_ptr), vdupq_n_u8(b'0')); | |
| // Level 1: u8 -> u16 | |
| let w1 = vld1q_u8([10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1].as_ptr()); | |
| let res = vpaddlq_u8(vmulq_u8(digits, w1)); | |
| // Level 2: u16 -> u32 | |
| let w2 = vld1q_u16([100, 1, 100, 1, 100, 1, 100, 1].as_ptr()); | |
| let res = vpaddlq_u16(vmulq_u16(res, w2)); | |
| // Level 3: u32 -> u64 | |
| let w3 = vld1q_u32([10000, 1, 10000, 1].as_ptr()); | |
| let res = vpaddlq_u32(vmulq_u32(res, w3)); | |
| vgetq_lane_u64::<0>(res) * 100000000 + vgetq_lane_u64::<1>(res) | |
| } | |
| fn bench_atoi(c: &mut Criterion) { | |
| let input = b"1234567890654321"; | |
| let mut group = c.benchmark_group("SIMD_Atoi_16B"); | |
| group.bench_function("unzip_method", |b| { | |
| b.iter(|| unsafe { black_box(atoi_unzip(black_box(input.as_ptr()))) }) | |
| }); | |
| group.bench_function("pairwise_method", |b| { | |
| b.iter(|| unsafe { black_box(atoi_pairwise(black_box(input.as_ptr()))) }) | |
| }); | |
| group.bench_function("atoi_simd_method", |b| { | |
| b.iter(|| black_box(atoi_simd::parse_pos::<u64, false>(black_box(input)).unwrap())) | |
| }); | |
| group.finish(); | |
| } | |
| criterion_group!(benches, bench_atoi); | |
| criterion_main!(benches); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment