Skip to content

Instantly share code, notes, and snippets.

@illume
Created August 19, 2025 21:27
Show Gist options
  • Select an option

  • Save illume/14c8bdadfba47e68b8378ee68a9e7794 to your computer and use it in GitHub Desktop.

Select an option

Save illume/14c8bdadfba47e68b8378ee68a9e7794 to your computer and use it in GitHub Desktop.
Example of how a portable runtime assembler in rust could look like.
use crabwalk::{
Assembler, F32x4,
VecReg0, VecReg1, VecReg2, VecReg3,
VecReg4, VecReg5, VecReg6, VecReg7,
GprA, GprB, GprC
};
fn main() {
// Shared prologue: set up args & load B's columns
let mut base = Assembler::new::<F32x4>();
base.entry_args(GprA, GprB, GprC)
.load3_at(VecReg1, GprB, 0 * 4)
.load3_at(VecReg2, GprB, 3 * 4)
.load3_at(VecReg3, GprB, 6 * 4);
// --- 1. Identity path ---
let mut asm_id = base.clone();
asm_id.store3_at(VecReg1, GprC, 0 * 4)
.store3_at(VecReg2, GprC, 3 * 4)
.store3_at(VecReg3, GprC, 6 * 4)
.ret();
let mm33_id: extern "sysv64" fn(*const f32, *const f32, *mut f32) = asm_id.finalize();
// --- 2. General multiply ---
let mut asm_mul = base.clone();
for row in 0..3 {
asm_mul.ld1_splat(VecReg4, GprA, (row * 3 + 0) * 4)
.ld1_splat(VecReg5, GprA, (row * 3 + 1) * 4)
.ld1_splat(VecReg6, GprA, (row * 3 + 2) * 4)
.mul(VecReg0, VecReg4, VecReg1)
.mul(VecReg7, VecReg5, VecReg2).add(VecReg0, VecReg7)
.mul(VecReg7, VecReg6, VecReg3).add(VecReg0, VecReg7)
.store3_at(VecReg0, GprC, row * 3 * 4);
}
asm_mul.ret();
let mm33_mul: extern "sysv64" fn(*const f32, *const f32, *mut f32) = asm_mul.finalize();
// --- 3. Specific-matrix path (90° rotation about Z axis) ---
// A = [[ 0, -1, 0 ],
// [ 1, 0, 0 ],
// [ 0, 0, 1 ]]
//
// Here: multiplication by 0 drops the op, 1 is just copy, -1 is negate.
let mut asm_spec = base;
// Row 0: 0*col0 + (-1)*col1 + 0*col2 → negate col1
asm_spec.neg(VecReg0, VecReg2)
.store3_at(VecReg0, GprC, 0 * 4);
// Row 1: 1*col0 + 0*col1 + 0*col2 → copy col0
asm_spec.store3_at(VecReg1, GprC, 3 * 4);
// Row 2: 0*col0 + 0*col1 + 1*col2 → copy col2
asm_spec.store3_at(VecReg3, GprC, 6 * 4);
asm_spec.ret();
let mm33_spec: extern "sysv64" fn(*const f32, *const f32, *mut f32) = asm_spec.finalize();
// --- Test runs ---
let a_id = [
1.0, 0.0, 0.0,
0.0, 1.0, 0.0,
0.0, 0.0, 1.0,
];
let a_mul = [
2.0, 0.0, 0.0,
0.0, 3.0, 0.0,
0.0, 0.0, 4.0,
];
let a_rot = [
0.0, -1.0, 0.0,
1.0, 0.0, 0.0,
0.0, 0.0, 1.0,
];
let b = [
1.0, 2.0, 3.0,
4.0, 5.0, 6.0,
7.0, 8.0, 9.0,
];
let mut c = [0.0; 9];
mm33_id(a_id.as_ptr(), b.as_ptr(), c.as_mut_ptr());
println!("Identity: {c:?}");
let mut c2 = [0.0; 9];
mm33_mul(a_mul.as_ptr(), b.as_ptr(), c2.as_mut_ptr());
println!("General mul: {c2:?}");
let mut c3 = [0.0; 9];
mm33_spec(a_rot.as_ptr(), b.as_ptr(), c3.as_mut_ptr());
println!("Specific rotation: {c3:?}");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment