Skip to content

Instantly share code, notes, and snippets.

@estshorter
Last active October 19, 2025 02:46
Show Gist options
  • Select an option

  • Save estshorter/19717de2d18780d6d754cd3efd3c8021 to your computer and use it in GitHub Desktop.

Select an option

Save estshorter/19717de2d18780d6d754cd3efd3c8021 to your computer and use it in GitHub Desktop.
// clang -fomit-frame-pointer -target arm-linux-gnueabihf -march=armv8-a -mcpu=cortex-a53 -mfloat-abi=hard -marm
// --sysroot=/usr/lib/arm-linux-gnueabihf test.c
// のようにomit-frame-pointerを指定していても、オプション指定なし(O0)だと以下のように怒られる。
// test.c:12:9: warning: inline asm clobber list contains reserved registers: R11 [-Winline-asm]
// 12 | "sub sp, sp, #0x200\n\t"
// | ^
// test.c:12:9: note: Reserved registers on the clobber list may not be preserved across the asm statement, and clobbering them may lead to undefined behaviour.
// 1 warning generated.
// O1以上を指定すると出ない。
#include <stdio.h>
#include <stdint.h>
typedef int32_t sp_digit;
static void sp_2048_mul_64(sp_digit* r_p, const sp_digit* a_p, const sp_digit* b_p)
{
register sp_digit* r asm ("r0") = r_p;
register const sp_digit* a asm ("r1") = a_p;
register const sp_digit* b asm ("r2") = b_p;
__asm__ __volatile__ (
"sub sp, sp, #0x200\n\t"
"mov r5, #0\n\t"
"mov r6, #0\n\t"
"mov r7, #0\n\t"
"mov r8, #0\n\t"
"\n"
"L_sp_2048_mul_64_outer_%=: \n\t"
"subs r3, r5, #0xfc\n\t"
"it cc\n\t"
"movcc r3, #0\n\t"
"sub r4, r5, r3\n\t"
"\n"
"L_sp_2048_mul_64_inner_%=: \n\t"
"ldr lr, [%[a], r3]\n\t"
"ldr r11, [%[b], r4]\n\t"
"umull r9, r10, lr, r11\n\t"
"adds r6, r6, r9\n\t"
"adcs r7, r7, r10\n\t"
"adc r8, r8, #0\n\t"
"add r3, r3, #4\n\t"
"sub r4, r4, #4\n\t"
"cmp r3, #0x100\n\t"
"beq L_sp_2048_mul_64_inner_done_%=\n\t"
"cmp r3, r5\n\t"
"ble L_sp_2048_mul_64_inner_%=\n\t"
"\n"
"L_sp_2048_mul_64_inner_done_%=: \n\t"
"str r6, [sp, r5]\n\t"
"mov r6, r7\n\t"
"mov r7, r8\n\t"
"mov r8, #0\n\t"
"add r5, r5, #4\n\t"
"cmp r5, #0x1f8\n\t"
"ble L_sp_2048_mul_64_outer_%=\n\t"
"str r6, [sp, r5]\n\t"
"\n"
"L_sp_2048_mul_64_store_%=: \n\t"
"ldm sp!, {r6, r7, r8, r9}\n\t"
"stm %[r]!, {r6, r7, r8, r9}\n\t"
"subs r5, r5, #16\n\t"
"bgt L_sp_2048_mul_64_store_%=\n\t"
: [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
:
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11"
);
}
int main(void) {
sp_digit r_p[32];
sp_digit a_p[32];
sp_digit b_p[32];
for (int i=0;i<32;i++) {
r_p[i] = 0;
a_p[i] = 2;
b_p[i] = 2;
}
sp_2048_mul_64(r_p, a_p, b_p);
for (int i=0;i<32;i++) {
printf("i: %02d, r: %d, a: %d, b: %d", i, r_p[i], a_p[i], b_p[i]);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment