Skip to content

Instantly share code, notes, and snippets.

@lastforkbender
Created January 23, 2026 02:51
Show Gist options
  • Select an option

  • Save lastforkbender/348e645faf0e6ee4d24690a2f74840c0 to your computer and use it in GitHub Desktop.

Select an option

Save lastforkbender/348e645faf0e6ee4d24690a2f74840c0 to your computer and use it in GitHub Desktop.
VGER440RL Complex Shuffle MASM32
; VGER440RL_FCQ8_SHUFFLE_COMPLEX.asm / MASM32
.686
.model flat, stdcall
option casemap:none
include windows.inc
include kernel32.inc
include advapi32.inc
includelib kernel32.lib
includelib advapi32.lib
alpha REAL8 0.25
temp REAL8 1.0
seedScale REAL8 0.001
.data
arrCount dd 8
arr REAL8 1.0, 2.0, -3.0, 1.0, 0.5, -0.25, 4.0, 0.0, 0.0, -1.5, 2.2, 0.8, -0.7, 0.7, 3.1, -2.3
scoresPtr dd 0
perturbPtr dd 0
weightsPtr dd 0
indicesPtr dd 0
pickedMask dd 0
permPtr dd 0
errAlloc db "MemAlloc failed", 0
errCrypt db "CryptGenRandom failed", 0
.data?
hProv dd ?
tempBuf dq 0.0
.code
CryptAcquireContextA proto :DWORD, :DWORD, :DWORD, :DWORD, :DWORD
CryptGenRandom proto :DWORD, :DWORD, :PTR
CryptReleaseContext proto :DWORD, :DWORD
GlobalAlloc proto :DWORD, :DWORD
GlobalFree proto :DWORD
GetTickCount proto
ExitProcess proto :DWORD
GMEM_ZEROINIT = 0x0040
GHND = 0x0042
secureRand32 proc uses eax ebx ecx edx
push 0
push 0
push PROV_RSA_FULL
push 1
push 0
call CryptAcquireContextA
cmp eax, 0
jne sr_gotProv
push 0
push 0
push PROV_RSA_FULL
push 1
push 0x00000008
call CryptAcquireContextA
cmp eax, 0
je sr_fallback
sr_gotProv:
mov ebx, eax
sub esp, 8
lea ecx, [esp]
push 4
push ecx
push ebx
call CryptGenRandom
add esp, 8
cmp eax, 0
je sr_cleanup_fail
mov eax, dword ptr [esp]
push 0
push ebx
call CryptReleaseContext
ret
sr_cleanup_fail:
push 0
push ebx
call CryptReleaseContext
sr_fallback:
call GetTickCount
xor eax, 0xA5A5A5A5
ret
secureRand32 endp
allocGlobal proc uses eax
push ebp
mov ebp, esp
push GMEM_ZEROINIT
push eax
call GlobalAlloc
pop ebp
ret
allocGlobal endp
freeGlobal proc uses eax
push 0
push eax
call GlobalFree
ret
freeGlobal endp
computeBaseScores proc uses esi edi ebx ecx eax edx
mov ecx, dword ptr [arrCount]
cmp ecx, 0
jle cb_done
mov esi, dword ptr [scoresPtr]
xor edi, edi
cb_loop:
mov ebx, OFFSET arr
mov eax, edi
shl eax, 4
add ebx, eax
add ebx, 8
fld qword ptr [ebx]
fabs
fld st0
faddp st1, st0
mov eax, dword ptr [scoresPtr]
mov edx, edi
shl edx, 3
add eax, edx
fstp qword ptr [eax]
inc edi
dec ecx
jnz cb_loop
cb_done:
ret
computeBaseScores endp
buildSeedPerturb proc uses esi edi ebx ecx eax
mov ecx, dword ptr [arrCount]
cmp ecx, 0
jle bsp_done
xor edi, edi
bsp_loop:
call secureRand32
sub esp, 8
mov dword ptr [esp], eax
fild dword ptr [esp]
add esp, 8
fld qword ptr seedScale
fmulp st1, st0
mov eax, dword ptr [perturbPtr]
mov ebx, edi
shl ebx, 3
add eax, ebx
fstp qword ptr [eax]
inc edi
dec ecx
jnz bsp_loop
bsp_done:
ret
buildSeedPerturb endp
applyPropagation proc uses esi edi ebx ecx eax edx st0
mov ecx, dword ptr [arrCount]
cmp ecx, 1
jl ap_done
xor esi, esi
ap_outer:
xor ebx, ebx
ap_inner:
cmp ebx, esi
jge ap_next_i
mov eax, dword ptr [scoresPtr]
mov edx, ebx
shl edx, 3
add eax, edx
fld qword ptr [eax]
mov eax, dword ptr [scoresPtr]
mov edx, esi
shl edx, 3
add eax, edx
fld qword ptr [eax]
fsubp st1, st0
fabs
fld1
faddp st1, st0
mov eax, dword ptr [perturbPtr]
mov edx, ebx
shl edx, 3
add eax, edx
fld qword ptr [eax]
fdivp st1, st0
fld qword ptr alpha
fmulp st1, st0
mov eax, dword ptr [perturbPtr]
mov edx, esi
shl edx, 3
add eax, edx
fld qword ptr [eax]
faddp st1, st0
fstp qword ptr [eax]
inc ebx
jmp ap_inner
ap_next_i:
inc esi
cmp esi, dword ptr [arrCount]
jl ap_outer
ap_done:
ret
applyPropagation endp
computeWeights proc uses esi edi ecx eax ebx
mov ecx, dword ptr [arrCount]
cmp ecx, 0
jle cw_done
mov esi, 0
mov eax, dword ptr [scoresPtr]
fld qword ptr [eax]
inc esi
cw_findmax:
cmp esi, ecx
jge cw_found
mov ebx, dword ptr [scoresPtr]
mov edx, esi
shl edx, 3
add ebx, edx
fld qword ptr [ebx]
fcomip st0, st1
jae cw_fm_next
fstp st0
cw_fm_next:
inc esi
jmp cw_findmax
cw_found:
xor esi, esi
cw_loop:
cmp esi, ecx
jge cw_sum
mov ebx, dword ptr [scoresPtr]
mov edx, esi
shl edx, 3
add ebx, edx
fld qword ptr [ebx]
fld st1
fsubp st1, st0
fld qword ptr temp
fdivp st1, st0
fldl2e
fmulp st1, st0
fld st0
frndint
fsubp st1, st0
fld st0
f2xm1
fld1
faddp st1, st0
fld st1
fscale
mov eax, dword ptr [weightsPtr]
mov edx, esi
shl edx, 3
add eax, edx
sub esp, 8
fstp qword ptr [esp]
fld qword ptr [esp]
fstp qword ptr [eax]
add esp, 8
inc esi
jmp cw_loop
cw_sum:
xor esi, esi
fldz
cw_sum_loop:
cmp esi, ecx
jge cw_norm
mov eax, dword ptr [weightsPtr]
mov edx, esi
shl edx, 3
add eax, edx
fld qword ptr [eax]
faddp st1, st0
inc esi
jmp cw_sum_loop
cw_norm:
fldz
fcomip st0, st1
jne cw_norm_continue
fld1
mov eax, dword ptr [arrCount]
push eax
fild dword ptr [esp]
add esp, 4
fdivp st1, st0
mov esi, 0
cw_set_uniform:
cmp esi, dword ptr [arrCount]
jge cw_done
mov eax, dword ptr [weightsPtr]
mov edx, esi
shl edx, 3
add eax, edx
fld st0
fstp qword ptr [eax]
inc esi
jmp cw_set_uniform
cw_norm_continue:
mov esi, 0
cw_norm_loop:
cmp esi, dword ptr [arrCount]
jge cw_done
mov eax, dword ptr [weightsPtr]
mov edx, esi
shl edx, 3
add eax, edx
fld qword ptr [eax]
fdiv st1, st0
fstp qword ptr [eax]
inc esi
jmp cw_norm_loop
cw_done:
fstp st0
ret
computeWeights endp
weightedSample proc uses esi edi ebx ecx eax edx
mov ecx, dword ptr [arrCount]
cmp ecx, 0
jle ws_done
xor esi, esi
ws_outer:
fldz
xor edi, edi
ws_total_loop:
cmp edi, ecx
jge ws_total_done
mov eax, dword ptr [weightsPtr]
mov ebx, edi
shl ebx, 3
add eax, ebx
fld qword ptr [eax]
faddp st1, st0
inc edi
jmp ws_total_loop
ws_total_done:
call secureRand32
sub esp, 16
mov dword ptr [esp], eax
fild dword ptr [esp]
fld1
fld1
fld1
fld1
fstp st0
add esp, 16
sub esp,8
mov dword ptr [esp], 0x00
mov dword ptr [esp+4], 0x41F00000
fld qword ptr [esp]
fdivp st1, st0
add esp,8
fmul st1, st0
mov edi, 0
fldz
ws_cum_loop:
cmp edi, ecx
jge ws_cum_done
mov eax, dword ptr [weightsPtr]
mov ebx, edi
shl ebx, 3
add eax, ebx
fld qword ptr [eax]
faddp st1, st0
fcomip st0, st1
ja ws_select_index
inc edi
jmp ws_cum_loop
ws_select_index:
mov eax, dword ptr [permPtr]
mov ebx, esi
shl ebx, 2
add eax, ebx
mov dword ptr [eax], edi
mov eax, dword ptr [weightsPtr]
mov ebx, edi
shl ebx, 3
add eax, ebx
fldz
fstp qword ptr [eax]
inc esi
cmp esi, ecx
jl ws_outer
ws_cum_done:
fstp st0
ws_done:
ret
weightedSample endp
start:
mov eax, dword ptr [arrCount]
mov ebx, eax
shl eax, 3
push eax
call allocGlobal
add esp,4
test eax, eax
je alloc_fail
mov dword ptr [scoresPtr], eax
mov eax, ebx
shl eax, 3
push eax
call allocGlobal
add esp, 4
test eax, eax
je alloc_fail
mov dword ptr [perturbPtr], eax
mov eax, ebx
shl eax, 3
push eax
call allocGlobal
add esp, 4
test eax, eax
je alloc_fail
mov dword ptr [weightsPtr], eax
mov eax, ebx
shl eax, 2
push eax
call allocGlobal
add esp, 4
test eax, eax
je alloc_fail
mov dword ptr [permPtr], eax
; Optional /_/_ allocate pickedMask(n bytes)
mov eax, ebx
push eax
call allocGlobal
add esp, 4
test eax, eax
je alloc_fail
mov dword ptr [pickedMask], eax
call computeBaseScores
call buildSeedPerturb
call applyPropagation
mov ecx, dword ptr [arrCount]
cmp ecx, 0
jle fs_done
xor esi, esi
fs_loop:
mov eax, dword ptr [scoresPtr]
mov edx, esi
shl edx, 3
add eax, edx
fld qword ptr [eax]
mov ebx, dword ptr [perturbPtr]
mov edx, esi
shl edx, 3
add ebx, edx
fld qword ptr [ebx]
faddp st1, st0
fstp qword ptr [eax]
inc esi
dec ecx
jnz fs_loop
fs_done:
call computeWeights
call weightedSample
; Optional /_/_ produce in-place shuffled output
mov eax, dword ptr [arrCount]
mov ebx, eax
shl eax, 4
push eax
call allocGlobal
add esp, 4
test eax, eax
je alloc_fail
mov dword ptr [indicesPtr], eax
mov ecx, dword ptr [arrCount]
xor esi, esi
copy_loop:
cmp esi, ecx
jge copy_done
mov eax, dword ptr [permPtr]
mov edx, esi
shl edx, 2
add eax, edx
mov edx, dword ptr [eax]
mov eax, OFFSET arr
mov ebx, esi
shl ebx, 4
add eax, ebx
mov ebx, dword ptr [indicesPtr]
mov ecx, edx
shl ecx, 4
add ebx, ecx
mov ecx, dword ptr [eax]
mov dword ptr [ebx], ecx
mov ecx, dword ptr [eax+4]
mov dword ptr [ebx+4], ecx
mov ecx, dword ptr [eax+8]
mov dword ptr [ebx+8], ecx
mov ecx, dword ptr [eax+12]
mov dword ptr [ebx+12], ecx
inc esi
jmp copy_loop
copy_done:
mov eax, dword ptr [arrCount]
mov ecx, eax
xor esi, esi
copyback_loop:
cmp esi, ecx
jge copyback_done
mov ebx, dword ptr [indicesPtr]
mov edx, esi
shl edx, 4
add ebx, edx
mov edx, dword ptr [OFFSET arr]
add edx, edx
mov edx, OFFSET arr
mov edi, esi
shl edi, 4
add edx, edi
mov eax, dword ptr [ebx]
mov dword ptr [edx], eax
mov eax, dword ptr [ebx+4]
mov dword ptr [edx+4], eax
mov eax, dword ptr [ebx+8]
mov dword ptr [edx+8], eax
mov eax, dword ptr [ebx+12]
mov dword ptr [edx+12], eax
inc esi
jmp copyback_loop
copyback_done:
mov eax, dword ptr [indicesPtr]
push eax
call freeGlobal
add esp,4
mov eax, dword ptr [scoresPtr]
push eax
call freeGlobal
add esp,4
mov eax, dword ptr [perturbPtr]
push eax
call freeGlobal
add esp,4
mov eax, dword ptr [weightsPtr]
push eax
call freeGlobal
add esp,4
mov eax, dword ptr [permPtr]
push eax
call freeGlobal
add esp,4
mov eax, dword ptr [pickedMask]
push eax
call freeGlobal
add esp,4
push 0
call ExitProcess
alloc_fail:
push 1
call ExitProcess
end start
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment