Created
February 20, 2026 01:33
-
-
Save Jokeren/d34ab092f427f98b1d1328325728dacf to your computer and use it in GitHub Desktop.
GEMM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| LDC R1, c[0x0][0x37c] | |
| S2R R2, SR_TID.X | |
| S2UR UR4, SR_CgaCtaId | |
| PMTRIG 0x4000 | |
| ISETP.LT.AND P0, PT, R2, 0x80, PT | |
| NOP | |
| BRA !P0, `(.L_x_0) 0x7f46c19b1410 | |
| R2UR UR6, R2 | |
| UPRMT UR7, URZ, 0x4210, UR4 | |
| NOP | |
| USHF.R.U32 UR6, UR6, 0x5, URZ | |
| ULOP3.LUT UR6, UR6, 0x3, URZ, 0xc0, !UPT | |
| UIMAD.U32 UR7, UR6, 0x8, UR7 | |
| UMOV UR10, 0x1ffffe | |
| UMOV UR11, 0xfffff800 | |
| UISETP.LT.AND UP0, UPT, UR6, 0x6, UPT | |
| @UP0 SYNCS.EXCH.64 URZ, [UR7+0x34400], UR10 | |
| UISETP.LT.AND UP0, UPT, UR6, 0x2, UPT | |
| @UP0 SYNCS.EXCH.64 URZ, [UR7+0x34420], UR10 | |
| UMOV UR10, 0x1ffffe | |
| UMOV UR11, 0x7ffff800 | |
| UISETP.LT.AND UP0, UPT, UR6, 0x6, UPT | |
| @UP0 SYNCS.EXCH.64 URZ, [UR7+0x34430], UR10 | |
| UISETP.LT.AND UP0, UPT, UR6, 0x2, UPT | |
| @UP0 SYNCS.EXCH.64 URZ, [UR7+0x34450], UR10 | |
| UIMAD.U32 UR7, UR6, 0x8, UR7 | |
| UMOV UR10, 0x1ffffe | |
| UMOV UR11, 0x7ffff800 | |
| UISETP.LT.AND UP0, UPT, UR6, 0x2, UPT | |
| @UP0 SYNCS.EXCH.64 URZ, [UR7+0x344a0], UR10 | |
| UMOV UR10, 0x1ffffc | |
| UMOV UR11, 0xfffff000 | |
| UISETP.LT.AND UP0, UPT, UR6, 0x2, UPT | |
| @UP0 SYNCS.EXCH.64 URZ, [UR7+0x344a8], UR10 | |
| UIMAD.U32 UR7, UR6, 0x10, UR7 | |
| UMOV UR10, 0x1ffffe | |
| UMOV UR11, 0x7ffff800 | |
| UISETP.LT.AND UP0, UPT, UR6, 0x2, UPT | |
| @UP0 SYNCS.EXCH.64 URZ, [UR7+0x344c0], UR10 | |
| UMOV UR10, 0x1ffd40 | |
| UMOV UR11, 0xfff50000 | |
| UISETP.LT.AND UP0, UPT, UR6, 0x2, UPT | |
| @UP0 SYNCS.EXCH.64 URZ, [UR7+0x344c8], UR10 | |
| ISETP.EQ.AND P0, PT, R2, 0x0, PT | |
| UMOV UR6, 0x344d0 | |
| UPRMT UR6, UR6, 0x4210, UR4 | |
| @P0 STS [UR6+0x8], RZ | |
| MEMBAR.ALL.CTA | |
| PMTRIG 0x8000 | |
| NOP | |
| UCGABAR_ARV | |
| S2UR UR5, SR_CTAID.X | |
| USHF.R.U32 UR5, UR5, 0x1, URZ | |
| UMOV UR6, 0x344a0 | |
| UPRMT UR6, UR6, 0x4210, UR4 | |
| UMOV UR7, 0x0 | |
| UISETP.NE.OR UP0, UPT, URZ, URZ, !UPT | |
| MOV R3, 0x0 | |
| UMOV UR8, 0x344c0 | |
| UMOV UR9, 0x0 | |
| MOV R4, 0x0 | |
| UPRMT UR8, UR8, 0x4210, UR4 | |
| UCGABAR_WAIT | |
| ACQBULK | |
| LDC.64 R8, c[0x0][0xa80] | |
| ISETP.NE.AND P0, PT, R8, RZ, PT | |
| ISETP.NE.OR P0, PT, R9, RZ, P0 | |
| NOP | |
| @P0 LDG.E R5, [R8] | |
| @!P0 LDC R5, c[0x0][0x4e8] | |
| MOV R6, RZ | |
| LDCU UR10, c[0x0][0x4c4] | |
| LDCU UR11, c[0x0][0x4c8] | |
| LDCU UR12, c[0x0][0x4c0] | |
| LDCU UR13, c[0x0][0x390] | |
| LDCU UR14, c[0x0][0x394] | |
| LDCU UR15, c[0x0][0x38c] | |
| LDCU UR16, c[0x0][0x3a8] | |
| LDCU UR17, c[0x0][0x3ac] | |
| LDCU UR18, c[0x0][0x3a4] | |
| LDCU UR19, c[0x0][0x39c] | |
| LDCU UR20, c[0x0][0x3a0] | |
| LDCU UR21, c[0x0][0x398] | |
| LDCU UR22, c[0x0][0x388] | |
| LDCU UR23, c[0x0][0x4cc] | |
| BRA `(.L_x_1) 0x7f46c19af4b0 | |
| PLOP3.LUT P0, PT, PT, PT, UP0, 0xa0, 0x0 | |
| @!UP0 UIADD3 UR6, UPT, UPT, UR6, 0x10, URZ | |
| @UP0 UIADD3 UR6, UPT, UPT, UR6, -0x10, URZ | |
| UPLOP3.LUT UP0, UPT, UP0, !UPT, !UPT, 0xf, 0x0 | |
| @P0 LOP3.LUT R3, R3, RZ, RZ, 0xf, !PT | |
| UMOV UR24, 0x0 | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P0, [UR8], R4 | |
| @!P0 NANOSLEEP.SYNCS 0xc350 | |
| @!P0 SYNCS.PHASECHK.TRANS64 P0, [UR8], R4 | |
| @P0 BRA `(.L_x_2) 0x7f46c19af510 | |
| BRA `(.L_x_3) 0x7f46c19af4c0 | |
| LDS R8, [UR8+0x18] | |
| LDS R7, [UR8+0x10] | |
| LOP3.LUT P0, R8, R8, 0x1, RZ, 0xc0, !PT | |
| NOP | |
| BRA !P0, `(.L_x_4) 0x7f46c19af620 | |
| R2UR UR24, R7 | |
| UPRMT UR25, UR8, 0x4210, URZ | |
| SYNCS.ARRIVE.TRANS64.RED.A1T0 RZ, [UR25+0x8], RZ | |
| LDCU UR25, c[0x0][0xa90] | |
| UIADD3 UR9, UPT, UPT, UR9, 0x1, URZ | |
| UIADD3 UR8, UPT, UPT, UR8, 0x20, URZ | |
| UISETP.LT.U32.AND UP2, UPT, UR9, UR25, UPT | |
| BRA.U UP2, `(.L_x_4) 0x7f46c19af620 | |
| UMOV UR8, 0x344c0 | |
| UMOV UR9, 0x0 | |
| UPRMT UR8, UR8, 0x4210, UR4 | |
| LOP3.LUT R4, R4, RZ, RZ, 0xf, !PT | |
| LDCU.64 UR26, c[0x0][0x110] | |
| UIADD3.64 UR26, UPT, UPT, UR26, 0x5c0, URZ | |
| UISETP.EQ.AND UP2, UPT, URZ, UR10, UPT | |
| UIMAD.WIDE.U32 UR38, UR5, UR10, URZ | |
| USHF.R.U32 UR35, UR39, UR11, URZ | |
| UIMAD.U32 UR31, UR35, UR12, UR5 | |
| @UP2 UMOV UR35, UR5 | |
| @UP2 UMOV UR31, URZ | |
| UMOV UR28, URZ | |
| UISETP.EQ.AND UP2, UPT, URZ, UR13, UPT | |
| UIMAD.WIDE.U32 UR38, UR31, UR13, URZ | |
| USHF.R.U32 UR29, UR39, UR14, URZ | |
| UIMAD.U32 UR30, UR29, UR15, UR31 | |
| @UP2 UMOV UR29, UR31 | |
| @UP2 UMOV UR30, URZ | |
| UISETP.GE.AND UP2, UPT, UR29, UR22, UPT | |
| BRA.U !UP2, `(.L_x_5) 0x7f46c19af7a0 | |
| UISETP.EQ.AND UP2, UPT, URZ, UR16, UPT | |
| UIMAD.WIDE.U32 UR38, UR30, UR16, URZ | |
| USHF.R.U32 UR33, UR39, UR17, URZ | |
| UIMAD.U32 UR34, UR33, UR18, UR30 | |
| @UP2 UMOV UR33, UR30 | |
| @UP2 UMOV UR34, URZ | |
| BRA `(.L_x_6) 0x7f46c19af800 | |
| UISETP.EQ.AND UP2, UPT, URZ, UR19, UPT | |
| UIMAD.WIDE.U32 UR38, UR30, UR19, URZ | |
| USHF.R.U32 UR33, UR39, UR20, URZ | |
| UIMAD.U32 UR34, UR33, UR21, UR30 | |
| @UP2 UMOV UR33, UR30 | |
| @UP2 UMOV UR34, URZ | |
| ULOP3.LUT UP2, URZ, UR29, 0x1, URZ, 0xc0, !UPT | |
| UIADD3 UR31, UPT, UPT, URZ, 0x0, -UR21 | |
| UIMAD.U32 UR34, UR29, UR31, UR34 | |
| @UP2 UIADD3 UR33, UPT, UPT, -UR33, URZ, UR23 | |
| UIMAD.WIDE.U32 UR30, UR4, -0x80000000, URZ | |
| USHF.R.U32 UR31, UR31, 0x0, URZ | |
| UIMAD.U32 UR30, UR31, -0x2, UR4 | |
| UIMAD.U32 UR33, UR33, 0x2, UR30 | |
| UIMAD.U32 UR34, UR34, 0x1, UR31 | |
| UIMAD.U32 UR33, UR33, 0x80, URZ | |
| UIMAD.U32 UR34, UR34, 0x100, URZ | |
| UMOV UR32, 0x30400 | |
| UPRMT UR32, UR32, 0x4210, UR4 | |
| LOP3.LUT P1, RZ, R2, 0x40, RZ, 0xc0, !PT | |
| LOP3.LUT P0, RZ, R2, 0x3f, RZ, 0xc0, !PT | |
| LOP3.LUT R7, R2, 0x7, RZ, 0xc0, !PT | |
| IMAD.IADD R11, R2, 0x1, R2 | |
| IMAD.SHL R10, R2, 0x40, RZ | |
| IMAD R7, R7, 0x90, RZ | |
| LOP3.LUT R8, R7, 0x50, R11, 0x78, !PT | |
| LOP3.LUT R8, R8, 0x400, R10, 0xf8, !PT | |
| VOTEU.ANY UP2, !P0 | |
| BRA !P1, `(.L_x_7) 0x7f46c19af990 | |
| UIADD3 UR32, UPT, UPT, UR32, 0x1000, URZ | |
| UIADD3 UR33, UPT, UPT, UR33, 0x40, URZ | |
| @UP0 UIADD3 UR7, UPT, UPT, UR7, 0x100, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P0, [UR6], R3 | |
| @!P0 NANOSLEEP.SYNCS 0xc350 | |
| @!P0 SYNCS.PHASECHK.TRANS64 P0, [UR6], R3 | |
| BRA !P0, `(.L_x_8) 0x7f46c19af9b0 | |
| LDTM.16dp256bit.x4 R16, tmem[UR7] | |
| LDTM.16dp256bit.x4 R32, tmem[UR7+0x100000] | |
| LOP3.LUT R9, R8, 0x20, RZ, 0x3c, !PT | |
| FFMA2 R16, R5.F32, R16.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R18, R5.F32, R18.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R20, R5.F32, R20.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R22, R5.F32, R22.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R16, R17, R16 | |
| FFMA2 R24, R5.F32, R24.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R17, R19, R18 | |
| FFMA2 R26, R5.F32, R26.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R18, R21, R20 | |
| FFMA2 R28, R5.F32, R28.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R19, R23, R22 | |
| FFMA2 R30, R5.F32, R30.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R24, R25, R24 | |
| F2FP.F16.F32.PACK_AB R25, R27, R26 | |
| STSM.16.MT88.4 [R8+UR32], R16 | |
| F2FP.F16.F32.PACK_AB R26, R29, R28 | |
| F2FP.F16.F32.PACK_AB R27, R31, R30 | |
| STSM.16.MT88.4 [R8+UR32+0x800], R24 | |
| FFMA2 R32, R5.F32, R32.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R34, R5.F32, R34.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R36, R5.F32, R36.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R38, R5.F32, R38.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R32, R33, R32 | |
| FFMA2 R40, R5.F32, R40.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R33, R35, R34 | |
| FFMA2 R42, R5.F32, R42.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R34, R37, R36 | |
| FFMA2 R44, R5.F32, R44.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R35, R39, R38 | |
| FFMA2 R46, R5.F32, R46.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R40, R41, R40 | |
| F2FP.F16.F32.PACK_AB R41, R43, R42 | |
| STSM.16.MT88.4 [R9+UR32], R32 | |
| F2FP.F16.F32.PACK_AB R42, R45, R44 | |
| F2FP.F16.F32.PACK_AB R43, R47, R46 | |
| LOP3.LUT R8, R9, 0x60, RZ, 0x3c, !PT | |
| STSM.16.MT88.4 [R9+UR32+0x800], R40 | |
| LOP3.LUT R9, R8, 0x0, RZ, 0x3c, !PT | |
| LOP3.LUT R8, R9, 0x40, RZ, 0x3c, !PT | |
| FENCE.VIEW.ASYNC.S | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| @UP2 UTMASTG.3D [UR32], [UR26] | |
| UTMACMDFLUSH | |
| @!UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| @UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| LDTM.16dp256bit.x4 R16, tmem[UR7] | |
| LDTM.16dp256bit.x4 R32, tmem[UR7+0x100000] | |
| UIADD3 UR32, UPT, UPT, UR32, 0x2000, URZ | |
| LOP3.LUT R9, R8, 0x20, RZ, 0x3c, !PT | |
| FFMA2 R16, R5.F32, R16.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R18, R5.F32, R18.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R20, R5.F32, R20.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R22, R5.F32, R22.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R16, R17, R16 | |
| FFMA2 R24, R5.F32, R24.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R17, R19, R18 | |
| FFMA2 R26, R5.F32, R26.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R18, R21, R20 | |
| FFMA2 R28, R5.F32, R28.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R19, R23, R22 | |
| FFMA2 R30, R5.F32, R30.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R24, R25, R24 | |
| F2FP.F16.F32.PACK_AB R25, R27, R26 | |
| STSM.16.MT88.4 [R8+UR32], R16 | |
| F2FP.F16.F32.PACK_AB R26, R29, R28 | |
| F2FP.F16.F32.PACK_AB R27, R31, R30 | |
| STSM.16.MT88.4 [R8+UR32+0x800], R24 | |
| FFMA2 R32, R5.F32, R32.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R34, R5.F32, R34.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R36, R5.F32, R36.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R38, R5.F32, R38.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R32, R33, R32 | |
| FFMA2 R40, R5.F32, R40.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R33, R35, R34 | |
| FFMA2 R42, R5.F32, R42.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R34, R37, R36 | |
| FFMA2 R44, R5.F32, R44.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R35, R39, R38 | |
| FFMA2 R46, R5.F32, R46.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R40, R41, R40 | |
| F2FP.F16.F32.PACK_AB R41, R43, R42 | |
| STSM.16.MT88.4 [R9+UR32], R32 | |
| F2FP.F16.F32.PACK_AB R42, R45, R44 | |
| F2FP.F16.F32.PACK_AB R43, R47, R46 | |
| LOP3.LUT R8, R9, 0x60, RZ, 0x3c, !PT | |
| STSM.16.MT88.4 [R9+UR32+0x800], R40 | |
| LOP3.LUT R9, R8, 0x0, RZ, 0x3c, !PT | |
| LOP3.LUT R8, R9, 0x40, RZ, 0x3c, !PT | |
| FENCE.VIEW.ASYNC.S | |
| UIADD3 UR34, UPT, UPT, UR34, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| @UP2 UTMASTG.3D [UR32], [UR26] | |
| UTMACMDFLUSH | |
| @!UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| @UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| LDTM.16dp256bit.x4 R16, tmem[UR7] | |
| LDTM.16dp256bit.x4 R32, tmem[UR7+0x100000] | |
| UIADD3 UR32, UPT, UPT, UR32, -0x2000, URZ | |
| LOP3.LUT R9, R8, 0x20, RZ, 0x3c, !PT | |
| FFMA2 R16, R5.F32, R16.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R18, R5.F32, R18.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R20, R5.F32, R20.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R22, R5.F32, R22.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R16, R17, R16 | |
| FFMA2 R24, R5.F32, R24.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R17, R19, R18 | |
| FFMA2 R26, R5.F32, R26.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R18, R21, R20 | |
| FFMA2 R28, R5.F32, R28.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R19, R23, R22 | |
| FFMA2 R30, R5.F32, R30.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R24, R25, R24 | |
| F2FP.F16.F32.PACK_AB R25, R27, R26 | |
| STSM.16.MT88.4 [R8+UR32], R16 | |
| F2FP.F16.F32.PACK_AB R26, R29, R28 | |
| F2FP.F16.F32.PACK_AB R27, R31, R30 | |
| STSM.16.MT88.4 [R8+UR32+0x800], R24 | |
| FFMA2 R32, R5.F32, R32.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R34, R5.F32, R34.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R36, R5.F32, R36.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R38, R5.F32, R38.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R32, R33, R32 | |
| FFMA2 R40, R5.F32, R40.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R33, R35, R34 | |
| FFMA2 R42, R5.F32, R42.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R34, R37, R36 | |
| FFMA2 R44, R5.F32, R44.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R35, R39, R38 | |
| FFMA2 R46, R5.F32, R46.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R40, R41, R40 | |
| F2FP.F16.F32.PACK_AB R41, R43, R42 | |
| STSM.16.MT88.4 [R9+UR32], R32 | |
| F2FP.F16.F32.PACK_AB R42, R45, R44 | |
| F2FP.F16.F32.PACK_AB R43, R47, R46 | |
| LOP3.LUT R8, R9, 0x60, RZ, 0x3c, !PT | |
| STSM.16.MT88.4 [R9+UR32+0x800], R40 | |
| LOP3.LUT R9, R8, 0x0, RZ, 0x3c, !PT | |
| LOP3.LUT R8, R9, 0x40, RZ, 0x3c, !PT | |
| FENCE.VIEW.ASYNC.S | |
| UIADD3 UR34, UPT, UPT, UR34, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| @UP2 UTMASTG.3D [UR32], [UR26] | |
| UTMACMDFLUSH | |
| @!UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| @UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| LDTM.16dp256bit.x4 R16, tmem[UR7] | |
| LDTM.16dp256bit.x4 R32, tmem[UR7+0x100000] | |
| UIADD3 UR32, UPT, UPT, UR32, 0x2000, URZ | |
| LOP3.LUT R9, R8, 0x20, RZ, 0x3c, !PT | |
| FFMA2 R16, R5.F32, R16.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R18, R5.F32, R18.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R20, R5.F32, R20.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R22, R5.F32, R22.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R16, R17, R16 | |
| FFMA2 R24, R5.F32, R24.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R17, R19, R18 | |
| FFMA2 R26, R5.F32, R26.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R18, R21, R20 | |
| FFMA2 R28, R5.F32, R28.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R19, R23, R22 | |
| FFMA2 R30, R5.F32, R30.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R24, R25, R24 | |
| F2FP.F16.F32.PACK_AB R25, R27, R26 | |
| STSM.16.MT88.4 [R8+UR32], R16 | |
| F2FP.F16.F32.PACK_AB R26, R29, R28 | |
| F2FP.F16.F32.PACK_AB R27, R31, R30 | |
| STSM.16.MT88.4 [R8+UR32+0x800], R24 | |
| FFMA2 R32, R5.F32, R32.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R34, R5.F32, R34.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R36, R5.F32, R36.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R38, R5.F32, R38.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R32, R33, R32 | |
| FFMA2 R40, R5.F32, R40.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R33, R35, R34 | |
| FFMA2 R42, R5.F32, R42.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R34, R37, R36 | |
| FFMA2 R44, R5.F32, R44.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R35, R39, R38 | |
| FFMA2 R46, R5.F32, R46.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R40, R41, R40 | |
| F2FP.F16.F32.PACK_AB R41, R43, R42 | |
| STSM.16.MT88.4 [R9+UR32], R32 | |
| F2FP.F16.F32.PACK_AB R42, R45, R44 | |
| F2FP.F16.F32.PACK_AB R43, R47, R46 | |
| LOP3.LUT R8, R9, 0x60, RZ, 0x3c, !PT | |
| STSM.16.MT88.4 [R9+UR32+0x800], R40 | |
| LOP3.LUT R9, R8, 0x0, RZ, 0x3c, !PT | |
| LOP3.LUT R8, R9, 0x40, RZ, 0x3c, !PT | |
| FENCE.VIEW.ASYNC.S | |
| UIADD3 UR34, UPT, UPT, UR34, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| @UP2 UTMASTG.3D [UR32], [UR26] | |
| UTMACMDFLUSH | |
| @!UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| @UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| LDTM.16dp256bit.x4 R16, tmem[UR7] | |
| LDTM.16dp256bit.x4 R32, tmem[UR7+0x100000] | |
| UIADD3 UR32, UPT, UPT, UR32, -0x2000, URZ | |
| LOP3.LUT R9, R8, 0x20, RZ, 0x3c, !PT | |
| FFMA2 R16, R5.F32, R16.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R18, R5.F32, R18.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R20, R5.F32, R20.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R22, R5.F32, R22.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R16, R17, R16 | |
| FFMA2 R24, R5.F32, R24.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R17, R19, R18 | |
| FFMA2 R26, R5.F32, R26.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R18, R21, R20 | |
| FFMA2 R28, R5.F32, R28.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R19, R23, R22 | |
| FFMA2 R30, R5.F32, R30.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R24, R25, R24 | |
| F2FP.F16.F32.PACK_AB R25, R27, R26 | |
| STSM.16.MT88.4 [R8+UR32], R16 | |
| F2FP.F16.F32.PACK_AB R26, R29, R28 | |
| F2FP.F16.F32.PACK_AB R27, R31, R30 | |
| STSM.16.MT88.4 [R8+UR32+0x800], R24 | |
| FFMA2 R32, R5.F32, R32.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R34, R5.F32, R34.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R36, R5.F32, R36.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R38, R5.F32, R38.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R32, R33, R32 | |
| FFMA2 R40, R5.F32, R40.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R33, R35, R34 | |
| FFMA2 R42, R5.F32, R42.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R34, R37, R36 | |
| FFMA2 R44, R5.F32, R44.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R35, R39, R38 | |
| FFMA2 R46, R5.F32, R46.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R40, R41, R40 | |
| F2FP.F16.F32.PACK_AB R41, R43, R42 | |
| STSM.16.MT88.4 [R9+UR32], R32 | |
| F2FP.F16.F32.PACK_AB R42, R45, R44 | |
| F2FP.F16.F32.PACK_AB R43, R47, R46 | |
| LOP3.LUT R8, R9, 0x60, RZ, 0x3c, !PT | |
| STSM.16.MT88.4 [R9+UR32+0x800], R40 | |
| LOP3.LUT R9, R8, 0x0, RZ, 0x3c, !PT | |
| LOP3.LUT R8, R9, 0x40, RZ, 0x3c, !PT | |
| FENCE.VIEW.ASYNC.S | |
| UIADD3 UR34, UPT, UPT, UR34, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| @UP2 UTMASTG.3D [UR32], [UR26] | |
| UTMACMDFLUSH | |
| @!UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| @UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| LDTM.16dp256bit.x4 R16, tmem[UR7] | |
| LDTM.16dp256bit.x4 R32, tmem[UR7+0x100000] | |
| UIADD3 UR32, UPT, UPT, UR32, 0x2000, URZ | |
| LOP3.LUT R9, R8, 0x20, RZ, 0x3c, !PT | |
| FFMA2 R16, R5.F32, R16.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R18, R5.F32, R18.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R20, R5.F32, R20.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R22, R5.F32, R22.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R16, R17, R16 | |
| FFMA2 R24, R5.F32, R24.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R17, R19, R18 | |
| FFMA2 R26, R5.F32, R26.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R18, R21, R20 | |
| FFMA2 R28, R5.F32, R28.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R19, R23, R22 | |
| FFMA2 R30, R5.F32, R30.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R24, R25, R24 | |
| F2FP.F16.F32.PACK_AB R25, R27, R26 | |
| STSM.16.MT88.4 [R8+UR32], R16 | |
| F2FP.F16.F32.PACK_AB R26, R29, R28 | |
| F2FP.F16.F32.PACK_AB R27, R31, R30 | |
| STSM.16.MT88.4 [R8+UR32+0x800], R24 | |
| FFMA2 R32, R5.F32, R32.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R34, R5.F32, R34.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R36, R5.F32, R36.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R38, R5.F32, R38.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R32, R33, R32 | |
| FFMA2 R40, R5.F32, R40.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R33, R35, R34 | |
| FFMA2 R42, R5.F32, R42.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R34, R37, R36 | |
| FFMA2 R44, R5.F32, R44.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R35, R39, R38 | |
| FFMA2 R46, R5.F32, R46.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R40, R41, R40 | |
| F2FP.F16.F32.PACK_AB R41, R43, R42 | |
| STSM.16.MT88.4 [R9+UR32], R32 | |
| F2FP.F16.F32.PACK_AB R42, R45, R44 | |
| F2FP.F16.F32.PACK_AB R43, R47, R46 | |
| LOP3.LUT R8, R9, 0x60, RZ, 0x3c, !PT | |
| STSM.16.MT88.4 [R9+UR32+0x800], R40 | |
| LOP3.LUT R9, R8, 0x0, RZ, 0x3c, !PT | |
| LOP3.LUT R8, R9, 0x40, RZ, 0x3c, !PT | |
| FENCE.VIEW.ASYNC.S | |
| UIADD3 UR34, UPT, UPT, UR34, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| @UP2 UTMASTG.3D [UR32], [UR26] | |
| UTMACMDFLUSH | |
| @!UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| @UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| LDTM.16dp256bit.x4 R16, tmem[UR7] | |
| LDTM.16dp256bit.x4 R32, tmem[UR7+0x100000] | |
| UIADD3 UR32, UPT, UPT, UR32, -0x2000, URZ | |
| LOP3.LUT R9, R8, 0x20, RZ, 0x3c, !PT | |
| FFMA2 R16, R5.F32, R16.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R18, R5.F32, R18.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R20, R5.F32, R20.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R22, R5.F32, R22.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R16, R17, R16 | |
| FFMA2 R24, R5.F32, R24.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R17, R19, R18 | |
| FFMA2 R26, R5.F32, R26.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R18, R21, R20 | |
| FFMA2 R28, R5.F32, R28.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R19, R23, R22 | |
| FFMA2 R30, R5.F32, R30.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R24, R25, R24 | |
| F2FP.F16.F32.PACK_AB R25, R27, R26 | |
| STSM.16.MT88.4 [R8+UR32], R16 | |
| F2FP.F16.F32.PACK_AB R26, R29, R28 | |
| F2FP.F16.F32.PACK_AB R27, R31, R30 | |
| STSM.16.MT88.4 [R8+UR32+0x800], R24 | |
| FFMA2 R32, R5.F32, R32.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R34, R5.F32, R34.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R36, R5.F32, R36.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R38, R5.F32, R38.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R32, R33, R32 | |
| FFMA2 R40, R5.F32, R40.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R33, R35, R34 | |
| FFMA2 R42, R5.F32, R42.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R34, R37, R36 | |
| FFMA2 R44, R5.F32, R44.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R35, R39, R38 | |
| FFMA2 R46, R5.F32, R46.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R40, R41, R40 | |
| F2FP.F16.F32.PACK_AB R41, R43, R42 | |
| STSM.16.MT88.4 [R9+UR32], R32 | |
| F2FP.F16.F32.PACK_AB R42, R45, R44 | |
| F2FP.F16.F32.PACK_AB R43, R47, R46 | |
| LOP3.LUT R8, R9, 0x60, RZ, 0x3c, !PT | |
| STSM.16.MT88.4 [R9+UR32+0x800], R40 | |
| LOP3.LUT R9, R8, 0x0, RZ, 0x3c, !PT | |
| LOP3.LUT R8, R9, 0x40, RZ, 0x3c, !PT | |
| FENCE.VIEW.ASYNC.S | |
| UIADD3 UR34, UPT, UPT, UR34, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| @UP2 UTMASTG.3D [UR32], [UR26] | |
| UTMACMDFLUSH | |
| @!UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| @UP0 UIADD3 UR7, UPT, UPT, UR7, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| LDTM.16dp256bit.x4 R16, tmem[UR7] | |
| LDTM.16dp256bit.x4 R32, tmem[UR7+0x100000] | |
| UIADD3 UR32, UPT, UPT, UR32, 0x2000, URZ | |
| LOP3.LUT R9, R8, 0x20, RZ, 0x3c, !PT | |
| FFMA2 R16, R5.F32, R16.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R18, R5.F32, R18.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R20, R5.F32, R20.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R22, R5.F32, R22.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R16, R17, R16 | |
| FFMA2 R24, R5.F32, R24.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R17, R19, R18 | |
| FFMA2 R26, R5.F32, R26.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R18, R21, R20 | |
| FFMA2 R28, R5.F32, R28.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R19, R23, R22 | |
| FFMA2 R30, R5.F32, R30.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R24, R25, R24 | |
| F2FP.F16.F32.PACK_AB R25, R27, R26 | |
| STSM.16.MT88.4 [R8+UR32], R16 | |
| F2FP.F16.F32.PACK_AB R26, R29, R28 | |
| F2FP.F16.F32.PACK_AB R27, R31, R30 | |
| STSM.16.MT88.4 [R8+UR32+0x800], R24 | |
| FFMA2 R32, R5.F32, R32.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R34, R5.F32, R34.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R36, R5.F32, R36.F32x2.HI_LO, RZ.F32 | |
| FFMA2 R38, R5.F32, R38.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R32, R33, R32 | |
| FFMA2 R40, R5.F32, R40.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R33, R35, R34 | |
| FFMA2 R42, R5.F32, R42.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R34, R37, R36 | |
| FFMA2 R44, R5.F32, R44.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R35, R39, R38 | |
| FFMA2 R46, R5.F32, R46.F32x2.HI_LO, RZ.F32 | |
| F2FP.F16.F32.PACK_AB R40, R41, R40 | |
| F2FP.F16.F32.PACK_AB R41, R43, R42 | |
| STSM.16.MT88.4 [R9+UR32], R32 | |
| F2FP.F16.F32.PACK_AB R42, R45, R44 | |
| F2FP.F16.F32.PACK_AB R43, R47, R46 | |
| LOP3.LUT R8, R9, 0x60, RZ, 0x3c, !PT | |
| STSM.16.MT88.4 [R9+UR32+0x800], R40 | |
| LOP3.LUT R9, R8, 0x0, RZ, 0x3c, !PT | |
| LOP3.LUT R8, R9, 0x40, RZ, 0x3c, !PT | |
| FENCE.VIEW.ASYNC.S | |
| UIADD3 UR34, UPT, UPT, UR34, 0x20, URZ | |
| BAR.SYNC.DEFER_BLOCKING 0x0, 0x80 | |
| @UP2 UTMASTG.3D [UR32], [UR26] | |
| UTMACMDFLUSH | |
| @!UP0 UIADD3 UR7, UPT, UPT, UR7, -0xe0, URZ | |
| @UP0 UIADD3 UR7, UPT, UPT, UR7, -0x1e0, URZ | |
| UISETP.EQ.AND UP2, UPT, UR24, URZ, UPT | |
| BRA.U UP2, `(.L_x_9) 0x7f46c19b13d0 | |
| UMOV UR5, UR24 | |
| USHF.R.U32 UR5, UR5, 0x1, URZ | |
| ISETP.EQ.OR P0, PT, R2, RZ, !PT | |
| ULOP3.LUT UR25, UR6, 0xfeffffff, URZ, 0xc0, !UPT | |
| NOP | |
| @P0 SYNCS.ARRIVE.TRANS64.RED RZ, [UR25+0x8], RZ | |
| BRA `(.L_x_10) 0x7f46c19af460 | |
| UTCATOMSWS.AND URZ, URZ | |
| UPRMT UR25, UR8, 0x4210, URZ | |
| SYNCS.ARRIVE.TRANS64.RED.A1T0 RZ, [UR25+0x8], RZ | |
| BRA `(.L_x_11) 0x7f46c19b3580 | |
| NOP | |
| UCGABAR_ARV | |
| ISETP.LT.AND P0, PT, R2, 0xa0, PT | |
| NOP | |
| BRA P0, `(.L_x_12) 0x7f46c19b14f0 | |
| ISETP.LT.AND P0, PT, R2, 0xc0, PT | |
| NOP | |
| BRA P0, `(.L_x_13) 0x7f46c19b2670 | |
| MOV R3, 0x1 | |
| ISETP.LT.AND P0, PT, R2, 0xe0, PT | |
| LOP3.LUT.PAND P0, RZ, R3, UR4, RZ, 0x30, P0 | |
| NOP | |
| BRA P0, `(.L_x_14) 0x7f46c19b2a00 | |
| BRA `(.L_x_11) 0x7f46c19b3580 | |
| UMOV UR30, 0x34400 | |
| UMOV UR31, 0x34430 | |
| UMOV UR32, 0x34458 | |
| UMOV UR33, 0x344c0 | |
| UMOV UR34, 0x0 | |
| MOV R3, 0x0 | |
| MOV R4, 0x0 | |
| MOV R7, 0x0 | |
| UPRMT UR30, UR30, 0x4210, UR4 | |
| UPRMT UR31, UR31, 0x4210, UR4 | |
| UPRMT UR32, UR32, 0x4210, UR4 | |
| UPRMT UR33, UR33, 0x4210, UR4 | |
| ULOP3.LUT UR9, UR31, 0xfeffffff, URZ, 0xc0, !UPT | |
| ULOP3.LUT UR17, UR31, 0xfeffffff, URZ, 0xc0, !UPT | |
| UMOV UR25, UR31 | |
| UIMAD.WIDE.U32 UR50, UR4, -0x80000000, URZ | |
| USHF.R.U32 UR51, UR51, 0x0, URZ | |
| UIMAD.U32 UR50, UR51, -0x2, UR4 | |
| UMOV UR22, UR50 | |
| UMOV UR23, UR51 | |
| LDCU UR15, c[0x0][0x4d0] | |
| S2UR UR5, SR_CTAID.X | |
| USHF.R.U32 UR5, UR5, 0x1, URZ | |
| LDCU UR35, c[0x0][0x4c4] | |
| LDCU UR36, c[0x0][0x4c8] | |
| LDCU UR37, c[0x0][0x4c0] | |
| LDCU UR38, c[0x0][0x390] | |
| LDCU UR39, c[0x0][0x394] | |
| LDCU UR40, c[0x0][0x38c] | |
| LDCU UR41, c[0x0][0x3a8] | |
| LDCU UR42, c[0x0][0x3ac] | |
| LDCU UR43, c[0x0][0x3a4] | |
| LDCU UR44, c[0x0][0x39c] | |
| LDCU UR45, c[0x0][0x3a0] | |
| LDCU UR46, c[0x0][0x398] | |
| LDCU UR47, c[0x0][0x388] | |
| LDCU UR48, c[0x0][0x4cc] | |
| UCGABAR_WAIT | |
| ACQBULK | |
| LOP3.LUT R8, R2, 0x1f, RZ, 0xc0, !PT | |
| ISETP.LT.AND P1, PT, R8, 0x1, PT | |
| ISETP.LT.AND P0, PT, R8, 0x1, PT | |
| MOV R9, 0x1 | |
| LOP3.LUT.PAND P0, RZ, R9, UR4, RZ, 0x30, P0 | |
| ISETP.GE.AND P2, PT, R2, 0xa0, PT | |
| NOP | |
| BRA P2, `(.L_x_15) 0x7f46c19b2660 | |
| LDCU.64 UR50, c[0x0][0x110] | |
| UISETP.EQ.AND UP1, UPT, URZ, UR35, UPT | |
| UIMAD.WIDE.U32 UR54, UR5, UR35, URZ | |
| USHF.R.U32 UR12, UR55, UR36, URZ | |
| UIMAD.U32 UR53, UR12, UR37, UR5 | |
| @UP1 UMOV UR12, UR5 | |
| @UP1 UMOV UR53, URZ | |
| UMOV UR14, URZ | |
| UISETP.EQ.AND UP1, UPT, URZ, UR38, UPT | |
| UIMAD.WIDE.U32 UR54, UR53, UR38, URZ | |
| USHF.R.U32 UR49, UR55, UR39, URZ | |
| UIMAD.U32 UR52, UR49, UR40, UR53 | |
| @UP1 UMOV UR49, UR53 | |
| @UP1 UMOV UR52, URZ | |
| UISETP.GE.AND UP1, UPT, UR49, UR47, UPT | |
| BRA.U !UP1, `(.L_x_16) 0x7f46c19b1950 | |
| UISETP.EQ.AND UP1, UPT, URZ, UR41, UPT | |
| UIMAD.WIDE.U32 UR54, UR52, UR41, URZ | |
| USHF.R.U32 UR6, UR55, UR42, URZ | |
| UIMAD.U32 UR7, UR6, UR43, UR52 | |
| @UP1 UMOV UR6, UR52 | |
| @UP1 UMOV UR7, URZ | |
| BRA `(.L_x_17) 0x7f46c19b19b0 | |
| UISETP.EQ.AND UP1, UPT, URZ, UR44, UPT | |
| UIMAD.WIDE.U32 UR54, UR52, UR44, URZ | |
| USHF.R.U32 UR6, UR55, UR45, URZ | |
| UIMAD.U32 UR7, UR6, UR46, UR52 | |
| @UP1 UMOV UR6, UR52 | |
| @UP1 UMOV UR7, URZ | |
| ULOP3.LUT UP1, URZ, UR49, 0x1, URZ, 0xc0, !UPT | |
| UIADD3 UR53, UPT, UPT, URZ, 0x0, -UR46 | |
| UIMAD.U32 UR7, UR49, UR53, UR7 | |
| @UP1 UIADD3 UR6, UPT, UPT, -UR6, URZ, UR48 | |
| UIMAD.WIDE.U32 UR52, UR4, -0x80000000, URZ | |
| USHF.R.U32 UR53, UR53, 0x0, URZ | |
| UIMAD.U32 UR52, UR53, -0x2, UR4 | |
| UIMAD.U32 UR6, UR6, 0x2, UR52 | |
| UIMAD.U32 UR7, UR7, 0x1, UR53 | |
| UISETP.NE.OR UP1, UPT, URZ, URZ, !UPT | |
| UMOV UR20, UR12 | |
| UMOV UR11, 0xffffffc0 | |
| UMOV UR18, 0xffffffc0 | |
| UISETP.EQ.AND UP0, UPT, URZ, URZ, UPT | |
| UMOV UR8, 0x400 | |
| UPRMT UR8, UR8, 0x4210, UR4 | |
| UMOV UR16, 0x4400 | |
| UPRMT UR16, UR16, 0x4210, UR4 | |
| BRA.U !UP0, `(.L_x_11) 0x7f46c19b3580 | |
| UIADD3.64 UR38, UPT, UPT, UR50, 0x480, URZ | |
| UIADD3.64 UR40, UPT, UPT, UR50, 0x580, URZ | |
| UIADD3.64 UR36, UPT, UPT, UR50, 0x540, URZ | |
| UIADD3.64 UR50, UPT, UPT, UR50, 0x440, URZ | |
| MOV R5, 0x10000 | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P3, [UR30], R3 | |
| BRA `(.L_x_18) 0x7f46c19b1c00 | |
| IADD3 R4, PT, PT, R4, 0x1, RZ | |
| UIADD3 UR8, UPT, UPT, UR8, 0x8000, URZ | |
| UIADD3 UR16, UPT, UPT, UR16, 0x8000, URZ | |
| UIADD3 UR30, UPT, UPT, UR30, 0x8, URZ | |
| UISETP.EQ.AND UP3, UPT, UR31, UR32, UPT | |
| @!UP3 UIADD3 UR31, UPT, UPT, UR31, 0x8, URZ | |
| @UP3 UIADD3 UR31, UPT, UPT, UR31, -0x28, URZ | |
| @!UP3 UIADD3 UR17, UPT, UPT, UR17, 0x8, URZ | |
| @UP3 UIADD3 UR17, UPT, UPT, UR17, -0x28, URZ | |
| @!UP3 UIADD3 UR9, UPT, UPT, UR9, 0x8, URZ | |
| @UP3 UIADD3 UR9, UPT, UPT, UR9, -0x28, URZ | |
| ISETP.EQ.AND P2, PT, R4, 0x5, PT | |
| MOV R8, 0x8 | |
| UIMAD.U32 UR10, UR6, 0x80, URZ | |
| UIMAD.U32 UR19, UR7, 0x100, URZ | |
| UIMAD.U32 UR19, UR22, 0x80, UR19 | |
| UIADD3 UR11, UPT, UPT, UR11, 0x40, URZ | |
| UIADD3 UR18, UPT, UPT, UR18, 0x40, URZ | |
| BRA !P3, `(.L_x_19) 0x7f46c19b25c0 | |
| @P2 MOV R8, 0xffffffd8 | |
| @P2 LOP3.LUT R3, R3, RZ, RZ, 0xf, !PT | |
| UTMALDG.3D.2CTA [UR8], [UR50] | |
| UIADD3 UR8, UPT, UPT, UR8, 0x2000, URZ | |
| UIADD3 UR10, UPT, UPT, UR10, 0x40, URZ | |
| UTMALDG.3D.2CTA [UR8], [UR50] | |
| UIADD3 UR8, UPT, UPT, UR8, -0x2000, URZ | |
| UTMALDG.3D.2CTA [UR16], [UR38] | |
| @P0 SYNCS.ARRIVE.TRANS64.RED RZ, [UR31], R5 | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P3, [R8+UR30], R3 | |
| UIADD3 UR43, UPT, UPT, UR11, 0x40, URZ | |
| UISETP.LT.U32.AND UP2, UPT, UR43, UR15, UPT | |
| @!P2 BRA.U UP2, `(.L_x_20) 0x7f46c19b1b50 | |
| BRA.U UP2, `(.L_x_21) 0x7f46c19b23b0 | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P4, [UR33], R7 | |
| @!P4 NANOSLEEP.SYNCS 0xc350 | |
| @!P4 SYNCS.PHASECHK.TRANS64 P4, [UR33], R7 | |
| @P4 BRA `(.L_x_22) 0x7f46c19b1db0 | |
| BRA `(.L_x_23) 0x7f46c19b1d60 | |
| LDS R10, [UR33+0x18] | |
| LDS R9, [UR33+0x10] | |
| LOP3.LUT P4, R10, R10, 0x1, RZ, 0xc0, !PT | |
| NOP | |
| BRA !P4, `(.L_x_24) 0x7f46c19b2280 | |
| R2UR UR5, R9 | |
| NOP | |
| USHF.R.U32 UR5, UR5, 0x1, URZ | |
| UPRMT UR43, UR33, 0x4210, URZ | |
| SYNCS.ARRIVE.TRANS64.RED.A1T0 RZ, [UR43+0x8], RZ | |
| LDCU UR43, c[0x0][0xa90] | |
| UIADD3 UR34, UPT, UPT, UR34, 0x1, URZ | |
| UIADD3 UR33, UPT, UPT, UR33, 0x20, URZ | |
| UISETP.LT.U32.AND UP3, UPT, UR34, UR43, UPT | |
| BRA.U UP3, `(.L_x_25) 0x7f46c19b1ee0 | |
| UMOV UR33, 0x344c0 | |
| UMOV UR34, 0x0 | |
| UPRMT UR33, UR33, 0x4210, UR4 | |
| LOP3.LUT R7, R7, RZ, RZ, 0xf, !PT | |
| LDCU UR48, c[0x0][0x4c4] | |
| UISETP.EQ.AND UP3, UPT, URZ, UR48, UPT | |
| UIMAD.WIDE.U32 UR46, UR5, UR48, URZ | |
| LDCU UR48, c[0x0][0x4c8] | |
| USHF.R.U32 UR12, UR47, UR48, URZ | |
| LDCU UR46, c[0x0][0x4c0] | |
| UIMAD.U32 UR45, UR12, UR46, UR5 | |
| @UP3 UMOV UR12, UR5 | |
| @UP3 UMOV UR45, URZ | |
| UMOV UR14, URZ | |
| LDCU UR48, c[0x0][0x390] | |
| UISETP.EQ.AND UP3, UPT, URZ, UR48, UPT | |
| UIMAD.WIDE.U32 UR46, UR45, UR48, URZ | |
| LDCU UR48, c[0x0][0x394] | |
| USHF.R.U32 UR43, UR47, UR48, URZ | |
| LDCU UR46, c[0x0][0x38c] | |
| UIMAD.U32 UR44, UR43, UR46, UR45 | |
| @UP3 UMOV UR43, UR45 | |
| @UP3 UMOV UR44, URZ | |
| LDCU UR45, c[0x0][0x388] | |
| UISETP.GE.AND UP3, UPT, UR43, UR45, UPT | |
| BRA.U !UP3, `(.L_x_26) 0x7f46c19b20e0 | |
| LDCU UR45, c[0x0][0x3a8] | |
| UISETP.EQ.AND UP3, UPT, URZ, UR45, UPT | |
| UIMAD.WIDE.U32 UR46, UR44, UR45, URZ | |
| LDCU UR45, c[0x0][0x3ac] | |
| USHF.R.U32 UR6, UR47, UR45, URZ | |
| LDCU UR45, c[0x0][0x3a4] | |
| UIMAD.U32 UR7, UR6, UR45, UR44 | |
| @UP3 UMOV UR6, UR44 | |
| @UP3 UMOV UR7, URZ | |
| BRA `(.L_x_27) 0x7f46c19b2170 | |
| LDCU UR45, c[0x0][0x39c] | |
| UISETP.EQ.AND UP3, UPT, URZ, UR45, UPT | |
| UIMAD.WIDE.U32 UR46, UR44, UR45, URZ | |
| LDCU UR45, c[0x0][0x3a0] | |
| USHF.R.U32 UR6, UR47, UR45, URZ | |
| LDCU UR45, c[0x0][0x398] | |
| UIMAD.U32 UR7, UR6, UR45, UR44 | |
| @UP3 UMOV UR6, UR44 | |
| @UP3 UMOV UR7, URZ | |
| ULOP3.LUT UP3, URZ, UR43, 0x1, URZ, 0xc0, !UPT | |
| LDCU UR46, c[0x0][0x398] | |
| UIADD3 UR45, UPT, UPT, URZ, 0x0, -UR46 | |
| UIMAD.U32 UR7, UR43, UR45, UR7 | |
| LDCU UR47, c[0x0][0x4cc] | |
| @UP3 UIADD3 UR6, UPT, UPT, -UR6, URZ, UR47 | |
| UIMAD.WIDE.U32 UR44, UR4, -0x80000000, URZ | |
| USHF.R.U32 UR45, UR45, 0x0, URZ | |
| UIMAD.U32 UR44, UR45, -0x2, UR4 | |
| UIMAD.U32 UR6, UR6, 0x2, UR44 | |
| UIMAD.U32 UR7, UR7, 0x1, UR45 | |
| UMOV UR20, UR12 | |
| UIMAD.U32 UR11, UR15, -0x1, UR11 | |
| UIMAD.U32 UR18, UR15, -0x1, UR18 | |
| UISETP.NE.OR UP1, UPT, URZ, URZ, !UP1 | |
| @!P2 BRA `(.L_x_20) 0x7f46c19b1b50 | |
| BRA `(.L_x_21) 0x7f46c19b23b0 | |
| PREEXIT | |
| UPRMT UR43, UR33, 0x4210, URZ | |
| SYNCS.ARRIVE.TRANS64.RED.A1T0 RZ, [UR43+0x8], RZ | |
| @!P2 LOP3.LUT R3, R3, RZ, RZ, 0xf, !PT | |
| UMOV UR30, 0x34400 | |
| UPRMT UR30, UR30, 0x4210, UR4 | |
| LOP3.LUT R9, R2, 0x1f, RZ, 0xc0, !PT | |
| ISETP.GT.AND P4, PT, R9, 0x5, PT | |
| NOP | |
| @P4 BRA `(.L_x_28) 0x7f46c19b23a0 | |
| ISETP.GT.AND P4, PT, R9, R4, PT | |
| NOP | |
| @P4 LOP3.LUT R3, R3, RZ, RZ, 0xf, !PT | |
| IMAD.SHL R9, R9, 0x8, RZ | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P3, [R9+UR30], R3 | |
| @!P3 NANOSLEEP.SYNCS 0xc350 | |
| @!P3 SYNCS.PHASECHK.TRANS64 P3, [R9+UR30], R3 | |
| @!P3 BRA `(.L_x_29) 0x7f46c19b2360 | |
| BRA `(.L_x_11) 0x7f46c19b3580 | |
| MOV R4, 0x0 | |
| UIADD3 UR8, UPT, UPT, UR8, -0x28000, URZ | |
| UIADD3 UR16, UPT, UPT, UR16, -0x28000, URZ | |
| UIADD3 UR30, UPT, UPT, UR30, -0x28, URZ | |
| UISETP.EQ.AND UP3, UPT, UR31, UR32, UPT | |
| @!UP3 UIADD3 UR31, UPT, UPT, UR31, 0x8, URZ | |
| @UP3 UIADD3 UR31, UPT, UPT, UR31, -0x28, URZ | |
| @!UP3 UIADD3 UR17, UPT, UPT, UR17, 0x8, URZ | |
| @UP3 UIADD3 UR17, UPT, UPT, UR17, -0x28, URZ | |
| @!UP3 UIADD3 UR9, UPT, UPT, UR9, 0x8, URZ | |
| @UP3 UIADD3 UR9, UPT, UPT, UR9, -0x28, URZ | |
| ISETP.EQ.AND P2, PT, R4, 0x5, PT | |
| MOV R8, 0x8 | |
| UIMAD.U32 UR10, UR6, 0x80, URZ | |
| UIMAD.U32 UR19, UR7, 0x100, URZ | |
| UIMAD.U32 UR19, UR22, 0x80, UR19 | |
| UIADD3 UR11, UPT, UPT, UR11, 0x40, URZ | |
| UIADD3 UR18, UPT, UPT, UR18, 0x40, URZ | |
| BRA !P3, `(.L_x_30) 0x7f46c19b2610 | |
| @P2 MOV R8, 0xffffffd8 | |
| @P2 LOP3.LUT R3, R3, RZ, RZ, 0xf, !PT | |
| UTMALDG.3D.2CTA [UR8], [UR50] | |
| UIADD3 UR8, UPT, UPT, UR8, 0x2000, URZ | |
| UIADD3 UR10, UPT, UPT, UR10, 0x40, URZ | |
| UTMALDG.3D.2CTA [UR8], [UR50] | |
| UIADD3 UR8, UPT, UPT, UR8, -0x2000, URZ | |
| UTMALDG.3D.2CTA [UR16], [UR38] | |
| @P0 SYNCS.ARRIVE.TRANS64.RED RZ, [UR31], R5 | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P3, [R8+UR30], R3 | |
| UIADD3 UR43, UPT, UPT, UR11, 0x40, URZ | |
| UISETP.LT.U32.AND UP2, UPT, UR43, UR15, UPT | |
| @!P2 BRA.U UP2, `(.L_x_20) 0x7f46c19b1b50 | |
| BRA `(.L_x_23) 0x7f46c19b1d60 | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P3, [UR30], R3 | |
| @!P3 NANOSLEEP.SYNCS 0xc350 | |
| @!P3 SYNCS.PHASECHK.TRANS64 P3, [UR30], R3 | |
| @P3 BRA `(.L_x_31) 0x7f46c19b1c80 | |
| BRA `(.L_x_19) 0x7f46c19b25c0 | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P3, [UR30], R3 | |
| @!P3 NANOSLEEP.SYNCS 0xc350 | |
| @!P3 SYNCS.PHASECHK.TRANS64 P3, [UR30], R3 | |
| @P3 BRA `(.L_x_32) 0x7f46c19b24e0 | |
| BRA `(.L_x_30) 0x7f46c19b2610 | |
| BRA `(.L_x_11) 0x7f46c19b3580 | |
| UISETP.NE.AND UP0, UPT, UR4, URZ, UPT | |
| BRA.U UP0, `(.L_x_11) 0x7f46c19b3580 | |
| MOV R3, 0x0 | |
| UMOV UR6, 0x0 | |
| UMOV UR9, 0x344c0 | |
| MOV R5, 0x0 | |
| UCGABAR_WAIT | |
| PMTRIG 0x4000 | |
| LDCU UR7, c[0x0][0xa90] | |
| UISETP.EQ.U32.AND UP0, UPT, UR7, URZ, UPT | |
| IADD3 R5, PT, PT, R5, 0x1, RZ | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P0, [UR9+0x8], R3 | |
| @!P0 NANOSLEEP.SYNCS 0xc350 | |
| @!P0 SYNCS.PHASECHK.TRANS64 P0, [UR9+0x8], R3 | |
| @P0 BRA `(.L_x_33) 0x7f46c19b2770 | |
| BRA `(.L_x_34) 0x7f46c19b2720 | |
| LOP3.LUT R6, R2, 0x1f, RZ, 0xc0, !PT | |
| ISETP.LT.AND P0, PT, R6, 0x2, PT | |
| PRMT R6, RZ, 0x4210, R6 | |
| MOV R7, 0x10 | |
| @P0 SYNCS.ARRIVE.TRANS64.RED RZ, [R6+UR9], R7 | |
| WARPSYNC.ALL | |
| UIADD3 UR8, UPT, UPT, UR9, 0x10, URZ | |
| @!UP0 UGETNEXTWORKID.BROADCAST [UR8], [UR9] | |
| BRA.U !UP0, `(.L_x_35) 0x7f46c19b2860 | |
| LOP3.LUT R6, R2, 0x1f, RZ, 0xc0, !PT | |
| ISETP.LT.AND P0, PT, R6, 0x2, PT | |
| PRMT R6, RZ, 0x4210, R6 | |
| MOV R7, 0x10 | |
| @P0 SYNCS.ARRIVE.TRANS64.RED.A0TX RZ, [R6+UR9], R7 | |
| WARPSYNC.ALL | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P0, [UR9], R3 | |
| @!P0 NANOSLEEP.SYNCS 0xc350 | |
| @!P0 SYNCS.PHASECHK.TRANS64 P0, [UR9], R3 | |
| @P0 BRA `(.L_x_36) 0x7f46c19b28b0 | |
| BRA `(.L_x_35) 0x7f46c19b2860 | |
| LDS R4, [UR8+0x8] | |
| LOP3.LUT P0, R4, R4, 0x1, RZ, 0xc0, !PT | |
| NOP | |
| BRA P0, `(.L_x_37) 0x7f46c19b2960 | |
| LOP3.LUT R3, R3, RZ, RZ, 0xf, !PT | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P1, [UR9+0x8], R3 | |
| @!P1 NANOSLEEP.SYNCS 0xc350 | |
| @!P1 SYNCS.PHASECHK.TRANS64 P1, [UR9+0x8], R3 | |
| @P1 BRA `(.L_x_38) 0x7f46c19b2950 | |
| BRA `(.L_x_39) 0x7f46c19b2900 | |
| BRA `(.L_x_11) 0x7f46c19b3580 | |
| LDCU UR7, c[0x0][0xa90] | |
| UIADD3 UR6, UPT, UPT, UR6, 0x1, URZ | |
| UIADD3 UR9, UPT, UPT, UR9, 0x20, URZ | |
| UISETP.LT.U32.AND UP1, UPT, UR6, UR7, UPT | |
| BRA.U UP1, `(.L_x_40) 0x7f46c19b29f0 | |
| UMOV UR9, 0x344c0 | |
| UMOV UR6, 0x0 | |
| UPRMT UR9, UR9, 0x4210, UR4 | |
| LOP3.LUT R3, R3, RZ, RZ, 0xf, !PT | |
| BRA `(.L_x_41) 0x7f46c19b2710 | |
| UMOV UR6, URZ | |
| UMOV UR7, 0x1 | |
| S2UR UR5, SR_CTAID.X | |
| USHF.R.U32 UR5, UR5, 0x1, URZ | |
| UMOV UR14, 0x10 | |
| NOP | |
| UTCATOMSWS.2CTA.FIND_AND_SET UP0, URZ, UR14 | |
| BRA.U UP0, `(.L_x_42) 0x7f46c19b2aa0 | |
| NANOSLEEP 0x100 | |
| BRA `(.L_x_43) 0x7f46c19b2a60 | |
| UMOV UR9, 0x34430 | |
| UMOV UR10, 0x343f8 | |
| UMOV UR11, 0x344a0 | |
| UPRMT UR9, UR9, 0x4210, UR4 | |
| UPRMT UR10, UR10, 0x4210, UR4 | |
| UPRMT UR11, UR11, 0x4210, UR4 | |
| UMOV UR14, 0x0 | |
| UMOV UR15, 0x10408010 | |
| UMOV UR16, 0x2000040 | |
| UMOV UR17, 0x40004040 | |
| UMOV UR18, 0x2000440 | |
| UMOV UR19, 0x40004040 | |
| UMOV UR20, 0x0 | |
| UISETP.NE.OR UP0, UPT, URZ, URZ, !UPT | |
| MOV R5, 0x0 | |
| MOV R6, 0x0 | |
| MOV R3, 0x40 | |
| LDCU UR21, c[0x0][0x4d0] | |
| UMOV UR12, 0x344c0 | |
| UMOV UR13, 0x0 | |
| MOV R4, 0x0 | |
| UPRMT UR12, UR12, 0x4210, UR4 | |
| UIMAD.WIDE.U32 UR24, UR4, -0x80000000, URZ | |
| USHF.R.U32 UR25, UR25, 0x0, URZ | |
| UIMAD.U32 UR24, UR25, -0x2, UR4 | |
| UIMAD UR25, UR25, 0x2, URZ | |
| USHF.L.U64.HI UR22, URZ, UR25, 0x3 | |
| USHF.L.U64.HI UR25, URZ, UR24, 0x3 | |
| ULOP3.LUT UR22, UR22, UR25, URZ, 0xfc, !UPT | |
| UISETP.NE.OR UP1, UPT, URZ, URZ, UPT | |
| UISETP.NE.OR UP2, UPT, URZ, URZ, !UPT | |
| ISETP.GE.AND P0, PT, R3, UR21, PT | |
| UCGABAR_WAIT | |
| ISETP.NE.AND P2, PT, RZ, RZ, PT | |
| UIADD3 UR8, UPT, UPT, UR9, 0x8, URZ | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P3, [UR12], R4 | |
| @!P3 NANOSLEEP.SYNCS 0xc350 | |
| @!P3 SYNCS.PHASECHK.TRANS64 P3, [UR12], R4 | |
| @P3 BRA `(.L_x_44) 0x7f46c19b2d20 | |
| BRA `(.L_x_45) 0x7f46c19b2cd0 | |
| LDS R7, [UR12+0x18] | |
| LOP3.LUT P3, R7, R7, 0x1, RZ, 0xc0, !PT | |
| UPRMT UR23, UR12, 0x4210, URZ | |
| SYNCS.ARRIVE.TRANS64.RED.A1T0 RZ, [UR23+0x8], RZ | |
| BRA !P3, `(.L_x_46) 0x7f46c19b2e00 | |
| LDCU UR23, c[0x0][0xa90] | |
| UIADD3 UR13, UPT, UPT, UR13, 0x1, URZ | |
| UIADD3 UR12, UPT, UPT, UR12, 0x20, URZ | |
| UISETP.LT.U32.AND UP3, UPT, UR13, UR23, UPT | |
| BRA.U UP3, `(.L_x_46) 0x7f46c19b2e00 | |
| UMOV UR12, 0x344c0 | |
| UMOV UR13, 0x0 | |
| UPRMT UR12, UR12, 0x4210, UR4 | |
| LOP3.LUT R4, R4, RZ, RZ, 0xf, !PT | |
| BRA `(.L_x_47) 0x7f46c19b34e0 | |
| IADD3 R3, PT, PT, R3, 0x40, RZ | |
| UISETP.EQ.AND UP3, UPT, UR7, 0x5, UPT | |
| UISETP.EQ.AND UP2, UPT, UR6, 0x4, UPT | |
| UIADD3 UR6, UPT, UPT, UR6, 0x1, URZ | |
| UIADD3 UR16, UPT, UPT, UR16, 0x680, URZ | |
| UIADD3 UR18, UPT, UPT, UR18, 0x7fa, URZ | |
| UMOV UR15, 0x10408010 | |
| @!UP0 UIADD3 UR20, UPT, UPT, UR20, 0x0, URZ | |
| @UP0 UIADD3 UR20, UPT, UPT, UR20, 0x0, URZ | |
| ISETP.GE.AND P0, PT, R3, UR21, PT | |
| PLOP3.LUT P2, PT, PT, PT, UP3, 0xa0, 0x0 | |
| @!UP3 UIADD3 UR8, UPT, UPT, UR9, 0x8, URZ | |
| @UP3 UIADD3 UR8, UPT, UPT, UR9, -0x28, URZ | |
| @UP3 UMOV UR7, URZ | |
| @!UP3 UIADD3 UR7, UPT, UPT, UR7, 0x1, URZ | |
| BRA !P1, `(.L_x_47) 0x7f46c19b34e0 | |
| @P2 LOP3.LUT R5, R5, RZ, RZ, 0xf, !PT | |
| @!P0 SYNCS.PHASECHK.TRANS64.TRYWAIT P1, [UR8], R5 | |
| UTCHMMA.2CTA gdesc[UR16], gdesc[UR18], tmem[UR20], tmem[UR14], idesc[UR15], !UP1 | |
| UIADD3 UR16, UPT, UPT, UR16, 0x80, URZ | |
| UIADD3 UR18, UPT, UPT, UR18, 0x2, URZ | |
| UTCHMMA.2CTA gdesc[UR16], gdesc[UR18], tmem[UR20], tmem[UR14], idesc[UR15], UPT | |
| UIADD3 UR16, UPT, UPT, UR16, 0x80, URZ | |
| UIADD3 UR18, UPT, UPT, UR18, 0x2, URZ | |
| UTCHMMA.2CTA gdesc[UR16], gdesc[UR18], tmem[UR20], tmem[UR14], idesc[UR15], UPT | |
| UIADD3 UR16, UPT, UPT, UR16, 0x80, URZ | |
| UIADD3 UR18, UPT, UPT, UR18, 0x2, URZ | |
| UTCHMMA.2CTA gdesc[UR16], gdesc[UR18], tmem[UR20], tmem[UR14], idesc[UR15], UPT | |
| UMOV UR9, UR8 | |
| UIADD3 UR10, UPT, UPT, UR10, 0x8, URZ | |
| UTCBAR.2CTA.MULTICAST [UR10], URZ, UR22 | |
| UISETP.NE.AND UP1, UPT, URZ, URZ, UPT | |
| BRA.U !P0, !UP2, `(.L_x_48) 0x7f46c19b2e10 | |
| BRA !P0, `(.L_x_49) 0x7f46c19b32b0 | |
| USHF.L.U64.HI UR23, URZ, UR4, 0x3 | |
| UTCBAR.2CTA.MULTICAST [UR11], URZ, UR23 | |
| PLOP3.LUT P2, PT, PT, PT, UP0, 0xa0, 0x0 | |
| @!UP0 UIADD3 UR11, UPT, UPT, UR11, 0x10, URZ | |
| @UP0 UIADD3 UR11, UPT, UPT, UR11, -0x10, URZ | |
| @!UP0 UIADD3 UR20, UPT, UPT, UR20, 0x100, URZ | |
| @UP0 UIADD3 UR20, UPT, UPT, UR20, -0x100, URZ | |
| UPLOP3.LUT UP0, UPT, UP0, !UPT, !UPT, 0xf, 0x0 | |
| @P2 LOP3.LUT R6, R6, RZ, RZ, 0xf, !PT | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P2, [UR11+0x8], R6 | |
| @!P2 NANOSLEEP.SYNCS 0xc350 | |
| @!P2 SYNCS.PHASECHK.TRANS64 P2, [UR11+0x8], R6 | |
| BRA !P2, `(.L_x_50) 0x7f46c19b30c0 | |
| LOP3.LUT P2, R7, R7, 0x1, RZ, 0xc0, !PT | |
| NOP | |
| BRA !P2, `(.L_x_11) 0x7f46c19b3580 | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P3, [UR12], R4 | |
| @!P3 NANOSLEEP.SYNCS 0xc350 | |
| @!P3 SYNCS.PHASECHK.TRANS64 P3, [UR12], R4 | |
| @P3 BRA `(.L_x_51) 0x7f46c19b3180 | |
| BRA `(.L_x_52) 0x7f46c19b3130 | |
| LDS R7, [UR12+0x18] | |
| LOP3.LUT P3, R7, R7, 0x1, RZ, 0xc0, !PT | |
| UPRMT UR23, UR12, 0x4210, URZ | |
| SYNCS.ARRIVE.TRANS64.RED.A1T0 RZ, [UR23+0x8], RZ | |
| BRA !P3, `(.L_x_53) 0x7f46c19b3260 | |
| LDCU UR23, c[0x0][0xa90] | |
| UIADD3 UR13, UPT, UPT, UR13, 0x1, URZ | |
| UIADD3 UR12, UPT, UPT, UR12, 0x20, URZ | |
| UISETP.LT.U32.AND UP3, UPT, UR13, UR23, UPT | |
| BRA.U UP3, `(.L_x_53) 0x7f46c19b3260 | |
| UMOV UR12, 0x344c0 | |
| UMOV UR13, 0x0 | |
| UPRMT UR12, UR12, 0x4210, UR4 | |
| LOP3.LUT R4, R4, RZ, RZ, 0xf, !PT | |
| MOV R3, RZ | |
| UISETP.EQ.AND UP1, UPT, URZ, URZ, UPT | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P1, [UR8], R5 | |
| BRA.U !UP2, `(.L_x_48) 0x7f46c19b2e10 | |
| BRA `(.L_x_49) 0x7f46c19b32b0 | |
| IADD3 R3, PT, PT, R3, 0x40, RZ | |
| UISETP.EQ.AND UP3, UPT, UR7, 0x5, UPT | |
| UISETP.EQ.AND UP2, UPT, UR6, 0x4, UPT | |
| UMOV UR6, 0x0 | |
| UIADD3 UR16, UPT, UPT, UR16, -0x2980, URZ | |
| UIADD3 UR18, UPT, UPT, UR18, -0x2806, URZ | |
| UMOV UR15, 0x10408010 | |
| @!UP0 UIADD3 UR20, UPT, UPT, UR20, 0x0, URZ | |
| @UP0 UIADD3 UR20, UPT, UPT, UR20, 0x0, URZ | |
| ISETP.GE.AND P0, PT, R3, UR21, PT | |
| PLOP3.LUT P2, PT, PT, PT, UP3, 0xa0, 0x0 | |
| @!UP3 UIADD3 UR8, UPT, UPT, UR9, 0x8, URZ | |
| @UP3 UIADD3 UR8, UPT, UPT, UR9, -0x28, URZ | |
| @UP3 UMOV UR7, URZ | |
| @!UP3 UIADD3 UR7, UPT, UPT, UR7, 0x1, URZ | |
| BRA !P1, `(.L_x_54) 0x7f46c19b3530 | |
| @P2 LOP3.LUT R5, R5, RZ, RZ, 0xf, !PT | |
| @!P0 SYNCS.PHASECHK.TRANS64.TRYWAIT P1, [UR8], R5 | |
| UTCHMMA.2CTA gdesc[UR16], gdesc[UR18], tmem[UR20], tmem[UR14], idesc[UR15], !UP1 | |
| UIADD3 UR16, UPT, UPT, UR16, 0x80, URZ | |
| UIADD3 UR18, UPT, UPT, UR18, 0x2, URZ | |
| UTCHMMA.2CTA gdesc[UR16], gdesc[UR18], tmem[UR20], tmem[UR14], idesc[UR15], UPT | |
| UIADD3 UR16, UPT, UPT, UR16, 0x80, URZ | |
| UIADD3 UR18, UPT, UPT, UR18, 0x2, URZ | |
| UTCHMMA.2CTA gdesc[UR16], gdesc[UR18], tmem[UR20], tmem[UR14], idesc[UR15], UPT | |
| UIADD3 UR16, UPT, UPT, UR16, 0x80, URZ | |
| UIADD3 UR18, UPT, UPT, UR18, 0x2, URZ | |
| UTCHMMA.2CTA gdesc[UR16], gdesc[UR18], tmem[UR20], tmem[UR14], idesc[UR15], UPT | |
| UMOV UR9, UR8 | |
| UIADD3 UR10, UPT, UPT, UR10, -0x28, URZ | |
| UTCBAR.2CTA.MULTICAST [UR10], URZ, UR22 | |
| UISETP.NE.AND UP1, UPT, URZ, URZ, UPT | |
| BRA.U !P0, !UP2, `(.L_x_48) 0x7f46c19b2e10 | |
| BRA !P0, `(.L_x_49) 0x7f46c19b32b0 | |
| BRA `(.L_x_55) 0x7f46c19b3030 | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P1, [UR9], R5 | |
| @!P1 NANOSLEEP.SYNCS 0xc350 | |
| @!P1 SYNCS.PHASECHK.TRANS64 P1, [UR9], R5 | |
| BRA P1, `(.L_x_56) 0x7f46c19b2f10 | |
| BRA `(.L_x_47) 0x7f46c19b34e0 | |
| SYNCS.PHASECHK.TRANS64.TRYWAIT P1, [UR9], R5 | |
| @!P1 NANOSLEEP.SYNCS 0xc350 | |
| @!P1 SYNCS.PHASECHK.TRANS64 P1, [UR9], R5 | |
| BRA P1, `(.L_x_57) 0x7f46c19b33b0 | |
| BRA `(.L_x_54) 0x7f46c19b3530 | |
| EXIT |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment