Last active
November 11, 2024 19:28
-
-
Save stillwwater/cdcebf7b7e42008052c71bf4ec4ff670 to your computer and use it in GitHub Desktop.
Simple rasterizer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| bits 64 | |
| default rel | |
| %define R_PITCH (160 * 4) | |
| %define R_PITCH_SHIFT 10 | |
| section .data | |
| float_1 dd 1.0 | |
| align 32 | |
| float_1x8 dd 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 | |
| section .bss | |
| align 32 | |
| w0_row resd 8 | |
| w1_row resd 8 | |
| w2_row resd 8 | |
| extern r_framebuffer | |
| section .text | |
| extern puts | |
| extern r_vertexbuffer | |
| extern r_lanes | |
| %macro edge 4 | |
| mov r10d, [r_vertexbuffer + %2+0] ; R10 = A.x | |
| mov r11d, [r_vertexbuffer + %2+4] ; R11 = A.y | |
| mov r12d, [r_vertexbuffer + %3+0] ; R12 = B.x | |
| mov r13d, [r_vertexbuffer + %3+4] ; R13 = B.y | |
| mov r14d, ecx ; R14 = V.x | |
| mov r15d, esi ; R15 = V.y | |
| sub r12d, r10d ; R12 = b.x - a.x | |
| sub r15d, r11d ; R15 = c.y - a.y | |
| imul r12d, r15d ; R12 = (b.x - a.x) * (c.y - a.y) | |
| sub r13d, r11d ; R13 = b.y - a.y | |
| sub r14d, r10d ; R14 = c.x - a.x | |
| imul r14d, r13d ; R14 = (b.y - a.y) * (c.x - a.x) | |
| sub r12d, r14d ; R14 = edge(B, C, V) | |
| vmovd eax, %4 | |
| mov [%1], r12d ; very bad | |
| add r12d, eax | |
| mov [%1 + 4], r12d | |
| add r12d, eax | |
| mov [%1 + 8], r12d | |
| add r12d, eax | |
| mov [%1 + 12], r12d | |
| add r12d, eax | |
| mov [%1 + 16], r12d | |
| add r12d, eax | |
| mov [%1 + 20], r12d | |
| add r12d, eax | |
| mov [%1 + 24], r12d | |
| add r12d, eax | |
| mov [%1 + 28], r12d | |
| %endmacro | |
| %macro tri_sub 3 | |
| vbroadcastss ymm0, [r_vertexbuffer + %2] | |
| vbroadcastss ymm1, [r_vertexbuffer + %3] | |
| vpsubd %1, ymm0, ymm1 | |
| %endmacro | |
| %macro mul8 1 | |
| vpaddd %1, %1 | |
| vpaddd %1, %1 | |
| vpaddd %1, %1 | |
| %endmacro | |
| global r_fill_tri | |
| r_fill_tri: | |
| mov ebp, 1024 | |
| .loop_tri: | |
| vmovups xmm0, [r_vertexbuffer + 0] ; vertex A | |
| vmovups xmm1, [r_vertexbuffer + 12] ; B | |
| vmovups xmm2, [r_vertexbuffer + 24] ; C | |
| vpminsd xmm3, xmm0, xmm1 ; AABB X/Y min | |
| vpminsd xmm3, xmm3, xmm2 | |
| vpmaxsd xmm4, xmm0, xmm1 ; AABB X/Y max | |
| vpmaxsd xmm4, xmm4, xmm2 | |
| vmovq rcx, xmm3 ; RCX = mins X | |
| mov rsi, rcx | |
| shr rsi, 32 ; RSI = mins Y | |
| vmovq rdx, xmm4 ; RDX = maxs X | |
| mov rdi, rdx | |
| shr rdi, 32 ; RDI = maxs Y | |
| mov ecx, ecx | |
| lea r9, [r_framebuffer] | |
| add r9, rcx | |
| add r9, rsi ; R9 = top left of rect in buffer | |
| tri_sub ymm5, 4, 16 ; XMM5 = A01 | |
| tri_sub ymm6, 12, 0 ; XMM6 = B01 | |
| tri_sub ymm7, 16, 28 ; XMM7 = A12 | |
| tri_sub ymm8, 24, 12 ; XMM8 = B12 | |
| tri_sub ymm9, 28, 4 ; XMM9 = A20 | |
| tri_sub ymm10, 0, 24 ; XMM1a0 = B20 | |
| edge w0_row, 12, 24, xmm7 ; edge(B, C, V) | |
| vmovaps ymm2, [w0_row] | |
| edge w1_row, 24, 0, xmm9 ; edge(C, A, V) | |
| vmovaps ymm3, [w1_row] | |
| edge w2_row, 0, 12, xmm5 ; edge(A, B, V) | |
| vmovaps ymm4, [w2_row] | |
| mul8 ymm7 | |
| mul8 ymm9 | |
| mul8 ymm5 | |
| vmovd ebx, xmm3 | |
| vmovaps ymm15, [float_1x8] | |
| .loop_y: | |
| mov ecx, ebx | |
| vmovaps ymm12, ymm2 ; w0 | |
| vmovaps ymm13, ymm3 ; w1 | |
| vmovaps ymm14, ymm4 ; w2 | |
| .loop_x: | |
| vpor ymm0, ymm12, ymm13 | |
| vpor ymm0, ymm0, ymm14 | |
| vxorps ymm1, ymm1 | |
| vpcmpgtd ymm0, ymm0, ymm1 | |
| vpand ymm0, ymm0, ymm15 | |
| vmovups [r9 + 4 * rcx], ymm0 | |
| .continue_x: | |
| vpaddd ymm12, ymm7 ; step column | |
| vpaddd ymm13, ymm9 | |
| vpaddd ymm14, ymm5 | |
| add ecx, 8 ; end .loop_x | |
| cmp ecx, edx | |
| jle .loop_x | |
| vpaddd ymm2, ymm8 ; step row | |
| vpaddd ymm3, ymm10 | |
| vpaddd ymm4, ymm6 | |
| add r9, R_PITCH ; next scanline | |
| inc esi ; end .loop_y | |
| cmp esi, edi | |
| jle .loop_y | |
| .break_y: | |
| dec ebp ; end .loop_tri | |
| jnz .loop_tri | |
| vzeroupper | |
| mov rax, rdi | |
| ret |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include "rasterizer.h" | |
| #include <assert.h> | |
| #include <stdlib.h> | |
| #include <stdio.h> | |
| enum { | |
| R_WIDTH = 256, | |
| R_HEIGHT = 128, | |
| R_PITCH = R_WIDTH * sizeof(float), | |
| }; | |
| float r_framebuffer[R_WIDTH * R_HEIGHT * 100]; | |
| int r_vertexbuffer[256]; | |
| int r_fill_tri(void); | |
| void | |
| image_write_tga(void) | |
| { | |
| FILE *file = fopen("framebuffer.tga", "wb+"); | |
| assert(file); | |
| unsigned char header[18] = { | |
| 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| R_WIDTH & 0xFF, (R_WIDTH >> 8) & 0xFF, R_HEIGHT & 0xFF, (R_HEIGHT >> 8) & 0xFF, 32, 8, | |
| }; | |
| fwrite(header, 1, sizeof(header), file); | |
| for (unsigned i = 0; i < R_WIDTH * R_HEIGHT; ++i) { | |
| unsigned depth = r_framebuffer[i] * 255; | |
| fwrite(&depth, 1, 4, file); | |
| } | |
| fclose(file); | |
| } | |
| int | |
| main() | |
| { | |
| r_vertexbuffer[0] = 0; | |
| r_vertexbuffer[1] = 0; | |
| r_vertexbuffer[2] = 0; | |
| r_vertexbuffer[3] = 256; | |
| r_vertexbuffer[4] = 0; | |
| r_vertexbuffer[5] = 0; | |
| r_vertexbuffer[6] = 128; | |
| r_vertexbuffer[7] = 128; | |
| r_vertexbuffer[8] = 0; | |
| r_fill_tri(); | |
| image_write_tga(); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment