-
-
Save futureengine2/7c8fbc6fefce1818ff1edcd4d7e7bfcf to your computer and use it in GitHub Desktop.
| static void gi_on_gpu(u8* in_bitmap, int w, int h) { | |
| #define num_cascades 7 | |
| static bool initialized; | |
| static gpu_bindgroup_t texture_bindgroup[2]; | |
| static gpu_bindgroup_t cascade_uniform_bindgroup[num_cascades]; | |
| static gpu_bindgroup_t render_uniform_bindgroup; | |
| static gpu_buffer_t vertex_buffer; | |
| static gpu_buffer_t uniform_buffer; | |
| static gpu_pipeline_t pipeline; | |
| static gpu_bindgroup_layout_t uniform_bindgroup_layout; | |
| static gpu_bindgroup_layout_t texture_bindgroup_layout; | |
| static lifetime_t texture_lifetime; | |
| static gpu_texture_t textures[2]; | |
| static gpu_texture_t input_texture; | |
| lifetime_t* lifetime = g_platform->lifetime; | |
| f32 d0 = 1.f; // distance between probes in cascade 0 | |
| int r0 = 4; // number of rays in cascade 0 | |
| int n0 = (int)floorf(2*w/d0); // number of probes in cascade 0 per dimension | |
| int cn = num_cascades; | |
| typedef struct { | |
| f32 d0; | |
| int r0; | |
| int n0; | |
| int ci; | |
| int cn; | |
| int do_render; | |
| int add_sky_light; | |
| int padding; | |
| v2 resolution; | |
| v2 padding2; | |
| } uniform_t; | |
| if (!initialized) { | |
| lifetime_t temp_lifetime = {0}; | |
| initialized = true; | |
| // create bindgroup layouts | |
| uniform_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){ | |
| .name = "gi uniform bgl", | |
| .entries = { | |
| { | |
| .visibility = gpu_visibility_fragment, | |
| .type = gpu_binding_type_buffer, | |
| .buffer.type = gpu_buffer_binding_type_uniform, | |
| }, | |
| }, | |
| }); | |
| texture_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){ | |
| .name = "gi texture bgl", | |
| .entries = { | |
| { | |
| .visibility = gpu_visibility_fragment, | |
| .type = gpu_binding_type_sampler, | |
| }, | |
| { | |
| .visibility = gpu_visibility_fragment, | |
| .type = gpu_binding_type_sampler, | |
| }, | |
| }, | |
| }); | |
| // create pipeline | |
| pipeline = gpu_pipeline_make(lifetime, &(gpu_pipeline_desc_t){ | |
| .name = "gi render shader", | |
| .code = file_read("shaders/gi.glsl", &temp_lifetime).bytes, | |
| .bgls = { | |
| uniform_bindgroup_layout, | |
| texture_bindgroup_layout, | |
| }, | |
| }); | |
| // create uniform buffer (we pack all our different uniforms in one buffer), one per cascade and one for rendering | |
| { | |
| gpu_uniform_packer_t p = gpu_uniform_packer_begin(sizeof(uniform_t), num_cascades+1, lifetime); | |
| uniform_buffer = p.handle; | |
| // set cascade uniforms | |
| for (int i = 0; i < num_cascades; ++i) { | |
| *(uniform_t*)p.data = (uniform_t){ | |
| .d0 = d0, | |
| .r0 = r0, | |
| .n0 = n0, | |
| .ci = i, | |
| .cn = num_cascades, | |
| .add_sky_light = 1, | |
| .resolution = {(f32)w,(f32)h}, | |
| }; | |
| cascade_uniform_bindgroup[i] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){ | |
| .name = "gi", | |
| .layout = uniform_bindgroup_layout, | |
| .entries = {gpu_uniform_packer_bindgroup_entry(&p)}, | |
| }); | |
| gpu_uniform_packer_next(&p); | |
| } | |
| // set render uniform | |
| *(uniform_t*)p.data = (uniform_t){ | |
| .d0 = d0, | |
| .r0 = r0, | |
| .n0 = n0, | |
| .ci = 0, | |
| .cn = num_cascades, | |
| .do_render = 1, | |
| .resolution = {(f32)w,(f32)h}, | |
| }; | |
| render_uniform_bindgroup = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){ | |
| .name = "gi", | |
| .layout = uniform_bindgroup_layout, | |
| .entries = {gpu_uniform_packer_bindgroup_entry(&p)}, | |
| }); | |
| gpu_uniform_packer_end(&p); | |
| } | |
| // create textures | |
| input_texture = gpu_texture_make(w, h, gpu_texture_format_rgb8, filter_type_nearest, false, lifetime); | |
| gpu_texture_set_border(input_texture, (color_t){1,1,1,1}); | |
| textures[0] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime); | |
| textures[1] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime); | |
| texture_bindgroup[0] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){ | |
| .name = "gi", | |
| .layout = texture_bindgroup_layout, | |
| .entries = { | |
| {.sampler = {input_texture}}, | |
| {.sampler = {textures[0]}}, | |
| }, | |
| }); | |
| texture_bindgroup[1] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){ | |
| .name = "gi", | |
| .layout = texture_bindgroup_layout, | |
| .entries = { | |
| {.sampler = {input_texture}}, | |
| {.sampler = {textures[1]}}, | |
| }, | |
| }); | |
| lifetime_destroy(&temp_lifetime); | |
| } | |
| // update input texture | |
| gpu_texture_set_data(input_texture, in_bitmap); | |
| // clear texture for pingponging | |
| gpu_texture_clear(textures[(cn-1)%2], (color_t){0}); | |
| // build cascades | |
| for (int i = cn-1; i >= 0; --i) { | |
| drawcall_render(&(drawcall_t){ | |
| .pipeline = pipeline, | |
| .last_vertex = 6, | |
| .bindgroups = {cascade_uniform_bindgroup[i], texture_bindgroup[i%2]}, | |
| .outputs = {textures[(i+1)%2]}, | |
| }); | |
| } | |
| // render | |
| drawcall_render(&(drawcall_t){ | |
| .pipeline = pipeline, | |
| .last_vertex = 6, | |
| .bindgroups = {render_uniform_bindgroup, texture_bindgroup[cn%2]}, | |
| }); | |
| #undef num_cascades | |
| } |
| #ifdef VERTEX_SHADER | |
| out vec2 fuv; | |
| // a vertex shader that spits out a screen-size quad | |
| // call with vertex count = 6 | |
| void main(void) { | |
| vec2[] positions = vec2[]( | |
| vec2(-1,-1), | |
| vec2(1,-1), | |
| vec2(1,1), | |
| vec2(-1,-1), | |
| vec2(1,1), | |
| vec2(-1,1) | |
| ); | |
| vec2 vpos = positions[gl_VertexID%6]; | |
| gl_Position = vec4(vpos, 0, 1); | |
| fuv = vpos*0.5+0.5; | |
| fuv.y = 1 - fuv.y; | |
| } | |
| #endif /* VERTEX_SHADER */ | |
| #ifdef FRAGMENT_SHADER | |
| layout (std140, binding = 0) uniform Uniform | |
| { | |
| float d0; // distance between probes in cascade 0 | |
| int r0; // number of rays in cascade 0 | |
| int n0; // number of probes in cascade 0 (per dimension) | |
| int ci; // cascade number | |
| int cn; // total number of cascades | |
| int should_do_render; // we switch on this to render instead of building the cascades | |
| int add_sky_light; // set to 1 to add sky lighting to uppermost cascade | |
| int padding; | |
| vec2 u_resolution; // resolution of the input texture | |
| vec2 padding4; | |
| }; | |
| layout(binding = 1) uniform sampler2D u_input; // world data that we raytrace through | |
| layout(binding = 2) uniform sampler2D u_prev; // previous cascade (ping-pong this and the output texture) | |
| in vec2 fuv; | |
| layout(location = 0) out vec4 ocolor; | |
| const float PI = 3.1415927; | |
| // raymarch2d: Implementation of Amanatides & Woo voxel marching algo | |
| struct raymarch2d_t { | |
| int x; | |
| int y; | |
| int sx; | |
| int sy; | |
| int ex; | |
| int ey; | |
| float tmx; | |
| float tmy; | |
| float tdx; | |
| float tdy; | |
| }; | |
| raymarch2d_t raymarch2d_make(float x0, float y0, float x1, float y1) { | |
| raymarch2d_t res; | |
| res.x = int(floor(x0)); | |
| res.y = int(floor(y0)); | |
| res.sx = x0 < x1 ? 1 : x1 < x0 ? -1 : 0; | |
| res.sy = y0 < y1 ? 1 : y1 < y0 ? -1 : 0; | |
| res.ex = int(floor(x1)) + 2*res.sx; | |
| res.ey = int(floor(y1)) + 2*res.sy; | |
| float dx = x1 - x0; | |
| float dy = y1 - y0; | |
| float l = 1.f/sqrt(dx*dx + dy*dy); | |
| dx *= l; | |
| dy *= l; | |
| res.tmx = dx == 0 ? 10000000 : (x0 - res.x)/dx; | |
| res.tmy = dy == 0 ? 10000000 : (y0 - res.y)/dy; | |
| res.tdx = dx == 0 ? 0 : res.sx/dx; | |
| res.tdy = dy == 0 ? 0 : res.sy/dy; | |
| return res; | |
| } | |
| bool raymarch2d_next(inout raymarch2d_t r) { | |
| if (r.tmx < r.tmy) { | |
| r.tmx += r.tdx; | |
| r.x += r.sx; | |
| return r.x != r.ex; | |
| } | |
| else { | |
| r.tmy += r.tdy; | |
| r.y += r.sy; | |
| return r.y != r.ey; | |
| } | |
| } | |
| vec3 tonemap_aces(vec3 color) { | |
| const float slope = 12.0; | |
| vec4 x = vec4( | |
| color.r, color.g, color.b, | |
| (color.r * 0.299) + (color.g * 0.587) + (color.b * 0.114) | |
| ); | |
| const float a = 2.51f; | |
| const float b = 0.03f; | |
| const float c = 2.43f; | |
| const float d = 0.59f; | |
| const float e = 0.14f; | |
| vec4 tonemap = clamp((x * (a * x + b)) / (x * (c * x + d) + e), 0.0, 1.0); | |
| float t = x.a; | |
| t = t * t / (slope + t); | |
| return mix(tonemap.rgb, tonemap.aaa, t); | |
| } | |
| vec3 sky_(vec2 angle) { | |
| float a1 = angle[1]; | |
| float a0 = angle[0]; | |
| // Sky integral formula taken from | |
| // Analytic Direct Illumination - Mathis | |
| // https://www.shadertoy.com/view/NttSW7 | |
| const vec3 SkyColor = vec3(0.2,0.5,1.); | |
| const vec3 SunColor = vec3(1.,0.7,0.1)*10.; | |
| const float SunA = 2.0; | |
| const float SunS = 64.0; | |
| const float SSunS = sqrt(SunS); | |
| const float ISSunS = 1./SSunS; | |
| vec3 SI = SkyColor*(a1-a0-0.5*(cos(a1)-cos(a0))); | |
| SI += SunColor*(atan(SSunS*(SunA-a0))-atan(SSunS*(SunA-a1)))*ISSunS; | |
| return SI / 6.0; | |
| } | |
| vec3 sky(vec2 angle) { | |
| // Integrate the radiance from the sky over an interval of directions | |
| if (angle[1] < 2.0 * PI) | |
| return sky_(angle); | |
| return | |
| sky_(vec2(angle[0], 2.0 * PI)) + | |
| sky_(vec2(0.0, angle[1] - 2.0 * PI)); | |
| } | |
| void main(void) { | |
| if (should_do_render == 1) { | |
| // sample probe in cascade 0 | |
| float x = fuv.x * u_resolution.x; | |
| float y = fuv.y * u_resolution.y; | |
| float xi = round(x/d0); | |
| float yi = round(y/d0); | |
| vec3 c = vec3(0,0,0); | |
| for (int r = 0; r < r0; ++r) { | |
| vec2 pixelcoord = floor(vec2(xi*r0 + r, yi)) + 0.5; | |
| c += texture(u_prev, pixelcoord / textureSize(u_prev, 0)).rgb; | |
| } | |
| ocolor = vec4(tonemap_aces(c/r0),1); | |
| } | |
| else { | |
| // build cascade | |
| int u = int(gl_FragCoord.x); | |
| int v = int(gl_FragCoord.y); | |
| int lm = 2;// ray distance branching factor. ray distance = 2^(lm*ci) | |
| int rm = 1;// ray count branching factor. Num rays for cascade ci = r0*2^(rm*ci) = r0*(1 << rm*ci). NOTE: increasing this removes the property that total size of all cascades converges to 2x size of cascade 0, and instead leads to linear size increase | |
| int n = n0 >> ci; // number of probes in one dimension | |
| float d = d0*(1 << ci); // distance between probes | |
| int rn = r0 << (rm*ci); // number of pixels/rays per probe | |
| int yi = v; // probe index | |
| int xi = u/rn; // probe index | |
| int r = u - xi*rn; // ray index | |
| float dx = d0*0.5f*(1 << ci); | |
| float x = xi * d + dx; // probe pos | |
| float y = yi * d + dx; // probe pos | |
| float l = 0.5 * d0; // length of ray | |
| float intensity = 1.0; | |
| if (xi >= n || xi < 0 || yi >= n || yi < 0) { | |
| ocolor = vec4(0,0,0,0); | |
| return; | |
| } | |
| float ra = ci == 0 ? 0 : l*(1 << ((ci-1)*lm)); // start of ray length interval | |
| float rb = l*(1 << (ci*lm)); // end of ray length interval | |
| float alpha = 2*PI*(float(r)+0.5)/rn; | |
| vec2 rot = vec2(cos(alpha), sin(alpha)); | |
| vec2 a = vec2(x,y) + rot*ra; // start of ray | |
| vec2 b = vec2(x,y) + rot*rb; // end of ray | |
| raymarch2d_t raym = raymarch2d_make(a.x, a.y, b.x, b.y); | |
| vec4 col = vec4(0,0,0,0); | |
| while (raymarch2d_next(raym)) { | |
| vec3 v = texture(u_input, vec2((raym.x+0.5)/u_resolution.x, (raym.y+0.5)/u_resolution.y)).rgb; | |
| if (v != vec3(1,1,1)) { | |
| col = vec4(v*intensity,1); | |
| break; | |
| } | |
| } | |
| // if no hit, get from upper cascade | |
| // TODO: do proper alpha blending to support transparent materials. Since we're only dealing with opaque materials for now it's fine | |
| if (col.a == 0) { | |
| if (ci == cn-1) { | |
| if (add_sky_light != 0) | |
| col = vec4(sky(vec2(alpha, alpha + 2*PI/rn)) / (2*PI/rn), 1); | |
| else | |
| col = vec4(0,0,0,0); | |
| } | |
| else { | |
| int xi2 = (xi+1)/2; // probe index in upper | |
| int yi2 = (yi+1)/2; // probe index in upper | |
| int r2 = r << rm; // ray index in upper | |
| int rn2 = rn << rm; // num rays in upper | |
| int n2 = n >> 1; // num probes in upper | |
| float tx = 0.75 - 0.5*float(xi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid | |
| float ty = 0.75 - 0.5*float(yi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid | |
| // loop through all the nearby rays in the upper cascade | |
| // TODO: in the case where there are >2 rays in the upper cascade for each ray in this cascade (i.e. rm > 1), | |
| // we should choose a better weighting than just treating them all equally | |
| vec4 upper = vec4(0,0,0,0); | |
| float frac = 1.0 / (1 << rm); | |
| for (int ri = 0; ri < (1 << rm); ++ri) { | |
| vec2 pc1 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri | |
| vec2 pc2 = floor(vec2(clamp(xi2, 0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri | |
| vec2 pc3 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri | |
| vec2 pc4 = floor(vec2(clamp(xi2, 0, n2-1)*rn2 + r2 + ri, clamp(yi2, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri | |
| vec4 c = mix( | |
| mix(texture(u_prev, pc1 / textureSize(u_prev, 0)), texture(u_prev, pc2 / textureSize(u_prev, 0)), tx), | |
| mix(texture(u_prev, pc3 / textureSize(u_prev, 0)), texture(u_prev, pc4 / textureSize(u_prev, 0)), tx), | |
| ty | |
| ); | |
| upper += c*frac; | |
| } | |
| col = upper; | |
| } | |
| } | |
| ocolor = vec4(col.rgb, 1); | |
| } | |
| } | |
| #endif /* FRAGMENT_SHADER */ |
Looks like he's got two 3D videos, one in screen-space and in world-space.
Here's an example of the world space, you can see that he gets light from the models to the left and right outside of the view frustum:
https://youtu.be/5Ua-h1pg6yM?si=c6wdsT-LzlQTPC_l&t=37
There's some other artifacts going on that are probably coming from things like number of cascades being low, or low ray multiplication factor, probably could've used more parameter tweaking.
This is a neat website tmpvar made that lets you play with some of the parameters (screenspace only though) https://tmpvar.com/poc/radiance-cascades/#flatland-2d
Btw this method is literally just a cleverer way of laying out and combining the results of your probes. How you calculate the value of your rays is entirely up to you.
awesome thanks for chatting
i saw it but to me it looks it cant produce data on its own outside camera view(turn camera away from light loses the data on the wall), thats why i am trying to see if it can merge with hddagi to help it with world space like amd brixelizer caching idea does