Last active
January 24, 2024 16:34
-
-
Save IgorBaratta/da8b9ce3b2b11ecfad3d4333bcbb023a to your computer and use it in GitHub Desktop.
CUDA python demonstration
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from mpi4py import MPI | |
| import basix | |
| from dolfinx import fem | |
| import dolfinx | |
| import ufl | |
| import numpy as np | |
| from numba import cuda | |
| def volume(mesh): | |
| # Function to compute the volume of each cell in the mesh | |
| dg0 = basix.ufl.element("DG", cell_type, 0) | |
| W = fem.functionspace(mesh, dg0) | |
| w = fem.Function(W) | |
| v = ufl.TestFunction(W) | |
| L = ufl.inner(1, v) * ufl.dx | |
| L = fem.form(L) | |
| fem.assemble_vector(w.x.array, L) | |
| return w.x.array | |
| # Create a unit cube mesh | |
| comm = MPI.COMM_WORLD | |
| mesh = dolfinx.mesh.create_unit_cube(comm, 10, 10, 10, dolfinx.mesh.CellType.hexahedron) | |
| cell_type = mesh.basix_cell() | |
| degree = 1 | |
| # Define finite element and function space | |
| e = basix.ufl.element("Lagrange", cell_type, degree) | |
| V = fem.functionspace(mesh, e) | |
| ndofs = V.dofmap.list.shape[1] | |
| # Compute quadrature points and weights | |
| quad_degree = e.degree * 2 + 1 | |
| pts, wts = basix.make_quadrature(cell_type, quad_degree) | |
| # Compute reference mass matrix | |
| phi = e.tabulate(0, pts)[0, :, :] | |
| M = np.einsum("qi,q,qj->ij", phi, wts, phi) | |
| # Initialize functions and interpolate a function for the source term | |
| u = fem.Function(V) | |
| f = fem.Function(V) | |
| f.interpolate(lambda x: x[0]) | |
| @cuda.jit | |
| def assemble_vector(u_d, f_d, dofmap_d, M_d, vol_d): | |
| # CUDA kernel to assemble the vector | |
| tid = cuda.threadIdx.x | |
| bid = cuda.blockIdx.x | |
| sA = cuda.shared.array(shape=(ndofs,), dtype=np.float64) | |
| dof = dofmap_d[bid, tid] | |
| sA[tid] = f_d[dof] | |
| cuda.syncthreads() | |
| # Each thread computes the dot product of row i of M_d with sA | |
| val = 0.0 | |
| for i in range(M_d.shape[1]): | |
| val += sA[i] * M_d[tid, i] * vol_d[bid] | |
| # u_d[dof] += val | |
| cuda.atomic.add(u_d, dof, val) | |
| # CUDA variables | |
| dofmap_d = cuda.to_device(V.dofmap.list) | |
| u_d = cuda.to_device(u.x.array) | |
| f_d = cuda.to_device(f.x.array) | |
| M_d = cuda.to_device(M) | |
| vol_d = cuda.to_device(volume(mesh)) | |
| # Launch CUDA kernel | |
| num_blocks = V.dofmap.list.shape[0] | |
| threads_per_block = V.dofmap.list.shape[1] | |
| assemble_vector[num_blocks, threads_per_block, 0](u_d, f_d, dofmap_d, M_d, vol_d) | |
| cuda.synchronize() | |
| # Reference solution with dolfinx | |
| v = ufl.TestFunction(V) | |
| L = ufl.inner(f, v) * ufl.dx | |
| L = fem.form(L) | |
| u_ref = fem.Function(V) | |
| fem.assemble_vector(u_ref.x.array, L) | |
| # Compare with reference solution | |
| u_d.copy_to_host(u.x.array) | |
| assert np.allclose(u_ref.x.array, u.x.array) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment