Created
August 14, 2023 14:45
-
-
Save getianao/f856d6c6364f5ae334c5620f04583c3d to your computer and use it in GitHub Desktop.
Test l2 cache for HBM 64 bytes promotion.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #define BLOCK_SIZE 1024 | |
| #define STRIDE 16 | |
| __global__ void kernel(float *A, float *B) { | |
| int idx = blockIdx.x * blockDim.x + threadIdx.x; | |
| if (idx * STRIDE < BLOCK_SIZE) | |
| B[idx] = A[idx * STRIDE]; | |
| // STRIDE * 4 bytes stride read (STRIDE * 4 bytes float) | |
| } | |
| int main() { | |
| float *A, *B; | |
| cudaMalloc(&A, BLOCK_SIZE * sizeof(float)); | |
| cudaMalloc(&B, BLOCK_SIZE * sizeof(float)); | |
| kernel<<<1, BLOCK_SIZE>>>(A, B); | |
| cudaDeviceSynchronize(); | |
| } | |
| // nvcc test_l2.cu -O3 -o test_l2 | |
| // 1024, 1024/16=64 sectors/threads, 64/32=2 requests, 64*32=2048 bytes (from L1) | |
| // https://forums.developer.nvidia.com/t/pascal-l1-cache/49571/20?u=cache | |
| // https://stackoverflow.com/questions/63497910/cache-behaviour-in-compute-capability-7-5 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment