Skip to content

Instantly share code, notes, and snippets.

@yuhanz
Created September 12, 2024 07:05
Show Gist options
  • Select an option

  • Save yuhanz/e9b082c04bbafc07f822c1e60e03832b to your computer and use it in GitHub Desktop.

Select an option

Save yuhanz/e9b082c04bbafc07f822c1e60e03832b to your computer and use it in GitHub Desktop.
#define N 10000000
#include "stdio.h"
// To compile: nvcc vector_add.cu -o vector_add -ccbin "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.41.34120\bin\Hostx64\x64"
// void vector_add(float *out, float *a, float *b, int n) {
// for(int i = 0; i < n; i++){
// out[i] = a[i] + b[i];
// }
// }
__global__ void vector_add(float *out, float *a, float *b, int n) {
for(int i = 0; i < n; i++){
out[i] = a[i] + b[i];
}
}
int main(){
float *a, *b, *out;
float *d_a, *d_b, *d_out;
// Allocate memory
a = (float*)malloc(sizeof(float) * N);
b = (float*)malloc(sizeof(float) * N);
out = (float*)malloc(sizeof(float) * N);
cudaMalloc((void**)&d_a, sizeof(float)*N);
cudaMalloc((void**)&d_b, sizeof(float)*N);
cudaMalloc((void**)&d_out, sizeof(float)*N);
// Initialize array
for(int i = 0; i < N; i++){
a[i] = 1.0f; b[i] = 2.0f;
}
cudaMemcpy(d_a, a, sizeof(float)*N, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, sizeof(float)*N, cudaMemcpyHostToDevice);
// Main function
// vector_add<<<1,1>>>(out, a, b, N);
vector_add<<<1,1>>>(d_out, d_a, d_b, N);
cudaMemcpy(out, d_out, sizeof(float)*N, cudaMemcpyDeviceToHost);
printf("result %f", out[0]);
free(a);
free(b);
free(out);
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_out);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment