#include __global__ void vectorAdd(const float *A, const float *B, float *C, int numElements) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < numElements) { C[i] = A[i] + B[i]; } } __device__ int get_thread_id() { return threadIdx.x; } __host__ void init_vectors(float *A, float *B, int n) { for (int i = 0; i < n; ++i) { A[i] = rand() / (float)RAND_MAX; B[i] = rand() / (float)RAND_MAX; } } int main(void) { int numElements = 50000; size_t size = numElements * sizeof(float); printf("[Vector addition of %d elements]\n", numElements); return 0; }