Unexpected result of Cuda program

asked 2018-09-14 09:37:10 -0600

Amber gravatar image

updated 2018-09-14 11:20:12 -0600

fcomida gravatar image

I am new to cuda programming. I wrote a cuda C program that takes cube of the array elements and displays the output. However; the result comes out to be 0 for all the elements which is wrong. The following is my code:

\__global__ void cube(float *d_in, float *d_out)

{
  int index= threadIdx.x;

d_out[index]=d_in[index]*d_in[index]*d_in[index]; }

int main() { const int ARRAY_SIZE=96;

const int ARRAY_BYTES=ARRAY_SIZE * sizeof(float);

float h_in[ARRAY_SIZE];
for(int i=0; i<ARRAY_SIZE;i++)
{
    h_in[i]=float(i);
}
float h_out[ARRAY_SIZE];

float * d_in;
float * d_out;

cudaMalloc((void**)&d_in,ARRAY_BYTES);
cudaMalloc((void**)&d_out,ARRAY_BYTES);

cudaMemcpy(d_in,h_in,ARRAY_BYTES,cudaMemcpyHostToDevice);
cube<<<1,ARRAY_SIZE>>>(d_in,d_out); 
cudaMemcpy(h_out,d_out,ARRAY_BYTES,cudaMemcpyDeviceToHost);

for(int i=0; i<ARRAY_SIZE;i++)
{
    printf("%f",h_out[i] );
    printf("\n");

}
cudaFree(d_in);
cudaFree(d_out);
return 0;
}

Please help me with this I would be very grateful

edit retag flag offensive close merge delete