I have a kernel to calculate different elements of a matrix, based on their position (diagonal or off-diagonal). The kernel works as expected when calculating matrices of si
I tried to reproduce your code with the following complete example. The code compiles, runs with no error.
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include
#include "cuComplex.h"
__global__ void createYBus(float *R, float *X, float *B, int numberOfBuses, int numberOfBranches, int *fromBus, int *toBus, cuComplex *y)
{
int rowIdx = blockIdx.y*blockDim.y + threadIdx.y;
int colIdx = blockIdx.x*blockDim.x + threadIdx.x;
int index = rowIdx*numberOfBuses + colIdx;
if (rowIdx>>(dev_R, dev_X, dev_B, numberOfBuses, numLines, dev_fromBus, dev_toBus, dev_y);
cuComplex* y_bus = new cuComplex[numberOfBuses*numberOfBuses] ;
//copy results back to CPU
cudaError_t cudaStat6 = cudaMemcpy(y_bus, dev_y, numberOfBuses*numberOfBuses*sizeof(cuComplex), cudaMemcpyDeviceToHost);
if (cudaStat6 != cudaSuccess) {
printf ("failure : (%d) - %s\n", cudaStat6, ::cudaGetErrorString(cudaStat6)) ;
return 1;
}
return 0 ;
}
Your error seems to be somewhere else.
You want to run your code in NSIGHT debug mode with cuda mem check activated. If compiled with debug information, the tool should point out the location of your error.
EDIT: The problem appears to ne caused by WDDM TDR as discussed in comment.