I am very new to openacc and have just high-level knowledge so any help and explanation of what I am doing wrong would be appreciated.
I am trying to accelerate(para
Jeff is correct that the OpenACC committee is still working on how to standardize support for aggregate data types with dynamic data members. However with PGI version 14.9 or later, we have added better support for structs as well as C++ classes so in this case you can simplify the code by just adding create(p[0:1]). What will happen is that the compiler will create a device copy of p with memory allocated for just the data members. Then when you do the copy of p->distance, memory will be allocated for "distance" and then attach it to p. (i.e. the run time will fill in the device pointer in the struct).
There are caveats. First is that this behavior has not been standardized so other compilers such as Cray, Pathscale, GCC, and others may have different behavior. Second, order matters. p needs to be created before distance can be attached. Third, more complex data structures become very difficult to manage. As Jeff suggests, using CUDA Unified Memory is a good alternative for managing complex data structures.
If you're interested, much of my GTC2015 presentation discuss this topic (link). The focus of the talk is on C++ Class data management, but is applicable to C structs as well.
Hope this helps, Mat
% cat test1.c
#include
#include
#define min(a,b) (a > b) ? b : a
#define max(a,b) (a < b) ? b : a
#define NX 10
#define NY 10
#define NZ 10
struct phiType {
double dx, dy, dz;
double * distance;
};
typedef struct phiType Phi;
#pragma acc routine seq
double solve(Phi *p, int index) {
// for simplicity just returning a value
return 2;
}
void fast_sweep(Phi *p) {
// removing boundaries
int x = NX - 2;
int y = NY - 2;
int z = NZ - 2;
int startLevel = 3;
int endLevel = x + y + z;
#pragma acc data create(p[0:1]) copy(p->distance[0:NX*NY*NZ])
for(int level = startLevel; level <= endLevel; level++){
int ks = max(1, level-(y + z));
int ke = min(x, level-2);
int js = max(1, level-(x + z));
int je = min(y, level-2);
#pragma acc region
{
#pragma acc loop independent
for(int k = ks; k <= ke; k++){
#pragma acc loop independent
for(int j = js; j <= je; j++){
int i = level - (k + j);
if(i > 0 && i <= z){
int index = i * NX * NY + j * NX + k;
p->distance[index] = solve(p, index);
}
}
}
}
}
}
void create_phi(Phi *p){
p->dx = 1;
p->dy = 1;
p->dz = 1;
p->distance = (double *) malloc(sizeof(double) * NX * NY * NZ);
for(int i = 0; i < NZ; i++){
for(int j = 0; j < NY; j++){
for(int k = 0; k < NX; k++){
int index = i * NX * NY + j * NX + k;
p->distance[index] = (i*j*k == 0) ? 0 : 1;
}
}
}
}
int main()
{
printf("start \n");
Phi *p = (Phi *) malloc(sizeof(Phi));
create_phi(p);
printf("calling fast sweep \n");
fast_sweep(p);
printf(" print the results \n");
for(int i = 0; i < NZ; i++){
for(int j = 0; j < NY; j++){
for(int k = 0; k < NX; k++){
int index = i * NX * NY + j * NX + k;
printf("%f ", p->distance[index]);
}
printf("\n");
}
printf("\n");
}
return 0;
}
% pgcc -acc -ta=tesla:cc35 -Minfo=accel test1.c -V15.7 ; a.out
solve:
19, Generating acc routine seq
fast_sweep:
34, Generating create(p[:1])
Generating copy(p->distance[:1000])
45, Loop is parallelizable
47, Loop is parallelizable
Accelerator kernel generated
Generating Tesla code
45, #pragma acc loop gang /* blockIdx.y */
47, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
start
calling fast sweep
print the results
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000