/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting aspects of that functionality. */ #include #include #include #define NUM_WORKERS 16 #ifdef ACC_DEVICE_TYPE_radeon /* AMD GCN uses the autovectorizer for the vector dimension: the use of a function call in vector-partitioned code in this test is not currently supported. */ #define NUM_VECTORS 1 #else #define NUM_VECTORS 32 #endif #define WIDTH 64 #define HEIGHT 32 #define WORK_ID(I,N) \ (acc_on_device (acc_device_not_host) \ ? __builtin_goacc_parlevel_id (GOMP_DIM_WORKER) \ : (I % N)) #define VEC_ID(I,N) \ (acc_on_device (acc_device_not_host) \ ? __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR) \ : (I % N)) #pragma acc routine worker void __attribute__ ((noinline)) WorkVec (int *ptr, int w, int h, int nw, int nv) { #pragma acc loop worker for (int i = 0; i < h; i++) #pragma acc loop vector for (int j = 0; j < w; j++) ptr[i*w + j] = (WORK_ID (i, nw) << 8) | VEC_ID(j, nv); } int DoWorkVec (int nw) { int ary[HEIGHT][WIDTH]; int err = 0; for (int ix = 0; ix != HEIGHT; ix++) for (int jx = 0; jx != WIDTH; jx++) ary[ix][jx] = 0xdeadbeef; printf ("spawning %d ...", nw); fflush (stdout); #pragma acc parallel num_workers(nw) vector_length (NUM_VECTORS) copy (ary) /* { dg-warning "region contains vector partitioned code but is not vector partitioned" "" { target openacc_radeon_accel_selected } .-1 } */ { WorkVec ((int *)ary, WIDTH, HEIGHT, nw, NUM_VECTORS); } for (int ix = 0; ix != HEIGHT; ix++) for (int jx = 0; jx != WIDTH; jx++) { int exp = ((ix % nw) << 8) | (jx % NUM_VECTORS); if (ary[ix][jx] != exp) { printf ("\nary[%d][%d] = %#x expected %#x", ix, jx, ary[ix][jx], exp); err = 1; } } printf (err ? " failed\n" : " ok\n"); return err; } int main () { int err = 0; for (int W = 1; W <= NUM_WORKERS; W <<= 1) err |= DoWorkVec (W); return err; }