93 lines
2.7 KiB
Text
93 lines
2.7 KiB
Text
#include "mylib.h"
|
|
#include "mylib.cuh"
|
|
|
|
|
|
__global__ void kernel_seuillageGPU(unsigned char *d_image_in, unsigned char *d_image_out,int size_j)
|
|
{
|
|
float Csum;
|
|
int i, j, k, iFirst, jFirst;
|
|
|
|
iFirst = blockIdx.x*BLOCK_SIZE; // num de block dans la grille de block
|
|
jFirst = blockIdx.y*BLOCK_SIZE;
|
|
|
|
i = iFirst + threadIdx.x;// recuperer l'identifiant d'un thread dans les blocs
|
|
j = jFirst + threadIdx.y;
|
|
|
|
float nr = 0;
|
|
|
|
nr=d_image_in[2+j*3+i*3*size_j]/sqrtf(d_image_in[0+j*3+i*3*size_j]*d_image_in[0+j*3+i*3*size_j]+d_image_in[1+j*3+i*3*size_j]*d_image_in[1+j*3+i*3*size_j]+d_image_in[2+j*3+i*3*size_j]*d_image_in[2+j*3+i*3*size_j]);
|
|
|
|
if(nr > 0.7)
|
|
d_image_out[1+j*3+i*3*size_j] = d_image_in[2+j*3+i*3*size_j];
|
|
else
|
|
d_image_out[1+j*3+i*3*size_j] = d_image_in[1+j*3+i*3*size_j];
|
|
|
|
d_image_out[0+j*3+i*3*size_j] = d_image_in[0+j*3+i*3*size_j];
|
|
d_image_out[2+j*3+i*3*size_j] = d_image_in[2+j*3+i*3*size_j];
|
|
|
|
|
|
}
|
|
|
|
|
|
Mat seuillageGPU( Mat in)
|
|
{
|
|
cudaError_t error;
|
|
Mat out;
|
|
out.create(in.rows,in.cols,CV_8UC3);
|
|
|
|
// allocate host memory
|
|
unsigned char *h_image_in_GPU ;
|
|
h_image_in_GPU=in.data;
|
|
|
|
/*cudaEvent_t start,stop,start_mem,stop_mem;
|
|
error = cudaEventCreate(&start_mem);
|
|
error = cudaEventCreate(&stop_mem);
|
|
|
|
error = cudaEventRecord(start, NULL);
|
|
error = cudaEventSynchronize(start);*/
|
|
|
|
// images on device memoryÍÍÍ
|
|
unsigned char *d_image_in_GPU;
|
|
unsigned char *d_image_out_GPU;
|
|
|
|
const unsigned long int mem_size=in.cols*in.rows*3*sizeof(unsigned char);
|
|
|
|
// Alocation mémoire de d_image_in et d_image_out sur la carte GPU
|
|
cudaMalloc((void**) &d_image_in_GPU,mem_size );
|
|
cudaMalloc((void**) &d_image_out_GPU, mem_size);
|
|
|
|
// copy host memory to device
|
|
cudaMemcpy(d_image_in_GPU, h_image_in_GPU,mem_size ,cudaMemcpyHostToDevice);
|
|
|
|
//error = cudaEventRecord(stop_mem, NULL);
|
|
|
|
// Wait for the stop event to complete
|
|
//error = cudaEventSynchronize(stop_mem);
|
|
//float msecMem = 0.0f;
|
|
//error = cudaEventElapsedTime(&msecMem, start, stop_mem);
|
|
|
|
// setup execution parameters -> découpage en threads
|
|
dim3 threads(BLOCK_SIZE,BLOCK_SIZE);
|
|
dim3 grid(in.rows/BLOCK_SIZE,in.cols/BLOCK_SIZE);
|
|
|
|
// lancement des threads executé sur la carte GPU
|
|
kernel_seuillageGPU<<< grid, threads >>>(d_image_in_GPU, d_image_out_GPU,in.cols);
|
|
|
|
// Record the start event
|
|
//error = cudaEventRecord(start_mem, NULL);
|
|
//error = cudaEventSynchronize(start_mem);
|
|
|
|
// copy result from device to host
|
|
cudaMemcpy(out.data, d_image_out_GPU, mem_size,cudaMemcpyDeviceToHost);
|
|
cudaFree(d_image_in_GPU);
|
|
cudaFree(d_image_out_GPU);
|
|
/*
|
|
float msecTotal,msecMem2;
|
|
error = cudaEventRecord(stop, NULL);
|
|
error = cudaEventSynchronize(stop);
|
|
error = cudaEventElapsedTime(&msecTotal, start, stop);
|
|
error = cudaEventElapsedTime(&msecMem2, start_mem, stop);
|
|
*/
|
|
return out;
|
|
}
|
|
|