M2_SETI/A4/TP_GPU-master/TP5_computer-vision/mylib.cu

94 lines
2.7 KiB
Text
Raw Normal View History

2022-12-09 09:03:22 +01:00
#include "mylib.h"
#include "mylib.cuh"
__global__ void kernel_seuillageGPU(unsigned char *d_image_in, unsigned char *d_image_out,int size_j)
{
float Csum;
int i, j, k, iFirst, jFirst;
iFirst = blockIdx.x*BLOCK_SIZE; // num de block dans la grille de block
jFirst = blockIdx.y*BLOCK_SIZE;
i = iFirst + threadIdx.x;// recuperer l'identifiant d'un thread dans les blocs
j = jFirst + threadIdx.y;
float nr = 0;
nr=d_image_in[2+j*3+i*3*size_j]/sqrtf(d_image_in[0+j*3+i*3*size_j]*d_image_in[0+j*3+i*3*size_j]+d_image_in[1+j*3+i*3*size_j]*d_image_in[1+j*3+i*3*size_j]+d_image_in[2+j*3+i*3*size_j]*d_image_in[2+j*3+i*3*size_j]);
if(nr > 0.7)
d_image_out[1+j*3+i*3*size_j] = d_image_in[2+j*3+i*3*size_j];
else
d_image_out[1+j*3+i*3*size_j] = d_image_in[1+j*3+i*3*size_j];
d_image_out[0+j*3+i*3*size_j] = d_image_in[0+j*3+i*3*size_j];
d_image_out[2+j*3+i*3*size_j] = d_image_in[2+j*3+i*3*size_j];
}
Mat seuillageGPU( Mat in)
{
cudaError_t error;
Mat out;
out.create(in.rows,in.cols,CV_8UC3);
// allocate host memory
unsigned char *h_image_in_GPU ;
h_image_in_GPU=in.data;
/*cudaEvent_t start,stop,start_mem,stop_mem;
error = cudaEventCreate(&start_mem);
error = cudaEventCreate(&stop_mem);
error = cudaEventRecord(start, NULL);
error = cudaEventSynchronize(start);*/
// images on device memoryÍÍÍ
unsigned char *d_image_in_GPU;
unsigned char *d_image_out_GPU;
const unsigned long int mem_size=in.cols*in.rows*3*sizeof(unsigned char);
// Alocation mémoire de d_image_in et d_image_out sur la carte GPU
cudaMalloc((void**) &d_image_in_GPU,mem_size );
cudaMalloc((void**) &d_image_out_GPU, mem_size);
// copy host memory to device
cudaMemcpy(d_image_in_GPU, h_image_in_GPU,mem_size ,cudaMemcpyHostToDevice);
//error = cudaEventRecord(stop_mem, NULL);
// Wait for the stop event to complete
//error = cudaEventSynchronize(stop_mem);
//float msecMem = 0.0f;
//error = cudaEventElapsedTime(&msecMem, start, stop_mem);
// setup execution parameters -> découpage en threads
dim3 threads(BLOCK_SIZE,BLOCK_SIZE);
dim3 grid(in.rows/BLOCK_SIZE,in.cols/BLOCK_SIZE);
// lancement des threads executé sur la carte GPU
kernel_seuillageGPU<<< grid, threads >>>(d_image_in_GPU, d_image_out_GPU,in.cols);
// Record the start event
//error = cudaEventRecord(start_mem, NULL);
//error = cudaEventSynchronize(start_mem);
// copy result from device to host
cudaMemcpy(out.data, d_image_out_GPU, mem_size,cudaMemcpyDeviceToHost);
cudaFree(d_image_in_GPU);
cudaFree(d_image_out_GPU);
/*
float msecTotal,msecMem2;
error = cudaEventRecord(stop, NULL);
error = cudaEventSynchronize(stop);
error = cudaEventElapsedTime(&msecTotal, start, stop);
error = cudaEventElapsedTime(&msecMem2, start_mem, stop);
*/
return out;
}