M2_SETI/A4/TP_GPU-master/SAXPY/saxpy_main_student.cu

196 lines
4.6 KiB
Text
Raw Permalink Normal View History

2022-12-09 09:03:22 +01:00
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/* Template project which demonstrates the basics on how to setup a project
* example application.
* Host code.
*/
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <iostream>
using namespace std;
// includes CUDA
#include <cuda_runtime.h>
#include "saxpy.h"
__global__ void saxpy_kernel(float *vector_SAXPY, float A, float *vector_X, float *vector_Y, int N)
{
// A VOUS DE CODER
}
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
void runTest( int argc, char** argv);
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv)
{
runTest( argc, argv);
}
__host__ static int iDivUp(int a, int b) {
return ((a % b != 0) ? (a / b + 1): (a/b));
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runTest( int argc, char** argv)
{
cudaError_t error;
unsigned long int N=256*1024;
const unsigned int mem_size = N*sizeof(float);
// allocate host memory
float* h_vector_X = (float*) malloc(mem_size);
float* h_vector_Y = (float*) malloc(mem_size);
//Initilaisation des données d'entrée
float A=1.0;
for (int i=0;i<N;i++){
h_vector_X[i]=(float)rand();
h_vector_Y[i]=(float)rand();
}
////////////////////////////////////////////////////////////////////////////////
// EXECUTION SUR LE CPU
///////////////////////////////////////////////////////////////////////
// Image trait<69>e sur le CPU
float* h_vector_SAXPY_CPU = (float*) malloc( mem_size);
printf("SAXPY CPU\n");
cudaEvent_t start,stop;
error = cudaEventCreate(&start);
error = cudaEventCreate(&stop);
// Record the start event
error = cudaEventRecord(start, NULL);
error = cudaEventSynchronize(start);
//Seuillage sur CPU
// A VOUS DE CODER
// Record the start event
error = cudaEventRecord(stop, NULL);
// Wait for the stop event to complete
error = cudaEventSynchronize(stop);
float msecTotal = 0.0f;
error = cudaEventElapsedTime(&msecTotal, start, stop);
printf("CPU execution time %f\n",msecTotal);
////////////////////////////////////////////////////////////////////////////////
// EXECUTION SUR LE GPU
///////////////////////////////////////////////////////////////////////
printf("SAXPY GPU\n");
float* h_vector_SAXPY_GPU = (float*) malloc(mem_size);
// images on device memory
float* d_vector_X;
float* d_vector_Y;
float* d_vector_SAXPY;
cudaEvent_t start_mem,stop_mem;
error = cudaEventCreate(&start_mem);
error = cudaEventCreate(&stop_mem);
error = cudaEventRecord(start, NULL);
error = cudaEventSynchronize(start);
// Alocation mémoire de d_vector_X, d_vector_Y et d_vector_SAXPY sur la carte GPU
// A VOUS DE CODER
// copy host memory to device
// A VOUS DE CODER
error = cudaEventRecord(stop_mem, NULL);
// Wait for the stop event to complete
error = cudaEventSynchronize(stop_mem);
float msecMem = 0.0f;
error = cudaEventElapsedTime(&msecMem, start, stop_mem);
// setup execution parameters -> découpage en threads
// A VOUS DE CODER
// lancement des threads executé sur la carte GPU
// A VOUS DE CODER
error = cudaEventRecord(start_mem, NULL);
error = cudaEventSynchronize(start_mem);
// copy result from device to host
// A VOUS DE CODER
// cleanup device memory
// COMMENTAIRES A ENLEVER
//cudaFree(d_vector_X);
//cudaFree(d_vector_Y);
//cudaFree(d_vector_SAXPY);
error = cudaEventRecord(stop, NULL);
// Wait for the stop event to complete
error = cudaEventSynchronize(stop);
msecTotal = 0.0f;
error = cudaEventElapsedTime(&msecTotal, start, stop);
float msecMem2 =0.0f;
error = cudaEventElapsedTime(&msecMem2, start_mem, stop);
msecMem+=msecMem2;
printf("GPU execution time %f ms (memory management %2.2f \%)\n",msecTotal,(msecMem)/(msecTotal)*100);
float sum_diff=0;
for(int i=0;i<N;i++)
sum_diff+= h_vector_SAXPY_GPU[i]-h_vector_SAXPY_CPU[i];
printf("sum_diff = %f\n",sum_diff);
// cleanup memory
free(h_vector_X);
free(h_vector_Y);
free(h_vector_SAXPY_GPU);
free(h_vector_SAXPY_CPU);
}