195 lines
4.6 KiB
Text
195 lines
4.6 KiB
Text
/*
|
||
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
|
||
*
|
||
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||
* with this source code for terms and conditions that govern your use of
|
||
* this software. Any use, reproduction, disclosure, or distribution of
|
||
* this software and related documentation outside the terms of the EULA
|
||
* is strictly prohibited.
|
||
*
|
||
*/
|
||
|
||
/* Template project which demonstrates the basics on how to setup a project
|
||
* example application.
|
||
* Host code.
|
||
*/
|
||
|
||
// includes, system
|
||
#include <stdlib.h>
|
||
#include <stdio.h>
|
||
#include <string.h>
|
||
#include <math.h>
|
||
|
||
#include <iostream>
|
||
using namespace std;
|
||
|
||
// includes CUDA
|
||
#include <cuda_runtime.h>
|
||
|
||
#include "saxpy.h"
|
||
|
||
__global__ void saxpy_kernel(float *vector_SAXPY, float A, float *vector_X, float *vector_Y, int N)
|
||
{
|
||
// A VOUS DE CODER
|
||
|
||
}
|
||
|
||
|
||
|
||
////////////////////////////////////////////////////////////////////////////////
|
||
// declaration, forward
|
||
void runTest( int argc, char** argv);
|
||
|
||
|
||
|
||
|
||
////////////////////////////////////////////////////////////////////////////////
|
||
// Program main
|
||
////////////////////////////////////////////////////////////////////////////////
|
||
int
|
||
main( int argc, char** argv)
|
||
{
|
||
|
||
runTest( argc, argv);
|
||
}
|
||
|
||
__host__ static int iDivUp(int a, int b) {
|
||
return ((a % b != 0) ? (a / b + 1): (a/b));
|
||
}
|
||
|
||
////////////////////////////////////////////////////////////////////////////////
|
||
//! Run a simple test for CUDA
|
||
////////////////////////////////////////////////////////////////////////////////
|
||
void
|
||
runTest( int argc, char** argv)
|
||
{
|
||
cudaError_t error;
|
||
|
||
unsigned long int N=256*1024;
|
||
|
||
const unsigned int mem_size = N*sizeof(float);
|
||
// allocate host memory
|
||
float* h_vector_X = (float*) malloc(mem_size);
|
||
float* h_vector_Y = (float*) malloc(mem_size);
|
||
|
||
//Initilaisation des données d'entrée
|
||
float A=1.0;
|
||
|
||
for (int i=0;i<N;i++){
|
||
h_vector_X[i]=(float)rand();
|
||
h_vector_Y[i]=(float)rand();
|
||
}
|
||
|
||
|
||
////////////////////////////////////////////////////////////////////////////////
|
||
// EXECUTION SUR LE CPU
|
||
///////////////////////////////////////////////////////////////////////
|
||
|
||
|
||
// Image trait<69>e sur le CPU
|
||
float* h_vector_SAXPY_CPU = (float*) malloc( mem_size);
|
||
|
||
printf("SAXPY CPU\n");
|
||
|
||
cudaEvent_t start,stop;
|
||
error = cudaEventCreate(&start);
|
||
error = cudaEventCreate(&stop);
|
||
|
||
// Record the start event
|
||
error = cudaEventRecord(start, NULL);
|
||
error = cudaEventSynchronize(start);
|
||
//Seuillage sur CPU
|
||
// A VOUS DE CODER
|
||
|
||
// Record the start event
|
||
error = cudaEventRecord(stop, NULL);
|
||
// Wait for the stop event to complete
|
||
error = cudaEventSynchronize(stop);
|
||
float msecTotal = 0.0f;
|
||
error = cudaEventElapsedTime(&msecTotal, start, stop);
|
||
|
||
|
||
printf("CPU execution time %f\n",msecTotal);
|
||
|
||
|
||
|
||
////////////////////////////////////////////////////////////////////////////////
|
||
// EXECUTION SUR LE GPU
|
||
///////////////////////////////////////////////////////////////////////
|
||
|
||
printf("SAXPY GPU\n");
|
||
|
||
float* h_vector_SAXPY_GPU = (float*) malloc(mem_size);
|
||
|
||
// images on device memory
|
||
float* d_vector_X;
|
||
float* d_vector_Y;
|
||
float* d_vector_SAXPY;
|
||
|
||
|
||
cudaEvent_t start_mem,stop_mem;
|
||
error = cudaEventCreate(&start_mem);
|
||
error = cudaEventCreate(&stop_mem);
|
||
|
||
error = cudaEventRecord(start, NULL);
|
||
error = cudaEventSynchronize(start);
|
||
|
||
|
||
|
||
|
||
// Alocation mémoire de d_vector_X, d_vector_Y et d_vector_SAXPY sur la carte GPU
|
||
// A VOUS DE CODER
|
||
|
||
// copy host memory to device
|
||
// A VOUS DE CODER
|
||
|
||
|
||
error = cudaEventRecord(stop_mem, NULL);
|
||
// Wait for the stop event to complete
|
||
error = cudaEventSynchronize(stop_mem);
|
||
float msecMem = 0.0f;
|
||
error = cudaEventElapsedTime(&msecMem, start, stop_mem);
|
||
|
||
// setup execution parameters -> découpage en threads
|
||
// A VOUS DE CODER
|
||
|
||
|
||
// lancement des threads executé sur la carte GPU
|
||
// A VOUS DE CODER
|
||
|
||
error = cudaEventRecord(start_mem, NULL);
|
||
error = cudaEventSynchronize(start_mem);
|
||
|
||
// copy result from device to host
|
||
// A VOUS DE CODER
|
||
|
||
// cleanup device memory
|
||
// COMMENTAIRES A ENLEVER
|
||
//cudaFree(d_vector_X);
|
||
//cudaFree(d_vector_Y);
|
||
//cudaFree(d_vector_SAXPY);
|
||
|
||
|
||
error = cudaEventRecord(stop, NULL);
|
||
// Wait for the stop event to complete
|
||
error = cudaEventSynchronize(stop);
|
||
msecTotal = 0.0f;
|
||
error = cudaEventElapsedTime(&msecTotal, start, stop);
|
||
float msecMem2 =0.0f;
|
||
error = cudaEventElapsedTime(&msecMem2, start_mem, stop);
|
||
msecMem+=msecMem2;
|
||
|
||
printf("GPU execution time %f ms (memory management %2.2f \%)\n",msecTotal,(msecMem)/(msecTotal)*100);
|
||
|
||
float sum_diff=0;
|
||
for(int i=0;i<N;i++)
|
||
sum_diff+= h_vector_SAXPY_GPU[i]-h_vector_SAXPY_CPU[i];
|
||
|
||
printf("sum_diff = %f\n",sum_diff);
|
||
|
||
// cleanup memory
|
||
free(h_vector_X);
|
||
free(h_vector_Y);
|
||
free(h_vector_SAXPY_GPU);
|
||
free(h_vector_SAXPY_CPU);
|
||
}
|