a4

2022-12-09 09:03:22 +01:00 · 2022-12-09 09:03:22 +01:00 · e55f6b704f
commit e55f6b704f
parent a5ff016d94
90 changed files with 17332 additions and 0 deletions
--- a/A4/A4C4-C1.pdf
+++ b/A4/A4C4-C1.pdf
--- a/A4/A4C4-TP1.pdf
+++ b/A4/A4C4-TP1.pdf
--- a/A4/TP_GPU-master.zip
+++ b/A4/TP_GPU-master.zip
--- a/A4/TP_GPU-master/SAXPY/.gitignore
+++ b/A4/TP_GPU-master/SAXPY/.gitignore
@ -0,0 +1 @@
+/Release/
--- a/A4/TP_GPU-master/SAXPY/CMakeLists.txt
+++ b/A4/TP_GPU-master/SAXPY/CMakeLists.txt
@ -0,0 +1,34 @@
+cmake_minimum_required(VERSION 2.8)
+
+project(saxpy LANGUAGES CXX CUDA)
+set(ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/)
+#set(CMAKE_C_COMPILER "gcc-8")
+#set(CMAKE_CXX_COMPILER "g++-8")
+
+#set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda")
+
+list(APPEND HEADER_REP ${ROOT_DIR}/ ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} "/usr/local/cuda/samples/common/inc/" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/../samples/common/inc")
+
+file(GLOB CPP_FILES ${ROOT_DIR}/*.cpp)
+file(GLOB CU_FILES ${ROOT_DIR}/*.cu)
+file(GLOB_RECURSE HEADER_FILES ${ROOT_DIR}/*.cuh ${ROOT_DIR}/*.h)
+
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math")
+
+
+set(CUDA_SEPARABLE_COMPILATION ON)
+set(CUDA_NVCC_FLAGS "-lineinfo;-I\"${ROOT_DIR}/inc\" -I\"${ROOT_DIR}/\" -I\"/usr/local/cuda/samples/common/inc/\"")
+
+
+add_executable(saxpy_CUDA ${CPP_FILES} ${CU_FILES} ${HEADER_FILES})
+set_target_properties(saxpy_CUDA PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(saxpy_CUDA PUBLIC ${ROOT_DIR}/inc/ /usr/local/cuda/samples/common/inc/)
+
+
+
+
+
+
+
+
+
--- a/A4/TP_GPU-master/SAXPY/CMakeLists_cmake28.txt
+++ b/A4/TP_GPU-master/SAXPY/CMakeLists_cmake28.txt
@ -0,0 +1,34 @@
+cmake_minimum_required(VERSION 2.8)
+
+project(matrice)
+
+set(ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/)
+
+#set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda")
+
+find_package(CUDA REQUIRED) 
+list(APPEND HEADER_REP ${ROOT_DIR}/ ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} "/usr/local/cuda/samples/common/inc/" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/../samples/common/inc")
+
+#file(GLOB CPP_FILES ${ROOT_DIR}/src/CPP/*.cpp)
+#file(GLOB CU_FILES ${ROOT_DIR}/src/CPP/*.cu)
+set(CPP_FILES seuillage_C.cpp)
+set(CU_FILES ${ROOT_DIR}/seuillage_main.cu)
+#file(GLOB_RECURSE HEADER_FILES ${ROOT_DIR}/src/CUDA/*.cuh ${ROOT_DIR}/inc/*.h)
+
+
+set(CUDA_SEPARABLE_COMPILATION ON)
+set(CUDA_NVCC_FLAGS "-lineinfo;-I\"${ROOT_DIR}/inc\" -I\"${ROOT_DIR}/\" -I\"/usr/local/cuda/samples/common/inc/\"")
+
+
+cuda_add_executable(seuillage_CUDA ${CPP_FILES} ${CU_FILES} ${HEADER_FILES})
+set_target_properties(seuillage_CUDA PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(seuillage_CUDA PUBLIC ${ROOT_DIR}/inc/ /usr/local/cuda/samples/common/inc/)
+
+
+
+
+
+
+
+
+
--- a/A4/TP_GPU-master/SAXPY/CMakeLists_cmake38.txt
+++ b/A4/TP_GPU-master/SAXPY/CMakeLists_cmake38.txt
@ -0,0 +1,17 @@
+#cmake -DCMAKE_CUDA_FLAGS=”-arch=sm_30” ..
+cmake_minimum_required(VERSION 3.8)
+set(CUDACXX "/usr/local/cuda/bin/nvcc")
+set(CMAKE_CUDA_COMPILER "/usr/local/cuda/bin/nvcc")
+project(seuillage_CUDA LANGUAGES CXX CUDA)
+
+set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+set(CMAKE_VERBOSE_MAKEFILE TRUE)
+
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math")
+
+
+add_executable(seuillage_CUDA seuillage_C.cpp seuillage_main.cu)
+set_target_properties(seuillage_CUDA PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_compile_features(seuillage_CUDA PUBLIC cxx_std_11)
+target_include_directories(seuillage_CUDA PUBLIC ".")
+
--- a/A4/TP_GPU-master/SAXPY/saxpy.h
+++ b/A4/TP_GPU-master/SAXPY/saxpy.h
@ -0,0 +1,24 @@
+
+//#define SEUILLAGE_H
+//#ifndef SEUILLAGE_H
+
+// includes, system
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+
+
+
+#define BLOCK_SIZE 16
+
+// Prototype
+void runTest( int argc, char** argv);
+extern "C" void saxpy_C(float *vector_SAXPY, float A, float *vector_X, float *vector_Y, int N);
+
+
+//#endif
+
+
+
--- a/A4/TP_GPU-master/SAXPY/saxpy_C_student.cpp
+++ b/A4/TP_GPU-master/SAXPY/saxpy_C_student.cpp
@ -0,0 +1,28 @@
+/*
+ * Copiright 1993-2009 NVIDIA Corporation.  All rights reserved.
+ *
+ * NVIDIA Corporation and its licensors retain all intellectual properti and 
+ * proprietari rights in and to this software and related documentation and 
+ * ani modifications thereto.  Ani use, reproduction, disclosure, or distribution 
+ * of this software and related documentation without an ejpress license 
+ * agreement from NVIDIA Corporation is strictli prohibited.
+ * 
+ */
+
+/* Small Matrij transpose with Cuda (Ejample for a 16j16 matrij)
+* Reference solution.
+*/
+
+#include "saxpy.h"
+
+////////////////////////////////////////////////////////////////////////////////
+//! Compute reference data set
+////////////////////////////////////////////////////////////////////////////////
+void saxpy_C(float *vector_SAXPY, float A, float *vector_X, float *vector_Y, int N)
+{
+  // A VOUS DE CODER
+}
+
+
+
+
--- a/A4/TP_GPU-master/SAXPY/saxpy_main_student.cu
+++ b/A4/TP_GPU-master/SAXPY/saxpy_main_student.cu
@ -0,0 +1,195 @@
+/*
+ * Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+/* Template project which demonstrates the basics on how to setup a project 
+ * example application.
+ * Host code.
+ */
+
+// includes, system
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include <iostream>
+using namespace std;
+
+// includes CUDA
+#include <cuda_runtime.h>
+
+#include "saxpy.h"
+
+__global__ void saxpy_kernel(float *vector_SAXPY, float A, float *vector_X, float *vector_Y, int N)
+{
+	// A VOUS DE CODER
+
+}
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// declaration, forward
+void runTest( int argc, char** argv);
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Program main
+////////////////////////////////////////////////////////////////////////////////
+int
+main( int argc, char** argv) 
+{
+
+	runTest( argc, argv);
+}
+
+__host__ static int iDivUp(int a, int b) {
+	return ((a % b != 0) ? (a / b + 1): (a/b));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//! Run a simple test for CUDA
+////////////////////////////////////////////////////////////////////////////////
+void
+runTest( int argc, char** argv) 
+{
+	cudaError_t error;
+
+	unsigned long int N=256*1024;
+
+	const unsigned int mem_size = N*sizeof(float);
+	// allocate host memory
+	float* h_vector_X = (float*) malloc(mem_size);
+	float* h_vector_Y = (float*) malloc(mem_size);
+
+	//Initilaisation des données d'entrée
+	float A=1.0;
+
+	for (int i=0;i<N;i++){
+		h_vector_X[i]=(float)rand();
+		h_vector_Y[i]=(float)rand();
+	}
+
+
+	////////////////////////////////////////////////////////////////////////////////
+	// EXECUTION SUR LE CPU
+	///////////////////////////////////////////////////////////////////////
+
+
+	// Image trait<69>e sur le CPU
+	float* h_vector_SAXPY_CPU = (float*) malloc( mem_size);
+
+	printf("SAXPY CPU\n");
+
+	cudaEvent_t start,stop;
+	error = cudaEventCreate(&start);
+	error = cudaEventCreate(&stop);
+
+	// Record the start event
+	error = cudaEventRecord(start, NULL);
+	error = cudaEventSynchronize(start);
+	//Seuillage sur CPU
+	// A VOUS DE CODER
+
+	// Record the start event
+	error = cudaEventRecord(stop, NULL);
+	// Wait for the stop event to complete
+	error = cudaEventSynchronize(stop);
+	float msecTotal = 0.0f;
+	error = cudaEventElapsedTime(&msecTotal, start, stop);
+
+
+	printf("CPU execution time %f\n",msecTotal);
+
+
+
+	////////////////////////////////////////////////////////////////////////////////
+	// EXECUTION SUR LE GPU
+	///////////////////////////////////////////////////////////////////////
+
+	printf("SAXPY GPU\n");
+
+float* h_vector_SAXPY_GPU = (float*) malloc(mem_size);
+
+	// images on device memory
+	float* d_vector_X;
+	float* d_vector_Y;
+	float* d_vector_SAXPY;
+
+
+	cudaEvent_t start_mem,stop_mem;
+	error = cudaEventCreate(&start_mem);
+	error = cudaEventCreate(&stop_mem);
+
+	error = cudaEventRecord(start, NULL);
+	error = cudaEventSynchronize(start);
+
+
+
+	
+	// Alocation mémoire de d_vector_X, d_vector_Y et d_vector_SAXPY sur la carte GPU
+	// A VOUS DE CODER
+
+	// copy host memory to device
+	// A VOUS DE CODER
+
+
+	error = cudaEventRecord(stop_mem, NULL);
+	// Wait for the stop event to complete
+	error = cudaEventSynchronize(stop_mem);
+	float msecMem = 0.0f;
+	error = cudaEventElapsedTime(&msecMem, start, stop_mem);
+
+	// setup execution parameters -> découpage en threads
+	// A VOUS DE CODER
+
+
+	// lancement des threads executé sur la carte GPU
+	// A VOUS DE CODER
+
+	error = cudaEventRecord(start_mem, NULL);
+	error = cudaEventSynchronize(start_mem);
+
+	// copy result from device to host
+	// A VOUS DE CODER
+
+	// cleanup device memory
+	// COMMENTAIRES A ENLEVER
+	//cudaFree(d_vector_X);
+	//cudaFree(d_vector_Y);
+	//cudaFree(d_vector_SAXPY);
+
+
+	error = cudaEventRecord(stop, NULL);
+	// Wait for the stop event to complete
+	error = cudaEventSynchronize(stop);
+	msecTotal = 0.0f;
+	error = cudaEventElapsedTime(&msecTotal, start, stop);
+	float msecMem2 =0.0f;
+	error = cudaEventElapsedTime(&msecMem2, start_mem, stop);
+	msecMem+=msecMem2;
+
+	printf("GPU execution time %f ms (memory management %2.2f \%)\n",msecTotal,(msecMem)/(msecTotal)*100);
+
+	float sum_diff=0;
+for(int i=0;i<N;i++)
+	sum_diff+= h_vector_SAXPY_GPU[i]-h_vector_SAXPY_CPU[i];
+
+	printf("sum_diff = %f\n",sum_diff);
+
+	// cleanup memory
+	free(h_vector_X);
+	free(h_vector_Y);
+	free(h_vector_SAXPY_GPU);
+	free(h_vector_SAXPY_CPU);
+}
--- a/A4/TP_GPU-master/TP0.png
+++ b/A4/TP_GPU-master/TP0.png
--- a/A4/TP_GPU-master/TP0_matrice/.cproject
+++ b/A4/TP_GPU-master/TP0_matrice/.cproject
@ -0,0 +1,94 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+	<storageModule moduleId="org.eclipse.cdt.core.settings">
+		<cconfiguration id="com.nvidia.cuda.ide.nine_zero.configuration.release.1804067204">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="com.nvidia.cuda.ide.nine_zero.configuration.release.1804067204" moduleId="org.eclipse.cdt.core.settings" name="Release">
+				<externalSettings/>
+				<extensions>
+					<extension id="com.nvidia.cuda.ide.elf" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="com.nvidia.cuda.ide.cubin" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="com.nvidia.cuda.ide.macho" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.VCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="nvcc.errorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="com.nvidia.cuda.ide.nine_zero.configuration.release.1804067204" name="Release" parent="com.nvidia.cuda.ide.nine_zero.configuration.release">
+					<folderInfo id="com.nvidia.cuda.ide.nine_zero.configuration.release.1804067204." name="/" resourcePath="">
+						<toolChain id="com.nvidia.cuda.ide.toolchain.nine_zero.69243472" name="CUDA Toolkit 10.0" superClass="com.nvidia.cuda.ide.toolchain.nine_zero">
+							<targetPlatform archList="all" binaryParser="com.nvidia.cuda.ide.elf;com.nvidia.cuda.ide.macho;com.nvidia.cuda.ide.cubin" id="com.nvidia.cuda.ide.targetPlatform.718907530" isAbstract="false" name="Debug Platform" osList="linux,macosx" superClass="com.nvidia.cuda.ide.targetPlatform"/>
+							<builder buildPath="${workspace_loc:/matrice}/Release" id="com.nvidia.cuda.ide.builder.960504214" keepEnvironmentInBuildfile="false" name="CUDA Toolkit 9.2 Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="com.nvidia.cuda.ide.builder"/>
+							<tool id="nvcc.compiler.base.376896027" name="NVCC Compiler" superClass="nvcc.compiler.base">
+								<option id="nvcc.compiler.include.paths.602427994" name="Include paths (-I)" superClass="nvcc.compiler.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="/usr/local/cuda/samples/common/inc"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}}/inc&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}}/src/CUDA&quot;"/>
+								</option>
+								<option id="nvcc.compiler.deviceDebug.1868299221" name="Generate device debug information (-G)" superClass="nvcc.compiler.deviceDebug"/>
+								<option id="nvcc.compiler.option.level.1923776877" name="Generate host debug information (-g)" superClass="nvcc.compiler.option.level"/>
+								<option defaultValue="nvcc.compiler.optimization.level.most" id="nvcc.compiler.optimization.level.461195553" name="Optimization Level" superClass="nvcc.compiler.optimization.level" value="nvcc.compiler.optimization.level.most" valueType="enumerated"/>
+								<option id="nvcc.compiler.pic.2021621233" name="Position Independent Code (-fPIC)" superClass="nvcc.compiler.pic" value="true" valueType="boolean"/>
+								<inputType id="nvcc.compiler.input.cu.212130830" superClass="nvcc.compiler.input.cu"/>
+								<inputType id="nvcc.compiler.input.cpp.1544991377" superClass="nvcc.compiler.input.cpp"/>
+								<inputType id="nvcc.compiler.input.c.298636231" superClass="nvcc.compiler.input.c"/>
+							</tool>
+							<tool id="nvcc.linker.base.815597997" name="NVCC Linker" superClass="nvcc.linker.base">
+								<option id="nvcc.linker.option.paths.539821132" name="Library search path (-L)" superClass="nvcc.linker.option.paths" valueType="libPaths">
+									<listOptionValue builtIn="false" value="/usr/local/cuda/lib"/>
+								</option>
+								<option id="nvcc.linker.option.libs.680956661" name="Libraries (-l)" superClass="nvcc.linker.option.libs" valueType="libs">
+									<listOptionValue builtIn="false" value="cublas"/>
+								</option>
+								<inputType id="nvcc.linker.input.1893304424" superClass="nvcc.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="nvcc.archiver.base.24620523" name="NVCC Archiver" superClass="nvcc.archiver.base"/>
+							<tool id="com.nvidia.host.assembler.1797002694" name="Host Assembler" superClass="com.nvidia.host.assembler">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.195427270" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+					<sourceEntries>
+						<entry excluding="matrixMul_kernel.cuh" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name="src"/>
+					</sourceEntries>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+			<storageModule moduleId="com.nvidia.cuda.ide.build.project.ICudaProjectConfiguration">
+				<executable devicelink="false">
+					<ptx major="5" minor="2"/>
+					<sass major="5" minor="2"/>
+				</executable>
+				<editor-arch major="5" minor="2"/>
+			</storageModule>
+		</cconfiguration>
+	</storageModule>
+	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+		<project id="matrice.com.nvidia.cuda.ide.nine_zero.exe.915560355" name="Executable" projectType="com.nvidia.cuda.ide.nine_zero.exe"/>
+	</storageModule>
+	<storageModule moduleId="scannerConfiguration">
+		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.nine_zero.configuration.debug.2018016243;com.nvidia.cuda.ide.nine_zero.configuration.debug.2018016243.;nvcc.compiler.base.1292094216;nvcc.compiler.input.cu.2074083432">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.nine_zero.configuration.debug.2018016243;com.nvidia.cuda.ide.nine_zero.configuration.debug.2018016243.;nvcc.compiler.base.1292094216;nvcc.compiler.input.c.2064063779">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.nine_zero.configuration.debug.2018016243;com.nvidia.cuda.ide.nine_zero.configuration.debug.2018016243.;nvcc.compiler.base.1292094216;nvcc.compiler.input.cpp.679408217">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
+		</scannerConfigBuildInfo>
+	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
+	<storageModule moduleId="refreshScope" versionNumber="2">
+		<configuration configurationName="Release">
+			<resource resourceType="PROJECT" workspacePath="/matrice"/>
+		</configuration>
+	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
+</cproject>
--- a/A4/TP_GPU-master/TP0_matrice/.project
+++ b/A4/TP_GPU-master/TP0_matrice/.project
@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>matrice</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+			<triggers>clean,full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.cdt.core.cnature</nature>
+		<nature>org.eclipse.cdt.core.ccnature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+	</natures>
+</projectDescription>
--- a/A4/TP_GPU-master/TP0_matrice/.settings/org.eclipse.ptp.rdt.sync.core.prefs
+++ b/A4/TP_GPU-master/TP0_matrice/.settings/org.eclipse.ptp.rdt.sync.core.prefs
@ -0,0 +1,3 @@
+eclipse.preferences.version=1
+project-sync-config=<?xml version\="1.0" encoding\="UTF-8"?>\n<sync-configs>\n<sync-config config-name\="Local" connection-name\="Local" location\="/home/gac/L2S_Programmation/TP_GPU/TP_matrice_CUDA_10" remote-services-id\="org.eclipse.ptp.remote.LocalServices" sync-on-postbuild\="true" sync-on-prebuild\="true" sync-on-save\="true" sync-provider-id\="org.eclipse.ptp.rdt.sync.git.core.synchronizeService">\n<config-properties com.nvidia.cuda.ide.build.core.Cpu\="Native"/>\n</sync-config>\n</sync-configs>
+sync-mode=NONE
--- a/A4/TP_GPU-master/TP0_matrice/CMakeLists-cuda11.2.txt
+++ b/A4/TP_GPU-master/TP0_matrice/CMakeLists-cuda11.2.txt
@ -0,0 +1,33 @@
+#cmake -DCMAKE_CUDA_FLAGS=”-arch=sm_30” ..
+cmake_minimum_required(VERSION 2.8)
+
+project(matrice)
+
+set(CUDA_TOOLKIT_ROOT_DIR "/partage/public/ngac/cuda-11.2")
+
+set(ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/)
+
+#set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda")
+
+find_package(CUDA REQUIRED) 
+list(APPEND HEADER_REP ${ROOT_DIR}/inc/ ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} "/usr/local/cuda/samples/common/inc/" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/../samples/common/inc")
+
+#file(GLOB CPP_FILES ${ROOT_DIR}/src/CPP/*.cpp)
+#file(GLOB CU_FILES ${ROOT_DIR}/src/CPP/*.cu)
+set(CPP_FILES ${ROOT_DIR}/src/CPP/matrixMul_C.cpp)
+set(CU_FILES ${ROOT_DIR}/src/CPP/matrixMul_cuda.cu ${ROOT_DIR}/src/CPP/matrixMul_CUBLAS.cu ${ROOT_DIR}/src/CPP/matrixMul_main.cu)
+file(GLOB_RECURSE HEADER_FILES ${ROOT_DIR}/src/CUDA/*.cuh ${ROOT_DIR}/inc/*.h)
+
+
+set(CUDA_SEPARABLE_COMPILATION ON)
+set(CUDA_NVCC_FLAGS "-lineinfo;-I\"${ROOT_DIR}/inc\" -I\"${ROOT_DIR}/src/CUDA\" -I\"/usr/local/cuda/samples/common/inc/\"")
+
+
+cuda_add_executable(matrice_CUDA ${CPP_FILES} ${CU_FILES} ${HEADER_FILES})
+set_target_properties(matrice_CUDA PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(matrice_CUDA PUBLIC ${ROOT_DIR}/inc/ ${CUDA_TOOLKIT_ROOT_DIR}/samples/common/inc)
+target_link_libraries(matrice_CUDA ${CUDA_CUBLAS_LIBRARIES})
+
+
+
+
--- a/A4/TP_GPU-master/TP0_matrice/CMakeLists.txt
+++ b/A4/TP_GPU-master/TP0_matrice/CMakeLists.txt
@ -0,0 +1,37 @@
+#cmake -DCMAKE_CUDA_FLAGS=”-arch=sm_30” ..
+cmake_minimum_required(VERSION 3.8)
+#set(CMAKE_CUDA_COMPILER "/usr/local/cuda/bin/nvcc")
+project(Matrice_project LANGUAGES CXX CUDA)
+
+set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+set(CMAKE_VERBOSE_MAKEFILE TRUE)
+
+set(ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
+set(EXE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
+
+list(APPEND HEADER_REP ${ROOT_DIR}/inc  ${ROOT_DIR}/src/CUDA ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} "/usr/local/cuda/samples/common/inc/" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/../samples/common/inc")
+
+file(GLOB CPP_FILES ${ROOT_DIR}/src/CPP/*.cpp)
+file(GLOB CU_FILES ${ROOT_DIR}/src/CPP/*.cu)
+file(GLOB_RECURSE HEADER_FILES ${ROOT_DIR}/src/CUDA/*.cuh ${ROOT_DIR}/inc/*.h)
+
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math")
+
+#COMPILATION LIBRAIRIE PUIS EXECUTABLE
+#add_library(matrice ${CPP_FILES} ${CU_FILES} ${HEADER_FILES})
+
+#set_target_properties(matrice PROPERTIES POSITION_INDEPENDENT_CODE ON)
+#target_compile_features(matrice PUBLIC cxx_std_11)
+#target_include_directories(matrice PUBLIC "${HEADER_REP}")
+
+#add_executable(matrice_exe "${EXE_DIR}/src/CPP/matrixMul_main.cu")
+#target_link_libraries(matrice_exe PUBLIC matrice)
+
+#COMPILATION EXECUTABLE (sans generer avant une librairie => tous les fichers sources sont pris en compte)
+add_executable(matrice_exe ${CPP_FILES} ${CU_FILES} ${HEADER_FILES})
+
+set_target_properties(matrice_exe PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_compile_features(matrice_exe PUBLIC cxx_std_11)
+target_include_directories(matrice_exe PUBLIC "${HEADER_REP}")
+target_link_libraries(matrice_exe PUBLIC cublas)
+
--- a/A4/TP_GPU-master/TP0_matrice/CMakeLists_cmake28.txt
+++ b/A4/TP_GPU-master/TP0_matrice/CMakeLists_cmake28.txt
@ -0,0 +1,31 @@
+#cmake -DCMAKE_CUDA_FLAGS=”-arch=sm_30” ..
+cmake_minimum_required(VERSION 2.8)
+
+project(matrice)
+
+set(ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/)
+
+#set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda")
+
+find_package(CUDA REQUIRED) 
+list(APPEND HEADER_REP ${ROOT_DIR}/inc/ ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} "/usr/local/cuda/samples/common/inc/" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/../samples/common/inc")
+
+#file(GLOB CPP_FILES ${ROOT_DIR}/src/CPP/*.cpp)
+#file(GLOB CU_FILES ${ROOT_DIR}/src/CPP/*.cu)
+set(CPP_FILES ${ROOT_DIR}/src/CPP/matrixMul_C.cpp)
+set(CU_FILES ${ROOT_DIR}/src/CPP/matrixMul_cuda.cu ${ROOT_DIR}/src/CPP/matrixMul_CUBLAS.cu ${ROOT_DIR}/src/CPP/matrixMul_main.cu)
+file(GLOB_RECURSE HEADER_FILES ${ROOT_DIR}/src/CUDA/*.cuh ${ROOT_DIR}/inc/*.h)
+
+
+set(CUDA_SEPARABLE_COMPILATION ON)
+set(CUDA_NVCC_FLAGS "-lineinfo;-I\"${ROOT_DIR}/inc\" -I\"${ROOT_DIR}/src/CUDA\" -I\"/usr/local/cuda/samples/common/inc/\"")
+
+
+cuda_add_executable(matrice_CUDA ${CPP_FILES} ${CU_FILES} ${HEADER_FILES})
+set_target_properties(matrice_CUDA PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(matrice_CUDA PUBLIC ${ROOT_DIR}/inc/ /usr/local/cuda/samples/common/inc/)
+target_link_libraries(matrice_CUDA ${CUDA_CUBLAS_LIBRARIES})
+
+
+
+
--- a/A4/TP_GPU-master/TP0_matrice/inc/matrixMul.h
+++ b/A4/TP_GPU-master/TP0_matrice/inc/matrixMul.h
@ -0,0 +1,38 @@
+#ifndef _MATRIXMUL_H_
+#define _MATRIXMUL_H_
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+// includes, project
+#include  <cuda_runtime.h>
+//#include <helper_functions.h>
+//#include <helper_cuda.h>
+#include "cublas.h"
+// Thread block size
+#define BLOCK_SIZE 16
+
+typedef enum type_version_kernel {v0,v0_bis,v1} Type_version_kernel;
+
+void computeGold( float*, const float*, const float*, unsigned int, unsigned int, unsigned int);
+// Matrix dimensions
+// (chosen as multiples of the thread block size for simplicity)
+/*
+#define WA (64 * BLOCK_SIZE) // Matrix A width
+#define HA (64 * BLOCK_SIZE) // Matrix A height
+#define WB (64 * BLOCK_SIZE) // Matrix B width
+#define HB WA  // Matrix B height
+#define WC WB  // Matrix C width 
+#define HC HA  // Matrix C height
+*/
+
+#  
+void compute_matrixMul_C(int W);
+void compute_matrixMul_cublas(int N);
+void compute_matrixMul_cuda(int N,Type_version_kernel v);
+void randomInit(float*, int);
+void printDiff(float*, float*, int, int);
+
+
+#endif // _MATRIXMUL_H_
--- a/A4/TP_GPU-master/TP0_matrice/src/.ptp-sync-folder
+++ b/A4/TP_GPU-master/TP0_matrice/src/.ptp-sync-folder
--- a/A4/TP_GPU-master/TP0_matrice/src/CPP/matrixMul_C.cpp
+++ b/A4/TP_GPU-master/TP0_matrice/src/CPP/matrixMul_C.cpp
@ -0,0 +1,137 @@
+/*
+ * Copyright 1993-2007 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO USER:
+ *
+ * This source code is subject to NVIDIA ownership rights under U.S. and
+ * international Copyright laws.  Users and possessors of this source code
+ * are hereby granted a nonexclusive, royalty-free license to use this code
+ * in individual and commercial software.
+ *
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+ * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+ * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+ * OR PERFORMANCE OF THIS SOURCE CODE.
+ *
+ * U.S. Government End Users.   This source code is a "commercial item" as
+ * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+ * "commercial computer  software"  and "commercial computer software
+ * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+ * and is provided to the U.S. Government only as a commercial end item.
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+ * source code with only those rights set forth herein.
+ *
+ * Any use of this source code in individual and commercial software must
+ * include, in the user documentation and internal comments to the code,
+ * the above Disclaimer and U.S. Government End Users Notice.
+ */
+
+
+#include "matrixMul.h"
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// export C interface
+
+////////////////////////////////////////////////////////////////////////////////
+//! Compute reference data set
+//! C = A * B
+//! @param C          reference data, computed but preallocated
+//! @param A          matrix A as provided to device
+//! @param B          matrix B as provided to device
+//! @param hA         height of matrix A
+//! @param wB         width of matrix B
+////////////////////////////////////////////////////////////////////////////////
+void
+computeGold(float* C, const float* A, const float* B, unsigned int hA, unsigned int wA, unsigned int wB)
+{
+    for (unsigned int i = 0; i < hA; ++i)
+        for (unsigned int j = 0; j < wB; ++j) {
+            double sum = 0;
+            for (unsigned int k = 0; k < wA; ++k) {
+                double a = A[i * wA + k];
+                double b = B[k * wB + j];
+                sum += a * b;
+            }
+            C[i * wB + j] = (float)sum;
+        }
+}
+
+void compute_matrixMul_C(int W){
+
+
+
+    // allocate host memory for matrices A and B
+    unsigned int size_A = W * W;
+    unsigned int mem_size_A = sizeof(float) * size_A;
+    float* h_A = (float*) malloc(mem_size_A);
+    unsigned int size_B = W * W;
+    unsigned int mem_size_B = sizeof(float) * size_B;
+    float* h_B = (float*) malloc(mem_size_B);
+
+    // initialize host memory
+    //randomInit(h_A, size_A);
+    //randomInit(h_B, size_B);
+
+    // allocate host memory for the result
+    unsigned int size_C = W * W;
+    unsigned int mem_size_C = sizeof(float) * size_C;
+    float* h_C = (float*) malloc(mem_size_C);
+
+    // create and start timer
+    cudaDeviceSynchronize();
+
+    // Allocate CUDA events that we'll use for timing
+    cudaEvent_t event[2];
+
+    cudaEventCreate(event+0);
+    cudaEventCreate(event+1);
+
+    cudaEventRecord(event[0], NULL);
+    computeGold(h_C, h_A, h_B, W, W, W);
+    cudaEventRecord(event[1], NULL);
+    cudaEventSynchronize(event[1]);
+
+    float msecTotal = 0.0f;
+    cudaEventElapsedTime(&msecTotal, event[0], event[1]);
+
+    printf("Time= \t\t\t\t %.3f msec\n",msecTotal);
+
+    free(h_A);
+    free(h_B);
+    free(h_C);
+
+}
+
+
+// Allocates a matrix with random float entries.
+void randomInit(float* data, int size)
+{
+    for (int i = 0; i < size; ++i)
+        data[i] = rand() / (float)RAND_MAX;
+}
+
+void printDiff(float *data1, float *data2, int width, int height)
+{
+  int i,j,k;
+  int error_count=0;
+  for (j=0; j<height; j++) {
+    for (i=0; i<width; i++) {
+      k = j*width+i;
+      if (data1[k] != data2[k]) {
+         printf("diff(%d,%d) CPU=%4.4f, GPU=%4.4f n", i,j, data1[k], data2[k]);
+         error_count++;
+      }
+    }
+  }
+  printf(" nTotal Errors = %d n", error_count);
+}
+
--- a/A4/TP_GPU-master/TP0_matrice/src/CPP/matrixMul_CUBLAS.cu
+++ b/A4/TP_GPU-master/TP0_matrice/src/CPP/matrixMul_CUBLAS.cu
@ -0,0 +1,111 @@
+
+
+#include  "matrixMul.h"
+
+
+
+void compute_matrixMul_cublas(int N)
+{
+ 
+  float alpha = 1.0f, beta = 0.0f;
+
+  
+ 
+  
+  // allocate host memory for matrices A and B
+  unsigned int size_A = N * N;
+  unsigned int mem_size_A = sizeof(float) * size_A;
+  float* h_A = (float*) malloc(mem_size_A);
+  //float* h_Abis = (float*) malloc(mem_size_A);
+  unsigned int size_B = N * N;
+  unsigned int mem_size_B = sizeof(float) * size_B;
+  float* h_B = (float*) malloc(mem_size_B);
+  //float* h_Bbis = (float*) malloc(mem_size_B);
+  
+  // allocate host memory for the result
+  unsigned int size_C = N * N;
+  unsigned int mem_size_C = sizeof(float) * size_C;
+  float* h_C = (float*) malloc(mem_size_C);
+  
+  
+  cublasInit();
+
+ // set seed for rand()
+    srand(2006);
+   // initialize host memory
+    randomInit(h_A, size_A);
+    randomInit(h_B, size_B);
+  
+  // Allocate CUDA events that we'll use for timing
+    cudaEvent_t record_event[5];
+    float time_msec[4];
+    for (int i=0;i<5;i++){
+    	cudaEventCreate(record_event+i);   
+	}
+
+    // Record the start event
+    cudaDeviceSynchronize();
+    cudaEventRecord(record_event[0], NULL);
+  
+  float* d_A;
+  cublasAlloc(N*N, sizeof(float), (void **)&d_A);
+  float* d_B;
+  cublasAlloc(N*N, sizeof(float), (void **)&d_B);
+  float* d_C;
+  cublasAlloc(N*N, sizeof(float), (void **)&d_C);
+  
+
+  cudaEventRecord(record_event[1], NULL);
+    cudaEventSynchronize(record_event[1]);
+  
+  // copy host memory to device
+  cublasSetMatrix(N,N, sizeof(float), h_A, N, d_A, N);
+  cublasSetMatrix(N,N, sizeof(float), h_B, N, d_B, N);
+  
+   cudaEventRecord(record_event[2], NULL);
+    cudaEventSynchronize(record_event[2]);
+  
+  
+ 
+  cublasSgemm('n', 'n', N, N, N, alpha, d_A, N,d_B, N, beta, d_C, N);
+  
+   
+  cudaEventRecord(record_event[3], NULL);
+    cudaEventSynchronize(record_event[3]);
+
+ 
+
+
+  cublasGetMatrix(N,N, sizeof(float), d_C,N, h_C, N);
+  
+  cudaEventRecord(record_event[4], NULL);
+    cudaEventSynchronize(record_event[4]);
+  
+
+
+ cudaEventElapsedTime(time_msec+0, record_event[0], record_event[1]);
+    cudaEventElapsedTime(time_msec+1, record_event[1], record_event[2]);
+    cudaEventElapsedTime(time_msec+2, record_event[2], record_event[3]);
+    cudaEventElapsedTime(time_msec+3, record_event[3], record_event[4]);
+    time_msec[4]=time_msec[0]+time_msec[1]+time_msec[2]+time_msec[3];
+
+printf("TOTAL : \t\t\t %f (ms) dont %.2f%% de gestion mémoire \n",time_msec[4],100*(time_msec[0]+time_msec[1]+time_msec[3])/time_msec[4]);
+   
+
+ 
+ 
+  
+  cublasShutdown();
+
+
+ // clean up memory
+    free(h_A);
+    free(h_B);
+    free(h_C);
+    //free(reference);
+    cudaFree(d_A);
+    cudaFree(d_B);
+    cudaFree(d_C);
+
+
+}
--- a/A4/TP_GPU-master/TP0_matrice/src/CPP/matrixMul_cuda.cu
+++ b/A4/TP_GPU-master/TP0_matrice/src/CPP/matrixMul_cuda.cu
@ -0,0 +1,161 @@
+/*
+ * Copyright 1993-2007 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO USER:
+ *
+ * This source code is subject to NVIDIA ownership rights under U.S. and
+ * international Copyright laws.  Users and possessors of this source code
+ * are hereby granted a nonexclusive, royalty-free license to use this code
+ * in individual and commercial software.
+ *
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+ * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+ * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+ * OR PERFORMANCE OF THIS SOURCE CODE.
+ *
+ * U.S. Government End Users.   This source code is a "commercial item" as
+ * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+ * "commercial computer  software"  and "commercial computer software
+ * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+ * and is provided to the U.S. Government only as a commercial end item.
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+ * source code with only those rights set forth herein.
+ *
+ * Any use of this source code in individual and commercial software must
+ * include, in the user documentation and internal comments to the code,
+ * the above Disclaimer and U.S. Government End Users Notice.
+ */
+
+/* Matrix multiplication: C = A * B.
+ * Host code.
+ *
+ * This sample implements matrix multiplication and is exactly the same as
+ * Chapter 7 of the programming guide.
+ * It has been written for clarity of exposition to illustrate various CUDA
+ * programming principles, not with the goal of providing the most
+ * performant generic kernel for matrix multiplication.
+ *
+ * CUBLAS provides high-performance matrix multiplication.
+ */
+
+// includes, system
+#include "matrixMul.h"
+// includes, kernels
+#include "matrixMul_kernel.cuh"
+
+
+////////////////////////////////////////////////////////////////////////////////
+//! Run a simple test for CUDA
+////////////////////////////////////////////////////////////////////////////////
+void compute_matrixMul_cuda(int N,Type_version_kernel version_kernel)
+{
+   
+    // set seed for rand()
+    //srand(2006);
+
+    // allocate host memory for matrices A and B
+    unsigned int size_A = N * N;
+    unsigned int mem_size_A = sizeof(float) * size_A;
+    float* h_A = (float*) malloc(mem_size_A);
+    unsigned int size_B = N * N;
+    unsigned int mem_size_B = sizeof(float) * size_B;
+    float* h_B = (float*) malloc(mem_size_B);
+
+    // allocate host memory for the result
+    unsigned int size_C = N * N;
+    unsigned int mem_size_C = sizeof(float) * size_C;
+    float* h_C = (float*) malloc(mem_size_C);
+    
+ 
+
+ 
+ // Allocate CUDA events that we'll use for timing
+    cudaEvent_t record_event[5];
+    float time_msec[4];
+    for (int i=0;i<5;i++){
+    	cudaEventCreate(record_event+i);   
+	}
+
+    // Record the start event
+    cudaDeviceSynchronize();
+    cudaEventRecord(record_event[0], NULL);
+
+    // allocate device memory
+    float* d_A;
+    cudaMalloc((void**) &d_A, mem_size_A);
+    float* d_B;
+    cudaMalloc((void**) &d_B, mem_size_B);
+
+    // allocate device memory for result
+    float* d_C;
+    cudaMalloc((void**) &d_C, mem_size_C);
+
+    cudaEventRecord(record_event[1], NULL);
+    cudaEventSynchronize(record_event[1]);
+
+   
+   
+ 
+    // copy host memory to device
+    cudaMemcpy(d_A, h_A, mem_size_A,
+                              cudaMemcpyHostToDevice) ;
+    cudaMemcpy(d_B, h_B, mem_size_B,
+                              cudaMemcpyHostToDevice) ;
+
+  
+   cudaEventRecord(record_event[2], NULL);
+    cudaEventSynchronize(record_event[2]);
+
+  
+
+    // setup execution parameters
+    dim3 threads(BLOCK_SIZE, BLOCK_SIZE);
+    dim3 grid(N/threads.x, N/threads.y);
+
+    // execute the kernel
+    switch(version_kernel){
+    case v0 :  matrixMul_v0<<< grid, threads >>>(d_C, d_A, d_B, N); break;
+    case v0_bis :  matrixMul_v0_bis<<< grid, threads >>>(d_C, d_A, d_B, N); break;
+    case v1 :  matrixMul_v1<<< grid, threads >>>(d_C, d_A, d_B, N,N); break;
+    }
+
+
+    cudaEventRecord(record_event[3], NULL);
+    cudaEventSynchronize(record_event[3]);
+   
+
+    // copy result from device to host
+     cudaMemcpy(h_C, d_C, mem_size_C,
+	cudaMemcpyDeviceToHost) ;
+
+  cudaEventRecord(record_event[4], NULL);
+    cudaEventSynchronize(record_event[4]);
+
+
+
+    cudaEventElapsedTime(time_msec+0, record_event[0], record_event[1]);
+    cudaEventElapsedTime(time_msec+1, record_event[1], record_event[2]);
+    cudaEventElapsedTime(time_msec+2, record_event[2], record_event[3]);
+    cudaEventElapsedTime(time_msec+3, record_event[3], record_event[4]);
+    time_msec[4]=time_msec[0]+time_msec[1]+time_msec[2]+time_msec[3];
+    printf("TOTAL : \t\t\t %f (ms) dont %.2f%% de gestion mémoire \n",time_msec[4],100*(time_msec[0]+time_msec[1]+time_msec[3])/time_msec[4]);
+   
+    
+
+    // clean up memory
+    free(h_A);
+    free(h_B);
+    free(h_C);
+   
+    cudaFree(d_A);
+    cudaFree(d_B);
+    cudaFree(d_C);
+}
+
--- a/A4/TP_GPU-master/TP0_matrice/src/CPP/matrixMul_main.cu
+++ b/A4/TP_GPU-master/TP0_matrice/src/CPP/matrixMul_main.cu
@ -0,0 +1,108 @@
+/*
+ * Copyright 1993-2007 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO USER:
+ *
+ * This source code is subject to NVIDIA ownership rights under U.S. and
+ * international Copyright laws.  Users and possessors of this source code
+ * are hereby granted a nonexclusive, royalty-free license to use this code
+ * in individual and commercial software.
+ *
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+ * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+ * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+ * OR PERFORMANCE OF THIS SOURCE CODE.
+ *
+ * U.S. Government End Users.   This source code is a "commercial item" as
+ * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+ * "commercial computer  software"  and "commercial computer software
+ * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+ * and is provided to the U.S. Government only as a commercial end item.
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+ * source code with only those rights set forth herein.
+ *
+ * Any use of this source code in individual and commercial software must
+ * include, in the user documentation and internal comments to the code,
+ * the above Disclaimer and U.S. Government End Users Notice.
+ */
+
+/* Matrix multiplication: C = A * B.
+ * Host code.
+ *
+ * This sample implements matrix multiplication and is exactly the same as
+ * Chapter 7 of the programming guide.
+ * It has been written for clarity of exposition to illustrate various CUDA
+ * programming principles, not with the goal of providing the most
+ * performant generic kernel for matrix multiplication.
+ *
+ * CUBLAS provides high-performance matrix multiplication.
+ */
+
+// includes, system
+#include "matrixMul.h"
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Program main
+////////////////////////////////////////////////////////////////////////////////
+int
+main(int argc, const char** argv)
+{
+//findCudaDevice(argc,argv);
+
+  int n;
+
+ printf("\nmatrix_Mul_CUDA_C\n\n");
+  for (n=7;n<11;n++){
+    unsigned int taille_matrice;
+    taille_matrice=(unsigned int)pow((float)2.0,n);
+    printf("MATRICE DE TAILLE %d\n",taille_matrice);
+    compute_matrixMul_C(taille_matrice);
+  }
+
+
+  printf("\nmatrix_Mul_CUDA_v0\n\n");
+    for (n=7;n<11;n++){
+      unsigned int taille_matrice;
+      taille_matrice=(unsigned int)pow((float)2.0,n);
+      printf("MATRICE DE TAILLE %d\n",taille_matrice);
+      compute_matrixMul_cuda(taille_matrice,v0);
+    }
+
+
+    printf("\nmatrix_Mul_CUDA_v0_bis\n\n");
+       for (n=7;n<11;n++){
+         unsigned int taille_matrice;
+         taille_matrice=(unsigned int)pow((float)2.0,n);
+         printf("MATRICE DE TAILLE %d\n",taille_matrice);
+         compute_matrixMul_cuda(taille_matrice,v0_bis);
+       }
+
+
+       printf("\nmatrix_Mul_CUDA_v1\n\n");
+            for (n=7;n<11;n++){
+              unsigned int taille_matrice;
+              taille_matrice=(unsigned int)pow((float)2.0,n);
+              printf("MATRICE DE TAILLE %d\n",taille_matrice);
+              compute_matrixMul_cuda(taille_matrice,v1);
+            }
+
+
+
+            printf("\nmatrix_Mul_CUBLAS\n\n");
+                 for (n=7;n<11;n++){
+                   unsigned int taille_matrice;
+                   taille_matrice=(unsigned int)pow((float)2.0,n);
+                   printf("MATRICE DE TAILLE %d\n",taille_matrice);
+                   compute_matrixMul_cublas(taille_matrice);
+                 }
+
+  //CUT_EXIT(argc, argv);
+}
--- a/A4/TP_GPU-master/TP0_matrice/src/CUDA/matrixMul_kernel.cuh
+++ b/A4/TP_GPU-master/TP0_matrice/src/CUDA/matrixMul_kernel.cuh
@ -0,0 +1,198 @@
+/*
+ * Copyright 1993-2007 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO USER:
+ *
+ * This source code is subject to NVIDIA ownership rights under U.S. and
+ * international Copyright laws.  Users and possessors of this source code
+ * are hereby granted a nonexclusive, royalty-free license to use this code
+ * in individual and commercial software.
+ *
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+ * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+ * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+ * OR PERFORMANCE OF THIS SOURCE CODE.
+ *
+ * U.S. Government End Users.   This source code is a "commercial item" as
+ * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+ * "commercial computer  software"  and "commercial computer software
+ * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+ * and is provided to the U.S. Government only as a commercial end item.
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+ * source code with only those rights set forth herein.
+ *
+ * Any use of this source code in individual and commercial software must
+ * include, in the user documentation and internal comments to the code,
+ * the above Disclaimer and U.S. Government End Users Notice.
+ */
+
+/* Matrix multiplication: C = A * B.
+ * Device code.
+ */
+
+#ifndef _MATRIXMUL_KERNEL_H_
+#define _MATRIXMUL_KERNEL_H_
+
+#include <stdio.h>
+#include "matrixMul.h"
+
+#define CHECK_BANK_CONFLICTS 0
+#if CHECK_BANK_CONFLICTS
+#define AS(i, j) CUT_BANK_CHECKER(((float*)&As[0][0]), (BLOCK_SIZE * i + j))
+#define BS(i, j) CUT_BANK_CHECKER(((float*)&Bs[0][0]), (BLOCK_SIZE * i + j))
+#else
+#define AS(i, j) As[i][j]
+#define BS(i, j) Bs[i][j]
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+//! Matrix multiplication on the device: C = A * B
+//! N is matrix dimension
+////////////////////////////////////////////////////////////////////////////////
+__global__ void
+matrixMul_v0( float* C, float* A, float* B,int matrix_size)
+{
+  int k;
+  float C_sum;
+
+  int i_0,j_0;
+  int i,j;
+
+  int a,b,c;
+
+  i_0=blockIdx.x*BLOCK_SIZE;
+  j_0=blockIdx.y*BLOCK_SIZE;
+  
+  i=i_0+threadIdx.x;
+  j=j_0+threadIdx.y;
+
+  a=i*matrix_size;
+  b=j;
+  c=j+i*matrix_size;
+
+
+  for (k = 0; k < matrix_size; ++k){
+    C_sum += A[a] * B[b];
+    a++;
+    b+=matrix_size;
+  }
+
+  C[c] = C_sum;
+
+}
+
+
+
+__global__ void
+matrixMul_v0_bis( float* C, float* A, float* B,int matrix_size)
+{
+  int k;
+  float C_sum;
+
+  int i_0,j_0;
+  int i,j;
+
+  int a,b,c;
+
+  j_0=blockIdx.x*BLOCK_SIZE;
+  i_0=blockIdx.y*BLOCK_SIZE;
+
+  j=j_0+threadIdx.x;
+  i=i_0+threadIdx.y;
+
+  a=i*matrix_size;
+  b=j;
+  c=j+i*matrix_size;
+
+
+  for (k = 0; k < matrix_size; ++k){
+    C_sum += A[a] * B[b];
+    a++;
+    b+=matrix_size;
+  }
+
+  C[c] = C_sum;
+
+}
+
+
+__global__ void
+matrixMul_v1( float* C, float* A, float* B, int wA, int wB)
+{
+    // Block index
+    int bx = blockIdx.x;
+    int by = blockIdx.y;
+
+    // Thread index
+    int tx = threadIdx.x;
+    int ty = threadIdx.y;
+
+    // Index of the first sub-matrix of A processed by the block
+    int aBegin = wA * BLOCK_SIZE * by;
+
+    // Index of the last sub-matrix of A processed by the block
+    int aEnd   = aBegin + wA - 1;
+
+    // Step size used to iterate through the sub-matrices of A
+    int aStep  = BLOCK_SIZE;
+
+    // Index of the first sub-matrix of B processed by the block
+    int bBegin = BLOCK_SIZE * bx;
+
+    // Step size used to iterate through the sub-matrices of B
+    int bStep  = BLOCK_SIZE * wB;
+
+    // Csub is used to store the element of the block sub-matrix
+    // that is computed by the thread
+    float Csub = 0;
+
+    // Loop over all the sub-matrices of A and B
+    // required to compute the block sub-matrix
+    for (int a = aBegin, b = bBegin;
+             a <= aEnd;
+             a += aStep, b += bStep) {
+
+        // Declaration of the shared memory array As used to
+        // store the sub-matrix of A
+        __shared__ float As[BLOCK_SIZE][BLOCK_SIZE];
+
+        // Declaration of the shared memory array Bs used to
+        // store the sub-matrix of B
+        __shared__ float Bs[BLOCK_SIZE][BLOCK_SIZE];
+
+        // Load the matrices from device memory
+        // to shared memory; each thread loads
+        // one element of each matrix
+        AS(ty, tx) = A[a + wA * ty + tx];
+        BS(ty, tx) = B[b + wB * ty + tx];
+
+        // Synchronize to make sure the matrices are loaded
+        __syncthreads();
+
+        // Multiply the two matrices together;
+        // each thread computes one element
+        // of the block sub-matrix
+        for (int k = 0; k < BLOCK_SIZE; ++k)
+            Csub += AS(ty, k) * BS(k, tx);
+
+        // Synchronize to make sure that the preceding
+        // computation is done before loading two new
+        // sub-matrices of A and B in the next iteration
+        __syncthreads();
+    }
+
+    // Write the block sub-matrix to device memory;
+    // each thread writes one element
+    int c = wB * BLOCK_SIZE * by + BLOCK_SIZE * bx;
+    C[c + wB * ty + tx] = Csub;
+}
+
+
+#endif // #ifndef _MATRIXMUL_KERNEL_H_
--- a/A4/TP_GPU-master/TP1_image/CUDA/.cproject
+++ b/A4/TP_GPU-master/TP1_image/CUDA/.cproject
@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+	<storageModule moduleId="org.eclipse.cdt.core.settings">
+		<cconfiguration id="com.nvidia.cuda.ide.eight_zero.configuration.release.381529367">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="com.nvidia.cuda.ide.eight_zero.configuration.release.381529367" moduleId="org.eclipse.cdt.core.settings" name="Release">
+				<externalSettings/>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.VCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="nvcc.errorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="com.nvidia.cuda.ide.elf" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="com.nvidia.cuda.ide.cubin" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="com.nvidia.cuda.ide.macho" point="org.eclipse.cdt.core.BinaryParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="com.nvidia.cuda.ide.eight_zero.configuration.release.381529367" name="Release" parent="com.nvidia.cuda.ide.eight_zero.configuration.release.381529367">
+					<folderInfo id="com.nvidia.cuda.ide.eight_zero.configuration.release.381529367." name="/" resourcePath="">
+						<toolChain id="com.nvidia.cuda.ide.toolchain.eight_zero.exe.release.1272863516" name="CUDA Toolkit 8.0">
+							<targetPlatform archList="all" binaryParser="com.nvidia.cuda.ide.elf;com.nvidia.cuda.ide.macho;com.nvidia.cuda.ide.cubin" id="com.nvidia.cuda.ide.targetPlatform.677042486" isAbstract="false" name="Debug Platform" osList="linux,macosx" superClass="com.nvidia.cuda.ide.targetPlatform"/>
+							<builder buildPath="${workspace_loc:/seuillage_CUDA_8}/Release" id="com.nvidia.cuda.ide.builder.1452012632" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="CUDA Toolkit 8.0 Builder" superClass="com.nvidia.cuda.ide.builder"/>
+							<tool id="nvcc.compiler.base.1547368432" name="NVCC Compiler" superClass="nvcc.compiler.base">
+								<option id="nvcc.compiler.deviceDebug.1647831981" name="Generate device debug information (-G)" superClass="nvcc.compiler.deviceDebug"/>
+								<option id="nvcc.compiler.option.level.16726990" name="Generate host debug information (-g)" superClass="nvcc.compiler.option.level"/>
+								<option defaultValue="nvcc.compiler.optimization.level.most" id="nvcc.compiler.optimization.level.1222624320" name="Optimization Level" superClass="nvcc.compiler.optimization.level" valueType="enumerated"/>
+								<option id="nvcc.compiler.pic.1176654696" name="Position Independent Code (-fPIC)" superClass="nvcc.compiler.pic"/>
+								<option id="nvcc.compiler.include.paths.393958652" name="Include paths (-I)" superClass="nvcc.compiler.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="/opt/cuda/samples/common/inc"/>
+								</option>
+								<inputType id="nvcc.compiler.input.cu.1459843904" superClass="nvcc.compiler.input.cu"/>
+								<inputType id="nvcc.compiler.input.cpp.360700336" superClass="nvcc.compiler.input.cpp"/>
+								<inputType id="nvcc.compiler.input.c.1943821627" superClass="nvcc.compiler.input.c"/>
+							</tool>
+							<tool id="nvcc.linker.base.602397088" name="NVCC Linker" superClass="nvcc.linker.base">
+								<inputType id="nvcc.linker.input.862625666" superClass="nvcc.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="nvcc.archiver.base.342423531" name="NVCC Archiver" superClass="nvcc.archiver.base"/>
+							<tool id="com.nvidia.host.assembler.1673437193" name="Host Assembler" superClass="com.nvidia.host.assembler">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1548523292" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="com.nvidia.cuda.ide.build.project.ICudaProjectConfiguration">
+				<executable devicelink="false">
+					<sass major="3" minor="0"/>
+					<ptx major="3" minor="0"/>
+				</executable>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+	</storageModule>
+	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+		<project id="seuillage_CUDA_8.com.nvidia.cuda.ide.eight_zero.exe.270714933" name="Executable"/>
+	</storageModule>
+	<storageModule moduleId="scannerConfiguration">
+		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.eight_zero.configuration.debug.823155436;com.nvidia.cuda.ide.eight_zero.configuration.debug.823155436.;nvcc.compiler.base.1174254289;nvcc.compiler.input.cpp.2134833342">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.eight_zero.configuration.debug.823155436;com.nvidia.cuda.ide.eight_zero.configuration.debug.823155436.;nvcc.compiler.base.1174254289;nvcc.compiler.input.c.691715125">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.eight_zero.configuration.debug.823155436;com.nvidia.cuda.ide.eight_zero.configuration.debug.823155436.;nvcc.compiler.base.1174254289;nvcc.compiler.input.cu.1571068709">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
+		</scannerConfigBuildInfo>
+	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
+	<storageModule moduleId="refreshScope"/>
+</cproject>
--- a/A4/TP_GPU-master/TP1_image/CUDA/.gitignore
+++ b/A4/TP_GPU-master/TP1_image/CUDA/.gitignore
@ -0,0 +1 @@
+/Release/
--- a/A4/TP_GPU-master/TP1_image/CUDA/.project
+++ b/A4/TP_GPU-master/TP1_image/CUDA/.project
@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>seuillage_CUDA_8</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+			<triggers>clean,full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.cdt.core.cnature</nature>
+		<nature>org.eclipse.cdt.core.ccnature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+	</natures>
+</projectDescription>
--- a/A4/TP_GPU-master/TP1_image/CUDA/CMakeLists.txt
+++ b/A4/TP_GPU-master/TP1_image/CUDA/CMakeLists.txt
@ -0,0 +1,34 @@
+cmake_minimum_required(VERSION 2.8)
+
+project(seuillage LANGUAGES CXX CUDA)
+set(ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/)
+#set(CMAKE_C_COMPILER "gcc-8")
+#set(CMAKE_CXX_COMPILER "g++-8")
+
+#set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda")
+
+list(APPEND HEADER_REP ${ROOT_DIR}/ ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} "/usr/local/cuda/samples/common/inc/" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/../samples/common/inc")
+
+file(GLOB CPP_FILES ${ROOT_DIR}/*.cpp)
+file(GLOB CU_FILES ${ROOT_DIR}/*.cu)
+file(GLOB_RECURSE HEADER_FILES ${ROOT_DIR}/*.cuh ${ROOT_DIR}/*.h)
+
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math")
+
+
+set(CUDA_SEPARABLE_COMPILATION ON)
+set(CUDA_NVCC_FLAGS "-lineinfo;-I\"${ROOT_DIR}/inc\" -I\"${ROOT_DIR}/\" -I\"/usr/local/cuda/samples/common/inc/\"")
+
+
+add_executable(seuillage_CUDA ${CPP_FILES} ${CU_FILES} ${HEADER_FILES})
+set_target_properties(seuillage_CUDA PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(seuillage_CUDA PUBLIC ${ROOT_DIR}/inc/ /usr/local/cuda/samples/common/inc/)
+
+
+
+
+
+
+
+
+
--- a/A4/TP_GPU-master/TP1_image/CUDA/CMakeLists_cmake28.txt
+++ b/A4/TP_GPU-master/TP1_image/CUDA/CMakeLists_cmake28.txt
@ -0,0 +1,34 @@
+cmake_minimum_required(VERSION 2.8)
+
+project(matrice)
+
+set(ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/)
+
+#set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda")
+
+find_package(CUDA REQUIRED) 
+list(APPEND HEADER_REP ${ROOT_DIR}/ ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} "/usr/local/cuda/samples/common/inc/" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/../samples/common/inc")
+
+#file(GLOB CPP_FILES ${ROOT_DIR}/src/CPP/*.cpp)
+#file(GLOB CU_FILES ${ROOT_DIR}/src/CPP/*.cu)
+set(CPP_FILES seuillage_C.cpp)
+set(CU_FILES ${ROOT_DIR}/seuillage_main.cu)
+#file(GLOB_RECURSE HEADER_FILES ${ROOT_DIR}/src/CUDA/*.cuh ${ROOT_DIR}/inc/*.h)
+
+
+set(CUDA_SEPARABLE_COMPILATION ON)
+set(CUDA_NVCC_FLAGS "-lineinfo;-I\"${ROOT_DIR}/inc\" -I\"${ROOT_DIR}/\" -I\"/usr/local/cuda/samples/common/inc/\"")
+
+
+cuda_add_executable(seuillage_CUDA ${CPP_FILES} ${CU_FILES} ${HEADER_FILES})
+set_target_properties(seuillage_CUDA PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(seuillage_CUDA PUBLIC ${ROOT_DIR}/inc/ /usr/local/cuda/samples/common/inc/)
+
+
+
+
+
+
+
+
+
--- a/A4/TP_GPU-master/TP1_image/CUDA/CMakeLists_cmake38.txt
+++ b/A4/TP_GPU-master/TP1_image/CUDA/CMakeLists_cmake38.txt
@ -0,0 +1,17 @@
+#cmake -DCMAKE_CUDA_FLAGS=”-arch=sm_30” ..
+cmake_minimum_required(VERSION 3.8)
+set(CUDACXX "/usr/local/cuda/bin/nvcc")
+set(CMAKE_CUDA_COMPILER "/usr/local/cuda/bin/nvcc")
+project(seuillage_CUDA LANGUAGES CXX CUDA)
+
+set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+set(CMAKE_VERBOSE_MAKEFILE TRUE)
+
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math")
+
+
+add_executable(seuillage_CUDA seuillage_C.cpp seuillage_main.cu)
+set_target_properties(seuillage_CUDA PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_compile_features(seuillage_CUDA PUBLIC cxx_std_11)
+target_include_directories(seuillage_CUDA PUBLIC ".")
+
--- a/A4/TP_GPU-master/TP1_image/CUDA/build/.vs/cmake.db
+++ b/A4/TP_GPU-master/TP1_image/CUDA/build/.vs/cmake.db
--- a/A4/TP_GPU-master/TP1_image/CUDA/build/.vs/seuillage/v16/.suo
+++ b/A4/TP_GPU-master/TP1_image/CUDA/build/.vs/seuillage/v16/.suo
--- a/A4/TP_GPU-master/TP1_image/CUDA/build/.vs/seuillage/v16/Browse.VC.db
+++ b/A4/TP_GPU-master/TP1_image/CUDA/build/.vs/seuillage/v16/Browse.VC.db
--- a/A4/TP_GPU-master/TP1_image/CUDA/build/.vs/seuillage/v16/Solution.VC.db
+++ b/A4/TP_GPU-master/TP1_image/CUDA/build/.vs/seuillage/v16/Solution.VC.db
--- a/A4/TP_GPU-master/TP1_image/CUDA/seuillage.h
+++ b/A4/TP_GPU-master/TP1_image/CUDA/seuillage.h
@ -0,0 +1,27 @@
+
+//#define SEUILLAGE_H
+//#ifndef SEUILLAGE_H
+
+// includes, system
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+
+
+#define SIZE_I 960
+#define SIZE_J 1280
+
+
+#define BLOCK_SIZE 16
+
+// Prototype
+void runTest( int argc, char** argv);
+extern "C" void seuillage_C( float reference[][SIZE_J][SIZE_I] , float idata[][SIZE_J][SIZE_I] );
+
+
+//#endif
+
+
+
--- a/A4/TP_GPU-master/TP1_image/CUDA/seuillage_C_student.cpp
+++ b/A4/TP_GPU-master/TP1_image/CUDA/seuillage_C_student.cpp
@ -0,0 +1,41 @@
+/*
+ * Copiright 1993-2009 NVIDIA Corporation.  All rights reserved.
+ *
+ * NVIDIA Corporation and its licensors retain all intellectual properti and 
+ * proprietari rights in and to this software and related documentation and 
+ * ani modifications thereto.  Ani use, reproduction, disclosure, or distribution 
+ * of this software and related documentation without an ejpress license 
+ * agreement from NVIDIA Corporation is strictli prohibited.
+ * 
+ */
+
+/* Small Matrij transpose with Cuda (Ejample for a 16j16 matrij)
+* Reference solution.
+*/
+
+#include "seuillage.h"
+
+////////////////////////////////////////////////////////////////////////////////
+//! Compute reference data set
+////////////////////////////////////////////////////////////////////////////////
+void seuillage_C(float image_out[][SIZE_J][SIZE_I], float image_in[][SIZE_J][SIZE_I]) 
+{
+	float r, g, b;
+	for(int j=0; j<SIZE_J; j++){
+		for(int i=0; i<SIZE_I; i++){
+			r = image_in[0][j][i];
+			g = image_in[1][j][i];
+			b = image_in[2][j][i];
+
+			if(r/sqrt(r*r+g*g+b*b) > 0.7){
+				image_out[0][j][i] = r;
+				image_out[1][j][i] = g;
+				image_out[2][j][i] = b;
+			}
+		}
+	}
+}
+
+
+
+
--- a/A4/TP_GPU-master/TP1_image/CUDA/seuillage_main_student.cu
+++ b/A4/TP_GPU-master/TP1_image/CUDA/seuillage_main_student.cu
@ -0,0 +1,229 @@
+/*
+ * Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+/* Template project which demonstrates the basics on how to setup a project 
+ * example application.
+ * Host code.
+ */
+
+// includes, system
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include <iostream>
+using namespace std;
+
+// includes CUDA
+#include <cuda_runtime.h>
+
+#include "seuillage.h"
+
+__global__ void seuillage_kernel(float d_image_in[][SIZE_J][SIZE_I],float d_image_out[][SIZE_J][SIZE_I])
+{
+
+	// A VOUS DE CODER
+	int i,j;
+
+	j = blockIdx.x;
+	i = threadIdx.x;
+
+	float r,g,b;
+
+	r = d_image_in[0][j][i];
+	g = d_image_in[1][j][i];
+	b = d_image_in[2][j][i];
+
+	if((r/sqrt(r*r+g*g+b*b))> 0.7){
+
+		d_image_out[0][j][i]=r;
+		d_image_out[1][j][i]=g;
+		d_image_out[2][j][i]=b;
+	}
+
+	
+}
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// declaration, forward
+void runTest( int argc, char** argv);
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Program main
+////////////////////////////////////////////////////////////////////////////////
+int
+main( int argc, char** argv) 
+{
+
+	runTest( argc, argv);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//! Run a simple test for CUDA
+////////////////////////////////////////////////////////////////////////////////
+void
+runTest( int argc, char** argv) 
+{
+	cudaError_t error;
+
+	if (argc<2)
+		printf("indiquer le chemin du repertoire contenant les images\n");
+
+	const unsigned int mem_size = sizeof(float) * 3* SIZE_J * SIZE_I;
+	// allocate host memory
+	float* h_image_in = (float*) malloc(mem_size);
+
+
+	//Initilaisation du volume d'entr<74>e
+	FILE *file_ptr;
+	char name_file_in[512];
+	sprintf(name_file_in,"%s/ferrari.raw",argv[1]);
+	printf("%s",name_file_in);
+	file_ptr=fopen(name_file_in,"rb");
+	fread(h_image_in,sizeof(float),3*SIZE_J*SIZE_I,file_ptr);
+	fclose(file_ptr);
+
+
+	////////////////////////////////////////////////////////////////////////////////
+	// EXECUTION SUR LE CPU
+	///////////////////////////////////////////////////////////////////////
+
+
+	// Image trait<69>e sur le CPU
+	float* h_image_out_CPU = (float*) malloc( mem_size);
+
+	printf("Seuillage CPU d'une image couleur \n");
+
+	cudaEvent_t start,stop;
+	error = cudaEventCreate(&start);
+	error = cudaEventCreate(&stop);
+
+	// Record the start event
+	error = cudaEventRecord(start, NULL);
+	error = cudaEventSynchronize(start);
+	//Seuillage sur CPU
+	seuillage_C( (float (*)[SIZE_J][SIZE_I])h_image_out_CPU, (float (*)[SIZE_J][SIZE_I])h_image_in);
+
+	// Record the start event
+	error = cudaEventRecord(stop, NULL);
+	// Wait for the stop event to complete
+	error = cudaEventSynchronize(stop);
+	float msecTotal = 0.0f;
+	error = cudaEventElapsedTime(&msecTotal, start, stop);
+
+
+	printf("CPU execution time %f\n",msecTotal);
+
+	//Sauvegarde de l'image resultat
+	char name_file_out_CPU[512];
+	sprintf(name_file_out_CPU,"%s/ferrari_out_CPU.raw",argv[1]);
+	file_ptr=fopen(name_file_out_CPU,"wb");
+	fwrite(h_image_out_CPU,sizeof(float),3*SIZE_J*SIZE_I,file_ptr);
+	fclose(file_ptr);
+
+
+	////////////////////////////////////////////////////////////////////////////////
+	// EXECUTION SUR LE GPU
+	///////////////////////////////////////////////////////////////////////
+
+	cudaEvent_t start_mem,stop_mem;
+	error = cudaEventCreate(&start_mem);
+	error = cudaEventCreate(&stop_mem);
+
+	error = cudaEventRecord(start, NULL);
+	error = cudaEventSynchronize(start);
+
+
+	float* h_image_out_GPU = (float*) malloc(mem_size);
+
+	// images on device memory
+	float* d_image_in;
+	float* d_image_out;
+
+
+
+	// Alocation mémoire de d_image_in et d_image_out sur la carte GPU
+	// A VOUS DE CODER
+	cudaMalloc((void **) &d_image_in, mem_size);
+	cudaMalloc((void **) &d_image_out, mem_size);
+
+	// copy host memory to device
+	// A VOUS DE CODER
+	cudaMemcpy(d_image_in, 	h_image_in, mem_size, cudaMemcpyHostToDevice);
+
+	error = cudaEventRecord(stop_mem, NULL);
+	// Wait for the stop event to complete
+	error = cudaEventSynchronize(stop_mem);
+	float msecMem = 0.0f;
+	error = cudaEventElapsedTime(&msecMem, start, stop_mem);
+
+	// setup execution parameters -> découpage en threads
+	// A VOUS DE CODER
+	dim3 threads(960);
+	dim3 grid(1280);
+
+	// lancement des threads executé sur la carte GPU
+	// A VOUS DE CODER
+	
+	// INDICATION : pour les parametres de la fonction kernel seuillage_kernel, vous ferez un changement de type (float *) vers  (float (*)[SIZE_J][SIZE_I])
+	// inspirez vous du lancement de la fonction seuillage_C dans le main.
+	seuillage_kernel<<< grid,threads >>>((float (*)[SIZE_J][SIZE_I])d_image_in, (float (*)[SIZE_J][SIZE_I])d_image_out);
+	// Record the start event
+
+
+
+	error = cudaEventRecord(start_mem, NULL);
+	error = cudaEventSynchronize(start_mem);
+
+	// copy result from device to    host
+	// A VOUS DE CODER
+	cudaMemcpy(h_image_out_GPU, d_image_out, mem_size, cudaMemcpyDeviceToHost);
+
+	// cleanup device memory
+	//ENLEVEZ LES COMMENTAIRES
+	cudaFree(d_image_in);
+	cudaFree(d_image_out);
+
+
+	error = cudaEventRecord(stop, NULL);
+	// Wait for the stop event to complete
+	error = cudaEventSynchronize(stop);
+	msecTotal = 0.0f;
+	error = cudaEventElapsedTime(&msecTotal, start, stop);
+	float msecMem2 =0.0f;
+	error = cudaEventElapsedTime(&msecMem2, start_mem, stop);
+	msecMem+=msecMem2;
+
+	printf("GPU execution time %f ms (memory management %2.2f \%)\n",msecTotal,(msecMem)/(msecTotal)*100);
+
+	// Enregistrement de l'image de sortie sur un fichier
+	char name_file_out_GPU[512];
+	sprintf(name_file_out_GPU,"%s/ferrari_out_GPU.raw",argv[1]);
+	file_ptr=fopen(name_file_out_GPU,"wb");
+	fwrite(h_image_out_GPU,sizeof(float),3*SIZE_J*SIZE_I,file_ptr);
+	fclose(file_ptr);
+
+
+	// cleanup memory
+	free(h_image_in);
+	free(h_image_out_GPU);
+	free(h_image_out_CPU);
+
+
+
+
+}
--- a/A4/TP_GPU-master/TP1_image/Image/.DS_Store
+++ b/A4/TP_GPU-master/TP1_image/Image/.DS_Store
--- a/A4/TP_GPU-master/TP1_image/Image/carre.raw
+++ b/A4/TP_GPU-master/TP1_image/Image/carre.raw
--- a/A4/TP_GPU-master/TP1_image/Image/carre_noise_gaussian.raw
+++ b/A4/TP_GPU-master/TP1_image/Image/carre_noise_gaussian.raw
--- a/A4/TP_GPU-master/TP1_image/Image/carre_noise_saltpepper.raw
+++ b/A4/TP_GPU-master/TP1_image/Image/carre_noise_saltpepper.raw
--- a/A4/TP_GPU-master/TP1_image/Image/carre_out.jpg
+++ b/A4/TP_GPU-master/TP1_image/Image/carre_out.jpg
--- a/A4/TP_GPU-master/TP1_image/Image/carre_out.raw
+++ b/A4/TP_GPU-master/TP1_image/Image/carre_out.raw
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari.jpg
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari.jpg
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari.raw
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari.raw
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari2.raw
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari2.raw
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari_NB.raw
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari_NB.raw
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari_NB_bruit_gaussien.raw
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari_NB_bruit_gaussien.raw
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari_NB_bruit_poivre_et
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari_NB_bruit_poivre_et
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari_out.jpg
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari_out.jpg
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari_out.raw
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari_out.raw
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari_out_CPU.jpg
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari_out_CPU.jpg
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari_out_CPU.png
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari_out_CPU.png
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari_out_CPU.raw
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari_out_CPU.raw
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari_out_GPU.jpg
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari_out_GPU.jpg
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari_out_GPU.raw
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari_out_GPU.raw
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari_out_v1.jpg
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari_out_v1.jpg
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari_out_v2.jpg
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari_out_v2.jpg
--- a/A4/TP_GPU-master/TP1_image/Image/ferrari_out_v3.jpg
+++ b/A4/TP_GPU-master/TP1_image/Image/ferrari_out_v3.jpg
--- a/A4/TP_GPU-master/TP1_image/Matlab/.DS_Store
+++ b/A4/TP_GPU-master/TP1_image/Matlab/.DS_Store
--- a/A4/TP_GPU-master/TP1_image/Matlab/Contours_Matlab.m
+++ b/A4/TP_GPU-master/TP1_image/Matlab/Contours_Matlab.m
@ -0,0 +1,51 @@
+
+%Ouverture d'une image au format raw
+fid = fopen('../Image/carre.raw', 'rb');
+image_in=fread(fid, 512*512, 'single');
+image_in=reshape(image_in,512,512);
+fclose(fid);
+
+
+%Affichage d'une image couleur avec image
+figure('name','Image in','numbertitle','off');imagesc(image_in);colormap(gray);
+
+%Taille d'une image
+taille=size(image_in);
+display(taille);
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%tic toc pour mesurer le temps de calcul  
+tic;  
+hx=[-1 0 1];
+hy=[1;0;-1];
+
+h=conv2(hx,hy)
+h2D=[0 1 0;-1 0 1;0 -1 0];
+
+image_out=conv2(image_in,h2D,'same');
+image_out=abs(image_out);
+
+%pour la convolution regardez l'aide pour conv2
+
+%pour le filtre median regardez l'aide de medfilt2
+
+
+toc;
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+
+
+%Affichage d'une image avec imagesc
+figure('name','Image out','numbertitle','off');imagesc(image_out);colormap gray;
+
+
+
+%Sauvegarde d'une image au format jpg
+imwrite(image_out,'../Image/carre_out.jpg','jpg');
+
+
+%Sauvegarde d'une image au format raw
+fid = fopen('../Image/carre_out.raw', 'w');
+fwrite(fid, image_out, 'single');
+fclose(fid);
--- a/A4/TP_GPU-master/TP1_image/Matlab/raw2jpg.m
+++ b/A4/TP_GPU-master/TP1_image/Matlab/raw2jpg.m
@ -0,0 +1,14 @@
+function raw2jpg(name_proc)
+%fid = fopen('ferrari.raw', 'r');
+file_name_in=['../Image/ferrari_out_' name_proc '.raw'];
+fid = fopen(file_name_in, 'r');
+ima=single(fread(fid,1280*960*3, 'single'));
+fclose(fid);
+
+ima=reshape(ima,960,1280,3);
+ima=ima./255;
+figure('name',name_proc,'numbertitle','off');image(ima);
+
+file_name_out=['../Image/ferrari_out_' name_proc '.jpg'];
+imwrite(ima,file_name_out,'jpg');
+end
--- a/A4/TP_GPU-master/TP1_image/Matlab/seuil_Matlab.m
+++ b/A4/TP_GPU-master/TP1_image/Matlab/seuil_Matlab.m
@ -0,0 +1,69 @@
+close all;
+
+%Ouverture d'une image au format couleur
+ima=single(imread('../Image/ferrari.jpg'));
+ima=ima./255;
+
+%Affichage d'une image couleur avec image
+figure('name','RGB in','numbertitle','off');image(ima);
+
+
+%Taille d'une image
+taille=size(ima);
+display(taille);
+
+ima_r=ima(:,:,1);
+ima_g=ima(:,:,2);
+ima_b=ima(:,:,3);
+
+%Affichage d'un niveau de couleur de l'image 
+figure('name','R','numbertitle','off');imagesc(ima_r);colormap gray  %Niveau de rouge
+figure('name','G','numbertitle','off');imagesc(ima_g);colormap gray  %Niveau de vert
+figure('name','B','numbertitle','off');imagesc(ima_b);colormap gray  %Niveau de bleu
+
+%Taille d'une image
+taille=size(ima);
+display(taille);
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%tic toc pour mesurer le temps de calcul  
+disp("Seuil :");
+tic;  
+
+ima_out=ima;
+
+nr = ima_r./sqrt(ima_r.^2 + ima_b.^2 + ima_g.^2);
+
+ima_seuil = ima.*(nr>0.7);
+toc;
+figure('name','RGB out','numbertitle','off');image(ima_seuil);
+
+disp("Jaune :")
+tic;
+ima_jaune = ima_seuil;
+ima_jaune(:,:,2) = ima_seuil(:,:,1);
+toc;
+figure('name','RGB out','numbertitle','off');image(ima_jaune);
+
+disp("Reinsertion dans l'image :")
+tic;
+ima_out = ima - ima_seuil + ima_jaune;
+figure('name','RGB out','numbertitle','off');image(ima_out);
+toc;
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+
+
+%Sauvegarde d'une image au format jpg
+imwrite(ima_out,'../Image/ferrari_out.jpg','jpg');
+
+
+
+%Sauvegarde d'une image au format raw
+fid = fopen('../Image/ferrari_out.raw', 'w');
+fwrite(fid, ima_out, 'single');
+fclose(fid);
+
--- a/A4/TP_GPU-master/TP1_image/Matlab/seuil_Matlab_solution.m
+++ b/A4/TP_GPU-master/TP1_image/Matlab/seuil_Matlab_solution.m
@ -0,0 +1,108 @@
+close all;
+
+%Ouverture d'une image au format couleur
+ima=single(imread('../Image/ferrari.jpg'));
+ima=ima./255;
+
+%Affichage d'une image couleur avec image
+figure('name','RGB in','numbertitle','off');image(ima);
+
+
+%Taille d'une image
+taille=size(ima);
+display(taille);
+
+ima_r=ima(:,:,1);
+ima_g=ima(:,:,2);
+ima_b=ima(:,:,3);
+
+%Affichage d'un niveau de couleur de l'image 
+figure('name','R','numbertitle','off');imagesc(ima_r);colormap gray  %Niveau de rouge
+figure('name','G','numbertitle','off');imagesc(ima_g);colormap gray  %Niveau de vert
+figure('name','B','numbertitle','off');imagesc(ima_b);colormap gray  %Niveau de bleu
+
+%Taille d'une image
+taille=size(ima);
+display(taille);
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%tic toc pour mesurer le temps de calcul  
+display('VERSION 1');
+tic;  
+
+ima_out_v1=ima;
+%A vous de coder ! 
+
+
+for j=1:taille(2)
+    for i=1:taille(1)
+
+
+   
+        nr(i,j)=ima(i,j,1)/sqrt(ima(i,j,1)*ima(i,j,1)+ima(i,j,2)*ima(i,j,2)+ima(i,j,3)*ima(i,j,3));
+        
+        if (nr(i,j)>0.7)
+            ima_out_v1(i,j,2)=ima(i,j,1);
+        end
+    end
+end
+        
+
+toc;
+
+
+figure('name','ratio rouge','numbertitle','off');imagesc(nr);colormap gray  %Niveau de rouge
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+display('VERSION 2');
+tic;  
+
+
+ima_out_v2=ima;
+
+nr=ima_r./sqrt(ima_r.^2+ima_g.^2+ima_b.^2);
+%A vous de coder !
+
+    for j=1:taille(2)
+        for i=1:taille(1)
+        if (nr(i,j)>0.7)
+            ima_out_v2(i,j,2)=ima(i,j,1);
+        end
+    end
+end
+        
+
+toc;
+
+
+display('VERSION 3');
+tic;  
+ima_out_v3=ima;
+
+nr=ima_r./sqrt(ima_r.^2+ima_g.^2+ima_b.^2);
+image_test=nr>0.7;
+
+image_tmp=ima_g;
+image_tmp(image_test)=ima_r(image_test);
+ima_out_v3(:,:,2)=image_tmp();
+toc;
+figure('name','test','numbertitle','off');imagesc(image_test);colormap gray  %Niveau de rouge
+
+figure('name','RGB out version 1','numbertitle','off');image(ima_out_v1);
+figure('name','RGB out version 2','numbertitle','off');image(ima_out_v2);
+figure('name','RGB out version 3','numbertitle','off');image(ima_out_v3);
+
+%Sauvegarde d'une image au format jpg
+imwrite(ima_out_v1,'../Image/ferrari_out_v1.jpg','jpg');
+imwrite(ima_out_v2,'../Image/ferrari_out_v2.jpg','jpg');
+imwrite(ima_out_v3,'../Image/ferrari_out_v3.jpg','jpg');
+
+
+%Sauvegarde d'une image au format raw
+fid = fopen('../Image/ferrari_out.raw', 'w');
+fwrite(fid, ima_out_v3, 'single');
+fclose(fid);
+
--- a/A4/TP_GPU-master/TP2_reduction/.cproject
+++ b/A4/TP_GPU-master/TP2_reduction/.cproject
@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
+	<storageModule moduleId="org.eclipse.cdt.core.settings">
+		<cconfiguration id="com.nvidia.cuda.ide.nine_zero.configuration.release.1433346787">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="com.nvidia.cuda.ide.nine_zero.configuration.release.1433346787" moduleId="org.eclipse.cdt.core.settings" name="Release">
+				<externalSettings/>
+				<extensions>
+					<extension id="com.nvidia.cuda.ide.elf" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="com.nvidia.cuda.ide.cubin" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="com.nvidia.cuda.ide.macho" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.VCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="nvcc.errorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="com.nvidia.cuda.ide.nine_zero.configuration.release.1433346787" name="Release" parent="com.nvidia.cuda.ide.nine_zero.configuration.release">
+					<folderInfo id="com.nvidia.cuda.ide.nine_zero.configuration.release.1433346787." name="/" resourcePath="">
+						<toolChain id="com.nvidia.cuda.ide.toolchain.nine_zero.exe.release.698197512" name="CUDA Toolkit 10.0" superClass="com.nvidia.cuda.ide.toolchain.nine_zero.exe.release">
+							<targetPlatform archList="all" binaryParser="com.nvidia.cuda.ide.elf;com.nvidia.cuda.ide.macho;com.nvidia.cuda.ide.cubin" id="com.nvidia.cuda.ide.targetPlatform.1887109166" isAbstract="false" name="Debug Platform" osList="linux,macosx" superClass="com.nvidia.cuda.ide.targetPlatform"/>
+							<builder buildPath="${workspace_loc:/TP2}/Release" id="com.nvidia.cuda.ide.builder.996479146" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="CUDA Toolkit 10.0 Builder" superClass="com.nvidia.cuda.ide.builder"/>
+							<tool id="nvcc.compiler.base.970531586" name="NVCC Compiler" superClass="nvcc.compiler.base">
+								<option id="nvcc.compiler.deviceDebug.130815577" name="Generate device debug information (-G)" superClass="nvcc.compiler.deviceDebug"/>
+								<option id="nvcc.compiler.option.level.1535441624" name="Generate host debug information (-g)" superClass="nvcc.compiler.option.level"/>
+								<option defaultValue="nvcc.compiler.optimization.level.most" id="nvcc.compiler.optimization.level.1934934131" name="Optimization Level" superClass="nvcc.compiler.optimization.level" valueType="enumerated"/>
+								<option id="nvcc.compiler.pic.84159010" name="Position Independent Code (-fPIC)" superClass="nvcc.compiler.pic"/>
+								<inputType id="nvcc.compiler.input.cu.2057104910" superClass="nvcc.compiler.input.cu"/>
+								<inputType id="nvcc.compiler.input.cpp.1440437331" superClass="nvcc.compiler.input.cpp"/>
+								<inputType id="nvcc.compiler.input.c.1110197327" superClass="nvcc.compiler.input.c"/>
+							</tool>
+							<tool id="nvcc.linker.base.1084287072" name="NVCC Linker" superClass="nvcc.linker.base">
+								<option id="nvcc.linker.option.libs.1571841227" name="Libraries (-l)" superClass="nvcc.linker.option.libs" valueType="libs">
+									<listOptionValue builtIn="false" value="gomp"/>
+								</option>
+								<inputType id="nvcc.linker.input.1818364586" superClass="nvcc.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="nvcc.archiver.base.1485801156" name="NVCC Archiver" superClass="nvcc.archiver.base"/>
+							<tool id="com.nvidia.host.assembler.1159491752" name="Host Assembler" superClass="com.nvidia.host.assembler">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.174374770" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="com.nvidia.cuda.ide.build.project.ICudaProjectConfiguration">
+				<executable devicelink="false">
+					<sass major="3" minor="0"/>
+					<ptx major="3" minor="0"/>
+				</executable>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
+	</storageModule>
+	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+		<project id="TP2.com.nvidia.cuda.ide.nine_zero.exe.235863883" name="Executable" projectType="com.nvidia.cuda.ide.nine_zero.exe"/>
+	</storageModule>
+	<storageModule moduleId="scannerConfiguration">
+		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
+		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.nine_zero.configuration.debug.9454793;com.nvidia.cuda.ide.nine_zero.configuration.debug.9454793.;nvcc.compiler.base.1363154216;nvcc.compiler.input.cu.75245776">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.nine_zero.configuration.debug.9454793;com.nvidia.cuda.ide.nine_zero.configuration.debug.9454793.;nvcc.compiler.base.1363154216;nvcc.compiler.input.cpp.1264206044">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
+		</scannerConfigBuildInfo>
+		<scannerConfigBuildInfo instanceId="com.nvidia.cuda.ide.nine_zero.configuration.debug.9454793;com.nvidia.cuda.ide.nine_zero.configuration.debug.9454793.;nvcc.compiler.base.1363154216;nvcc.compiler.input.c.1376543207">
+			<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="com.nvidia.cuda.ide.build.NVCCPerProjectProfile"/>
+		</scannerConfigBuildInfo>
+	</storageModule>
+	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
+	<storageModule moduleId="refreshScope"/>
+</cproject>
--- a/A4/TP_GPU-master/TP2_reduction/.gitkeep
+++ b/A4/TP_GPU-master/TP2_reduction/.gitkeep
--- a/A4/TP_GPU-master/TP2_reduction/.project
+++ b/A4/TP_GPU-master/TP2_reduction/.project
@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>TP2</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
+			<triggers>clean,full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
+			<triggers>full,incremental,</triggers>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.cdt.core.cnature</nature>
+		<nature>org.eclipse.cdt.core.ccnature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
+		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
+	</natures>
+</projectDescription>
--- a/A4/TP_GPU-master/TP2_reduction/CMakeLists.txt
+++ b/A4/TP_GPU-master/TP2_reduction/CMakeLists.txt
@ -0,0 +1,34 @@
+#cmake -DCMAKE_CUDA_FLAGS=”-arch=sm_30” ..
+cmake_minimum_required(VERSION 2.8)
+
+project(reduce)
+
+set(ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/)
+
+#set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda")
+
+find_package(CUDA REQUIRED) 
+list(APPEND HEADER_REP ${ROOT_DIR}/ ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} "/usr/local/cuda/samples/common/inc/" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/../samples/common/inc")
+
+#file(GLOB CPP_FILES ${ROOT_DIR}/src/CPP/*.cpp)
+#file(GLOB CU_FILES ${ROOT_DIR}/src/CPP/*.cu)
+set(CU_FILES ${ROOT_DIR}/Reduce_solution.cu)
+#file(GLOB_RECURSE HEADER_FILES ${ROOT_DIR}/src/CUDA/*.cuh ${ROOT_DIR}/inc/*.h)
+
+
+set(CUDA_SEPARABLE_COMPILATION ON)
+set(CUDA_NVCC_FLAGS "-lineinfo;-I\"${ROOT_DIR}/inc\" -I\"${ROOT_DIR}/\" -I\"/usr/local/cuda/samples/common/inc/\" -std=c++14")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
+
+cuda_add_executable(reduce ${CPP_FILES} ${CU_FILES} ${HEADER_FILES})
+set_target_properties(reduce PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(reduce PUBLIC ${ROOT_DIR}/inc/ /usr/local/cuda/samples/common/inc/)
+
+
+
+
+
+
+
+
+
--- a/A4/TP_GPU-master/TP2_reduction/CMakeLists_cmake28.txt
+++ b/A4/TP_GPU-master/TP2_reduction/CMakeLists_cmake28.txt
@ -0,0 +1,34 @@
+cmake_minimum_required(VERSION 2.8)
+
+project(matrice)
+
+set(ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/)
+
+#set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda")
+
+find_package(CUDA REQUIRED) 
+list(APPEND HEADER_REP ${ROOT_DIR}/ ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} "/usr/local/cuda/samples/common/inc/" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/../samples/common/inc")
+
+#file(GLOB CPP_FILES ${ROOT_DIR}/src/CPP/*.cpp)
+#file(GLOB CU_FILES ${ROOT_DIR}/src/CPP/*.cu)
+set(CPP_FILES seuillage_C.cpp)
+set(CU_FILES ${ROOT_DIR}/seuillage_main.cu)
+#file(GLOB_RECURSE HEADER_FILES ${ROOT_DIR}/src/CUDA/*.cuh ${ROOT_DIR}/inc/*.h)
+
+
+set(CUDA_SEPARABLE_COMPILATION ON)
+set(CUDA_NVCC_FLAGS "-lineinfo;-I\"${ROOT_DIR}/inc\" -I\"${ROOT_DIR}/\" -I\"/usr/local/cuda/samples/common/inc/\"")
+
+
+cuda_add_executable(seuillage_CUDA ${CPP_FILES} ${CU_FILES} ${HEADER_FILES})
+set_target_properties(seuillage_CUDA PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(seuillage_CUDA PUBLIC ${ROOT_DIR}/inc/ /usr/local/cuda/samples/common/inc/)
+
+
+
+
+
+
+
+
+
--- a/A4/TP_GPU-master/TP2_reduction/CMakeLists_cmake38.txt
+++ b/A4/TP_GPU-master/TP2_reduction/CMakeLists_cmake38.txt
@ -0,0 +1,17 @@
+#cmake -DCMAKE_CUDA_FLAGS=”-arch=sm_30” ..
+cmake_minimum_required(VERSION 3.8)
+set(CUDACXX "/usr/local/cuda/bin/nvcc")
+set(CMAKE_CUDA_COMPILER "/usr/local/cuda/bin/nvcc")
+project(seuillage_CUDA LANGUAGES CXX CUDA)
+
+set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+set(CMAKE_VERBOSE_MAKEFILE TRUE)
+
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math")
+
+
+add_executable(seuillage_CUDA seuillage_C.cpp seuillage_main.cu)
+set_target_properties(seuillage_CUDA PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_compile_features(seuillage_CUDA PUBLIC cxx_std_11)
+target_include_directories(seuillage_CUDA PUBLIC ".")
+
--- a/A4/TP_GPU-master/TP2_reduction/GpuTimer.h
+++ b/A4/TP_GPU-master/TP2_reduction/GpuTimer.h
@ -0,0 +1,39 @@
+#pragma once
+#include <cuda_runtime_api.h>
+
+class GpuTimer
+{
+  cudaEvent_t start, stop;
+
+public:
+  GpuTimer()
+  {
+    cudaEventCreate(&start);
+    cudaEventCreate(&stop);
+  }
+
+  ~GpuTimer()
+  {
+    cudaEventDestroy(stop);
+    cudaEventDestroy(start);
+  }
+
+  void Start()
+  {
+    cudaEventRecord(start);
+  }
+
+  void Stop()
+  {
+    cudaEventRecord(stop);
+    cudaEventSynchronize(stop);
+  }
+
+  float Elapsed()
+  {
+    float elapsed;
+    cudaEventElapsedTime(&elapsed, start, stop);
+    return elapsed;
+  }
+};
+
--- a/A4/TP_GPU-master/TP2_reduction/Reduce.cu
+++ b/A4/TP_GPU-master/TP2_reduction/Reduce.cu
@ -0,0 +1,458 @@
+/*
+# Copyright (c) 2011-2012 NVIDIA CORPORATION. All Rights Reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto. Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.   
+*/
+
+#include <iostream>
+#include <cuda_runtime_api.h>
+#include <omp.h>
+
+#include <thrust/host_vector.h>
+#include <thrust/device_vector.h>
+#include <thrust/generate.h>
+#include <thrust/reduce.h>
+#include <thrust/functional.h>
+
+//#include <cub/cub.cuh>
+#include "GpuTimer.h"
+
+#define CUDA_SAFE_CALL(call) \
+  { \
+    cudaError_t err_code = call; \
+    if( err_code != cudaSuccess ) { std::cerr << "Error (" << __FILE__ << ":" << __LINE__ << "): " << cudaGetErrorString(err_code) << std::endl; return 1; } \
+  }
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// G P U   R E D U C T I O N
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+__global__ void reduce_kernel( int n, const int *in_buffer, int *out_buffer, const int2 *block_ranges )
+{
+  // Allocate shared memory inside the block.
+  extern __shared__ int s_mem[];
+float my_sum=0;
+  // The range of data to work with.
+  int2 range = block_ranges[blockIdx.x];
+
+  // Compute the sum of my elements.
+  
+  // TODO: fill-in that section of the code
+
+  // Copy my sum in shared memory.
+  s_mem[threadIdx.x] = my_sum;
+
+  // Make sure all the threads have copied their value in shared memory.
+  __syncthreads();
+
+  // Compute the sum inside the block.
+  
+  // TODO: fill-in that section of the code
+
+  // The first thread of the block stores its result.
+  if( threadIdx.x == 0 )
+    out_buffer[blockIdx.x] = s_mem[0];
+}
+
+int reduce_on_gpu( int n, const int *a_device )
+{
+  // Compute the size of the grid.
+  const int BLOCK_DIM   = 256;
+  const int grid_dim    = std::min( BLOCK_DIM, (n + BLOCK_DIM-1) / BLOCK_DIM );
+  const int num_threads = BLOCK_DIM * grid_dim;
+
+  // Compute the number of elements per block.
+  const int elements_per_block = BLOCK_DIM * ((n + num_threads - 1) / num_threads);
+
+  // Allocate memory for temporary buffers.
+  int  *partial_sums = NULL;
+  int2 *block_ranges = NULL;
+
+  CUDA_SAFE_CALL( cudaMalloc( (void **) &partial_sums, BLOCK_DIM * sizeof(int ) ) );
+  CUDA_SAFE_CALL( cudaMalloc( (void **) &block_ranges, grid_dim  * sizeof(int2) ) );
+
+  // Compute the ranges for the blocks.
+  int sum = 0;
+  int2 *block_ranges_on_host = new int2[grid_dim];
+  for( int block_idx = 0 ; block_idx < grid_dim ; ++block_idx )
+  {
+    block_ranges_on_host[block_idx].x = sum;
+    block_ranges_on_host[block_idx].y = std::min( sum += elements_per_block, n );
+  }
+  CUDA_SAFE_CALL( cudaMemcpy( block_ranges, block_ranges_on_host, grid_dim * sizeof(int2), cudaMemcpyHostToDevice ) );
+  delete[] block_ranges_on_host;
+
+  // First round: Compute a partial sum for all blocks.
+  reduce_kernel<<<grid_dim, BLOCK_DIM, BLOCK_DIM*sizeof(int)>>>( n, a_device, partial_sums, block_ranges );
+  CUDA_SAFE_CALL( cudaGetLastError() );
+
+  // Set the ranges for the second kernel call.
+  int2 block_range = make_int2( 0, grid_dim );
+  CUDA_SAFE_CALL( cudaMemcpy( block_ranges, &block_range, sizeof(int2), cudaMemcpyHostToDevice ) );
+
+  // Second round: Compute the final sum by summing the partial results of all blocks.
+  reduce_kernel<<<1, BLOCK_DIM, BLOCK_DIM*sizeof(int)>>>( grid_dim, partial_sums, partial_sums, block_ranges );
+  CUDA_SAFE_CALL( cudaGetLastError() );
+
+  // Read the result from device memory.
+  int result;
+  CUDA_SAFE_CALL( cudaMemcpy( &result, partial_sums, sizeof(int), cudaMemcpyDeviceToHost ) );
+
+  // Free temporary memory.
+  CUDA_SAFE_CALL( cudaFree( block_ranges ) );
+  CUDA_SAFE_CALL( cudaFree( partial_sums ) );
+
+  return result;
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// G P U   R E D U C T I O N :   O P T I M I Z E D   V E R S I O N
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#define WARP_SIZE 32
+
+template< int BLOCK_DIM > 
+__global__ void reduce_kernel_optimized( int n, const int *in_buffer, int *out_buffer, const int2 *__restrict block_ranges )
+{
+  // The number of warps in the block.
+  const int NUM_WARPS = BLOCK_DIM / WARP_SIZE;
+float my_sum=0;
+  // Allocate shared memory inside the block.
+  __shared__ volatile int s_mem[BLOCK_DIM];
+
+  // The range of data to work with.
+  int2 range = block_ranges[blockIdx.x];
+
+  // Warp/lane IDs.
+  const int warp_id = threadIdx.x / WARP_SIZE;
+  const int lane_id = threadIdx.x % WARP_SIZE;
+
+  // Compute the sum of my elements.
+  
+  // TODO: fill-in that section of the code
+
+  // Copy my sum in shared memory.
+  s_mem[threadIdx.x] = my_sum;
+
+  // Compute the sum inside each warp.
+ 
+  // TODO: fill-in that section of the code
+  
+  // Each warp leader stores the result for the warp.
+  if( lane_id == 0 )
+    // TODO: fill-in that section of the code
+  __syncthreads();
+
+  if( warp_id == 0 )
+  {
+    // Read my value from shared memory and store it in a register.
+    my_sum = s_mem[lane_id];
+  
+    // Sum the results of the warps.
+    
+    // TODO: fill-in that section of the code
+  }
+
+  // The 1st thread stores the result of the block.
+  if( threadIdx.x == 0 )
+    out_buffer[blockIdx.x] = my_sum += s_mem[1];
+}
+
+template< int BLOCK_DIM >
+int reduce_on_gpu_optimized( int n, const int *a_device )
+{
+  // Compute the size of the grid.
+  const int grid_dim    = std::min( BLOCK_DIM, (n + BLOCK_DIM-1) / BLOCK_DIM );
+  const int num_threads = BLOCK_DIM * grid_dim;
+
+  // Compute the number of elements per block.
+  const int elements_per_block = BLOCK_DIM * ((n + num_threads - 1) / num_threads);
+
+  // Allocate memory for temporary buffers.
+  int  *partial_sums = NULL;
+  int2 *block_ranges = NULL;
+
+  CUDA_SAFE_CALL( cudaMalloc( (void **) &partial_sums, BLOCK_DIM * sizeof(int ) ) );
+  CUDA_SAFE_CALL( cudaMalloc( (void **) &block_ranges, grid_dim  * sizeof(int2) ) );
+
+  // Compute the ranges for the blocks.
+  int sum = 0;
+  int2 *block_ranges_on_host = new int2[grid_dim];
+  for( int block_idx = 0 ; block_idx < grid_dim ; ++block_idx )
+  {
+    block_ranges_on_host[block_idx].x = sum;
+    block_ranges_on_host[block_idx].y = std::min( sum += elements_per_block, n );
+  }
+  CUDA_SAFE_CALL( cudaMemcpy( block_ranges, block_ranges_on_host, grid_dim * sizeof(int2), cudaMemcpyHostToDevice ) );
+  delete[] block_ranges_on_host;
+
+  // First round: Compute a partial sum for all blocks.
+  reduce_kernel_optimized<BLOCK_DIM><<<grid_dim, BLOCK_DIM>>>( n, a_device, partial_sums, block_ranges );
+  CUDA_SAFE_CALL( cudaGetLastError() );
+
+  // Set the ranges for the second kernel call.
+  int2 block_range = make_int2( 0, grid_dim );
+  CUDA_SAFE_CALL( cudaMemcpy( block_ranges, &block_range, sizeof(int2), cudaMemcpyHostToDevice ) );
+
+  // Second round: Compute the final sum by summing the partial results of all blocks.
+  reduce_kernel_optimized<BLOCK_DIM><<<1, BLOCK_DIM>>>( grid_dim, partial_sums, partial_sums, block_ranges );
+  CUDA_SAFE_CALL( cudaGetLastError() );
+
+  // Read the result from device memory.
+  int result;
+  CUDA_SAFE_CALL( cudaMemcpy( &result, partial_sums, sizeof(int), cudaMemcpyDeviceToHost ) );
+
+  // Free temporary memory.
+  CUDA_SAFE_CALL( cudaFree( block_ranges ) );
+  CUDA_SAFE_CALL( cudaFree( partial_sums ) );
+
+  return result;
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// M A I N
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+int main( int, char ** )
+{
+  const int NUM_TESTS = 10;
+
+  // The number of elements in the problem.
+  const int N = 512 * 131072;
+
+  std::cout << "Computing a reduction on " << N << " elements" << std::endl;
+
+  // X and Y on the host (CPU).
+  int *a_host = new int[N];
+
+  // Make sure the memory got allocated. TODO: free memory.
+  if( a_host == NULL )
+  {
+    std::cerr << "ERROR: Couldn't allocate a_host" << std::endl;
+    return 1;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Generate data
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << "Filling with 1s" << std::endl;
+
+  // Generate pseudo-random data.
+  for( int i = 0 ; i < N ; ++i )
+    a_host[i] = 1;
+  
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Compute on the CPU using 1 thread
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << std::endl;
+  std::cout << "Computing on the CPU using 1 CPU thread" << std::endl;
+  
+  GpuTimer gpu_timer;
+  gpu_timer.Start();
+
+  // Calculate the reference to compare with the device result.
+  int sum = 0;
+  for( int i_test = 0 ; i_test < NUM_TESTS ; ++i_test )
+  {
+    sum = 0;
+    for( int i = 0 ; i < N ; ++i )
+      sum += a_host[i];
+  }
+
+  gpu_timer.Stop();
+  
+  std::cout << "  Elapsed time: " << gpu_timer.Elapsed() / NUM_TESTS << "ms" << std::endl;
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Compute on the CPU using several OpenMP threads
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << std::endl;
+  std::cout << "Computing on the CPU using " << omp_get_max_threads() << " OpenMP thread(s)" << std::endl;
+  
+  gpu_timer.Start();
+
+  // Calculate the reference to compare with the device result.
+  int omp_sum = 0;
+  for( int i_test = 0 ; i_test < NUM_TESTS ; ++i_test )
+  {
+    omp_sum = 0;
+#pragma omp parallel shared(omp_sum)
+    {
+#pragma omp for reduction(+ : omp_sum)
+    for( int i = 0 ; i < N ; ++i )
+      omp_sum = omp_sum + a_host[i];
+    }
+  }
+
+  gpu_timer.Stop();
+  
+  std::cout << "  Elapsed time: " << gpu_timer.Elapsed() / NUM_TESTS << "ms" << std::endl;
+  
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Compute on the GPU
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // The copy of A on the device (GPU).
+  int *a_device = NULL;
+
+  // Allocate A on the device.
+  CUDA_SAFE_CALL( cudaMalloc( (void **) &a_device, N*sizeof( int ) ) );
+
+  // Copy A from host (CPU) to device (GPU).
+  CUDA_SAFE_CALL( cudaMemcpy( a_device, a_host, N*sizeof( int ), cudaMemcpyHostToDevice ) );
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Compute on the GPU using Thrust
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << std::endl;
+  std::cout << "Computing on the GPU using Thrust (transfers excluded)" << std::endl;
+  
+
+  gpu_timer.Start();
+
+  // Launch the kernel on the GPU.
+  int thrust_sum = 0;
+  for( int i_test = 0 ; i_test < NUM_TESTS ; ++i_test )
+  {
+    thrust_sum = thrust::reduce( thrust::device_ptr<int>(a_device), thrust::device_ptr<int>(a_device+N) );
+  }
+
+  gpu_timer.Stop();
+  
+  std::cout << "  Elapsed time: " << gpu_timer.Elapsed() / NUM_TESTS << "ms" << std::endl;
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Compute on the GPU using CUB
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+  /*int cub_sum=0;
+
+  
+  std::cout << std::endl;
+  std::cout << "Computing on the GPU using CUB (transfers excluded)" << std::endl;
+  
+  int * sum_device=NULL;
+  cudaMalloc(&sum_device, sizeof(int));
+
+  void     *temp_storage_device = NULL;
+  size_t   temp_storage_bytes = 0;
+  cub::DeviceReduce::Sum(temp_storage_device, temp_storage_bytes, a_device ,sum_device, N);
+  // Allocate temporary storage
+  cudaMalloc(&temp_storage_device, temp_storage_bytes);
+  
+
+  gpu_timer.Start();
+  
+  // Run reduction
+  for( int i_test = 0 ; i_test < NUM_TESTS ; ++i_test )
+  {
+  cub::DeviceReduce::Sum(temp_storage_device, temp_storage_bytes, a_device, sum_device,N);
+  }
+
+  
+  gpu_timer.Stop();
+
+  CUDA_SAFE_CALL( cudaMemcpy( &cub_sum, sum_device, sizeof(int), cudaMemcpyDeviceToHost ) );
+
+  
+  std::cout << "  Elapsed time: " << gpu_timer.Elapsed() / NUM_TESTS << "ms" << std::endl;
+*/
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Compute on the GPU
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << std::endl;
+  std::cout << "Computing on the GPU (transfers excluded)" << std::endl;
+  
+  gpu_timer.Start();
+
+  // Launch the kernel on the GPU.
+  int gpu_sum = 0;
+  for( int i_test = 0 ; i_test < NUM_TESTS ; ++i_test )
+  {
+    gpu_sum = reduce_on_gpu( N, a_device );
+  }
+
+  gpu_timer.Stop();
+  
+  std::cout << "  Elapsed time: " << gpu_timer.Elapsed() / NUM_TESTS << "ms" << std::endl;
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Compute on the GPU (optimized version)
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << std::endl;
+  std::cout << "Computing on the GPU using a tuned version (transfers excluded)" << std::endl;
+  
+  gpu_timer.Start();
+
+  const int BLOCK_DIM = 256;
+  
+  // Launch the kernel on the GPU.
+  int optim_gpu_sum = 0;
+  for( int i_test = 0 ; i_test < NUM_TESTS ; ++i_test )
+  {
+    optim_gpu_sum = reduce_on_gpu_optimized<BLOCK_DIM>( N, a_device );
+  }
+  
+  gpu_timer.Stop();
+  
+  std::cout << "  Elapsed time: " << gpu_timer.Elapsed() / NUM_TESTS << "ms" << std::endl;
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Validate results
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << std::endl;
+  std::cout << std::endl;
+  std::cout << "OpenMP      results: ref= " << sum << " / sum= " << omp_sum << std::endl;
+  std::cout << "Thrust      results: ref= " << sum << " / sum= " << thrust_sum << std::endl;
+  //std::cout << "CUB         results: ref= " << sum << " / sum= " << cub_sum << std::endl;
+  std::cout << "CUDA        results: ref= " << sum << " / sum= " << gpu_sum << std::endl;
+  std::cout << "CUDA Optim  results: ref= " << sum << " / sum= " << optim_gpu_sum << std::endl;
+  
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Clean memory
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Free device memory.
+  CUDA_SAFE_CALL( cudaFree( a_device ) );
+  
+  // Free host memory.
+  delete[] a_host;
+
+  return 0;
+}
+
--- a/A4/TP_GPU-master/TP2_reduction/windows/CpuTimer.cpp
+++ b/A4/TP_GPU-master/TP2_reduction/windows/CpuTimer.cpp
@ -0,0 +1,7 @@
+#include "CpuTimer.h"
+
+// Initialize the resolution of the timer
+LARGE_INTEGER CpuTimer::m_freq = (QueryPerformanceFrequency(&CpuTimer::m_freq), CpuTimer::m_freq);
+
+// Calculate the overhead of the timer
+LONGLONG CpuTimer::m_overhead = CpuTimer::GetOverhead();
--- a/A4/TP_GPU-master/TP2_reduction/windows/CpuTimer.h
+++ b/A4/TP_GPU-master/TP2_reduction/windows/CpuTimer.h
@ -0,0 +1,37 @@
+#pragma once
+#include <windows.h>
+
+struct CpuTimer
+{
+  void Start() 
+  {
+    QueryPerformanceCounter(&m_start);
+  }
+
+  void Stop() 
+  {
+    QueryPerformanceCounter(&m_stop);
+  }
+
+  // Returns elapsed time in milliseconds (ms)
+  double Elapsed()
+  {
+    return (m_stop.QuadPart - m_start.QuadPart - m_overhead) * 1000.0 / m_freq.QuadPart;
+  }
+
+private:
+
+  // Returns the overhead of the timer in ticks
+  static LONGLONG GetOverhead()
+  {
+    CpuTimer t;
+    t.Start();
+    t.Stop();
+    return t.m_stop.QuadPart - t.m_start.QuadPart;
+  }
+
+  LARGE_INTEGER m_start;
+  LARGE_INTEGER m_stop;
+  static LARGE_INTEGER m_freq;
+  static LONGLONG m_overhead;
+};
--- a/A4/TP_GPU-master/TP2_reduction/windows/GpuTimer.h
+++ b/A4/TP_GPU-master/TP2_reduction/windows/GpuTimer.h
@ -0,0 +1,39 @@
+#pragma once
+#include <cuda_runtime_api.h>
+
+class GpuTimer
+{
+  cudaEvent_t start, stop;
+
+public:
+  GpuTimer()
+  {
+    cudaEventCreate(&start);
+    cudaEventCreate(&stop);
+  }
+
+  ~GpuTimer()
+  {
+    cudaEventDestroy(stop);
+    cudaEventDestroy(start);
+  }
+
+  void Start()
+  {
+    cudaEventRecord(start);
+  }
+
+  void Stop()
+  {
+    cudaEventRecord(stop);
+    cudaEventSynchronize(stop);
+  }
+
+  float Elapsed()
+  {
+    float elapsed;
+    cudaEventElapsedTime(&elapsed, start, stop);
+    return elapsed;
+  }
+};
+
--- a/A4/TP_GPU-master/TP2_reduction/windows/Reduce.cu
+++ b/A4/TP_GPU-master/TP2_reduction/windows/Reduce.cu
@ -0,0 +1,415 @@
+/*
+# Copyright (c) 2011-2012 NVIDIA CORPORATION. All Rights Reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto. Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.   
+*/
+
+#include <iostream>
+#include <cuda_runtime_api.h>
+#include <omp.h>
+#include <thrust/reduce.h>
+#include "GpuTimer.h"
+#include "CpuTimer.h"
+
+#define CUDA_SAFE_CALL(call) \
+  { \
+    cudaError_t err_code = call; \
+    if( err_code != cudaSuccess ) { std::cerr << "Error (" << __FILE__ << ":" << __LINE__ << "): " << cudaGetErrorString(err_code) << std::endl; return 1; } \
+  }
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// G P U   R E D U C T I O N
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+__global__ void reduce_kernel( int n, const int *in_buffer, int *out_buffer, const int2 *block_ranges )
+{
+  // Allocate shared memory inside the block.
+  extern __shared__ int s_mem[];
+
+  // The range of data to work with.
+  int2 range = block_ranges[blockIdx.x];
+
+  // Compute the sum of my elements.
+  
+  // TODO: fill-in that section of the code
+
+  // Copy my sum in shared memory.
+  s_mem[threadIdx.x] = my_sum;
+
+  // Make sure all the threads have copied their value in shared memory.
+  __syncthreads();
+
+  // Compute the sum inside the block.
+  
+  // TODO: fill-in that section of the code
+
+  // The first thread of the block stores its result.
+  if( threadIdx.x == 0 )
+    out_buffer[blockIdx.x] = s_mem[0];
+}
+
+int reduce_on_gpu( int n, const int *a_device )
+{
+  // Compute the size of the grid.
+  const int BLOCK_DIM   = 256;
+  const int grid_dim    = std::min( BLOCK_DIM, (n + BLOCK_DIM-1) / BLOCK_DIM );
+  const int num_threads = BLOCK_DIM * grid_dim;
+
+  // Compute the number of elements per block.
+  const int elements_per_block = BLOCK_DIM * ((n + num_threads - 1) / num_threads);
+
+  // Allocate memory for temporary buffers.
+  int  *partial_sums = NULL;
+  int2 *block_ranges = NULL;
+
+  CUDA_SAFE_CALL( cudaMalloc( (void **) &partial_sums, BLOCK_DIM * sizeof(int ) ) );
+  CUDA_SAFE_CALL( cudaMalloc( (void **) &block_ranges, grid_dim  * sizeof(int2) ) );
+
+  // Compute the ranges for the blocks.
+  int sum = 0;
+  int2 *block_ranges_on_host = new int2[grid_dim];
+  for( int block_idx = 0 ; block_idx < grid_dim ; ++block_idx )
+  {
+    block_ranges_on_host[block_idx].x = sum;
+    block_ranges_on_host[block_idx].y = std::min( sum += elements_per_block, n );
+  }
+  CUDA_SAFE_CALL( cudaMemcpy( block_ranges, block_ranges_on_host, grid_dim * sizeof(int2), cudaMemcpyHostToDevice ) );
+  delete[] block_ranges_on_host;
+
+  // First round: Compute a partial sum for all blocks.
+  reduce_kernel<<<grid_dim, BLOCK_DIM, BLOCK_DIM*sizeof(int)>>>( n, a_device, partial_sums, block_ranges );
+  CUDA_SAFE_CALL( cudaGetLastError() );
+
+  // Set the ranges for the second kernel call.
+  int2 block_range = make_int2( 0, grid_dim );
+  CUDA_SAFE_CALL( cudaMemcpy( block_ranges, &block_range, sizeof(int2), cudaMemcpyHostToDevice ) );
+
+  // Second round: Compute the final sum by summing the partial results of all blocks.
+  reduce_kernel<<<1, BLOCK_DIM, BLOCK_DIM*sizeof(int)>>>( grid_dim, partial_sums, partial_sums, block_ranges );
+  CUDA_SAFE_CALL( cudaGetLastError() );
+
+  // Read the result from device memory.
+  int result;
+  CUDA_SAFE_CALL( cudaMemcpy( &result, partial_sums, sizeof(int), cudaMemcpyDeviceToHost ) );
+
+  // Free temporary memory.
+  CUDA_SAFE_CALL( cudaFree( block_ranges ) );
+  CUDA_SAFE_CALL( cudaFree( partial_sums ) );
+
+  return result;
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// G P U   R E D U C T I O N :   O P T I M I Z E D   V E R S I O N
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#define WARP_SIZE 32
+
+template< int BLOCK_DIM > 
+__global__ void reduce_kernel_optimized( int n, const int *in_buffer, int *out_buffer, const int2 *__restrict block_ranges )
+{
+  // The number of warps in the block.
+  const int NUM_WARPS = BLOCK_DIM / WARP_SIZE;
+
+  // Allocate shared memory inside the block.
+  __shared__ volatile int s_mem[BLOCK_DIM];
+
+  // The range of data to work with.
+  int2 range = block_ranges[blockIdx.x];
+
+  // Warp/lane IDs.
+  const int warp_id = threadIdx.x / WARP_SIZE;
+  const int lane_id = threadIdx.x % WARP_SIZE;
+
+  // Compute the sum of my elements.
+  
+  // TODO: fill-in that section of the code
+
+  // Copy my sum in shared memory.
+  s_mem[threadIdx.x] = my_sum;
+
+  // Compute the sum inside each warp.
+ 
+  // TODO: fill-in that section of the code
+  
+  // Each warp leader stores the result for the warp.
+  if( lane_id == 0 )
+    // TODO: fill-in that section of the code
+  __syncthreads();
+
+  if( warp_id == 0 )
+  {
+    // Read my value from shared memory and store it in a register.
+    my_sum = s_mem[lane_id];
+  
+    // Sum the results of the warps.
+    
+    // TODO: fill-in that section of the code
+  }
+
+  // The 1st thread stores the result of the block.
+  if( threadIdx.x == 0 )
+    out_buffer[blockIdx.x] = my_sum += s_mem[1];
+}
+
+template< int BLOCK_DIM >
+int reduce_on_gpu_optimized( int n, const int *a_device )
+{
+  // Compute the size of the grid.
+  const int grid_dim    = std::min( BLOCK_DIM, (n + BLOCK_DIM-1) / BLOCK_DIM );
+  const int num_threads = BLOCK_DIM * grid_dim;
+
+  // Compute the number of elements per block.
+  const int elements_per_block = BLOCK_DIM * ((n + num_threads - 1) / num_threads);
+
+  // Allocate memory for temporary buffers.
+  int  *partial_sums = NULL;
+  int2 *block_ranges = NULL;
+
+  CUDA_SAFE_CALL( cudaMalloc( (void **) &partial_sums, BLOCK_DIM * sizeof(int ) ) );
+  CUDA_SAFE_CALL( cudaMalloc( (void **) &block_ranges, grid_dim  * sizeof(int2) ) );
+
+  // Compute the ranges for the blocks.
+  int sum = 0;
+  int2 *block_ranges_on_host = new int2[grid_dim];
+  for( int block_idx = 0 ; block_idx < grid_dim ; ++block_idx )
+  {
+    block_ranges_on_host[block_idx].x = sum;
+    block_ranges_on_host[block_idx].y = std::min( sum += elements_per_block, n );
+  }
+  CUDA_SAFE_CALL( cudaMemcpy( block_ranges, block_ranges_on_host, grid_dim * sizeof(int2), cudaMemcpyHostToDevice ) );
+  delete[] block_ranges_on_host;
+
+  // First round: Compute a partial sum for all blocks.
+  reduce_kernel_optimized<BLOCK_DIM><<<grid_dim, BLOCK_DIM>>>( n, a_device, partial_sums, block_ranges );
+  CUDA_SAFE_CALL( cudaGetLastError() );
+
+  // Set the ranges for the second kernel call.
+  int2 block_range = make_int2( 0, grid_dim );
+  CUDA_SAFE_CALL( cudaMemcpy( block_ranges, &block_range, sizeof(int2), cudaMemcpyHostToDevice ) );
+
+  // Second round: Compute the final sum by summing the partial results of all blocks.
+  reduce_kernel_optimized<BLOCK_DIM><<<1, BLOCK_DIM>>>( grid_dim, partial_sums, partial_sums, block_ranges );
+  CUDA_SAFE_CALL( cudaGetLastError() );
+
+  // Read the result from device memory.
+  int result;
+  CUDA_SAFE_CALL( cudaMemcpy( &result, partial_sums, sizeof(int), cudaMemcpyDeviceToHost ) );
+
+  // Free temporary memory.
+  CUDA_SAFE_CALL( cudaFree( block_ranges ) );
+  CUDA_SAFE_CALL( cudaFree( partial_sums ) );
+
+  return result;
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// M A I N
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+int main( int, char ** )
+{
+  const int NUM_TESTS = 10;
+
+  // The number of elements in the problem.
+  const int N = 512 * 131072;
+
+  std::cout << "Computing a reduction on " << N << " elements" << std::endl;
+
+  // X and Y on the host (CPU).
+  int *a_host = new int[N];
+
+  // Make sure the memory got allocated. TODO: free memory.
+  if( a_host == NULL )
+  {
+    std::cerr << "ERROR: Couldn't allocate a_host" << std::endl;
+    return 1;
+  }
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Generate data
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << "Filling with 1s" << std::endl;
+
+  // Generate pseudo-random data.
+  for( int i = 0 ; i < N ; ++i )
+    a_host[i] = 1;
+  
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Compute on the CPU using 1 thread
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << std::endl;
+  std::cout << "Computing on the CPU using 1 CPU thread" << std::endl;
+  
+  CpuTimer cpu_timer;
+  cpu_timer.Start();
+
+  // Calculate the reference to compare with the device result.
+  int sum = 0;
+  for( int i_test = 0 ; i_test < NUM_TESTS ; ++i_test )
+  {
+    sum = 0;
+    for( int i = 0 ; i < N ; ++i )
+      sum += a_host[i];
+  }
+
+  cpu_timer.Stop();
+  
+  std::cout << "  Elapsed time: " << cpu_timer.Elapsed() / NUM_TESTS << "ms" << std::endl;
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Compute on the CPU using several OpenMP threads
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << std::endl;
+  std::cout << "Computing on the CPU using " << omp_get_max_threads() << " OpenMP thread(s)" << std::endl;
+  
+  cpu_timer.Start();
+
+  // Calculate the reference to compare with the device result.
+  int omp_sum = 0;
+  for( int i_test = 0 ; i_test < NUM_TESTS ; ++i_test )
+  {
+    omp_sum = 0;
+#pragma omp parallel shared(omp_sum)
+    {
+#pragma omp for reduction(+ : omp_sum)
+    for( int i = 0 ; i < N ; ++i )
+      omp_sum = omp_sum + a_host[i];
+    }
+  }
+
+  cpu_timer.Stop();
+  
+  std::cout << "  Elapsed time: " << cpu_timer.Elapsed() / NUM_TESTS << "ms" << std::endl;
+  
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Compute on the GPU
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // The copy of A on the device (GPU).
+  int *a_device = NULL;
+
+  // Allocate A on the device.
+  CUDA_SAFE_CALL( cudaMalloc( (void **) &a_device, N*sizeof( int ) ) );
+
+  // Copy A from host (CPU) to device (GPU).
+  CUDA_SAFE_CALL( cudaMemcpy( a_device, a_host, N*sizeof( int ), cudaMemcpyHostToDevice ) );
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Compute on the GPU using Thrust
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << std::endl;
+  std::cout << "Computing on the GPU using Thrust (transfers excluded)" << std::endl;
+  
+  GpuTimer gpu_timer;
+  gpu_timer.Start();
+
+  // Launch the kernel on the GPU.
+  int thrust_sum = 0;
+  for( int i_test = 0 ; i_test < NUM_TESTS ; ++i_test )
+  {
+    thrust_sum = thrust::reduce( thrust::device_ptr<int>(a_device), thrust::device_ptr<int>(a_device+N) );
+  }
+
+  gpu_timer.Stop();
+  
+  std::cout << "  Elapsed time: " << gpu_timer.Elapsed() / NUM_TESTS << "ms" << std::endl;
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Compute on the GPU
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << std::endl;
+  std::cout << "Computing on the GPU (transfers excluded)" << std::endl;
+  
+  gpu_timer.Start();
+
+  // Launch the kernel on the GPU.
+  int gpu_sum = 0;
+  for( int i_test = 0 ; i_test < NUM_TESTS ; ++i_test )
+  {
+    gpu_sum = reduce_on_gpu( N, a_device );
+  }
+
+  gpu_timer.Stop();
+  
+  std::cout << "  Elapsed time: " << gpu_timer.Elapsed() / NUM_TESTS << "ms" << std::endl;
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Compute on the GPU (optimized version)
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << std::endl;
+  std::cout << "Computing on the GPU using a tuned version (transfers excluded)" << std::endl;
+  
+  gpu_timer.Start();
+
+  const int BLOCK_DIM = 256;
+  
+  // Launch the kernel on the GPU.
+  int optim_gpu_sum = 0;
+  for( int i_test = 0 ; i_test < NUM_TESTS ; ++i_test )
+  {
+    optim_gpu_sum = reduce_on_gpu_optimized<BLOCK_DIM>( N, a_device );
+  }
+  
+  gpu_timer.Stop();
+  
+  std::cout << "  Elapsed time: " << gpu_timer.Elapsed() / NUM_TESTS << "ms" << std::endl;
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Validate results
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  std::cout << std::endl;
+  std::cout << std::endl;
+  std::cout << "OpenMP results: ref= " << sum << " / sum= " << omp_sum << std::endl;
+  std::cout << "CUDA   results: ref= " << sum << " / sum= " << gpu_sum << std::endl;
+  std::cout << "Thrust results: ref= " << sum << " / sum= " << thrust_sum << std::endl;
+  std::cout << "Optim  results: ref= " << sum << " / sum= " << optim_gpu_sum << std::endl;
+  
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Clean memory
+
+  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // Free device memory.
+  CUDA_SAFE_CALL( cudaFree( a_device ) );
+  
+  // Free host memory.
+  delete[] a_host;
+
+  return 0;
+}
+
--- a/A4/TP_GPU-master/TP2_reduction/windows/Reduce.sln
+++ b/A4/TP_GPU-master/TP2_reduction/windows/Reduce.sln
@ -0,0 +1,20 @@
+
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual Studio 2010
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Reduce", "Reduce.vcxproj", "{037A2CAC-CD22-421A-9BB3-4E39043D36C0}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{037A2CAC-CD22-421A-9BB3-4E39043D36C0}.Debug|x64.ActiveCfg = Debug|x64
+		{037A2CAC-CD22-421A-9BB3-4E39043D36C0}.Debug|x64.Build.0 = Debug|x64
+		{037A2CAC-CD22-421A-9BB3-4E39043D36C0}.Release|x64.ActiveCfg = Release|x64
+		{037A2CAC-CD22-421A-9BB3-4E39043D36C0}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
--- a/A4/TP_GPU-master/TP2_reduction/windows/Reduce.suo
+++ b/A4/TP_GPU-master/TP2_reduction/windows/Reduce.suo
--- a/A4/TP_GPU-master/TP2_reduction/windows/Reduce.vcxproj
+++ b/A4/TP_GPU-master/TP2_reduction/windows/Reduce.vcxproj
@ -0,0 +1,172 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{037A2CAC-CD22-421A-9BB3-4E39043D36C0}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>Reduce</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 4.2.props" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <OpenMPSupport>true</OpenMPSupport>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);cudart.lib</AdditionalDependencies>
+    </Link>
+    <CudaCompile>
+      <TargetMachinePlatform>64</TargetMachinePlatform>
+      <CodeGeneration>compute_20,sm_20</CodeGeneration>
+      <AdditionalOptions>-Xcompiler "/openmp" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <OpenMPSupport>true</OpenMPSupport>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);cudart.lib</AdditionalDependencies>
+    </Link>
+    <CudaCompile>
+      <TargetMachinePlatform>64</TargetMachinePlatform>
+      <CodeGeneration>compute_20,sm_20</CodeGeneration>
+      <AdditionalCompilerOptions>/openmp</AdditionalCompilerOptions>
+      <Optimization>O3</Optimization>
+      <AdditionalOptions>-Xcompiler "/openmp" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="GpuTimer.h" />
+    <ClInclude Include="CpuTimer.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="CpuTimer.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <CudaCompile Include="Reduce.cu">
+      <TargetMachinePlatform Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">64</TargetMachinePlatform>
+      <CodeGeneration Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">compute_20,sm_20</CodeGeneration>
+      <AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xcompiler "/openmp" %(AdditionalOptions)</AdditionalOptions>
+    </CudaCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 4.2.targets" />
+  </ImportGroup>
+</Project>
--- a/A4/TP_GPU-master/TP3_TomoGPI/TP_TomoGPI_one_gradient.m
+++ b/A4/TP_GPU-master/TP3_TomoGPI/TP_TomoGPI_one_gradient.m
@ -0,0 +1,115 @@
+clear all;close all;
+N=128;
+P=128;
+rep_name=['Projections_',num2str(P),'/'];
+iter = Iter3D(rep_name); % Create a class instance
+
+iter.save_file=1;
+iter.save_volume=2;
+iter.fc=1.0;%frequence de coupure pour filtre de rétroporjection filtrée
+
+f_real=CreateVolumeReal(iter);
+g_real=getSinoReal(iter);
+
+f_estimated=CreateVolumeInit(iter);
+normdJProjReg=CreateVolumeInit(iter);
+
+[g_real,rsb_in]=addNoise(iter,g_real);
+file_name=sprintf('%s/P_ER_GPU_NOISE_%2.1fdB.s',iter.workdirectory,rsb_in);
+fid = fopen(file_name, 'wb');
+fwrite(fid,g_real ,'float');
+fclose(fid);
+
+%temp=zeros(size(f_real));%doLaplacian(iter,f_real,temp);%figure(2);imagesc(temp(:,:,N/2));title('laplacien df');colorbar;colormap(gray);drawnow;%figure(3);imagesc(f_real(:,:,N/2));title('f_real');colorbar;colormap(gray);drawnow;
+
+% disp('****************************')
+% disp('Descente de gradient... ')
+% disp('****************************')
+
+f_estimated_n=f_estimated;
+for num_iter_gradient_n=1:1:getGradientIterationNb(iter)
+    fprintf("iter = %d\n",num_iter_gradient_n);
+    iter.num_iter=num_iter_gradient_n;
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % CALCUL DES J et dJ
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    
+    %g=H*f
+    g_estimated=doProjection(iter,f_estimated_n);%figure(4);imagesc(g_estimated(:,:,N/2));title('g_estimated');colorbar;colormap(gray);drawnow;
+    dg=g_real-g_estimated;%figure(5);imagesc(g_real(:,:,N/2));title('g_real');colorbar;colormap(gray);drawnow;%figure(6);imagesc(dg(:,:,N/2));title('dg');colorbar;colormap(gray);drawnow;
+    
+    %df=Ht*(g-Hf)
+    df=doBackprojection(iter,dg);%figure(7);imagesc(df(:,:,N/2));title('df');colorbar;colormap(gray);drawnow;
+    
+    %dJ=-2*Ht*(g-Hf)
+    dJ_MC=-2*df;
+    dJ=dJ_MC;
+    
+    %dJ+=Dt*D*f  avec D laplacien
+    %J_reg=0;
+    %ApplyLaplacianRegularization_to_dJ(iter,f_estimated_n,dJ,getLambda(iter),J_reg,normdJProjReg,getGradientIterationNb(iter),getOptimalStepIterationNb(iter));
+    %dJ_reg=zeros(size(dJ)); temp=zeros(size(dJ));%doLaplacian(iter,f_estimated_n,temp);%doLaplacian(iter,temp,dJ_reg);clear temp;
+    %dJ=dJ+2*dJ_reg;
+    %dJ_reg=dJ-dJ_MC;figure(8);imagesc(dJ_reg(:,:,N/2));title('dJ reg');colorbar;colormap(gray);drawnow;%figure(9);imagesc(dg(:,:,N/2));title('dg');colorbar;colormap(gray);drawnow;
+    
+    %Gradient dJ mise à jour
+    
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % CALCUL DU PAS
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    
+    num_alpha=sum(dJ(:).^2);
+    proj_dJ=doProjection(iter,dJ);
+    denum_alpha=2*sum(proj_dJ(:).^2);
+    clear proj_dJ;
+    
+    %SI REGULARISATION
+    %if (getLambda(iter) ~= 0)
+     %   s=zeros(size(dJ));
+     %   doLaplacian(iter,dJ,s);
+      %  s=sum(s(:).^2);
+      %  denum_alpha=denum_alpha+getLambda(iter)*s;
+    %end
+    
+    alpha=num_alpha/denum_alpha;
+    
+    %iter.alpha(iter.num_iter)=alpha; %figure(10);plot(iter.alpha);title('pas');xlabel('iter');ylabel('alpha');drawnow;
+    
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % MISE A JOUR DE f
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    
+    f_estimated_n=f_estimated_n-alpha.*dJ;
+    clear dJ;
+    
+    % SAUVEGARDE DU VOLUME RECONSTRUIT TOUS LES iter.save_file
+    iter=sauvegarde_volume_TOMO8(f_estimated_n,f_real,iter);
+    
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    %CALCUL DU CRITERE (FACULTATIF)
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    
+    J_MC=sum(dg(:).^2);
+    iter.J(iter.num_iter)=J_MC;%+J_reg
+    
+    niter_done=size(iter.J,2);
+    if (niter_done > 1)
+        figure(11);plot(iter.J(2:niter_done));title('J');xlabel('iter');ylabel('J');drawnow;
+    end
+    figure(12);imagesc(f_estimated_n(:,:,N/2));title('gradient');colorbar;colormap(gray);drawnow;
+end
+
+%     disp('****************************')
+%     disp('Descente de gradient OK !!!!')
+%     disp('****************************')
+
+
+figure(11);
+plot(f_estimated_n(:,N/2,N/2),'b','LineWidth',1.5,'Marker','+');hold on;
+plot(f_real(:,N/2,N/2),'k','LineWidth',1.5);hold on;
+legend('gradient','real');
+
+
+figure(13);
+imagesc(f_real(:,:,N/2));title('real');colorbar;colormap(gray);drawnow;
+
--- a/A4/TP_GPU-master/TP3_TomoGPI/TP_TomoGPI_several_gradients.m
+++ b/A4/TP_GPU-master/TP3_TomoGPI/TP_TomoGPI_several_gradients.m
@ -0,0 +1,73 @@
+clear all;close all;
+N=128;
+P=128;
+rep_name=['Projections_',num2str(P),'/'];
+iter = Iter3D(rep_name); % Create a class instance
+
+iter.save_file=1;
+iter.save_volume=2;
+iter.fc=1.0;%frequence de coupure pour filtre de rétroporjection filtrée
+
+f_real=CreateVolumeReal(iter);
+g_real=getSinoReal(iter);
+
+
+[g_real,rsb_in]=addNoise(iter,g_real);
+file_name=sprintf('%s/P_ER_GPU_NOISE_%2.1fdB.s',iter.workdirectory,rsb_in);
+fid = fopen(file_name, 'wb');
+fwrite(fid,g_real ,'float');
+fclose(fid);
+
+file_name=sprintf('%s/FDK_NOISE_%2.1fdB.v',iter.workdirectory,rsb_in);
+fid = fopen(file_name, 'rb');
+f_FDK_32=fread(fid,N*N*N ,'float');
+f_FDK_32=reshape(f_FDK_32,N,N,N);
+fclose(fid);
+ 
+  
+    
+% RECONSTRUCTION ITERATIVE MOINDRE CARRE AVEC REGULARISATION QUADRATIQUE
+setPositivity(iter,1);
+setLambda(iter,100);
+  l1=getLambda(iter);
+f1_32=CreateVolumeInit(iter);
+title1_32='lambda 100 P 32';
+doGradient(iter,f1_32,g_real,f_real);
+
+setLambda(iter,0.1);
+  l2=getLambda(iter);
+f2_32=CreateVolumeInit(iter);
+title2_32='lambda 0.1 P 32';
+doGradient(iter,f2_32,g_real,f_real);
+
+
+setLambda(iter,0);
+  l3=getLambda(iter);
+f3_32=CreateVolumeInit(iter);
+title3_32='lambda 0 P 32';
+doGradient(iter,f3_32,g_real,f_real);
+
+
+figure(11);
+plot(f1_32(:,N/2,N/2),'b','LineWidth',1.5,'Marker','+');hold on;
+plot(f2_32(:,N/2,N/2),'g','LineWidth',1.5,'Marker','x');hold on;
+plot(f3_32(:,N/2,N/2),'c','LineWidth',1.5,'Marker','*');hold on;
+plot(f_real(:,N/2,N/2),'k','LineWidth',1.5);hold on;
+plot(f_FDK_32(:,N/2,N/2),'r','LineWidth',1.5,'Marker','o');hold on;
+legend(title1_32,title2_32,title3_32,'real','fdk');
+%legend(title1_32,'real','fdk');
+ 
+
+figure(12);
+imagesc(f_real(:,:,N/2));title('real');colorbar;colormap(gray);drawnow;
+figure(13);
+imagesc(f1_32(:,:,N/2));title(title1_32);colorbar;colormap(gray);drawnow;
+figure(14);
+imagesc(f_FDK_32(:,:,N/2));title('FDK');colorbar;colormap(gray);drawnow;
+figure(15);
+imagesc(f2_32(:,:,N/2));title(title2_32);colorbar;colormap(gray);drawnow;
+figure(16);
+imagesc(f3_32(:,:,N/2));title(title3_32);colorbar;colormap(gray);drawnow;
+
+
+
--- a/A4/TP_GPU-master/TP3_TomoGPI/install_TomoGPI.sh
+++ b/A4/TP_GPU-master/TP3_TomoGPI/install_TomoGPI.sh
@ -0,0 +1,16 @@
+#!/bin/bash
+export DATA_TP_TOMOGPI="/partage/public/ngac/TomoGPI/data_TP_TomoGPI/"
+if [ ! -d "/tmp/data_TP_TomoGPI/data3D_0256" ] 
+then
+  	cp -r "$DATA_TP_TOMOGPI" /tmp
+fi
+export CURRENTDIR='pwd'
+export TOMO_GPI=/partage/public/ngac/TomoGPI
+export MATLABPATH=$TOMO_GPI/Matlab/Tomo8:$MATLABPATH
+export MATLABPATH=$TOMO_GPI/build:$MATLABPATH
+export MATLABPATH=$CURRENT_DIR:$MATLABPATH
+export PATH=$TOMO_GPI/build/:$PATH
+alias matlab_TomoGPI='LD_PRELOAD=/usr/lib/gcc/x86_64-linux-gnu/8/libstdc++.so matlab'
+cd /tmp/data_TP_TomoGPI/data3D_0128/phantom3D_0006_shepp/
+matlab_TomoGPI &
+#matlab &
--- a/A4/TP_GPU-master/TP5_computer-vision/CMakeLists.txt
+++ b/A4/TP_GPU-master/TP5_computer-vision/CMakeLists.txt
@ -0,0 +1,32 @@
+#Déclaration du projet
+project(my_project)
+
+#link_directories(/usr/local/lib)
+
+set(ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/)
+
+find_package(CUDA REQUIRED) 
+#set(OpenCV_DIR "/usr/local/lib/OpenCV/")
+find_package(OpenCV REQUIRED)
+include_directories(${OpenCV_INCLUDE_DIRS} ".")
+
+find_package(Qt5 COMPONENTS Core Gui Widgets REQUIRED)
+
+list(APPEND HEADER_REP ${ROOT_DIR}/ ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} "/usr/local/cuda/samples/common/inc/" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/../samples/common/inc")
+
+
+set(CPP_FILES ${ROOT_DIR}/mylib.cpp)
+set(CU_FILES ${ROOT_DIR}/main_nano.cu ${ROOT_DIR}/mylib.cu)
+
+
+set(CUDA_SEPARABLE_COMPILATION ON)
+set(CUDA_NVCC_FLAGS "-lineinfo;-I\"${ROOT_DIR}/inc\" -I\"${ROOT_DIR}/\" -I\"/usr/local/cuda/samples/common/inc/\"")
+
+
+cuda_add_executable(my_project ${CPP_FILES} ${CU_FILES} ${HEADER_FILES})
+
+set_target_properties(my_project PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(my_project PUBLIC ${ROOT_DIR}/inc/ /usr/local/cuda/samples/common/inc/)
+target_link_libraries(my_project Qt5::Widgets)
+target_link_libraries(my_project Qt5::Core)
+target_link_libraries(my_project opencv_core opencv_highgui opencv_imgproc opencv_imgcodecs opencv_video opencv_videoio)
--- a/A4/TP_GPU-master/TP5_computer-vision/main_PC.cu
+++ b/A4/TP_GPU-master/TP5_computer-vision/main_PC.cu
@ -0,0 +1,47 @@
+#include "mylib.h"
+#include "mylib.cuh"
+#include "opencv2/opencv.hpp"
+#include <cuda_runtime.h>
+
+
+int main(int, char**)
+{ 	
+	VideoCapture cap(0); // open the default camera
+	if(!cap.isOpened())  // check if we succeeded
+		return -1;
+	
+	while(1){
+		Mat frame;
+	    	cap >> frame;
+	    	char c=(char)waitKey(25);
+		if(c == '1'){				// if '1' est appuye
+			Mat NB = noirBlanc(frame);
+	    		imshow("NoirEtBlanc", NB);
+		}
+		else if(c == '2'){			// if '2' est appuye
+			Mat seuil = seuillage(frame);
+	    		imshow("seuillage", seuil);
+		}
+		else if (c == '3'){			// if '3' est appuye
+			Mat cont = contour(frame);
+	    		imshow("contour", cont);
+		}
+		else if (c == '4'){			// if '4' est appuye
+			Mat seuilgpu = seuillageGPU(frame);
+	    		imshow("seuillage GPU",seuilgpu);
+		}
+		else if(c == '0') destroyAllWindows();	// if '0' est appuye
+
+		else imshow("frame", frame);
+			
+	    	if(c==27)				// if 'esc' est appuye
+	      		break;
+	}
+  	// When everything done, release the video capture object
+	cap.release();
+
+	// Closes all the frames
+	destroyAllWindows();
+
+	return 0;
+}
--- a/A4/TP_GPU-master/TP5_computer-vision/main_nano.cu
+++ b/A4/TP_GPU-master/TP5_computer-vision/main_nano.cu
@ -0,0 +1,92 @@
+#include "mylib.cuh"
+#include "mylib.h"
+#include <cuda_runtime.h>
+
+// acces au flux de la camera
+std::string gstreamer_pipeline(int capture_width, int capture_height,
+                               int display_width, int display_height,
+                               int framerate, int flip_method) {
+  return "nvarguscamerasrc ! video/x-raw(memory:NVMM), width=(int)" +
+         std::to_string(capture_width) + ", height=(int)" +
+         std::to_string(capture_height) +
+         ", format=(string)NV12, framerate=(fraction)" +
+         std::to_string(framerate) +
+         "/1 ! nvvidconv flip-method=" + std::to_string(flip_method) +
+         " ! video/x-raw, width=(int)" + std::to_string(display_width) +
+         ", height=(int)" + std::to_string(display_height) +
+         ", format=(string)BGRx ! videoconvert ! video/x-raw, "
+         "format=(string)BGR ! appsink";
+}
+
+int main(int, char **) {
+  int capture_width = 1280;
+  int capture_height = 720;
+  int display_width = 640;
+  int display_height = 360;
+  int framerate = 60;
+  int flip_method = 0;
+  char c = '1';
+
+  std::string pipeline =
+      gstreamer_pipeline(capture_width, capture_height, display_width,
+                         display_height, framerate, flip_method);
+  std::cout << "Using pipeline: \n\t" << pipeline << "\n";
+
+  cv::VideoCapture cap(pipeline, cv::CAP_GSTREAMER);
+
+  if (!cap.isOpened()) // check if we succeeded
+    return -1;
+
+  while (1) {
+    Mat frame;
+    cap >> frame;
+    int c_new;
+    c_new = cv::waitKey(10);
+    if (c_new != -1)
+      c = c_new;
+
+    switch (c) {
+    case '1': {
+      imshow("frame", frame);
+      break;
+    }
+    case '2': {
+      Mat NB = noirBlanc(frame);
+      imshow("NoirEtBlanc", NB);
+      break;
+    }
+    case '3': {
+      Mat seuil = seuillage(frame);
+      imshow("seuillage", seuil);
+      break;
+    }
+    case '4': {
+      Mat cont = contour(frame);
+      imshow("contour", cont);
+      break;
+    }
+    case '5': {
+      Mat seuilgpu = seuillageGPU(frame);
+      imshow("seuillage GPU", seuilgpu);
+      break;
+    }
+
+    case '0': {
+      destroyAllWindows();
+      break;
+    }
+    default:
+      break;
+    }
+
+    if (c == '\e')
+      break;
+  }
+  // When everything done, release the video capture object
+  cap.release();
+
+  // Closes all the frames
+  destroyAllWindows();
+
+  return 0;
+}
--- a/A4/TP_GPU-master/TP5_computer-vision/main_rasp.cpp
+++ b/A4/TP_GPU-master/TP5_computer-vision/main_rasp.cpp
@ -0,0 +1,41 @@
+#include "mylib.h"
+#include "opencv2/opencv.hpp"
+
+
+int main(int, char**)
+{ 	
+	VideoCapture cap(0); // open the default camera
+	if(!cap.isOpened())  // check if we succeeded
+		return -1;
+	
+	while(1){
+		Mat frame;
+	    	cap >> frame;
+	    	char c=(char)waitKey(25);
+		if(c == '1'){				// if '1' est appuye
+			Mat NB = noirBlanc(frame);
+	    		imshow("NoirEtBlanc", NB);
+		}
+		else if(c == '2'){			// if '2' est appuye
+			Mat seuil = seuillage(frame);
+	    		imshow("seuillage", seuil);
+		}
+		else if (c == '3'){			// if '3' est appuye
+			Mat cont = contour(frame);
+	    		imshow("contour", cont);
+		}
+		else if(c == '0') destroyAllWindows();	// if '0' est appuye
+
+		else imshow("frame", frame);
+			
+	    	if(c==27)				// if 'esc' est appuye
+	      		break;
+	}
+  	// When everything done, release the video capture object
+	cap.release();
+
+	// Closes all the frames
+	destroyAllWindows();
+
+	return 0;
+}
--- a/A4/TP_GPU-master/TP5_computer-vision/mylib.cpp
+++ b/A4/TP_GPU-master/TP5_computer-vision/mylib.cpp
@ -0,0 +1,90 @@
+#include "mylib.h"
+//---------------------noirBlanc-----------------------
+
+Mat noirBlanc(Mat frame)
+{ 
+	Mat im_gray_out;
+	
+	if (frame.empty())
+	exit(0);
+	
+	cvtColor(frame,im_gray_out,COLOR_RGB2GRAY);	
+	return im_gray_out;
+}
+
+
+
+//---------------------get_frame-----------------------
+
+Mat get_frame(Mat frame)
+{
+	if (frame.empty())
+	exit(0);
+	
+	return frame;
+}
+
+//---------------------seuillage------------------------
+
+Mat seuillage(Mat frame)
+{ 
+	float nr;
+	uchar r,v,b;
+	Mat frame_out;
+	frame_out.create(frame.rows,frame.cols,CV_8UC3);
+
+	if (frame.empty())
+	exit(0);
+	
+	for(int i = 0; i < frame.rows; i++)
+	{
+		for(int j = 0; j < frame.cols; j++)
+		{
+			
+			b = frame.at<Vec3b>(i,j)[0];
+			v = frame.at<Vec3b>(i,j)[1];
+			r = frame.at<Vec3b>(i,j)[2];
+			
+			nr = r/sqrt(r*r+b*b+v*v);
+			
+			if (nr > 0.7){
+				frame_out.at<Vec3b>(i,j)[0] = b;
+				frame_out.at<Vec3b>(i,j)[1] = r;
+				frame_out.at<Vec3b>(i,j)[2] = r;
+			}
+			else{
+				frame_out.at<Vec3b>(i,j)[0] = b;
+				frame_out.at<Vec3b>(i,j)[1] = v;
+				frame_out.at<Vec3b>(i,j)[2] = r;
+			}
+		}
+	}
+	return frame_out;
+}
+
+
+//---------------------contour------------------------
+
+Mat contour(Mat frame)
+{ 
+	
+	Mat frame_out,frame_grayt;
+	
+	cvtColor(frame,frame_grayt,COLOR_RGB2GRAY);
+	frame_out.create(frame.rows,frame.cols,CV_8UC1);
+	
+	if (frame.empty())
+	exit(0);
+	
+	for (int i=1;i<frame.rows;i++){
+		for (int j=1;j<frame.cols;j++){
+			short temp;
+			temp = (-1)*(short)frame_grayt.at<uchar>(i,j-1)+(-1)*(short)frame_grayt.at<uchar>(i-1,j)+(-1)*(char)frame_grayt.at<uchar>(i,j+1)+(-1)*(short)frame_grayt.at<uchar>(i+1,j)+4*(short)frame_grayt.at<uchar>(i,j);
+			
+			frame_out.at<uchar>(i,j)=(uchar)abs(temp);
+		}
+	}
+	return frame_out;
+}
+
+
--- a/A4/TP_GPU-master/TP5_computer-vision/mylib.cu
+++ b/A4/TP_GPU-master/TP5_computer-vision/mylib.cu
@ -0,0 +1,93 @@
+#include "mylib.h"
+#include "mylib.cuh"
+
+
+__global__ void kernel_seuillageGPU(unsigned char *d_image_in, unsigned char *d_image_out,int size_j)
+{
+	float Csum;
+	int i, j, k, iFirst, jFirst;
+
+	iFirst = blockIdx.x*BLOCK_SIZE; // num de block dans la grille de block
+	jFirst = blockIdx.y*BLOCK_SIZE;
+
+	i = iFirst + threadIdx.x;// recuperer l'identifiant d'un thread dans les blocs
+	j = jFirst + threadIdx.y;
+
+	float nr = 0;
+
+nr=d_image_in[2+j*3+i*3*size_j]/sqrtf(d_image_in[0+j*3+i*3*size_j]*d_image_in[0+j*3+i*3*size_j]+d_image_in[1+j*3+i*3*size_j]*d_image_in[1+j*3+i*3*size_j]+d_image_in[2+j*3+i*3*size_j]*d_image_in[2+j*3+i*3*size_j]);
+
+	if(nr > 0.7)
+		d_image_out[1+j*3+i*3*size_j] = d_image_in[2+j*3+i*3*size_j];
+	else
+		d_image_out[1+j*3+i*3*size_j] = d_image_in[1+j*3+i*3*size_j]; 
+
+	d_image_out[0+j*3+i*3*size_j] = d_image_in[0+j*3+i*3*size_j];
+	d_image_out[2+j*3+i*3*size_j] = d_image_in[2+j*3+i*3*size_j];
+
+
+}
+
+
+Mat seuillageGPU( Mat in)
+{
+	cudaError_t error;
+	Mat out;
+	out.create(in.rows,in.cols,CV_8UC3);
+	
+	// allocate host memory
+	unsigned char *h_image_in_GPU ;
+	h_image_in_GPU=in.data;
+	
+	/*cudaEvent_t start,stop,start_mem,stop_mem;
+	error = cudaEventCreate(&start_mem);
+	error = cudaEventCreate(&stop_mem);
+	
+	error = cudaEventRecord(start, NULL);
+	error = cudaEventSynchronize(start);*/
+	
+	// images on device memoryÍÍÍ
+	unsigned char *d_image_in_GPU;
+	unsigned char *d_image_out_GPU;
+	
+	const unsigned long int mem_size=in.cols*in.rows*3*sizeof(unsigned char);
+	
+	// Alocation mémoire de d_image_in et d_image_out sur la carte GPU
+	cudaMalloc((void**) &d_image_in_GPU,mem_size );
+	cudaMalloc((void**) &d_image_out_GPU, mem_size);
+	
+	// copy host memory to device
+	cudaMemcpy(d_image_in_GPU, h_image_in_GPU,mem_size ,cudaMemcpyHostToDevice);
+	
+	//error = cudaEventRecord(stop_mem, NULL);
+	
+	// Wait for the stop event to complete
+	//error = cudaEventSynchronize(stop_mem);
+	//float msecMem = 0.0f;
+	//error = cudaEventElapsedTime(&msecMem, start, stop_mem);
+	
+	// setup execution parameters -> découpage en threads
+	dim3 threads(BLOCK_SIZE,BLOCK_SIZE);
+	dim3 grid(in.rows/BLOCK_SIZE,in.cols/BLOCK_SIZE);
+	
+	// lancement des threads executé sur la carte GPU
+	kernel_seuillageGPU<<< grid, threads >>>(d_image_in_GPU, d_image_out_GPU,in.cols);
+	
+	// Record the start event
+	//error = cudaEventRecord(start_mem, NULL);
+	//error = cudaEventSynchronize(start_mem);
+	
+	// copy result from device to host
+	cudaMemcpy(out.data, d_image_out_GPU, mem_size,cudaMemcpyDeviceToHost);
+	cudaFree(d_image_in_GPU);
+	cudaFree(d_image_out_GPU);
+	/*
+	float msecTotal,msecMem2;
+	error = cudaEventRecord(stop, NULL);
+	error = cudaEventSynchronize(stop);
+	error = cudaEventElapsedTime(&msecTotal, start, stop);
+	error = cudaEventElapsedTime(&msecMem2, start_mem, stop);
+	*/
+	return out;
+}
+
--- a/A4/TP_GPU-master/TP5_computer-vision/mylib.cuh
+++ b/A4/TP_GPU-master/TP5_computer-vision/mylib.cuh
@ -0,0 +1,13 @@
+#ifndef MYLIB_CUH
+#define MYLIB_CUH
+
+#include "mylib.h"
+
+
+#include <cuda_runtime.h>
+
+
+Mat seuillageGPU( Mat in);
+
+
+#endif
--- a/A4/TP_GPU-master/TP5_computer-vision/mylib.h
+++ b/A4/TP_GPU-master/TP5_computer-vision/mylib.h
@ -0,0 +1,36 @@
+#ifndef MYLIB_H
+#define MYLIB_H
+
+#define SIZE_I 960
+#define SIZE_J 1280
+#define BLOCK_SIZE 16
+
+#include "opencv2/opencv.hpp"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <iostream>
+
+using namespace std;
+
+using namespace cv;
+
+//---------------------get_frame---------------------
+
+Mat get_frame(Mat frame);
+
+//---------------------noirBlanc------------------
+
+Mat noirBlanc(Mat frame);
+
+//---------------------seuillage------------------
+
+Mat seuillage(Mat frame);
+
+//---------------------contour--------------------
+
+Mat contour(Mat frame);
+
+
+#endif