master
hiGepi 1 year ago
parent 2ff63efcb9
commit 4ac3a1df18

@ -0,0 +1,82 @@
#include "edge_detect.h"
void mysobel(rgb_img_t &src, rgb_img_t &dst, int dir)
{
int const rows = MAX_HEIGHT;
int const cols = MAX_WIDTH;
rgb_img_t img0(rows, cols);
if (dir)
{
hls::Sobel<1,0,3>(src, img0);
}
else
{
hls::Sobel<0,1,3>(src, img0);
}
hls::ConvertScaleAbs(img0, dst);
}
void mysobelxy(rgb_img_t &src, rgb_img_t &dst)
{
int const rows = MAX_HEIGHT;
int const cols = MAX_WIDTH;
rgb_img_t img0(rows, cols);
rgb_img_t img1(rows, cols);
rgb_img_t img2(rows, cols);
rgb_img_t img3(rows, cols);
hls::Duplicate(src, img0, img1);
mysobel(img0, img2, 1);
mysobel(img1, img3, 0);
hls::AddWeighted(img2, 1, img3, 1, 0, dst);
}
void sobelfoo(stream_t &stream_in, stream_t &stream_out)
{
int const rows = MAX_HEIGHT;
int const cols = MAX_WIDTH;
rgb_img_t img0(rows, cols);
rgb_img_t img1(rows, cols);
rgb_img_t img2(rows, cols);
rgb_img_t img4(rows, cols);
hls::AXIvideo2Mat(stream_in, img0);
hls::CvtColor<HLS_RGB2GRAY>(img0, img1);
mysobelxy(img1,img2);
hls::CvtColor<HLS_GRAY2RGB>(img2, img4);
hls::Mat2AXIvideo(img4, stream_out);
}
//
//void blurfoo(stream_t &stream_in, stream_t &stream_out)
//{
// int const rows = MAX_HEIGHT;
// int const cols = MAX_WIDTH;
//
// rgb_img_t img0(rows, cols);
// rgb_img_t img1(rows, cols);
//
// hls::AXIvideo2Mat(stream_in, img0);
//
// hls::GaussianBlur<5,5>(img0, img1, (double)5, (double)5);
//
// hls::Mat2AXIvideo(img1, stream_out);
//
//}
void edge_detect(stream_t &stream_in, stream_t &stream_out)
{
int const rows = MAX_HEIGHT;
int const cols = MAX_WIDTH;
sobelfoo(stream_in, stream_out);
}

@ -0,0 +1,16 @@
#include "hls_video.h"
typedef ap_axiu<24,1,1,1> interface_t;
typedef ap_uint<3> interface_3_bits;
typedef hls::stream<interface_t> stream_t;
void edge_detect(stream_t &stream_in, stream_t &stream_out);
#define MAX_WIDTH 1280
#define MAX_HEIGHT 720
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC3> rgb_img_t;
typedef hls::Scalar<3, unsigned char> rgb_pix_t;
#define INPUT_IMAGE "rover.bmp"
#define OUTPUT_IMAGE "rover_out.bmp"

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

@ -0,0 +1,57 @@
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np
Y = [661, 971, 1282, 1613, 1936, 2273, 2609, 2933, 3264]
Y2 = [205, 290, 395, 478, 562, 625, 718, 808, 899]
Y_ARM = [424, 824 , 1224 , 1624, 2024, 2424 , 2824, 3224, 3624]
Y_gcc = [2296, 5119, 6715, 9078, 10830, 12541, 15041, 16780, 18883]
Y_O3 = [165, 287, 380, 559, 900, 901, 776, 1122, 1574]
X = []
temps = []
for i in range(9) :
Y[i] = Y[i]*3/100
Y2[i]=Y[i]*3/100
Y_ARM[i] = Y_ARM[i]*3/100
Y_gcc[i] = Y_gcc[i]*3/100/4
Y_O3[i] = Y_O3[i]*3/100/4
X.append(50*(1+i))
temps.append(Y[i]/33333334)
plt.scatter(X, Y, color="b", marker="x", label="FPGA 50 MHz")
plt.scatter(X, Y2, color="g", marker="x", label="FPGA 200 MHz")
plt.scatter(X, Y_ARM, color="g", label="mesures ARM")
plt.scatter(X, Y_gcc, color="y",marker="*", label="desktop non opti")
plt.scatter(X, Y_O3, color="pink", marker="*",label="desktop_opti")
x = np.array(X).reshape(-1, 1)
y = np.array(Y).reshape(-1, 1)
y2 = np.array(Y2).reshape(-1, 1)
reg = LinearRegression().fit(x, y)
reg2 = LinearRegression().fit(x, y2)
print("score obtenu : " + str(reg.score(x, y)))
print("score obtenu : " + str(reg.score(x, y2)))
print("attente à zéro : {}".format(reg.intercept_))
print("attente à zéro : {}".format(reg2.intercept_))
x_lin = [0, max(X)]
y_lin = [reg.predict(np.array([0]).reshape(-1, 1)), reg.predict(np.array([x_lin[1]]).reshape(-1, 1))]
y_lin2 = [reg2.predict(np.array([0]).reshape(-1, 1)), reg2.predict(np.array([x_lin[1]]).reshape(-1, 1))]
y_lin = [y_lin[0][0][0], y_lin[1][0][0]]
y_lin2 = [y_lin2[0][0][0], y_lin2[1][0][0]]
plt.plot(x_lin, y_lin, color = "r", label="RegLin 50 score : {:.4f}".format(reg.score(x, y)))
plt.plot(x_lin, y_lin2, color = "r")
plt.xlim([0, 500])
plt.ylim([0, 100])
plt.legend()
plt.title("Temps d'exécution en fonction de n_max")
plt.ylabel("T (0.1 µs)")
plt.xlabel("N_max")
plt.savefig("M2_SETI/A2/fibonacci/linéaire.png")
plt.show()

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

@ -0,0 +1,35 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity add_sub is
generic(nb_bits : natural:=16);
port( A,B : in STD_LOGIC_VECTOR(nb_bits-1 downto 0);
Op : in STD_LOGIC; -- operation choice: '0' --> addition, '1' subtraction
S : out STD_LOGIC_VECTOR(nb_bits-1 downto 0); -- result
Cout : out STD_LOGIC); -- carry out
end add_sub;
architecture proced of add_sub is
signal Aint, Bint : unsigned(nb_bits downto 0); -- we add one bit to make error free computations
signal Sint : unsigned(nb_bits downto 0);
begin
Aint(nb_bits-1 downto 0) <= unsigned(A);
Aint(nb_bits) <= '0';
Bint(nb_bits-1 downto 0) <= unsigned(B);
Bint(nb_bits) <= '0';
process(Op, Aint, Bint)
begin
if (Op = '0') then
Sint <= Aint + Bint;
else
Sint <= Aint - Bint;
end if;
end process;
S <= std_logic_vector(Sint(nb_bits-1 downto 0));
Cout <= Sint(nb_bits);
end proced;

@ -0,0 +1,39 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity decounter is
generic(nb_bits : natural ; -- width of the decounter
nb_iter : natural ); -- number of iterations to be performed
port( Init : in STD_LOGIC; -- Initialization command for the decounter; active on '1'
encount : in STD_LOGIC; -- enable input for the decounter; active on '1'
clk : in STD_LOGIC; -- clock
ceqz : out STD_LOGIC); -- output indicating if decounter=0 when it is at '1'
end decounter;
architecture proced of decounter is
signal Sint : unsigned(nb_bits-1 downto 0);
signal ceqzint : std_logic;
begin
Ps: process(clk, Init) -- process to compute the decounter's value
begin
if (Init = '1') then
Sint <= TO_UNSIGNED(nb_iter-1,nb_bits); -- use of a conversion function from numeric_std library
elsif (clk'event and (clk = '1') and (encount = '1')) then
Sint <= Sint - 1;
end if;
end process;
Pc : process(Sint) -- combinatorial process to compute the ceqz output value
begin
if Sint > 0 then
ceqzint <= '0';
else
ceqzint <= '1';
end if;
end process;
ceqz <= ceqzint;
end proced;

@ -0,0 +1,26 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity mux3_1 is
generic(nb_bits : natural );
port( I0,I1,I2 : in STD_LOGIC_VECTOR(nb_bits-1 downto 0);
sel : in STD_LOGIC_VECTOR(1 downto 0); -- select input
S : out STD_LOGIC_VECTOR(nb_bits-1 downto 0));
end mux3_1;
architecture proced of mux3_1 is
signal Sint : STD_LOGIC_VECTOR(nb_bits-1 downto 0);-- internal signal since process cannot directly modify an output
begin
process(sel, I0, I1)
begin
if (sel = "00") then
Sint <= I0;
elsif (sel = "01") then
Sint <= I1;
else
Sint <= I2;
end if;
end process;
S <= Sint;
end proced;

@ -0,0 +1,31 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity reg is
generic(nb_bits : natural);
port( Init : in STD_LOGIC; -- Initialization command
init_value : in STD_LOGIC_VECTOR(nb_bits-1 downto 0); -- Init value with unsigned type to cover all possible values with large nb_bits
ld : in STD_LOGIC; -- load command
clk : in STD_LOGIC;
E : in STD_LOGIC_VECTOR(nb_bits-1 downto 0);
S : out STD_LOGIC_VECTOR(nb_bits-1 downto 0));
end reg;
architecture proced of reg is
signal Sint : unsigned(nb_bits-1 downto 0);
begin
Ps: process(clk, Init)
begin
if (Init = '1') then
Sint <= unsigned(init_value);
elsif (clk'event and (clk = '1') and (ld ='1')) then
Sint <= unsigned(E);
end if;
end process;
S <= std_logic_vector(Sint);
end proced;

@ -0,0 +1,89 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity testbench is
end testbench;
architecture test_decounter of testbench is
signal clk, sig_init, sig_eqz, sig_en : std_logic;
begin
sig_init <= '1', '0' after 20 ns;
sig_en <= '0', '1' after 90 ns;
DUT: entity work.decounter(proced)
generic map(nb_bits => 4, nb_iter => 16)
port map(encount => sig_en, clk => clk, init => sig_init, ceqz => sig_eqz);
Gene_clk: process
begin
clk <= '0';
wait for 10 ns;
for i in 1 to 30 loop
clk <= '1';
wait for 10 ns;
clk <= '0';
wait for 10 ns;
end loop;
wait;
end process;
end test_decounter;
architecture test_mux2_1 of testbench is
signal sig_sel : std_logic;
signal sig_A, sig_B, sig_S : std_logic_vector(15 downto 0);
begin
sig_sel <= '1', '0' after 50 ns;
sig_A <= std_logic_vector(to_unsigned(100,16));
sig_B <= std_logic_vector(to_unsigned(0,16));
DUT: entity work.mux2_1(proced)
generic map(nb_bits => 16)
port map(I0 => sig_A, I1 => sig_B, sel => sig_sel, S => sig_S);
end test_mux2_1;
architecture test_add_sub of testbench is
signal sig_op, sig_cout : std_logic;
signal sig_A, sig_B, sig_res : std_logic_vector(15 downto 0);
begin
sig_op <= '0', '1' after 100 ns, '0' after 200 ns;
sig_A <= std_logic_vector(to_unsigned(1,16)), std_logic_vector(to_unsigned(0,16)) after 50 ns;
sig_B <= std_logic_vector(to_unsigned(1,16));
DUT: entity work.add_sub(proced)
generic map(nb_bits => 16)
port map(A => sig_A, B => sig_B, op => sig_op, S => sig_res, cout => sig_cout);
end test_add_sub;
architecture test_mux3_1 of testbench is
signal sig_sel : std_logic_vector(1 downto 0);
signal sig_A, sig_B, sig_C, sig_S : std_logic_vector(15 downto 0);
begin
sig_sel <= "00", "01" after 50 ns, "10" after 100 ns, "11" after 150 ns, "00" after 200 ns;
sig_A <= std_logic_vector(to_unsigned(100,16));
sig_B <= std_logic_vector(to_unsigned(0,16));
sig_C <= std_logic_vector(to_unsigned(32,16));
DUT: entity work.mux3_1(proced)
generic map(nb_bits => 16)
port map(I0 => sig_A, I1 => sig_B, I2 => sig_C, sel => sig_sel, S => sig_S);
end test_mux3_1;

@ -0,0 +1,93 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity racine_machine is
port (
clock : IN std_logic;
START : IN std_logic;
RESET : IN std_logic;
m_in : IN std_logic_vector(3 downto 0);
m_out : OUT std_logic_vector(3 downto 0)
-- count : OUT std_logic_vector(3 downto 0)
) ;
end racine_machine;
architecture behavior of racine_machine is
type etat is (attente, init, calcul, fin);
type cal is ('0','1');
signal state : etat := attente;
signal done : cal;
BEGIN
Racine : process(clock)
variable X : integer;
variable V : integer;
variable Z : integer;
variable n : integer := 5;
variable cond : integer;
variable i : integer;
begin
if(rising_edge(clock)) then
if RESET = '1' then
state <= attente;
else
case state is
when attente =>
if START = '1' then
state <= init;
else
state <= attente;
end if;
done <= '0';
when init =>
X := to_integer(unsigned(m_in));
V := 256;
Z := 0;
i := 5;
done <= '0';
state <= calcul;
when calcul =>
Z := Z+V;
cond := X-Z;
if cond >= 0 then
X := X-Z;
Z := (Z+V)/2;
else
Z := (Z-V)/2;
end if;
V := V/4;
i := i-1;
-- count <= std_logic_vector(to_unsigned(i, count'length));
done <= '0';
if i = 0 then
state <= fin;
else
state <= calcul;
end if;
when fin =>
done <= '1';
m_out <= std_logic_vector(to_unsigned(Z, m_out'length));
if START = '1' then
state <= fin;
else
state <= attente;
end if;
when others =>
state <= attente;
end case;
end if;
end if;
end process Racine;
end behavior;

@ -0,0 +1,93 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity machine is
port (
clock : IN std_logic;
START : IN std_logic;
RESET : IN std_logic;
INPUT : IN std_logic_vector(7 downto 0);
OUTPUT : OUT std_logic_vector(7 downto 0);
count : OUT std_logic_vector(7 downto 0)
) ;
end machine;
architecture behavior of machine is
type etat is (attente, init, calcul, fin);
type cal is ('0','1');
signal state : etat := attente;
signal done : cal;
BEGIN
Racine : process(clock)
variable X : integer;
variable V : integer;
variable Z : integer;
variable n : integer := 5;
variable cond : integer;
variable i : integer;
begin
if(rising_edge(clock)) then
if RESET = '1' then
state <= attente;
end if;
case state is
when attente =>
if START = '1' then
state <= init;
else
state <= attente;
end if;
done <= '0';
when init =>
X := to_integer(unsigned(INPUT));
V := 256;
Z := 0;
i := 5;
done <= '0';
state <= calcul;
when calcul =>
Z := Z+V;
cond := X-Z;
if cond >= 0 then
X := X-Z;
Z := (Z+V)/2;
else
Z := (Z-V)/2;
end if;
V := V/4;
i := i-1;
count <= std_logic_vector(to_unsigned(i, count'length));
done <= '0';
if i = 0 then
state <= fin;
else
state <= calcul;
end if;
when fin =>
done <= '1';
OUTPUT <= std_logic_vector(to_unsigned(Z, OUTPUT'length));
if START = '1' then
state <= fin;
else
state <= attente;
end if;
when others =>
state <= attente;
end case;
end if;
end process Racine;
end behavior;

@ -0,0 +1,10 @@
C:/Users/sradosa/Documents/VHDL/RacineCarre/MachineEtat.vhd {1 {vcom -work work -2002 -explicit C:/Users/sradosa/Documents/VHDL/RacineCarre/MachineEtat.vhd
Model Technology ModelSim ALTERA vcom 10.1d Compiler 2012.11 Nov 2 2012
-- Loading package STANDARD
-- Loading package TEXTIO
-- Loading package std_logic_1164
-- Loading package NUMERIC_STD
-- Compiling entity machine
-- Compiling architecture behavior of machine
} {} {}}

@ -0,0 +1,493 @@
; Copyright 1991-2009 Mentor Graphics Corporation
;
; All Rights Reserved.
;
; THIS WORK CONTAINS TRADE SECRET AND PROPRIETARY INFORMATION WHICH IS THE PROPERTY OF
; MENTOR GRAPHICS CORPORATION OR ITS LICENSORS AND IS SUBJECT TO LICENSE TERMS.
;
[Library]
std = $MODEL_TECH/../std
ieee = $MODEL_TECH/../ieee
verilog = $MODEL_TECH/../verilog
vital2000 = $MODEL_TECH/../vital2000
std_developerskit = $MODEL_TECH/../std_developerskit
synopsys = $MODEL_TECH/../synopsys
modelsim_lib = $MODEL_TECH/../modelsim_lib
sv_std = $MODEL_TECH/../sv_std
; Altera Primitive libraries
;
; VHDL Section
;
altera_mf = $MODEL_TECH/../altera/vhdl/altera_mf
altera = $MODEL_TECH/../altera/vhdl/altera
altera_lnsim = $MODEL_TECH/../altera/vhdl/altera_lnsim
lpm = $MODEL_TECH/../altera/vhdl/220model
220model = $MODEL_TECH/../altera/vhdl/220model
max = $MODEL_TECH/../altera/vhdl/max
maxii = $MODEL_TECH/../altera/vhdl/maxii
maxv = $MODEL_TECH/../altera/vhdl/maxv
stratix = $MODEL_TECH/../altera/vhdl/stratix
stratixii = $MODEL_TECH/../altera/vhdl/stratixii
stratixiigx = $MODEL_TECH/../altera/vhdl/stratixiigx
hardcopyii = $MODEL_TECH/../altera/vhdl/hardcopyii
hardcopyiii = $MODEL_TECH/../altera/vhdl/hardcopyiii
hardcopyiv = $MODEL_TECH/../altera/vhdl/hardcopyiv
cyclone = $MODEL_TECH/../altera/vhdl/cyclone
cycloneii = $MODEL_TECH/../altera/vhdl/cycloneii
cycloneiii = $MODEL_TECH/../altera/vhdl/cycloneiii
cycloneiiils = $MODEL_TECH/../altera/vhdl/cycloneiiils
sgate = $MODEL_TECH/../altera/vhdl/sgate
stratixgx = $MODEL_TECH/../altera/vhdl/stratixgx
altgxb = $MODEL_TECH/../altera/vhdl/altgxb
stratixgx_gxb = $MODEL_TECH/../altera/vhdl/stratixgx_gxb
stratixiigx_hssi = $MODEL_TECH/../altera/vhdl/stratixiigx_hssi
arriagx_hssi = $MODEL_TECH/../altera/vhdl/arriagx_hssi
arriaii = $MODEL_TECH/../altera/vhdl/arriaii
arriaii_hssi = $MODEL_TECH/../altera/vhdl/arriaii_hssi
arriaii_pcie_hip = $MODEL_TECH/../altera/vhdl/arriaii_pcie_hip
arriaiigz = $MODEL_TECH/../altera/vhdl/arriaiigz
arriaiigz_hssi = $MODEL_TECH/../altera/vhdl/arriaiigz_hssi
arriaiigz_pcie_hip = $MODEL_TECH/../altera/vhdl/arriaiigz_pcie_hip
arriagx = $MODEL_TECH/../altera/vhdl/arriagx
altgxb_lib = $MODEL_TECH/../altera/vhdl/altgxb
stratixiv = $MODEL_TECH/../altera/vhdl/stratixiv
stratixiv_hssi = $MODEL_TECH/../altera/vhdl/stratixiv_hssi
stratixiv_pcie_hip = $MODEL_TECH/../altera/vhdl/stratixiv_pcie_hip
cycloneiv = $MODEL_TECH/../altera/vhdl/cycloneiv
cycloneiv_hssi = $MODEL_TECH/../altera/vhdl/cycloneiv_hssi
cycloneiv_pcie_hip = $MODEL_TECH/../altera/vhdl/cycloneiv_pcie_hip
cycloneive = $MODEL_TECH/../altera/vhdl/cycloneive
hardcopyiv_hssi = $MODEL_TECH/../altera/vhdl/hardcopyiv_hssi
hardcopyiv_pcie_hip = $MODEL_TECH/../altera/vhdl/hardcopyiv_pcie_hip
stratixv = $MODEL_TECH/../altera/vhdl/stratixv
stratixv_hssi = $MODEL_TECH/../altera/vhdl/stratixv_hssi
stratixv_pcie_hip = $MODEL_TECH/../altera/vhdl/stratixv_pcie_hip
arriavgz = $MODEL_TECH/../altera/vhdl/arriavgz
arriavgz_hssi = $MODEL_TECH/../altera/vhdl/arriavgz_hssi
arriavgz_pcie_hip = $MODEL_TECH/../altera/vhdl/arriavgz_pcie_hip
arriav = $MODEL_TECH/../altera/vhdl/arriav
cyclonev = $MODEL_TECH/../altera/vhdl/cyclonev
;
; Verilog Section
;
altera_mf_ver = $MODEL_TECH/../altera/verilog/altera_mf
altera_ver = $MODEL_TECH/../altera/verilog/altera
altera_lnsim_ver = $MODEL_TECH/../altera/verilog/altera_lnsim
lpm_ver = $MODEL_TECH/../altera/verilog/220model
220model_ver = $MODEL_TECH/../altera/verilog/220model
max_ver = $MODEL_TECH/../altera/verilog/max
maxii_ver = $MODEL_TECH/../altera/verilog/maxii
maxv_ver = $MODEL_TECH/../altera/verilog/maxv
stratix_ver = $MODEL_TECH/../altera/verilog/stratix
stratixii_ver = $MODEL_TECH/../altera/verilog/stratixii
stratixiigx_ver = $MODEL_TECH/../altera/verilog/stratixiigx
arriagx_ver = $MODEL_TECH/../altera/verilog/arriagx
hardcopyii_ver = $MODEL_TECH/../altera/verilog/hardcopyii
hardcopyiii_ver = $MODEL_TECH/../altera/verilog/hardcopyiii
hardcopyiv_ver = $MODEL_TECH/../altera/verilog/hardcopyiv
cyclone_ver = $MODEL_TECH/../altera/verilog/cyclone
cycloneii_ver = $MODEL_TECH/../altera/verilog/cycloneii
cycloneiii_ver = $MODEL_TECH/../altera/verilog/cycloneiii
cycloneiiils_ver = $MODEL_TECH/../altera/verilog/cycloneiiils
sgate_ver = $MODEL_TECH/../altera/verilog/sgate
stratixgx_ver = $MODEL_TECH/../altera/verilog/stratixgx
altgxb_ver = $MODEL_TECH/../altera/verilog/altgxb
stratixgx_gxb_ver = $MODEL_TECH/../altera/verilog/stratixgx_gxb
stratixiigx_hssi_ver = $MODEL_TECH/../altera/verilog/stratixiigx_hssi
arriagx_hssi_ver = $MODEL_TECH/../altera/verilog/arriagx_hssi
arriaii_ver = $MODEL_TECH/../altera/verilog/arriaii
arriaii_hssi_ver = $MODEL_TECH/../altera/verilog/arriaii_hssi
arriaii_pcie_hip_ver = $MODEL_TECH/../altera/verilog/arriaii_pcie_hip
arriaiigz_ver = $MODEL_TECH/../altera/verilog/arriaiigz
arriaiigz_hssi_ver = $MODEL_TECH/../altera/verilog/arriaiigz_hssi
arriaiigz_pcie_hip_ver = $MODEL_TECH/../altera/verilog/arriaiigz_pcie_hip
stratixiii_ver = $MODEL_TECH/../altera/verilog/stratixiii
stratixiii = $MODEL_TECH/../altera/vhdl/stratixiii
stratixiv_ver = $MODEL_TECH/../altera/verilog/stratixiv
stratixiv_hssi_ver = $MODEL_TECH/../altera/verilog/stratixiv_hssi
stratixiv_pcie_hip_ver = $MODEL_TECH/../altera/verilog/stratixiv_pcie_hip
stratixv_ver = $MODEL_TECH/../altera/verilog/stratixv
stratixv_hssi_ver = $MODEL_TECH/../altera/verilog/stratixv_hssi
stratixv_pcie_hip_ver = $MODEL_TECH/../altera/verilog/stratixv_pcie_hip
arriavgz_ver = $MODEL_TECH/../altera/verilog/arriavgz
arriavgz_hssi_ver = $MODEL_TECH/../altera/verilog/arriavgz_hssi
arriavgz_pcie_hip_ver = $MODEL_TECH/../altera/verilog/arriavgz_pcie_hip
arriav_ver = $MODEL_TECH/../altera/verilog/arriav
arriav_hssi_ver = $MODEL_TECH/../altera/verilog/arriav_hssi
arriav_pcie_hip_ver = $MODEL_TECH/../altera/verilog/arriav_pcie_hip
cyclonev_ver = $MODEL_TECH/../altera/verilog/cyclonev
cyclonev_hssi_ver = $MODEL_TECH/../altera/verilog/cyclonev_hssi
cyclonev_pcie_hip_ver = $MODEL_TECH/../altera/verilog/cyclonev_pcie_hip
cycloneiv_ver = $MODEL_TECH/../altera/verilog/cycloneiv
cycloneiv_hssi_ver = $MODEL_TECH/../altera/verilog/cycloneiv_hssi
cycloneiv_pcie_hip_ver = $MODEL_TECH/../altera/verilog/cycloneiv_pcie_hip
cycloneive_ver = $MODEL_TECH/../altera/verilog/cycloneive
hardcopyiv_hssi_ver = $MODEL_TECH/../altera/verilog/hardcopyiv_hssi
hardcopyiv_pcie_hip_ver = $MODEL_TECH/../altera/verilog/hardcopyiv_pcie_hip
work = work
[vcom]
; VHDL93 variable selects language version as the default.
; Default is VHDL-2002.
; Value of 0 or 1987 for VHDL-1987.
; Value of 1 or 1993 for VHDL-1993.
; Default or value of 2 or 2002 for VHDL-2002.
; Default or value of 3 or 2008 for VHDL-2008.
VHDL93 = 2002
; Show source line containing error. Default is off.
; Show_source = 1
; Turn off unbound-component warnings. Default is on.
; Show_Warning1 = 0
; Turn off process-without-a-wait-statement warnings. Default is on.
; Show_Warning2 = 0
; Turn off null-range warnings. Default is on.
; Show_Warning3 = 0
; Turn off no-space-in-time-literal warnings. Default is on.
; Show_Warning4 = 0
; Turn off multiple-drivers-on-unresolved-signal warnings. Default is on.
; Show_Warning5 = 0
; Turn off optimization for IEEE std_logic_1164 package. Default is on.
; Optimize_1164 = 0
; Turn on resolving of ambiguous function overloading in favor of the
; "explicit" function declaration (not the one automatically created by
; the compiler for each type declaration). Default is off.
; The .ini file has Explicit enabled so that std_logic_signed/unsigned
; will match the behavior of synthesis tools.
Explicit = 1
; Turn off acceleration of the VITAL packages. Default is to accelerate.
; NoVital = 1
; Turn off VITAL compliance checking. Default is checking on.
; NoVitalCheck = 1
; Ignore VITAL compliance checking errors. Default is to not ignore.
; IgnoreVitalErrors = 1
; Turn off VITAL compliance checking warnings. Default is to show warnings.
; Show_VitalChecksWarnings = 0
; Keep silent about case statement static warnings.
; Default is to give a warning.
; NoCaseStaticError = 1
; Keep silent about warnings caused by aggregates that are not locally static.
; Default is to give a warning.
; NoOthersStaticError = 1
; Turn off inclusion of debugging info within design units.
; Default is to include debugging info.
; NoDebug = 1
; Turn off "Loading..." messages. Default is messages on.
; Quiet = 1
; Turn on some limited synthesis rule compliance checking. Checks only:
; -- signals used (read) by a process must be in the sensitivity list
; CheckSynthesis = 1
; Activate optimizations on expressions that do not involve signals,
; waits, or function/procedure/task invocations. Default is off.
; ScalarOpts = 1
; Require the user to specify a configuration for all bindings,
; and do not generate a compile time default binding for the
; component. This will result in an elaboration error of
; 'component not bound' if the user fails to do so. Avoids the rare
; issue of a false dependency upon the unused default binding.
; RequireConfigForAllDefaultBinding = 1
; Inhibit range checking on subscripts of arrays. Range checking on
; scalars defined with subtypes is inhibited by default.
; NoIndexCheck = 1
; Inhibit range checks on all (implicit and explicit) assignments to
; scalar objects defined with subtypes.
; NoRangeCheck = 1
[vlog]
; Turn off inclusion of debugging info within design units.
; Default is to include debugging info.
; NoDebug = 1
; Turn off "loading..." messages. Default is messages on.
; Quiet = 1
; Turn on Verilog hazard checking (order-dependent accessing of global vars).
; Default is off.
; Hazard = 1
; Turn on converting regular Verilog identifiers to uppercase. Allows case
; insensitivity for module names. Default is no conversion.
; UpCase = 1
; Turn on incremental compilation of modules. Default is off.
; Incremental = 1
; Turns on lint-style checking.
; Show_Lint = 1
[vsim]
; Simulator resolution
; Set to fs, ps, ns, us, ms, or sec with optional prefix of 1, 10, or 100.
Resolution = ps
; User time unit for run commands
; Set to default, fs, ps, ns, us, ms, or sec. The default is to use the
; unit specified for Resolution. For example, if Resolution is 100ps,
; then UserTimeUnit defaults to ps.
; Should generally be set to default.
UserTimeUnit = default
; Default run length
RunLength = 100 ps
; Maximum iterations that can be run without advancing simulation time
IterationLimit = 5000
; Directive to license manager:
; vhdl Immediately reserve a VHDL license
; vlog Immediately reserve a Verilog license
; plus Immediately reserve a VHDL and Verilog license
; nomgc Do not look for Mentor Graphics Licenses
; nomti Do not look for Model Technology Licenses
; noqueue Do not wait in the license queue when a license isn't available
; viewsim Try for viewer license but accept simulator license(s) instead
; of queuing for viewer license
; License = plus
; Stop the simulator after a VHDL/Verilog assertion message
; 0 = Note 1 = Warning 2 = Error 3 = Failure 4 = Fatal
BreakOnAssertion = 3
; Assertion Message Format
; %S - Severity Level
; %R - Report Message
; %T - Time of assertion
; %D - Delta
; %I - Instance or Region pathname (if available)
; %% - print '%' character
; AssertionFormat = "** %S: %R\n Time: %T Iteration: %D%I\n"
; Assertion File - alternate file for storing VHDL/Verilog assertion messages
; AssertFile = assert.log
; Default radix for all windows and commands...
; Set to symbolic, ascii, binary, octal, decimal, hex, unsigned
DefaultRadix = symbolic
; VSIM Startup command
; Startup = do startup.do
; File for saving command transcript
TranscriptFile = transcript
; File for saving command history
; CommandHistory = cmdhist.log
; Specify whether paths in simulator commands should be described
; in VHDL or Verilog format.
; For VHDL, PathSeparator = /
; For Verilog, PathSeparator = .
; Must not be the same character as DatasetSeparator.
PathSeparator = /
; Specify the dataset separator for fully rooted contexts.
; The default is ':'. For example, sim:/top
; Must not be the same character as PathSeparator.
DatasetSeparator = :
; Disable VHDL assertion messages
; IgnoreNote = 1
; IgnoreWarning = 1
; IgnoreError = 1
; IgnoreFailure = 1
; Default force kind. May be freeze, drive, deposit, or default
; or in other terms, fixed, wired, or charged.
; A value of "default" will use the signal kind to determine the
; force kind, drive for resolved signals, freeze for unresolved signals
; DefaultForceKind = freeze
; If zero, open files when elaborated; otherwise, open files on
; first read or write. Default is 0.
; DelayFileOpen = 1
; Control VHDL files opened for write.
; 0 = Buffered, 1 = Unbuffered
UnbufferedOutput = 0
; Control the number of VHDL files open concurrently.
; This number should always be less than the current ulimit
; setting for max file descriptors.
; 0 = unlimited
ConcurrentFileLimit = 40
; Control the number of hierarchical regions displayed as
; part of a signal name shown in the Wave window.
; A value of zero tells VSIM to display the full name.
; The default is 0.
; WaveSignalNameWidth = 0
; Turn off warnings from the std_logic_arith, std_logic_unsigned
; and std_logic_signed packages.
; StdArithNoWarnings = 1
; Turn off warnings from the IEEE numeric_std and numeric_bit packages.
; NumericStdNoWarnings = 1
; Control the format of the (VHDL) FOR generate statement label
; for each iteration. Do not quote it.
; The format string here must contain the conversion codes %s and %d,
; in that order, and no other conversion codes. The %s represents
; the generate_label; the %d represents the generate parameter value
; at a particular generate iteration (this is the position number if
; the generate parameter is of an enumeration type). Embedded whitespace
; is allowed (but discouraged); leading and trailing whitespace is ignored.
; Application of the format must result in a unique scope name over all
; such names in the design so that name lookup can function properly.
; GenerateFormat = %s__%d
; Specify whether checkpoint files should be compressed.
; The default is 1 (compressed).
; CheckpointCompressMode = 0
; List of dynamically loaded objects for Verilog PLI applications
; Veriuser = veriuser.sl
; Specify default options for the restart command. Options can be one
; or more of: -force -nobreakpoint -nolist -nolog -nowave
; DefaultRestartOptions = -force
; HP-UX 10.20 ONLY - Enable memory locking to speed up large designs
; (> 500 megabyte memory footprint). Default is disabled.
; Specify number of megabytes to lock.
; LockedMemory = 1000
; Turn on (1) or off (0) WLF file compression.
; The default is 1 (compress WLF file).
; WLFCompress = 0
; Specify whether to save all design hierarchy (1) in the WLF file
; or only regions containing logged signals (0).
; The default is 0 (save only regions with logged signals).
; WLFSaveAllRegions = 1
; WLF file time limit. Limit WLF file by time, as closely as possible,
; to the specified amount of simulation time. When the limit is exceeded
; the earliest times get truncated from the file.
; If both time and size limits are specified the most restrictive is used.
; UserTimeUnits are used if time units are not specified.
; The default is 0 (no limit). Example: WLFTimeLimit = {100 ms}
; WLFTimeLimit = 0
; WLF file size limit. Limit WLF file size, as closely as possible,
; to the specified number of megabytes. If both time and size limits
; are specified then the most restrictive is used.
; The default is 0 (no limit).
; WLFSizeLimit = 1000
; Specify whether or not a WLF file should be deleted when the
; simulation ends. A value of 1 will cause the WLF file to be deleted.
; The default is 0 (do not delete WLF file when simulation ends).
; WLFDeleteOnQuit = 1
; Automatic SDF compilation
; Disables automatic compilation of SDF files in flows that support it.
; Default is on, uncomment to turn off.
; NoAutoSDFCompile = 1
[lmc]
[msg_system]
; Change a message severity or suppress a message.
; The format is: <msg directive> = <msg number>[,<msg number>...]
; Examples:
; note = 3009
; warning = 3033
; error = 3010,3016
; fatal = 3016,3033
; suppress = 3009,3016,3043
; The command verror <msg number> can be used to get the complete
; description of a message.
; Control transcripting of elaboration/runtime messages.
; The default is to have messages appear in the transcript and
; recorded in the wlf file (messages that are recorded in the
; wlf file can be viewed in the MsgViewer). The other settings
; are to send messages only to the transcript or only to the
; wlf file. The valid values are
; both {default}
; tran {transcript only}
; wlf {wlf file only}
; msgmode = both
[Project]
; Warning -- Do not edit the project properties directly.
; Property names are dynamic in nature and property
; values have special syntax. Changing property data directly
; can result in a corrupt MPF file. All project properties
; can be modified through project window dialogs.
Project_Version = 6
Project_DefaultLib = work
Project_SortMethod = unused
Project_Files_Count = 1
Project_File_0 = C:/Users/sradosa/Documents/VHDL/RacineCarre/MachineEtat.vhd
Project_File_P_0 = vhdl_novitalcheck 0 file_type vhdl group_id 0 cover_nofec 0 vhdl_nodebug 0 vhdl_1164 1 vhdl_noload 0 vhdl_synth 0 vhdl_enable0In 0 folder {Top Level} last_compile 1672916485 vhdl_disableopt 0 vhdl_vital 0 cover_excludedefault 0 vhdl_warn1 1 vhdl_warn2 1 vhdl_explicit 1 vhdl_showsource 0 vhdl_warn3 1 cover_covercells 0 vhdl_0InOptions {} vhdl_warn4 1 voptflow 1 cover_optlevel 3 vhdl_options {} vhdl_warn5 1 toggle - ood 0 cover_noshort 0 compile_to work compile_order 0 cover_nosub 0 dont_compile 0 vhdl_use93 2002
Project_Sim_Count = 0
Project_Folder_Count = 0
Echo_Compile_Output = 0
Save_Compile_Report = 1
Project_Opt_Count = 0
ForceSoftPaths = 0
ProjectStatusDelay = 5000
VERILOG_DoubleClick = Edit
VERILOG_CustomDoubleClick =
SYSTEMVERILOG_DoubleClick = Edit
SYSTEMVERILOG_CustomDoubleClick =
VHDL_DoubleClick = Edit
VHDL_CustomDoubleClick =
PSL_DoubleClick = Edit
PSL_CustomDoubleClick =
TEXT_DoubleClick = Edit
TEXT_CustomDoubleClick =
SYSTEMC_DoubleClick = Edit
SYSTEMC_CustomDoubleClick =
TCL_DoubleClick = Edit
TCL_CustomDoubleClick =
MACRO_DoubleClick = Edit
MACRO_CustomDoubleClick =
VCD_DoubleClick = Edit
VCD_CustomDoubleClick =
SDF_DoubleClick = Edit
SDF_CustomDoubleClick =
XML_DoubleClick = Edit
XML_CustomDoubleClick =
LOGFILE_DoubleClick = Edit
LOGFILE_CustomDoubleClick =
UCDB_DoubleClick = Edit
UCDB_CustomDoubleClick =
UPF_DoubleClick = Edit
UPF_CustomDoubleClick =
PCF_DoubleClick = Edit
PCF_CustomDoubleClick =
PROJECT_DoubleClick = Edit
PROJECT_CustomDoubleClick =
VRM_DoubleClick = Edit
VRM_CustomDoubleClick =
DEBUGDATABASE_DoubleClick = Edit
DEBUGDATABASE_CustomDoubleClick =
DEBUGARCHIVE_DoubleClick = Edit
DEBUGARCHIVE_CustomDoubleClick =
Project_Major_Version = 10
Project_Minor_Version = 1

Binary file not shown.

@ -0,0 +1,42 @@
m255
K3
13
cModel Technology
dC:\Users\sradosa\Documents\VHDL
Emachine
Z0 w1672916485
Z1 DPx4 ieee 11 numeric_std 0 22 O3PF8EB`?j9=z7KT`fn941
Z2 DPx3 std 6 textio 0 22 5>J:;AW>W0[[dW0I6EN1Q0
Z3 DPx4 ieee 14 std_logic_1164 0 22 5=aWaoGZSMWIcH0i^f`XF1
Z4 dC:\Users\sradosa\Documents\VHDL\RacineCarre
Z5 8C:/Users/sradosa/Documents/VHDL/RacineCarre/MachineEtat.vhd
Z6 FC:/Users/sradosa/Documents/VHDL/RacineCarre/MachineEtat.vhd
l0
L6
V_5VhKfLk^85mfN;VW;;?n1
!s100 @0@SHm25b0h]:;L_]<W^J1
Z7 OV;C;10.1d;51
32
!i10b 1
Z8 !s108 1672916490.089000
Z9 !s90 -reportprogress|300|-work|work|-2002|-explicit|C:/Users/sradosa/Documents/VHDL/RacineCarre/MachineEtat.vhd|
Z10 !s107 C:/Users/sradosa/Documents/VHDL/RacineCarre/MachineEtat.vhd|
Z11 o-work work -2002 -explicit -O0
Z12 tExplicit 1
Abehavior
R1
R2
R3
Z13 DEx4 work 7 machine 0 22 _5VhKfLk^85mfN;VW;;?n1
l24
L17
VM<B]kKfzV0lFW1RQKJZI03
!s100 EzC9NIZUcm_kc9d2NNeMQ3
R7
32
!i10b 1
R8
R9
R10
R11
R12

@ -0,0 +1,3 @@
m255
K3
cModel Technology

@ -0,0 +1,20 @@
#include <stdio.h>
void f(float * data, int n){
for(int i = 0; i<n; i++){
data[i] = 1.f/(data[i]*data[i]);
}
}
float fsum(float * data, int n){
float s = 0;
for(int i = 0; i<n; i++){
data[i] = 1.f/(data[i]*data[i]);
s += data[i];
}
return s;
}
int main(void){
}

@ -0,0 +1,93 @@
.file "toto.c"
.text
.globl f
.type f, @function
f:
.LFB23:
.cfi_startproc
endbr64
testl %esi, %esi
jle .L1
movq %rdi, %rax
leal -1(%rsi), %edx
leaq 4(%rdi,%rdx,4), %rdx
movss .LC0(%rip), %xmm1
.L3:
movss (%rax), %xmm0
mulss %xmm0, %xmm0
movaps %xmm1, %xmm2
divss %xmm0, %xmm2
movss %xmm2, (%rax)
addq $4, %rax
cmpq %rdx, %rax
jne .L3
.L1:
ret
.cfi_endproc
.LFE23:
.size f, .-f
.globl fsum
.type fsum, @function
fsum:
.LFB24:
.cfi_startproc
endbr64
testl %esi, %esi
jle .L8
movq %rdi, %rax
leal -1(%rsi), %edx
leaq 4(%rdi,%rdx,4), %rdx
pxor %xmm1, %xmm1
movss .LC0(%rip), %xmm2
.L7:
movss (%rax), %xmm0
mulss %xmm0, %xmm0
movaps %xmm2, %xmm3
divss %xmm0, %xmm3
movss %xmm3, (%rax)
addss %xmm3, %xmm1
addq $4, %rax
cmpq %rdx, %rax
jne .L7
.L5:
movaps %xmm1, %xmm0
ret
.L8:
pxor %xmm1, %xmm1
jmp .L5
.cfi_endproc
.LFE24:
.size fsum, .-fsum
.globl main
.type main, @function
main:
.LFB25:
.cfi_startproc
endbr64
movl $0, %eax
ret
.cfi_endproc
.LFE25:
.size main, .-main
.section .rodata.cst4,"aM",@progbits,4
.align 4
.LC0:
.long 1065353216
.ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"
.section .note.GNU-stack,"",@progbits
.section .note.gnu.property,"a"
.align 8
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.string "GNU"
1:
.align 8
.long 0xc0000002
.long 3f - 2f
2:
.long 0x3
3:
.align 8
4:

@ -0,0 +1,207 @@
.file "toto.c"
.text
.p2align 4
.globl f
.type f, @function
f:
.LFB23:
.cfi_startproc
endbr64
testl %esi, %esi
jle .L1
leal -1(%rsi), %eax
cmpl $2, %eax
jbe .L8
movl %esi, %edx
movq %rdi, %rax
shrl $2, %edx
salq $4, %rdx
addq %rdi, %rdx
.p2align 4,,10
.p2align 3
.L4:
movups (%rax), %xmm0
addq $16, %rax
mulps %xmm0, %xmm0
rcpps %xmm0, %xmm1
mulps %xmm1, %xmm0
mulps %xmm1, %xmm0
addps %xmm1, %xmm1
subps %xmm0, %xmm1
movups %xmm1, -16(%rax)
cmpq %rdx, %rax
jne .L4
movl %esi, %eax
andl $-4, %eax
testb $3, %sil
je .L11
.L3:
movslq %eax, %rdx
leaq (%rdi,%rdx,4), %rdx
movss (%rdx), %xmm0
movaps %xmm0, %xmm1
mulss %xmm0, %xmm1
movss .LC1(%rip), %xmm0
movaps %xmm0, %xmm3
divss %xmm1, %xmm3
movss %xmm3, (%rdx)
leal 1(%rax), %edx
cmpl %edx, %esi
jle .L1
movslq %edx, %rdx
movaps %xmm0, %xmm4
addl $2, %eax
leaq (%rdi,%rdx,4), %rdx
movss (%rdx), %xmm1
mulss %xmm1, %xmm1
divss %xmm1, %xmm4
movss %xmm4, (%rdx)
cmpl %eax, %esi
jle .L1
cltq
leaq (%rdi,%rax,4), %rax
movss (%rax), %xmm1
mulss %xmm1, %xmm1
divss %xmm1, %xmm0
movss %xmm0, (%rax)
.L1:
ret
.p2align 4,,10
.p2align 3
.L11:
ret
.L8:
xorl %eax, %eax
jmp .L3
.cfi_endproc
.LFE23:
.size f, .-f
.p2align 4
.globl fsum
.type fsum, @function
fsum:
.LFB24:
.cfi_startproc
endbr64
testl %esi, %esi
jle .L18
leal -1(%rsi), %eax
cmpl $2, %eax
jbe .L19
movl %esi, %edx
movq %rdi, %rax
pxor %xmm2, %xmm2
shrl $2, %edx
salq $4, %rdx
addq %rdi, %rdx
.p2align 4,,10
.p2align 3
.L15:
movups (%rax), %xmm1
addq $16, %rax
mulps %xmm1, %xmm1
rcpps %xmm1, %xmm0
mulps %xmm0, %xmm1
mulps %xmm0, %xmm1
addps %xmm0, %xmm0
subps %xmm1, %xmm0
movups %xmm0, -16(%rax)
addps %xmm0, %xmm2
cmpq %rdx, %rax
jne .L15
movaps %xmm2, %xmm0
movl %esi, %eax
movhlps %xmm2, %xmm0
andl $-4, %eax
addps %xmm0, %xmm2
movaps %xmm2, %xmm0
shufps $85, %xmm2, %xmm0
addps %xmm0, %xmm2
movaps %xmm2, %xmm0
testb $3, %sil
je .L21
.L14:
movslq %eax, %rdx
leaq (%rdi,%rdx,4), %rdx
movss (%rdx), %xmm1
movaps %xmm1, %xmm2
mulss %xmm1, %xmm2
movss .LC1(%rip), %xmm1
movaps %xmm1, %xmm4
divss %xmm2, %xmm4
movss %xmm4, (%rdx)
leal 1(%rax), %edx
addss %xmm4, %xmm0
cmpl %edx, %esi
jle .L12
movslq %edx, %rdx
movaps %xmm1, %xmm5
addl $2, %eax
leaq (%rdi,%rdx,4), %rdx
movss (%rdx), %xmm2
mulss %xmm2, %xmm2
divss %xmm2, %xmm5
addss %xmm5, %xmm0
movss %xmm5, (%rdx)
cmpl %eax, %esi
jle .L12
cltq
leaq (%rdi,%rax,4), %rax
movss (%rax), %xmm2
mulss %xmm2, %xmm2
divss %xmm2, %xmm1
addss %xmm1, %xmm0
movss %xmm1, (%rax)
ret
.p2align 4,,10
.p2align 3
.L18:
pxor %xmm0, %xmm0
.L12:
ret
.p2align 4,,10
.p2align 3
.L21:
ret
.L19:
xorl %eax, %eax
pxor %xmm0, %xmm0
jmp .L14
.cfi_endproc
.LFE24:
.size fsum, .-fsum
.section .text.startup,"ax",@progbits
.p2align 4
.globl main
.type main, @function
main:
.LFB25:
.cfi_startproc
endbr64
xorl %eax, %eax
ret
.cfi_endproc
.LFE25:
.size main, .-main
.section .rodata.cst4,"aM",@progbits,4
.align 4
.LC1:
.long 1065353216
.ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"
.section .note.GNU-stack,"",@progbits
.section .note.gnu.property,"a"
.align 8
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.string "GNU"
1:
.align 8
.long 0xc0000002
.long 3f - 2f
2:
.long 0x3
3:
.align 8
4:

@ -0,0 +1,17 @@
#include <stdio.h>
#include <math.h>
void f(float * x, float * y, float a, int n){
for(int i = 0; i<n; i++){
y[i] += a*x[i];
}
}
void f2(float * x, float * y, float a, int n){
for(int i = 0; i<n; i++){
y[i] += sqrt(a*x[i]);
}
}
int main(void){
}

@ -0,0 +1,86 @@
.file "toto2.c"
.text
.globl f
.type f, @function
f:
.LFB23:
.cfi_startproc
endbr64
testl %edx, %edx
jle .L1
leal -1(%rdx), %ecx
movl $0, %eax
.L3:
movaps %xmm0, %xmm1
mulss (%rdi,%rax,4), %xmm1
addss (%rsi,%rax,4), %xmm1
movss %xmm1, (%rsi,%rax,4)
movq %rax, %rdx
addq $1, %rax
cmpq %rcx, %rdx
jne .L3
.L1:
ret
.cfi_endproc
.LFE23:
.size f, .-f
.globl f2
.type f2, @function
f2:
.LFB24:
.cfi_startproc
endbr64
testl %edx, %edx
jle .L5
movq %rsi, %rax
leal -1(%rdx), %edx
leaq 4(%rsi,%rdx,4), %rdx
.L7:
movaps %xmm0, %xmm1
mulss (%rdi), %xmm1
cvtss2sd %xmm1, %xmm1
movapd %xmm1, %xmm2
sqrtsd %xmm2, %xmm2
pxor %xmm1, %xmm1
cvtss2sd (%rax), %xmm1
addsd %xmm2, %xmm1
cvtsd2ss %xmm1, %xmm1
movss %xmm1, (%rax)
addq $4, %rax
addq $4, %rdi
cmpq %rdx, %rax
jne .L7
.L5:
ret
.cfi_endproc
.LFE24:
.size f2, .-f2
.globl main
.type main, @function
main:
.LFB25:
.cfi_startproc
endbr64
movl $0, %eax
ret
.cfi_endproc
.LFE25:
.size main, .-main
.ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"
.section .note.GNU-stack,"",@progbits
.section .note.gnu.property,"a"
.align 8
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.string "GNU"
1:
.align 8
.long 0xc0000002
.long 3f - 2f
2:
.long 0x3
3:
.align 8
4:

@ -0,0 +1,231 @@
.file "toto2.c"
.text
.p2align 4
.globl f
.type f, @function
f:
.LFB23:
.cfi_startproc
endbr64
testl %edx, %edx
jle .L1
leaq 15(%rsi), %rcx
leal -1(%rdx), %eax
subq %rdi, %rcx
cmpq $30, %rcx
jbe .L3
cmpl $2, %eax
jbe .L3
movl %edx, %ecx
movaps %xmm0, %xmm2
xorl %eax, %eax
shrl $2, %ecx
shufps $0, %xmm2, %xmm2
salq $4, %rcx
.p2align 4,,10
.p2align 3
.L4:
movups (%rdi,%rax), %xmm1
movups (%rsi,%rax), %xmm3
mulps %xmm2, %xmm1
addps %xmm3, %xmm1
movups %xmm1, (%rsi,%rax)
addq $16, %rax
cmpq %rcx, %rax
jne .L4
movl %edx, %eax
andl $-4, %eax
testb $3, %dl
je .L1
movl %eax, %r8d
movss (%rdi,%r8,4), %xmm1
leaq (%rsi,%r8,4), %rcx
mulss %xmm0, %xmm1
addss (%rcx), %xmm1
movss %xmm1, (%rcx)
leal 1(%rax), %ecx
cmpl %ecx, %edx
jle .L1
movslq %ecx, %rcx
addl $2, %eax
movss (%rdi,%rcx,4), %xmm1
leaq (%rsi,%rcx,4), %r8
mulss %xmm0, %xmm1
addss (%r8), %xmm1
movss %xmm1, (%r8)
cmpl %eax, %edx
jle .L1
cltq
mulss (%rdi,%rax,4), %xmm0
leaq (%rsi,%rax,4), %rdx
addss (%rdx), %xmm0
movss %xmm0, (%rdx)
ret
.p2align 4,,10
.p2align 3
.L3:
movl %eax, %edx
xorl %eax, %eax
.p2align 4,,10
.p2align 3
.L6:
movss (%rdi,%rax,4), %xmm1
movq %rax, %rcx
mulss %xmm0, %xmm1
addss (%rsi,%rax,4), %xmm1
movss %xmm1, (%rsi,%rax,4)
addq $1, %rax
cmpq %rdx, %rcx
jne .L6
.L1:
ret
.cfi_endproc
.LFE23:
.size f, .-f
.p2align 4
.globl f2
.type f2, @function
f2:
.LFB24:
.cfi_startproc
endbr64
testl %edx, %edx
jle .L17
leaq 15(%rsi), %rax
leal -1(%rdx), %ecx
subq %rdi, %rax
cmpq $30, %rax
jbe .L19
cmpl $2, %ecx
jbe .L19
movl %edx, %ecx
movaps %xmm0, %xmm7
xorl %eax, %eax
shrl $2, %ecx
shufps $0, %xmm7, %xmm7
salq $4, %rcx
.p2align 4,,10
.p2align 3
.L20:
movups (%rdi,%rax), %xmm2
movlps 8(%rsi,%rax), %xmm6
mulps %xmm7, %xmm2
movhlps %xmm2, %xmm5
cvtps2pd %xmm2, %xmm1
sqrtpd %xmm1, %xmm4
cvtps2pd (%rsi,%rax), %xmm1
cvtps2pd %xmm5, %xmm2
addpd %xmm4, %xmm1
sqrtpd %xmm2, %xmm3
cvtps2pd %xmm6, %xmm2
addpd %xmm3, %xmm2
cvtpd2ps %xmm1, %xmm1
cvtpd2ps %xmm2, %xmm2
movlhps %xmm2, %xmm1
movups %xmm1, (%rsi,%rax)
addq $16, %rax
cmpq %rcx, %rax
jne .L20
movl %edx, %eax
andl $-4, %eax
testb $3, %dl
je .L17
movl %eax, %r8d
movss (%rdi,%r8,4), %xmm1
leaq (%rsi,%r8,4), %rcx
mulss %xmm0, %xmm1
cvtss2sd %xmm1, %xmm1
movapd %xmm1, %xmm2
pxor %xmm1, %xmm1
sqrtsd %xmm2, %xmm2
cvtss2sd (%rcx), %xmm1
addsd %xmm2, %xmm1
cvtsd2ss %xmm1, %xmm1
movss %xmm1, (%rcx)
leal 1(%rax), %ecx
cmpl %ecx, %edx
jle .L17
movslq %ecx, %rcx
addl $2, %eax
movss (%rdi,%rcx,4), %xmm1
leaq (%rsi,%rcx,4), %r8
mulss %xmm0, %xmm1
cvtss2sd %xmm1, %xmm1
movapd %xmm1, %xmm2
pxor %xmm1, %xmm1
sqrtsd %xmm2, %xmm2
cvtss2sd (%r8), %xmm1
addsd %xmm2, %xmm1
cvtsd2ss %xmm1, %xmm1
movss %xmm1, (%r8)
cmpl %eax, %edx
jle .L17
cltq
mulss (%rdi,%rax,4), %xmm0
leaq (%rsi,%rax,4), %rdx
cvtss2sd %xmm0, %xmm0
sqrtsd %xmm0, %xmm0
movapd %xmm0, %xmm1
pxor %xmm0, %xmm0
cvtss2sd (%rdx), %xmm0
addsd %xmm1, %xmm0
cvtsd2ss %xmm0, %xmm0
movss %xmm0, (%rdx)
ret
.p2align 4,,10
.p2align 3
.L19:
leaq 4(%rsi,%rcx,4), %rax
.p2align 4,,10
.p2align 3
.L22:
movss (%rdi), %xmm1
addq $4, %rsi
addq $4, %rdi
mulss %xmm0, %xmm1
cvtss2sd %xmm1, %xmm1
movapd %xmm1, %xmm2
pxor %xmm1, %xmm1
sqrtsd %xmm2, %xmm2
cvtss2sd -4(%rsi), %xmm1
addsd %xmm2, %xmm1
cvtsd2ss %xmm1, %xmm1
movss %xmm1, -4(%rsi)
cmpq %rax, %rsi
jne .L22
.L17:
ret
.cfi_endproc
.LFE24:
.size f2, .-f2
.section .text.startup,"ax",@progbits
.p2align 4
.globl main
.type main, @function
main:
.LFB25:
.cfi_startproc
endbr64
xorl %eax, %eax
ret
.cfi_endproc
.LFE25:
.size main, .-main
.ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"
.section .note.GNU-stack,"",@progbits
.section .note.gnu.property,"a"
.align 8
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.string "GNU"
1:
.align 8
.long 0xc0000002
.long 3f - 2f
2:
.long 0x3
3:
.align 8
4:

@ -0,0 +1,27 @@
# Cours A4
## 16/01 SIMD
-01 pas d'opti
-03 opti avec parallélisation, on transforme en a + b*c
-ffast-math, ignore les erreurs dans les données
https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html
<xmmintrin.h>
si la fonction est de type
for (i=0, i++)
f(d[i]...)
{+ - / *}
float / double
on ne touche pas à la fonction, le compilateur sait vectoriser.
sinon, on va aller chercher dans les fonctions simd
attention aux shuffles, la partie 1 ne contient que des données de a et la partie 2 que des données de b.
## 19/01 OpenMP
$P41
Attention aux clauses, si les temps d'exécutions dans les itérations sont identiques, la demande de travail est plus coûteuse que le gain apporté.

Binary file not shown.

Binary file not shown.

@ -350,7 +350,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]"
},
"vscode": {
"interpreter": {

Loading…
Cancel
Save