1941 lines
93 KiB
C++
1941 lines
93 KiB
C++
|
/*****************************************************************************
|
||
|
* CACTI 7.0
|
||
|
* SOFTWARE LICENSE AGREEMENT
|
||
|
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||
|
* All Rights Reserved
|
||
|
*
|
||
|
* Redistribution and use in source and binary forms, with or without
|
||
|
* modification, are permitted provided that the following conditions are
|
||
|
* met: redistributions of source code must retain the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer;
|
||
|
* redistributions in binary form must reproduce the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer in the
|
||
|
* documentation and/or other materials provided with the distribution;
|
||
|
* neither the name of the copyright holders nor the names of its
|
||
|
* contributors may be used to endorse or promote products derived from
|
||
|
* this software without specific prior written permission.
|
||
|
|
||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||
|
*
|
||
|
***************************************************************************/
|
||
|
|
||
|
|
||
|
|
||
|
#include "mat.h"
|
||
|
#include <assert.h>
|
||
|
|
||
|
|
||
|
Mat::Mat(const DynamicParameter & dyn_p)
|
||
|
:dp(dyn_p),
|
||
|
power_subarray_out_drv(),
|
||
|
delay_fa_tag(0), delay_cam(0),
|
||
|
delay_before_decoder(0), delay_bitline(0),
|
||
|
delay_wl_reset(0), delay_bl_restore(0),
|
||
|
delay_searchline(0), delay_matchchline(0),
|
||
|
delay_cam_sl_restore(0), delay_cam_ml_reset(0),
|
||
|
delay_fa_ram_wl(0),delay_hit_miss_reset(0),
|
||
|
delay_hit_miss(0),
|
||
|
subarray(dp, dp.fully_assoc),
|
||
|
power_bitline(), per_bitline_read_energy(0),
|
||
|
deg_bl_muxing(dp.deg_bl_muxing),
|
||
|
num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
|
||
|
delay_writeback(0),
|
||
|
cell(subarray.cell), cam_cell(subarray.cam_cell),
|
||
|
is_dram(dyn_p.is_dram),
|
||
|
pure_cam(dyn_p.pure_cam),
|
||
|
num_mats(dp.num_mats),
|
||
|
power_sa(), delay_sa(0),
|
||
|
leak_power_sense_amps_closed_page_state(0),
|
||
|
leak_power_sense_amps_open_page_state(0),
|
||
|
delay_subarray_out_drv(0),
|
||
|
delay_comparator(0), power_comparator(),
|
||
|
num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
|
||
|
num_subarrays_per_mat(dp.num_subarrays/dp.num_mats),
|
||
|
num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir),
|
||
|
array_leakage(0),
|
||
|
wl_leakage(0),
|
||
|
cl_leakage(0)
|
||
|
{
|
||
|
assert(num_subarrays_per_mat <= 4);
|
||
|
assert(num_subarrays_per_row <= 2);
|
||
|
is_fa = (dp.fully_assoc) ? true : false;
|
||
|
camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
|
||
|
|
||
|
if (is_fa || pure_cam)
|
||
|
num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat;
|
||
|
|
||
|
if (dp.use_inp_params == 1) {
|
||
|
RWP = dp.num_rw_ports;
|
||
|
ERP = dp.num_rd_ports;
|
||
|
EWP = dp.num_wr_ports;
|
||
|
SCHP = dp.num_search_ports;
|
||
|
}
|
||
|
else {
|
||
|
RWP = g_ip->num_rw_ports;
|
||
|
ERP = g_ip->num_rd_ports;
|
||
|
EWP = g_ip->num_wr_ports;
|
||
|
SCHP = g_ip->num_search_ports;
|
||
|
|
||
|
}
|
||
|
|
||
|
double number_sa_subarray;
|
||
|
|
||
|
if (!is_fa && !pure_cam)
|
||
|
{
|
||
|
number_sa_subarray = subarray.num_cols / deg_bl_muxing;
|
||
|
}
|
||
|
else if (is_fa && !pure_cam)
|
||
|
{
|
||
|
number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
|
||
|
}
|
||
|
|
||
|
else
|
||
|
{
|
||
|
number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
|
||
|
}
|
||
|
|
||
|
int num_dec_signals = subarray.num_rows;
|
||
|
double C_ld_bit_mux_dec_out = 0;
|
||
|
double C_ld_sa_mux_lev_1_dec_out = 0;
|
||
|
double C_ld_sa_mux_lev_2_dec_out = 0;
|
||
|
double R_wire_wl_drv_out;
|
||
|
|
||
|
if (!is_fa && !pure_cam)
|
||
|
{
|
||
|
R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
|
||
|
}
|
||
|
else if (is_fa && !pure_cam)
|
||
|
{
|
||
|
R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um;
|
||
|
}
|
||
|
|
||
|
double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
|
||
|
double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
|
||
|
|
||
|
if (deg_bl_muxing > 1)
|
||
|
{
|
||
|
C_ld_bit_mux_dec_out =
|
||
|
(2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
|
||
|
num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
|
||
|
}
|
||
|
|
||
|
if (dp.Ndsam_lev_1 > 1)
|
||
|
{
|
||
|
C_ld_sa_mux_lev_1_dec_out =
|
||
|
(num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
|
||
|
num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
|
||
|
}
|
||
|
if (dp.Ndsam_lev_2 > 1)
|
||
|
{
|
||
|
C_ld_sa_mux_lev_2_dec_out =
|
||
|
(num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
|
||
|
num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
|
||
|
}
|
||
|
|
||
|
if (num_subarrays_per_row >= 2)
|
||
|
{
|
||
|
// wire heads for both right and left side of a mat, so half the resistance
|
||
|
R_wire_bit_mux_dec_out /= 2.0;
|
||
|
R_wire_sa_mux_dec_out /= 2.0;
|
||
|
}
|
||
|
|
||
|
|
||
|
row_dec = new Decoder(
|
||
|
num_dec_signals,
|
||
|
false,
|
||
|
subarray.C_wl,
|
||
|
R_wire_wl_drv_out,
|
||
|
false/*is_fa*/,
|
||
|
is_dram,
|
||
|
true,
|
||
|
camFlag? cam_cell:cell);
|
||
|
|
||
|
row_dec->nodes_DSTN = subarray.num_rows;//TODO: this is not a good way for OOO programming
|
||
|
// if (is_fa && (!dp.is_tag))
|
||
|
// {
|
||
|
// row_dec->exist = true;
|
||
|
// }
|
||
|
bit_mux_dec = new Decoder(
|
||
|
deg_bl_muxing,// This number is 1 for FA or CAM
|
||
|
false,
|
||
|
C_ld_bit_mux_dec_out,
|
||
|
R_wire_bit_mux_dec_out,
|
||
|
false/*is_fa*/,
|
||
|
is_dram,
|
||
|
false,
|
||
|
camFlag? cam_cell:cell);
|
||
|
sa_mux_lev_1_dec = new Decoder(
|
||
|
dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
|
||
|
dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
|
||
|
C_ld_sa_mux_lev_1_dec_out,
|
||
|
R_wire_sa_mux_dec_out,
|
||
|
false/*is_fa*/,
|
||
|
is_dram,
|
||
|
false,
|
||
|
camFlag? cam_cell:cell);
|
||
|
sa_mux_lev_2_dec = new Decoder(
|
||
|
dp.Ndsam_lev_2, // This number is 1 for FA or CAM
|
||
|
false,
|
||
|
C_ld_sa_mux_lev_2_dec_out,
|
||
|
R_wire_sa_mux_dec_out,
|
||
|
false/*is_fa*/,
|
||
|
is_dram,
|
||
|
false,
|
||
|
camFlag? cam_cell:cell);
|
||
|
|
||
|
double C_wire_predec_blk_out;
|
||
|
double R_wire_predec_blk_out;
|
||
|
|
||
|
if (!is_fa && !pure_cam)
|
||
|
{
|
||
|
|
||
|
C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
|
||
|
R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
|
||
|
|
||
|
}
|
||
|
else //for pre-decode block's load is same for both FA and CAM
|
||
|
{
|
||
|
C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
|
||
|
R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
|
||
|
}
|
||
|
|
||
|
|
||
|
if (is_fa||pure_cam)
|
||
|
num_dec_signals += _log2(num_subarrays_per_mat);
|
||
|
|
||
|
PredecBlk * r_predec_blk1 = new PredecBlk(
|
||
|
num_dec_signals,
|
||
|
row_dec,
|
||
|
C_wire_predec_blk_out,
|
||
|
R_wire_predec_blk_out,
|
||
|
num_subarrays_per_mat,
|
||
|
is_dram,
|
||
|
true);
|
||
|
PredecBlk * r_predec_blk2 = new PredecBlk(
|
||
|
num_dec_signals,
|
||
|
row_dec,
|
||
|
C_wire_predec_blk_out,
|
||
|
R_wire_predec_blk_out,
|
||
|
num_subarrays_per_mat,
|
||
|
is_dram,
|
||
|
false);
|
||
|
PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
|
||
|
PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
|
||
|
PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
|
||
|
PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
|
||
|
PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
|
||
|
PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
|
||
|
dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
|
||
|
dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
|
||
|
|
||
|
PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
|
||
|
PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
|
||
|
PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
|
||
|
PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
|
||
|
PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
|
||
|
PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
|
||
|
PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
|
||
|
PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
|
||
|
way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
|
||
|
dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
|
||
|
|
||
|
r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
|
||
|
b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
|
||
|
sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
|
||
|
sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
|
||
|
|
||
|
subarray_out_wire = new Wire(dp.wtype, g_ip->cl_vertical?subarray.area.w:subarray.area.h);//Bug should be subarray.area.w Owen and
|
||
|
//subarray_out_wire = new Wire(g_ip->wt, g_ip->cl_vertical?subarray.area.w:subarray.area.h);//Bug should be subarray.area.w Owen and
|
||
|
|
||
|
double driver_c_gate_load;
|
||
|
double driver_c_wire_load;
|
||
|
double driver_r_wire_load;
|
||
|
|
||
|
if (is_fa || pure_cam)
|
||
|
|
||
|
{ //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
|
||
|
driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
|
||
|
driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
|
||
|
driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
|
||
|
cam_bl_precharge_eq_drv = new Driver(
|
||
|
driver_c_gate_load,
|
||
|
driver_c_wire_load,
|
||
|
driver_r_wire_load,
|
||
|
is_dram);
|
||
|
|
||
|
if (!pure_cam)
|
||
|
{
|
||
|
//This is only used for fully asso not pure CAM
|
||
|
driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
|
||
|
driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um;
|
||
|
driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um;
|
||
|
bl_precharge_eq_drv = new Driver(
|
||
|
driver_c_gate_load,
|
||
|
driver_c_wire_load,
|
||
|
driver_r_wire_load,
|
||
|
is_dram);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
else
|
||
|
{
|
||
|
driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
|
||
|
driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
|
||
|
driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
|
||
|
bl_precharge_eq_drv = new Driver(
|
||
|
driver_c_gate_load,
|
||
|
driver_c_wire_load,
|
||
|
driver_r_wire_load,
|
||
|
is_dram);
|
||
|
}
|
||
|
double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
|
||
|
double w_row_decoder = area_row_decoder / subarray.area.get_h();
|
||
|
|
||
|
double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
|
||
|
compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
|
||
|
|
||
|
/* This means the subarray drivers are along the vertical direction since / subarray.area.get_w() is used;
|
||
|
* so the subarray_out_wire (actually the drivers) under the subarray and along the x direction
|
||
|
* So as mentioned above @ line 271
|
||
|
* subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and
|
||
|
* change the out_wire (driver to along y direction need carefully rethinking
|
||
|
* rather than just simply switch w with h )
|
||
|
* */
|
||
|
double h_subarray_out_drv = subarray_out_wire->area.get_area() *
|
||
|
(subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
|
||
|
|
||
|
|
||
|
h_subarray_out_drv *= (RWP + ERP + SCHP);
|
||
|
|
||
|
double h_comparators = 0.0;
|
||
|
double w_row_predecode_output_wires = 0.0;
|
||
|
double h_bit_mux_dec_out_wires = 0.0;
|
||
|
double h_senseamp_mux_dec_out_wires = 0.0;
|
||
|
|
||
|
if ((!is_fa)&&(dp.is_tag))
|
||
|
{
|
||
|
//tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
|
||
|
h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
|
||
|
h_comparators *= (RWP + ERP);
|
||
|
}
|
||
|
|
||
|
//power-gating circuit
|
||
|
bool is_footer = false;
|
||
|
double Isat_subarray = 2* simplified_nmos_Isat(g_tp.sram.cell_nmos_w, is_dram, true);//only one wordline active in a subarray 2 means two inverters in an SRAM cell
|
||
|
double detalV_array;//, deltaV_wl, deltaV_floatingBL;
|
||
|
double c_wakeup_array;
|
||
|
|
||
|
if (!(is_fa || pure_cam) && g_ip->power_gating)
|
||
|
{//for SRAM only at this moment
|
||
|
c_wakeup_array = drain_C_(g_tp.sram.cell_pmos_w, PCH, 1, 1, cell.h, is_dram, true);//1 inv
|
||
|
c_wakeup_array += 2*drain_C_(g_tp.sram.cell_pmos_w, PCH, 1, 1, cell.h, is_dram, true)
|
||
|
+ drain_C_(g_tp.sram.cell_nmos_w, NCH, 1, 1, cell.h, is_dram, true);//1 inv
|
||
|
c_wakeup_array *= subarray.num_rows;
|
||
|
detalV_array = g_tp.sram_cell.Vdd-g_tp.sram_cell.Vcc_min;
|
||
|
|
||
|
sram_sleep_tx = new Sleep_tx (g_ip->perfloss,
|
||
|
Isat_subarray,
|
||
|
is_footer,
|
||
|
c_wakeup_array,
|
||
|
detalV_array,
|
||
|
1,
|
||
|
cell);
|
||
|
|
||
|
subarray.area.set_h(subarray.area.h+ sram_sleep_tx->area.h);
|
||
|
|
||
|
//TODO: add the sleep tx in the wl driver and
|
||
|
}
|
||
|
|
||
|
|
||
|
int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits);
|
||
|
int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits);
|
||
|
w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) *
|
||
|
g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
|
||
|
|
||
|
|
||
|
double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
|
||
|
(h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
|
||
|
h_subarray_out_drv + h_comparators);
|
||
|
|
||
|
double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
|
||
|
|
||
|
if (deg_bl_muxing > 1)
|
||
|
{
|
||
|
h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
|
||
|
}
|
||
|
if (dp.Ndsam_lev_1 > 1)
|
||
|
{
|
||
|
h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
|
||
|
}
|
||
|
if (dp.Ndsam_lev_2 > 1)
|
||
|
{
|
||
|
h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
|
||
|
}
|
||
|
|
||
|
double h_addr_datain_wires;
|
||
|
if (!g_ip->ver_htree_wires_over_array)
|
||
|
{
|
||
|
h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat +
|
||
|
(dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) *
|
||
|
g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
|
||
|
|
||
|
if (is_fa || pure_cam)
|
||
|
{
|
||
|
h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit
|
||
|
(dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) *
|
||
|
g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
|
||
|
(dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP;
|
||
|
}
|
||
|
//h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
|
||
|
//MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
|
||
|
h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
|
||
|
h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
|
||
|
h_addr_datain_wires +
|
||
|
h_bit_mux_dec_out_wires +
|
||
|
h_senseamp_mux_dec_out_wires;
|
||
|
|
||
|
}
|
||
|
|
||
|
// double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
|
||
|
double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
|
||
|
b_mux_predec_blk_drv1->area.get_area() +
|
||
|
sa_mux_lev_1_predec_blk_drv1->area.get_area() +
|
||
|
sa_mux_lev_2_predec_blk_drv1->area.get_area() +
|
||
|
way_sel_drv1->area.get_area() +
|
||
|
r_predec_blk_drv2->area.get_area() +
|
||
|
b_mux_predec_blk_drv2->area.get_area() +
|
||
|
sa_mux_lev_1_predec_blk_drv2->area.get_area() +
|
||
|
sa_mux_lev_2_predec_blk_drv2->area.get_area() +
|
||
|
r_predec_blk1->area.get_area() +
|
||
|
b_mux_predec_blk1->area.get_area() +
|
||
|
sa_mux_lev_1_predec_blk1->area.get_area() +
|
||
|
sa_mux_lev_2_predec_blk1->area.get_area() +
|
||
|
r_predec_blk2->area.get_area() +
|
||
|
b_mux_predec_blk2->area.get_area() +
|
||
|
sa_mux_lev_1_predec_blk2->area.get_area() +
|
||
|
sa_mux_lev_2_predec_blk2->area.get_area() +
|
||
|
bit_mux_dec->area.get_area() +
|
||
|
sa_mux_lev_1_dec->area.get_area() +
|
||
|
sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
|
||
|
|
||
|
/// double area_efficiency_mat;
|
||
|
|
||
|
|
||
|
// if (!is_fa)
|
||
|
// {
|
||
|
assert(num_subarrays_per_mat/num_subarrays_per_row>0);
|
||
|
area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area;
|
||
|
area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
|
||
|
area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
|
||
|
/// = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area();
|
||
|
|
||
|
// cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl;
|
||
|
// cout<<"h_comparators"<<h_comparators<<endl;
|
||
|
// cout<<"h_subarray_out_drv"<<h_subarray_out_drv<<endl;
|
||
|
// cout<<"h_addr_datain_wires"<<h_addr_datain_wires<<endl;
|
||
|
// cout<<"h_bit_mux_dec_out_wires"<<h_bit_mux_dec_out_wires<<endl;
|
||
|
// cout<<"h_senseamp_mux_dec_out_wires"<<h_senseamp_mux_dec_out_wires<<endl;
|
||
|
// cout<<"h_non_cell_area"<<h_non_cell_area<<endl;
|
||
|
// cout<<"area.h =" << (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h<<endl;
|
||
|
// cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
|
||
|
// cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;
|
||
|
|
||
|
if (g_ip->is_3d_mem)
|
||
|
{
|
||
|
h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
|
||
|
h_subarray_out_drv);
|
||
|
area.h = subarray.area.h + h_non_cell_area;
|
||
|
area.w = subarray.area.w;
|
||
|
if (g_ip->print_detail_debug)
|
||
|
cout << "actual subarray width: " << cell.w * subarray.num_cols /1e3 << " mm" << endl;
|
||
|
}
|
||
|
|
||
|
if (g_ip->print_detail_debug)
|
||
|
{
|
||
|
cout<<"h_non_cell_area"<<h_non_cell_area<<endl;
|
||
|
cout<<"area.h =" << (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h<<endl;
|
||
|
cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
|
||
|
cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;
|
||
|
}
|
||
|
|
||
|
assert(area.h>0);
|
||
|
assert(area.w>0);
|
||
|
// }
|
||
|
// else
|
||
|
// {
|
||
|
// area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area;
|
||
|
// area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
|
||
|
// area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
|
||
|
// area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
|
||
|
// }
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
Mat::~Mat()
|
||
|
{
|
||
|
delete row_dec;
|
||
|
delete bit_mux_dec;
|
||
|
delete sa_mux_lev_1_dec;
|
||
|
delete sa_mux_lev_2_dec;
|
||
|
|
||
|
delete r_predec->blk1;
|
||
|
delete r_predec->blk2;
|
||
|
delete b_mux_predec->blk1;
|
||
|
delete b_mux_predec->blk2;
|
||
|
delete sa_mux_lev_1_predec->blk1;
|
||
|
delete sa_mux_lev_1_predec->blk2;
|
||
|
delete sa_mux_lev_2_predec->blk1;
|
||
|
delete sa_mux_lev_2_predec->blk2;
|
||
|
delete dummy_way_sel_predec_blk1;
|
||
|
delete dummy_way_sel_predec_blk2;
|
||
|
|
||
|
delete r_predec->drv1;
|
||
|
delete r_predec->drv2;
|
||
|
delete b_mux_predec->drv1;
|
||
|
delete b_mux_predec->drv2;
|
||
|
delete sa_mux_lev_1_predec->drv1;
|
||
|
delete sa_mux_lev_1_predec->drv2;
|
||
|
delete sa_mux_lev_2_predec->drv1;
|
||
|
delete sa_mux_lev_2_predec->drv2;
|
||
|
delete way_sel_drv1;
|
||
|
delete dummy_way_sel_predec_blk_drv2;
|
||
|
|
||
|
delete r_predec;
|
||
|
delete b_mux_predec;
|
||
|
delete sa_mux_lev_1_predec;
|
||
|
delete sa_mux_lev_2_predec;
|
||
|
|
||
|
delete subarray_out_wire;
|
||
|
if (!pure_cam)
|
||
|
delete bl_precharge_eq_drv;
|
||
|
|
||
|
if (is_fa || pure_cam)
|
||
|
{
|
||
|
delete sl_precharge_eq_drv ;
|
||
|
delete sl_data_drv ;
|
||
|
delete cam_bl_precharge_eq_drv;
|
||
|
delete ml_precharge_drv;
|
||
|
delete ml_to_ram_wl_drv;
|
||
|
}
|
||
|
if (!sram_sleep_tx)
|
||
|
{
|
||
|
delete sram_sleep_tx;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
double Mat::compute_delays(double inrisetime)
|
||
|
{
|
||
|
int k;
|
||
|
double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl;
|
||
|
double outrisetime_search, outrisetime, row_dec_outrisetime;
|
||
|
// delay calculation for tags of fully associative cache
|
||
|
if (is_fa || pure_cam)
|
||
|
{
|
||
|
//Compute search access time
|
||
|
outrisetime_search = compute_cam_delay(inrisetime);
|
||
|
if (is_fa)
|
||
|
{
|
||
|
bl_precharge_eq_drv->compute_delay(0);
|
||
|
k = ml_to_ram_wl_drv->number_gates - 1;
|
||
|
rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
|
||
|
C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
|
||
|
drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
|
||
|
C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load;
|
||
|
tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
|
||
|
delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
|
||
|
|
||
|
R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
|
||
|
r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
|
||
|
R_bl = subarray.num_rows * r_b_metal;
|
||
|
C_bl = subarray.C_bl;
|
||
|
delay_bl_restore = bl_precharge_eq_drv->delay +
|
||
|
log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
|
||
|
(R_bl_precharge * C_bl + R_bl * C_bl / 2);
|
||
|
|
||
|
|
||
|
outrisetime_search = compute_bitline_delay(outrisetime_search);
|
||
|
outrisetime_search = compute_sa_delay(outrisetime_search);
|
||
|
}
|
||
|
outrisetime_search = compute_subarray_out_drv(outrisetime_search);
|
||
|
subarray_out_wire->set_in_rise_time(outrisetime_search);
|
||
|
outrisetime_search = subarray_out_wire->signal_rise_time();
|
||
|
delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
|
||
|
|
||
|
|
||
|
//TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
|
||
|
outrisetime = r_predec->compute_delays(inrisetime);
|
||
|
row_dec_outrisetime = row_dec->compute_delays(outrisetime);
|
||
|
|
||
|
outrisetime = b_mux_predec->compute_delays(inrisetime);
|
||
|
bit_mux_dec->compute_delays(outrisetime);
|
||
|
|
||
|
outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
|
||
|
sa_mux_lev_1_dec->compute_delays(outrisetime);
|
||
|
|
||
|
outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
|
||
|
sa_mux_lev_2_dec->compute_delays(outrisetime);
|
||
|
|
||
|
if (pure_cam)
|
||
|
{
|
||
|
outrisetime = compute_bitline_delay(row_dec_outrisetime);
|
||
|
outrisetime = compute_sa_delay(outrisetime);
|
||
|
}
|
||
|
return outrisetime_search;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
bl_precharge_eq_drv->compute_delay(0);
|
||
|
if (row_dec->exist == true)
|
||
|
{
|
||
|
int k = row_dec->num_gates - 1;
|
||
|
double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
|
||
|
// TODO: this 4*cell.h number must be revisited
|
||
|
double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
|
||
|
drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
|
||
|
double C_ld = row_dec->C_ld_dec_out;
|
||
|
double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
|
||
|
delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
|
||
|
}
|
||
|
double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
|
||
|
double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
|
||
|
double R_bl = subarray.num_rows * r_b_metal;
|
||
|
double C_bl = subarray.C_bl;
|
||
|
|
||
|
if (is_dram)
|
||
|
{
|
||
|
delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
delay_bl_restore = bl_precharge_eq_drv->delay +
|
||
|
log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
|
||
|
(R_bl_precharge * C_bl + R_bl * C_bl / 2);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
outrisetime = r_predec->compute_delays(inrisetime);
|
||
|
row_dec_outrisetime = row_dec->compute_delays(outrisetime);
|
||
|
|
||
|
outrisetime = b_mux_predec->compute_delays(inrisetime);
|
||
|
bit_mux_dec->compute_delays(outrisetime);
|
||
|
|
||
|
outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
|
||
|
sa_mux_lev_1_dec->compute_delays(outrisetime);
|
||
|
|
||
|
outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
|
||
|
sa_mux_lev_2_dec->compute_delays(outrisetime);
|
||
|
|
||
|
//CACTI3DD
|
||
|
if(g_ip->is_3d_mem)
|
||
|
{
|
||
|
row_dec_outrisetime = inrisetime;
|
||
|
}
|
||
|
|
||
|
outrisetime = compute_bitline_delay(row_dec_outrisetime);
|
||
|
outrisetime = compute_sa_delay(outrisetime);
|
||
|
outrisetime = compute_subarray_out_drv(outrisetime);
|
||
|
subarray_out_wire->set_in_rise_time(outrisetime);
|
||
|
outrisetime = subarray_out_wire->signal_rise_time();
|
||
|
|
||
|
delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
|
||
|
|
||
|
if (dp.is_tag == true && dp.fully_assoc == false)
|
||
|
{
|
||
|
compute_comparator_delay(0);
|
||
|
}
|
||
|
|
||
|
if (row_dec->exist == false)
|
||
|
{
|
||
|
delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
|
||
|
}
|
||
|
return outrisetime;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h()
|
||
|
{
|
||
|
|
||
|
double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) +
|
||
|
compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP)); // precharge circuitry
|
||
|
|
||
|
if (deg_bl_muxing > 1)
|
||
|
{
|
||
|
height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height
|
||
|
// height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
|
||
|
}
|
||
|
|
||
|
height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
|
||
|
|
||
|
if (dp.Ndsam_lev_1 > 1)
|
||
|
{
|
||
|
height += compute_tr_width_after_folding(
|
||
|
g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
|
||
|
//height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
|
||
|
}
|
||
|
|
||
|
if (dp.Ndsam_lev_2 > 1)
|
||
|
{
|
||
|
height += compute_tr_width_after_folding(
|
||
|
g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
|
||
|
//height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
|
||
|
|
||
|
// add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
|
||
|
height += 2 * compute_tr_width_after_folding(
|
||
|
pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
|
||
|
height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
|
||
|
}
|
||
|
|
||
|
// TODO: this should be uncommented...
|
||
|
/*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
|
||
|
{
|
||
|
//height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
|
||
|
double width_write_driver_write_mux = width_write_driver_or_write_mux();
|
||
|
double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
|
||
|
cell.w *
|
||
|
// deg_bl_muxing *
|
||
|
dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
|
||
|
height += height_write_driver_write_mux;
|
||
|
}*/
|
||
|
|
||
|
if (g_ip->is_3d_mem)
|
||
|
{
|
||
|
//height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
|
||
|
double width_write_driver_write_mux = width_write_driver_or_write_mux();
|
||
|
double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux, cell.w);
|
||
|
height += height_write_driver_write_mux;
|
||
|
}
|
||
|
|
||
|
return height;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
double Mat::compute_cam_delay(double inrisetime)
|
||
|
{
|
||
|
|
||
|
double out_time_ramp, this_delay;
|
||
|
double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
|
||
|
|
||
|
|
||
|
double Wfaprechp, Wdummyn, Wdummyinvn, Wdummyinvp, Waddrnandn, Waddrnandp,
|
||
|
Wfanorn, Wfanorp, W_hit_miss_n, W_hit_miss_p;
|
||
|
|
||
|
/**
|
||
|
double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
|
||
|
Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp,
|
||
|
Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp,
|
||
|
Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p;
|
||
|
**/
|
||
|
|
||
|
double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
|
||
|
int Htagbits;
|
||
|
|
||
|
double driver_c_gate_load;
|
||
|
double driver_c_wire_load;
|
||
|
double driver_r_wire_load;
|
||
|
//double searchline_precharge_time;
|
||
|
|
||
|
double leak_power_cc_inverters_sram_cell = 0;
|
||
|
double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
|
||
|
double leak_power_RD_port_sram_cell = 0;
|
||
|
double leak_power_SCHP_port_sram_cell = 0;
|
||
|
double leak_comparator_cam_cell =0;
|
||
|
|
||
|
double gate_leak_comparator_cam_cell = 0;
|
||
|
double gate_leak_power_cc_inverters_sram_cell = 0;
|
||
|
double gate_leak_power_RD_port_sram_cell = 0;
|
||
|
double gate_leak_power_SCHP_port_sram_cell = 0;
|
||
|
|
||
|
c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
|
||
|
c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
|
||
|
r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
|
||
|
r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
|
||
|
|
||
|
dynSearchEng = 0.0;
|
||
|
delay_matchchline = 0.0;
|
||
|
double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
|
||
|
bool linear_scaling = false;
|
||
|
|
||
|
if (linear_scaling)
|
||
|
{
|
||
|
/// Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
|
||
|
/// Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
|
||
|
/// Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
|
||
|
/// Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
|
||
|
/// Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
|
||
|
/// Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
|
||
|
/// Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
|
||
|
/// Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
|
||
|
/// Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
|
||
|
/// Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
|
||
|
/// Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
|
||
|
/// Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
|
||
|
/// Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
|
||
|
/// Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
|
||
|
/// Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
|
||
|
/// Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
|
||
|
/// Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
|
||
|
/// Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
|
||
|
/// Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
|
||
|
|
||
|
Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
|
||
|
Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
|
||
|
Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
|
||
|
Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
|
||
|
Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
|
||
|
Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
|
||
|
Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
|
||
|
Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
|
||
|
W_hit_miss_n = Wdummyn;
|
||
|
W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
|
||
|
//TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/// Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
|
||
|
/// Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
|
||
|
/// Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
|
||
|
/// Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
|
||
|
/// Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
|
||
|
/// Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
|
||
|
/// Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
|
||
|
/// Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
|
||
|
/// Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
|
||
|
/// Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
|
||
|
/// Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
|
||
|
/// Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
|
||
|
/// Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
|
||
|
/// Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
|
||
|
/// Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
|
||
|
/// Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
|
||
|
/// Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
|
||
|
/// Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
|
||
|
/// Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
|
||
|
|
||
|
Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
|
||
|
Wdummyn = g_tp.cam.cell_nmos_w;
|
||
|
Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
|
||
|
Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
|
||
|
Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
|
||
|
Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
|
||
|
Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
|
||
|
Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
|
||
|
W_hit_miss_n = Wdummyn;
|
||
|
W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
|
||
|
}
|
||
|
|
||
|
Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
|
||
|
|
||
|
/* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
|
||
|
search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
|
||
|
From the driver(am and an) to the comparators in all the rows including the dummy row,
|
||
|
Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
|
||
|
|
||
|
//Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
|
||
|
//Searchline precharge routes horizontally
|
||
|
driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
|
||
|
driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
|
||
|
driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
|
||
|
|
||
|
sl_precharge_eq_drv = new Driver(
|
||
|
driver_c_gate_load,
|
||
|
driver_c_wire_load,
|
||
|
driver_r_wire_load,
|
||
|
is_dram);
|
||
|
|
||
|
//searchline data driver ; subarray.num_rows + 1 is because of the dummy row
|
||
|
//data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
|
||
|
driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
|
||
|
driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
|
||
|
driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
|
||
|
sl_data_drv = new Driver(
|
||
|
driver_c_gate_load,
|
||
|
driver_c_wire_load,
|
||
|
driver_r_wire_load,
|
||
|
is_dram);
|
||
|
|
||
|
sl_precharge_eq_drv->compute_delay(0);
|
||
|
double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
|
||
|
double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
|
||
|
double R_bl = (subarray.num_rows + 1) * r_b_metal;
|
||
|
double C_bl = subarray.C_bl_cam;
|
||
|
delay_cam_sl_restore = sl_precharge_eq_drv->delay
|
||
|
+ log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2);
|
||
|
|
||
|
out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
|
||
|
|
||
|
//matchline ops delay
|
||
|
delay_matchchline += sl_data_drv->delay;
|
||
|
|
||
|
/* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
|
||
|
//matchline delay, matchline power, matchline_reset for cycle time computation,
|
||
|
|
||
|
////matchline precharge circuitry routes vertically
|
||
|
//There are two matchline precharge driver chains per subarray.
|
||
|
driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
|
||
|
driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
|
||
|
driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
|
||
|
|
||
|
ml_precharge_drv = new Driver(
|
||
|
driver_c_gate_load,
|
||
|
driver_c_wire_load,
|
||
|
driver_r_wire_load,
|
||
|
is_dram);
|
||
|
|
||
|
ml_precharge_drv->compute_delay(0);
|
||
|
|
||
|
|
||
|
rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
|
||
|
c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit
|
||
|
+ drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline
|
||
|
|
||
|
Cwire = c_matchline_metal * Htagbits;
|
||
|
Rwire = r_matchline_metal * Htagbits;
|
||
|
c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
|
||
|
|
||
|
double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
|
||
|
//double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
|
||
|
double R_ml = Rwire;
|
||
|
double C_ml = Cwire + c_intrinsic;
|
||
|
delay_cam_ml_reset = ml_precharge_drv->delay
|
||
|
+ log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too
|
||
|
|
||
|
//matchline ops delay
|
||
|
tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
|
||
|
this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
|
||
|
delay_matchchline += this_delay;
|
||
|
out_time_ramp = this_delay / VTHFA3;
|
||
|
|
||
|
dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise
|
||
|
* g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves
|
||
|
|
||
|
/* third stage, from the NAND2 gates to the drivers in the dummy row */
|
||
|
rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
|
||
|
c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
|
||
|
drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2;
|
||
|
c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
|
||
|
tf = rd * (c_intrinsic + c_gate_load);
|
||
|
this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
|
||
|
out_time_ramp = this_delay / (1 - VTHFA4);
|
||
|
delay_matchchline += this_delay;
|
||
|
|
||
|
//only the dummy row has the extra inverter between NAND and NOR gates
|
||
|
dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
|
||
|
|
||
|
/* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
|
||
|
rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
|
||
|
c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
|
||
|
Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2;
|
||
|
Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2;
|
||
|
c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
|
||
|
tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
|
||
|
this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
|
||
|
out_time_ramp = this_delay / VTHFA5;
|
||
|
delay_matchchline += this_delay;
|
||
|
|
||
|
dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
|
||
|
|
||
|
/*final statge from the NOR gate to drive the wordline of the data portion */
|
||
|
|
||
|
//searchline data driver There are two matchline precharge driver chains per subarray.
|
||
|
driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
|
||
|
driver_c_wire_load = subarray.C_wl_ram;
|
||
|
driver_r_wire_load = subarray.R_wl_ram;
|
||
|
|
||
|
ml_to_ram_wl_drv = new Driver(
|
||
|
driver_c_gate_load,
|
||
|
driver_c_wire_load,
|
||
|
driver_r_wire_load,
|
||
|
is_dram);
|
||
|
|
||
|
|
||
|
|
||
|
rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
|
||
|
c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
|
||
|
c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
|
||
|
tf = rd * (c_intrinsic + c_gate_load);
|
||
|
this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
|
||
|
out_time_ramp = this_delay / (1-0.5);
|
||
|
delay_matchchline += this_delay;
|
||
|
|
||
|
out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
|
||
|
|
||
|
//c_gate_load energy is computed in ml_to_ram_wl_drv
|
||
|
dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
|
||
|
|
||
|
|
||
|
/* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
|
||
|
/*Precharge the hitting logic */
|
||
|
c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
|
||
|
Cwire = c_searchline_metal * subarray.num_rows;
|
||
|
Rwire = r_searchline_metal * subarray.num_rows;
|
||
|
c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
|
||
|
|
||
|
rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
|
||
|
//double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
|
||
|
double R_hit_miss = Rwire;
|
||
|
double C_hit_miss = Cwire + c_intrinsic;
|
||
|
delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
|
||
|
dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
|
||
|
|
||
|
/*hitting logic evaluation */
|
||
|
c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
|
||
|
Cwire = c_searchline_metal * subarray.num_rows;
|
||
|
Rwire = r_searchline_metal * subarray.num_rows;
|
||
|
c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
|
||
|
|
||
|
rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
|
||
|
tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
|
||
|
|
||
|
delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
|
||
|
|
||
|
if (is_fa)
|
||
|
delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
|
||
|
|
||
|
dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
|
||
|
|
||
|
/* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
|
||
|
|
||
|
power_matchline.searchOp.dynamic = dynSearchEng;
|
||
|
|
||
|
//leakage in one subarray
|
||
|
double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
|
||
|
double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
|
||
|
double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
|
||
|
double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv
|
||
|
|
||
|
leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
|
||
|
leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
|
||
|
leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
|
||
|
leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
|
||
|
leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
|
||
|
|
||
|
power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
|
||
|
leak_comparator_cam_cell +
|
||
|
leak_power_acc_tr_RW_or_WR_port_sram_cell +
|
||
|
leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
|
||
|
leak_power_RD_port_sram_cell * ERP +
|
||
|
leak_power_SCHP_port_sram_cell*SCHP;
|
||
|
// power_matchline.searchOp.leakage += leak_comparator_cam_cell;
|
||
|
power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
|
||
|
power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
|
||
|
power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
|
||
|
power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd;
|
||
|
//In idle states, the hit/miss txs are closed (on) therefore no Isub
|
||
|
power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
|
||
|
// + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
|
||
|
|
||
|
//in idle state, Ig_on only possibly exist in access transistors of read only ports
|
||
|
double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
|
||
|
double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
|
||
|
double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2;
|
||
|
|
||
|
gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd;
|
||
|
gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd;
|
||
|
gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
|
||
|
gate_leak_power_SCHP_port_sram_cell = 0;
|
||
|
|
||
|
//cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
|
||
|
|
||
|
power_matchline.searchOp.gate_leakage += gate_leak_power_cc_inverters_sram_cell;
|
||
|
power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
|
||
|
power_matchline.searchOp.gate_leakage += gate_leak_power_SCHP_port_sram_cell*SCHP + gate_leak_power_RD_port_sram_cell * ERP;
|
||
|
power_matchline.searchOp.gate_leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
|
||
|
power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(0, Wfaprechp,1, pmos) * g_tp.cam_cell.Vdd;
|
||
|
power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
|
||
|
power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
|
||
|
power_matchline.searchOp.gate_leakage += subarray.num_rows * cmos_Ig_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
|
||
|
+ cmos_Ig_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
|
||
|
|
||
|
|
||
|
return out_time_ramp;
|
||
|
}
|
||
|
|
||
|
|
||
|
double Mat::width_write_driver_or_write_mux()
|
||
|
{
|
||
|
// calculate resistance of SRAM cell pull-up PMOS transistor
|
||
|
// cam and sram have same cell trasistor properties
|
||
|
double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
|
||
|
double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true);
|
||
|
double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
|
||
|
double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
|
||
|
|
||
|
return width_write_driver_nmos;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
double Mat::compute_comparators_height(
|
||
|
int tagbits,
|
||
|
int number_ways_in_mat,
|
||
|
double subarray_mem_cell_area_width)
|
||
|
{
|
||
|
double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
|
||
|
double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
|
||
|
return cumulative_area / subarray_mem_cell_area_width;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
double Mat::compute_bitline_delay(double inrisetime)
|
||
|
{
|
||
|
double V_b_pre, v_th_mem_cell, V_wl;
|
||
|
double tstep;
|
||
|
double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
|
||
|
double blfloating_c =0.0;
|
||
|
double R_cell_pull_down=0.0, R_cell_acc =0.0, r_dev=0.0;
|
||
|
int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
|
||
|
|
||
|
double R_b_metal = camFlag? cam_cell.h:cell.h * g_tp.wire_local.R_per_um;
|
||
|
double R_bl = subarray.num_rows * R_b_metal;
|
||
|
double C_bl = subarray.C_bl;
|
||
|
|
||
|
// TODO: no leakage for DRAMs?
|
||
|
double leak_power_cc_inverters_sram_cell = 0;
|
||
|
double gate_leak_power_cc_inverters_sram_cell = 0;
|
||
|
double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
|
||
|
double leak_power_RD_port_sram_cell = 0;
|
||
|
double gate_leak_power_RD_port_sram_cell = 0;
|
||
|
|
||
|
// double leak_power_cc_inverters_sram_cell_gated = 0;
|
||
|
// double leak_power_acc_tr_RW_or_WR_port_sram_cell_floating = 0;
|
||
|
// double leak_power_RD_port_sram_cell_floating = 0;
|
||
|
|
||
|
|
||
|
if (is_dram == true)
|
||
|
{
|
||
|
V_b_pre = g_tp.dram.Vbitpre;
|
||
|
v_th_mem_cell = g_tp.dram_acc.Vth;
|
||
|
V_wl = g_tp.vpp;
|
||
|
//The access transistor is not folded. So we just need to specify a threshold value for the
|
||
|
//folding width that is equal to or greater than Wmemcella.
|
||
|
R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
|
||
|
r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
|
||
|
}
|
||
|
else
|
||
|
{ //SRAM
|
||
|
V_b_pre = g_tp.sram.Vbitpre;
|
||
|
v_th_mem_cell = g_tp.sram_cell.Vth;
|
||
|
V_wl = g_tp.sram_cell.Vdd;
|
||
|
R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
|
||
|
R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
|
||
|
|
||
|
//Leakage current of an SRAM cell
|
||
|
double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);//TODO: how much is the idle time? just by *2?
|
||
|
double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,false, true);
|
||
|
double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true)*2;//two invs per cell
|
||
|
|
||
|
// leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd;
|
||
|
// leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
|
||
|
// leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd;
|
||
|
|
||
|
|
||
|
leak_power_cc_inverters_sram_cell = Icell * (g_ip->array_power_gated? g_tp.sram_cell.Vcc_min : g_tp.sram_cell.Vdd);
|
||
|
leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * (g_ip->bitline_floating? g_tp.sram.Vbitfloating : g_tp.sram_cell.Vdd);
|
||
|
leak_power_RD_port_sram_cell = Iport_erp * (g_ip->bitline_floating? g_tp.sram.Vbitfloating : g_tp.sram_cell.Vdd);
|
||
|
//
|
||
|
// leak_power_cc_inverters_sram_cell_gated = leak_power_cc_inverters_sram_cell/g_tp.sram_cell.Vdd*g_tp.sram_cell.Vcc_min;
|
||
|
// leak_power_acc_tr_RW_or_WR_port_sram_cell_floating = leak_power_acc_tr_RW_or_WR_port_sram_cell/g_tp.sram_cell.Vdd*g_tp.sram.Vbitfloating;
|
||
|
// leak_power_RD_port_sram_cell_floating = leak_power_RD_port_sram_cell_floating/g_tp.sram_cell.Vdd*g_tp.sram.Vbitfloating;
|
||
|
//
|
||
|
|
||
|
|
||
|
//in idle state, Ig_on only possibly exist in access transistors of read only ports
|
||
|
double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);
|
||
|
double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true);
|
||
|
|
||
|
gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd;
|
||
|
gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
|
||
|
}
|
||
|
|
||
|
|
||
|
double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram);
|
||
|
double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
|
||
|
double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
|
||
|
double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
|
||
|
double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
|
||
|
drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
|
||
|
drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
|
||
|
double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
|
||
|
|
||
|
if (is_dram)
|
||
|
{
|
||
|
double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl));
|
||
|
//tstep = 2.3 * fraction * r_dev *
|
||
|
tstep = fraction * r_dev * (g_ip->is_3d_mem==1?1:2.3) *
|
||
|
(g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) /
|
||
|
(g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux));
|
||
|
delay_writeback = tstep;
|
||
|
dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
|
||
|
(g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
|
||
|
dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) *
|
||
|
(g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100;
|
||
|
per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
|
||
|
(g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
double tau;
|
||
|
|
||
|
if (deg_bl_muxing > 1)
|
||
|
{
|
||
|
tau = (R_cell_pull_down + R_cell_acc) *
|
||
|
(C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
|
||
|
R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
|
||
|
R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
|
||
|
R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
|
||
|
dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /*
|
||
|
subarray.num_cols * num_subarrays_per_mat*/;
|
||
|
blfloating_c += (C_bl + 2 * C_drain_bit_mux) * 2;
|
||
|
dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
|
||
|
2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing);
|
||
|
blfloating_c += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *2;
|
||
|
dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
|
||
|
num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
|
||
|
//Write Ops are differential for SRAM
|
||
|
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
tau = (R_cell_pull_down + R_cell_acc) *
|
||
|
(C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
|
||
|
R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
|
||
|
dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
|
||
|
2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
|
||
|
|
||
|
blfloating_c += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * 2;
|
||
|
dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
|
||
|
num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
|
||
|
|
||
|
}
|
||
|
tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
|
||
|
|
||
|
|
||
|
// if (g_ip->array_power_gated)
|
||
|
// power_bitline.readOp.leakage =
|
||
|
// leak_power_cc_inverters_sram_cell_gated +
|
||
|
// leak_power_acc_tr_RW_or_WR_port_sram_cell_floating +
|
||
|
// leak_power_acc_tr_RW_or_WR_port_sram_cell_floating * (RWP + EWP - 1) +
|
||
|
// leak_power_RD_port_sram_cell_floating * ERP;
|
||
|
// else
|
||
|
power_bitline.readOp.leakage =
|
||
|
leak_power_cc_inverters_sram_cell +
|
||
|
leak_power_acc_tr_RW_or_WR_port_sram_cell +
|
||
|
leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
|
||
|
leak_power_RD_port_sram_cell * ERP;
|
||
|
|
||
|
power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
|
||
|
gate_leak_power_RD_port_sram_cell * ERP;
|
||
|
|
||
|
}
|
||
|
|
||
|
// cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl;
|
||
|
// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
|
||
|
// cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
|
||
|
// cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl;
|
||
|
|
||
|
|
||
|
/* take input rise time into account */
|
||
|
double m = V_wl / inrisetime;
|
||
|
if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m))
|
||
|
{
|
||
|
delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell)/ m);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
|
||
|
}
|
||
|
|
||
|
bool is_fa = (dp.fully_assoc) ? true : false;
|
||
|
|
||
|
if (dp.is_tag == false || is_fa == false)
|
||
|
{
|
||
|
power_bitline.readOp.dynamic = dynRdEnergy;
|
||
|
power_bitline.writeOp.dynamic = dynWriteEnergy;
|
||
|
}
|
||
|
|
||
|
//bitfloating
|
||
|
blfloating_wakeup_t = blfloating_c * (g_tp.sram_cell.Vdd-g_tp.sram.Vbitfloating) / (simplified_pmos_Isat(g_tp.w_pmos_bl_precharge)/Ilinear_to_Isat_ratio) ;
|
||
|
blfloating_wakeup_e.readOp.dynamic = dynRdEnergy/dp.V_b_sense*(g_tp.sram_cell.Vdd-g_tp.sram.Vbitfloating)*subarray.num_rows * num_subarrays_per_mat * dp.num_act_mats_hor_dir;
|
||
|
|
||
|
|
||
|
double outrisetime = 0;
|
||
|
return outrisetime;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
double Mat::compute_sa_delay(double inrisetime)
|
||
|
{
|
||
|
//int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
|
||
|
|
||
|
//Bitline circuitry leakage.
|
||
|
double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram);
|
||
|
double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
|
||
|
double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
|
||
|
double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
|
||
|
|
||
|
double lkgIdlePh = IsenseEn;//+ 2*IoBufP;
|
||
|
//double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
|
||
|
double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
|
||
|
//double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
|
||
|
// lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
|
||
|
double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
|
||
|
leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
|
||
|
leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
|
||
|
|
||
|
// sense amplifier has to drive logic in "data out driver" and sense precharge load.
|
||
|
// load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
|
||
|
//constant as well as the magnitude of input differential voltage.
|
||
|
double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
|
||
|
drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
|
||
|
drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
|
||
|
drain_C_(g_tp.w_iso,PCH,1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
|
||
|
drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
|
||
|
double tau = C_ld / g_tp.gm_sense_amp_latch;
|
||
|
delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
|
||
|
power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
|
||
|
num_subarrays_per_mat * num_act_mats_hor_dir*/;
|
||
|
power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
|
||
|
|
||
|
double outrisetime = 0;
|
||
|
return outrisetime;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
double Mat::compute_subarray_out_drv(double inrisetime)
|
||
|
{
|
||
|
double C_ld, rd, tf, this_delay;
|
||
|
double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
|
||
|
|
||
|
// delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
|
||
|
rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
|
||
|
C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
|
||
|
gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
|
||
|
tf = rd * C_ld;
|
||
|
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
|
||
|
delay_subarray_out_drv += this_delay;
|
||
|
inrisetime = this_delay/(1.0 - 0.5);
|
||
|
power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
|
||
|
power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
|
||
|
power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
|
||
|
// delay of signal through inverter-buffer to second level of sense-amp mux.
|
||
|
// internal delay of buffer
|
||
|
rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
|
||
|
C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
|
||
|
drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
|
||
|
gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
|
||
|
tf = rd * C_ld;
|
||
|
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
|
||
|
delay_subarray_out_drv += this_delay;
|
||
|
inrisetime = this_delay/(1.0 - 0.5);
|
||
|
power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
|
||
|
power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram)* g_tp.peri_global.Vdd;
|
||
|
power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
|
||
|
|
||
|
// inverter driving drain of pass transistor of second level of sense-amp mux.
|
||
|
rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
|
||
|
C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
|
||
|
drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
|
||
|
drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram);
|
||
|
tf = rd * C_ld;
|
||
|
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
|
||
|
delay_subarray_out_drv += this_delay;
|
||
|
inrisetime = this_delay/(1.0 - 0.5);
|
||
|
power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
|
||
|
power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
|
||
|
power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
|
||
|
|
||
|
|
||
|
// delay of signal through pass-transistor to input of subarray output driver.
|
||
|
rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
|
||
|
C_ld = dp.Ndsam_lev_2 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram) +
|
||
|
//gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
|
||
|
gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
|
||
|
tf = rd * C_ld;
|
||
|
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
|
||
|
delay_subarray_out_drv += this_delay;
|
||
|
inrisetime = this_delay/(1.0 - 0.5);
|
||
|
power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
|
||
|
power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
|
||
|
power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
|
||
|
|
||
|
|
||
|
return inrisetime;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
double Mat::compute_comparator_delay(double inrisetime)
|
||
|
{
|
||
|
int A = g_ip->tag_assoc;
|
||
|
|
||
|
int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
|
||
|
// a multiple of 4.
|
||
|
|
||
|
/* First Inverter */
|
||
|
double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) +
|
||
|
drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
|
||
|
drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
|
||
|
double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
|
||
|
double tf = Req*Ceq;
|
||
|
double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL);
|
||
|
double nextinputtime = st1del/VTHCOMPINV;
|
||
|
power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
|
||
|
|
||
|
//For each degree of associativity
|
||
|
//there are 4 such quarter comparators
|
||
|
double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
|
||
|
double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
|
||
|
/* Second Inverter */
|
||
|
Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) +
|
||
|
drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
|
||
|
drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
|
||
|
Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
|
||
|
tf = Req*Ceq;
|
||
|
double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE);
|
||
|
nextinputtime = st2del/(1.0-VTHCOMPINV);
|
||
|
power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
|
||
|
lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
|
||
|
gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
|
||
|
|
||
|
/* Third Inverter */
|
||
|
Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) +
|
||
|
drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
|
||
|
drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
|
||
|
Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
|
||
|
tf = Req*Ceq;
|
||
|
double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL);
|
||
|
nextinputtime = st3del/(VTHEVALINV);
|
||
|
power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
|
||
|
lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
|
||
|
gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
|
||
|
|
||
|
/* Final Inverter (virtual ground driver) discharging compare part */
|
||
|
double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram);
|
||
|
double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */
|
||
|
double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
|
||
|
drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
|
||
|
drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
|
||
|
drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram);
|
||
|
double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
|
||
|
drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
|
||
|
drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
|
||
|
gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram);
|
||
|
power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
|
||
|
power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
|
||
|
lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
|
||
|
lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2
|
||
|
|
||
|
gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
|
||
|
gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter
|
||
|
|
||
|
/* time to go to threshold of mux driver */
|
||
|
double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND);
|
||
|
/* take into account non-zero input rise time */
|
||
|
double m = g_tp.peri_global.Vdd/nextinputtime;
|
||
|
double Tcomparatorni;
|
||
|
|
||
|
if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m))
|
||
|
{
|
||
|
double a = m;
|
||
|
double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
|
||
|
double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
|
||
|
Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m;
|
||
|
}
|
||
|
delay_comparator = Tcomparatorni+st1del+st2del+st3del;
|
||
|
power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
|
||
|
power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
|
||
|
|
||
|
return Tcomparatorni / (1.0 - VTHMUXNAND);;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
void Mat::compute_power_energy()
|
||
|
{
|
||
|
//for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
|
||
|
//when search all subarrays and all mats are fully active
|
||
|
//when plain read/write only one subarray in a single mat is active.
|
||
|
|
||
|
// add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
|
||
|
// FIXME
|
||
|
//CACTI3DD
|
||
|
if (g_ip->is_3d_mem)
|
||
|
{
|
||
|
if (g_ip->print_detail_debug)
|
||
|
cout << "mat.cc: subarray.num_cols = " << subarray.num_cols << endl;
|
||
|
power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
|
||
|
//power_bl_precharge_eq_drv = num_subarrays_per_mat;
|
||
|
|
||
|
power_sa.readOp.dynamic *= subarray.num_cols;
|
||
|
|
||
|
power_bitline.readOp.dynamic *= subarray.num_cols;
|
||
|
|
||
|
power_subarray_out_drv.readOp.dynamic = power_subarray_out_drv.readOp.dynamic * g_ip->io_width * g_ip->burst_depth;//* subarray.num_cols;
|
||
|
|
||
|
if (g_ip->print_detail_debug)
|
||
|
{
|
||
|
//cout<<"mat.cc: g_ip->burst_len = "<< g_ip->burst_len << endl;
|
||
|
cout<<"mat.cc: power_bl_precharge_eq_drv.readOp.dynamic = "<< power_bl_precharge_eq_drv.readOp.dynamic * 1e9 << " nJ" <<endl;
|
||
|
cout<<"mat.cc: power_sa.readOp.dynamic = "<< power_sa.readOp.dynamic * 1e9 << " nJ" <<endl;
|
||
|
cout<<"mat.cc: power_bitline.readOp.dynamic = "<< power_bitline.readOp.dynamic * 1e9 << " nJ" <<endl;
|
||
|
cout<<"mat.cc: power_subarray_out_drv.readOp.dynamic = "<< power_subarray_out_drv.readOp.dynamic * 1e9 << " nJ" <<endl;
|
||
|
}
|
||
|
//What is the original power?
|
||
|
power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
|
||
|
power_sa.readOp.dynamic +
|
||
|
power_bitline.readOp.dynamic +
|
||
|
power_subarray_out_drv.readOp.dynamic;
|
||
|
}
|
||
|
else
|
||
|
{ //is_3d_mem
|
||
|
power.readOp.dynamic += r_predec->power.readOp.dynamic +
|
||
|
b_mux_predec->power.readOp.dynamic +
|
||
|
sa_mux_lev_1_predec->power.readOp.dynamic +
|
||
|
sa_mux_lev_2_predec->power.readOp.dynamic;
|
||
|
|
||
|
// add energy consumed in decoders
|
||
|
power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
|
||
|
if (!(is_fa||pure_cam))
|
||
|
power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
|
||
|
|
||
|
// add energy consumed in bitline prechagers, SAs, and bitlines
|
||
|
if (!(is_fa||pure_cam))
|
||
|
{
|
||
|
// add energy consumed in bitline prechagers
|
||
|
power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
|
||
|
power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
|
||
|
|
||
|
//Add sense amps energy
|
||
|
num_sa_subarray = subarray.num_cols / deg_bl_muxing;
|
||
|
power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ;
|
||
|
|
||
|
// add energy consumed in bitlines
|
||
|
//cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
|
||
|
power_bitline.readOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
|
||
|
power_bitline.writeOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
|
||
|
//cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
|
||
|
//Add subarray output energy
|
||
|
power_subarray_out_drv.readOp.dynamic =
|
||
|
(power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
|
||
|
|
||
|
power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
|
||
|
power_sa.readOp.dynamic +
|
||
|
power_bitline.readOp.dynamic +
|
||
|
power_subarray_out_drv.readOp.dynamic;
|
||
|
|
||
|
power.readOp.dynamic += power_row_decoders.readOp.dynamic +
|
||
|
bit_mux_dec->power.readOp.dynamic +
|
||
|
sa_mux_lev_1_dec->power.readOp.dynamic +
|
||
|
sa_mux_lev_2_dec->power.readOp.dynamic +
|
||
|
power_comparator.readOp.dynamic;
|
||
|
}
|
||
|
|
||
|
else if (is_fa)
|
||
|
{
|
||
|
//for plain read/write only one subarray in a mat is active
|
||
|
// add energy consumed in bitline prechagers
|
||
|
power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
|
||
|
+ cam_bl_precharge_eq_drv->power.readOp.dynamic;
|
||
|
power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
|
||
|
|
||
|
//Add sense amps energy
|
||
|
num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing;
|
||
|
num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing;
|
||
|
power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search;
|
||
|
power_sa.readOp.dynamic *= num_sa_subarray;
|
||
|
|
||
|
|
||
|
// add energy consumed in bitlines
|
||
|
power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
|
||
|
power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
|
||
|
power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
|
||
|
power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
|
||
|
|
||
|
//Add subarray output energy
|
||
|
power_subarray_out_drv.searchOp.dynamic =
|
||
|
(power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
|
||
|
power_subarray_out_drv.readOp.dynamic =
|
||
|
(power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
|
||
|
|
||
|
|
||
|
power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
|
||
|
power_sa.readOp.dynamic +
|
||
|
power_bitline.readOp.dynamic +
|
||
|
power_subarray_out_drv.readOp.dynamic;
|
||
|
|
||
|
power.readOp.dynamic += power_row_decoders.readOp.dynamic +
|
||
|
bit_mux_dec->power.readOp.dynamic +
|
||
|
sa_mux_lev_1_dec->power.readOp.dynamic +
|
||
|
sa_mux_lev_2_dec->power.readOp.dynamic +
|
||
|
power_comparator.readOp.dynamic;
|
||
|
|
||
|
//add energy consumed inside cam
|
||
|
power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
|
||
|
power_searchline_precharge = sl_precharge_eq_drv->power;
|
||
|
power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
|
||
|
power_searchline = sl_data_drv->power;
|
||
|
power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
|
||
|
power_matchline_precharge = ml_precharge_drv->power;
|
||
|
power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
|
||
|
power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
|
||
|
power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
|
||
|
|
||
|
power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
|
||
|
power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
|
||
|
power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
|
||
|
power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
|
||
|
|
||
|
power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
|
||
|
//power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
|
||
|
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// add energy consumed in bitline prechagers
|
||
|
power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
|
||
|
//power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
|
||
|
//power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
|
||
|
//power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
|
||
|
|
||
|
//Add sense amps energy
|
||
|
num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing;
|
||
|
power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
|
||
|
power_sa.searchOp.dynamic = 0;
|
||
|
|
||
|
power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
|
||
|
power_bitline.searchOp.dynamic = 0;
|
||
|
power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
|
||
|
|
||
|
power_subarray_out_drv.searchOp.dynamic =
|
||
|
(power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
|
||
|
power_subarray_out_drv.readOp.dynamic =
|
||
|
(power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
|
||
|
|
||
|
power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
|
||
|
power_sa.readOp.dynamic +
|
||
|
power_bitline.readOp.dynamic +
|
||
|
power_subarray_out_drv.readOp.dynamic;
|
||
|
|
||
|
power.readOp.dynamic += power_row_decoders.readOp.dynamic +
|
||
|
bit_mux_dec->power.readOp.dynamic +
|
||
|
sa_mux_lev_1_dec->power.readOp.dynamic +
|
||
|
sa_mux_lev_2_dec->power.readOp.dynamic +
|
||
|
power_comparator.readOp.dynamic;
|
||
|
|
||
|
|
||
|
////add energy consumed inside cam
|
||
|
power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
|
||
|
power_searchline_precharge = sl_precharge_eq_drv->power;
|
||
|
power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
|
||
|
power_searchline = sl_data_drv->power;
|
||
|
power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
|
||
|
power_matchline_precharge = ml_precharge_drv->power;
|
||
|
power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
|
||
|
power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
|
||
|
power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
|
||
|
|
||
|
power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
|
||
|
power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
|
||
|
power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
|
||
|
power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
|
||
|
|
||
|
power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
|
||
|
//power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
|
||
|
|
||
|
}
|
||
|
|
||
|
}//CACTI3DD
|
||
|
|
||
|
int number_output_drivers_subarray;
|
||
|
|
||
|
|
||
|
// // calculate leakage power
|
||
|
if (!(is_fa || pure_cam))
|
||
|
{
|
||
|
number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
|
||
|
|
||
|
power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
|
||
|
power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
|
||
|
power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
|
||
|
|
||
|
//num_sa_subarray = subarray.num_cols / deg_bl_muxing;
|
||
|
power_subarray_out_drv.readOp.leakage =
|
||
|
(power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
|
||
|
number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
|
||
|
|
||
|
power.readOp.leakage += power_bitline.readOp.leakage +
|
||
|
power_bl_precharge_eq_drv.readOp.leakage +
|
||
|
power_sa.readOp.leakage +
|
||
|
power_subarray_out_drv.readOp.leakage;
|
||
|
|
||
|
power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
|
||
|
power.readOp.leakage += power_comparator.readOp.leakage;
|
||
|
|
||
|
array_leakage = power_bitline.readOp.leakage;
|
||
|
|
||
|
cl_leakage =
|
||
|
power_bl_precharge_eq_drv.readOp.leakage +
|
||
|
power_sa.readOp.leakage +
|
||
|
power_subarray_out_drv.readOp.leakage +
|
||
|
power_comparator.readOp.leakage;
|
||
|
|
||
|
|
||
|
|
||
|
//Decoder blocks
|
||
|
power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
|
||
|
power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
|
||
|
power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
|
||
|
power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
|
||
|
|
||
|
if (!g_ip->wl_power_gated)
|
||
|
power.readOp.leakage += r_predec->power.readOp.leakage +
|
||
|
b_mux_predec->power.readOp.leakage +
|
||
|
sa_mux_lev_1_predec->power.readOp.leakage +
|
||
|
sa_mux_lev_2_predec->power.readOp.leakage +
|
||
|
power_row_decoders.readOp.leakage +
|
||
|
power_bit_mux_decoders.readOp.leakage +
|
||
|
power_sa_mux_lev_1_decoders.readOp.leakage +
|
||
|
power_sa_mux_lev_2_decoders.readOp.leakage;
|
||
|
else
|
||
|
power.readOp.leakage += (r_predec->power.readOp.leakage +
|
||
|
b_mux_predec->power.readOp.leakage +
|
||
|
sa_mux_lev_1_predec->power.readOp.leakage +
|
||
|
sa_mux_lev_2_predec->power.readOp.leakage +
|
||
|
power_row_decoders.readOp.leakage +
|
||
|
power_bit_mux_decoders.readOp.leakage +
|
||
|
power_sa_mux_lev_1_decoders.readOp.leakage +
|
||
|
power_sa_mux_lev_2_decoders.readOp.leakage)/g_tp.peri_global.Vdd*g_tp.peri_global.Vcc_min;
|
||
|
|
||
|
wl_leakage = r_predec->power.readOp.leakage +
|
||
|
b_mux_predec->power.readOp.leakage +
|
||
|
sa_mux_lev_1_predec->power.readOp.leakage +
|
||
|
sa_mux_lev_2_predec->power.readOp.leakage +
|
||
|
power_row_decoders.readOp.leakage +
|
||
|
power_bit_mux_decoders.readOp.leakage +
|
||
|
power_sa_mux_lev_1_decoders.readOp.leakage +
|
||
|
power_sa_mux_lev_2_decoders.readOp.leakage;
|
||
|
|
||
|
//++++Below is gate leakage
|
||
|
power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
|
||
|
power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
|
||
|
power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
|
||
|
|
||
|
//num_sa_subarray = subarray.num_cols / deg_bl_muxing;
|
||
|
power_subarray_out_drv.readOp.gate_leakage =
|
||
|
(power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
|
||
|
number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
|
||
|
|
||
|
power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
|
||
|
power_bl_precharge_eq_drv.readOp.gate_leakage +
|
||
|
power_sa.readOp.gate_leakage +
|
||
|
power_subarray_out_drv.readOp.gate_leakage;
|
||
|
//cout<<"leakage"<<power.readOp.leakage<<endl;
|
||
|
|
||
|
power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
|
||
|
power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
|
||
|
|
||
|
if (g_ip->power_gating)
|
||
|
{
|
||
|
|
||
|
//cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
|
||
|
|
||
|
//Power gating data summary
|
||
|
array_sleep_tx_area = sram_sleep_tx->area.get_area()*subarray.num_cols * num_subarrays_per_mat*dp.num_mats;
|
||
|
array_wakeup_e.readOp.dynamic = sram_sleep_tx->wakeup_power.readOp.dynamic * num_subarrays_per_mat*subarray.num_cols*dp.num_act_mats_hor_dir;
|
||
|
array_wakeup_t = sram_sleep_tx->wakeup_delay;
|
||
|
|
||
|
wl_sleep_tx_area = row_dec->sleeptx->area.get_area()*subarray.num_rows * num_subarrays_per_mat*dp.num_mats;
|
||
|
wl_wakeup_e.readOp.dynamic = row_dec->sleeptx->wakeup_power.readOp.dynamic * num_subarrays_per_mat*subarray.num_rows*dp.num_act_mats_hor_dir;
|
||
|
wl_wakeup_t = row_dec->sleeptx->wakeup_delay;
|
||
|
|
||
|
}
|
||
|
|
||
|
// gate_leakage power
|
||
|
power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
|
||
|
power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
|
||
|
power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
|
||
|
power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
|
||
|
|
||
|
power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
|
||
|
b_mux_predec->power.readOp.gate_leakage +
|
||
|
sa_mux_lev_1_predec->power.readOp.gate_leakage +
|
||
|
sa_mux_lev_2_predec->power.readOp.gate_leakage +
|
||
|
power_row_decoders.readOp.gate_leakage +
|
||
|
power_bit_mux_decoders.readOp.gate_leakage +
|
||
|
power_sa_mux_lev_1_decoders.readOp.gate_leakage +
|
||
|
power_sa_mux_lev_2_decoders.readOp.gate_leakage;
|
||
|
}
|
||
|
else if (is_fa)
|
||
|
{
|
||
|
int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
|
||
|
|
||
|
power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
|
||
|
power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
|
||
|
power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
|
||
|
power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
|
||
|
|
||
|
//cout<<"leakage3"<<power.readOp.leakage<<endl;
|
||
|
|
||
|
|
||
|
power_subarray_out_drv.readOp.leakage =
|
||
|
(power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
|
||
|
number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
|
||
|
|
||
|
power.readOp.leakage += power_bitline.readOp.leakage +
|
||
|
power_bl_precharge_eq_drv.readOp.leakage +
|
||
|
power_bl_precharge_eq_drv.searchOp.leakage +
|
||
|
power_sa.readOp.leakage +
|
||
|
power_subarray_out_drv.readOp.leakage;
|
||
|
|
||
|
//cout<<"leakage4"<<power.readOp.leakage<<endl;
|
||
|
|
||
|
// leakage power
|
||
|
power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
|
||
|
power.readOp.leakage += r_predec->power.readOp.leakage +
|
||
|
power_row_decoders.readOp.leakage;
|
||
|
|
||
|
//cout<<"leakage5"<<power.readOp.leakage<<endl;
|
||
|
|
||
|
//inside cam
|
||
|
power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
|
||
|
power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
|
||
|
power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
|
||
|
power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
|
||
|
power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
|
||
|
|
||
|
power.readOp.leakage += power_cam_all_active.searchOp.leakage;
|
||
|
|
||
|
// cout<<"leakage6"<<power.readOp.leakage<<endl;
|
||
|
|
||
|
//+++Below is gate leakage
|
||
|
power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
|
||
|
power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
|
||
|
power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
|
||
|
power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
|
||
|
|
||
|
//cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
|
||
|
|
||
|
|
||
|
power_subarray_out_drv.readOp.gate_leakage =
|
||
|
(power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
|
||
|
number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
|
||
|
|
||
|
power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
|
||
|
power_bl_precharge_eq_drv.readOp.gate_leakage +
|
||
|
power_bl_precharge_eq_drv.searchOp.gate_leakage +
|
||
|
power_sa.readOp.gate_leakage +
|
||
|
power_subarray_out_drv.readOp.gate_leakage;
|
||
|
|
||
|
//cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
|
||
|
|
||
|
// gate_leakage power
|
||
|
power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
|
||
|
power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
|
||
|
power_row_decoders.readOp.gate_leakage;
|
||
|
|
||
|
//cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
|
||
|
|
||
|
//inside cam
|
||
|
power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
|
||
|
power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
|
||
|
power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
|
||
|
power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
|
||
|
power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
|
||
|
|
||
|
power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
|
||
|
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
|
||
|
|
||
|
//power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
|
||
|
//power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
|
||
|
power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
|
||
|
power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
|
||
|
|
||
|
|
||
|
power_subarray_out_drv.readOp.leakage =
|
||
|
(power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
|
||
|
number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
|
||
|
|
||
|
power.readOp.leakage += //power_bitline.readOp.leakage +
|
||
|
//power_bl_precharge_eq_drv.readOp.leakage +
|
||
|
power_bl_precharge_eq_drv.searchOp.leakage +
|
||
|
power_sa.readOp.leakage +
|
||
|
power_subarray_out_drv.readOp.leakage;
|
||
|
|
||
|
// leakage power
|
||
|
power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
|
||
|
power.readOp.leakage += r_predec->power.readOp.leakage +
|
||
|
power_row_decoders.readOp.leakage;
|
||
|
|
||
|
//inside cam
|
||
|
power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
|
||
|
power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
|
||
|
power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
|
||
|
power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
|
||
|
power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
|
||
|
|
||
|
power.readOp.leakage += power_cam_all_active.searchOp.leakage;
|
||
|
|
||
|
//+++Below is gate leakage
|
||
|
power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
|
||
|
power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
|
||
|
|
||
|
|
||
|
power_subarray_out_drv.readOp.gate_leakage =
|
||
|
(power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
|
||
|
number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
|
||
|
|
||
|
power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
|
||
|
//power_bl_precharge_eq_drv.readOp.gate_leakage +
|
||
|
power_bl_precharge_eq_drv.searchOp.gate_leakage +
|
||
|
power_sa.readOp.gate_leakage +
|
||
|
power_subarray_out_drv.readOp.gate_leakage;
|
||
|
|
||
|
// gate_leakage power
|
||
|
power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
|
||
|
power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
|
||
|
power_row_decoders.readOp.gate_leakage;
|
||
|
|
||
|
//inside cam
|
||
|
power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
|
||
|
power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
|
||
|
power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
|
||
|
power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
|
||
|
power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
|
||
|
|
||
|
power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
|
||
|
}
|
||
|
}
|
||
|
|