/***************************************************************************** * CACTI 7.0 * SOFTWARE LICENSE AGREEMENT * Copyright 2015 Hewlett-Packard Development Company, L.P. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” * ***************************************************************************/ #include "area.h" #include "decoder.h" #include "parameter.h" #include #include #include using namespace std; Decoder::Decoder( int _num_dec_signals, bool flag_way_select, double _C_ld_dec_out, double _R_wire_dec_out, bool fully_assoc_, bool is_dram_, bool is_wl_tr_, const Area & cell_) :exist(false), C_ld_dec_out(_C_ld_dec_out), R_wire_dec_out(_R_wire_dec_out), num_gates(0), num_gates_min(2), delay(0), //power(), fully_assoc(fully_assoc_), is_dram(is_dram_), is_wl_tr(is_wl_tr_), total_driver_nwidth(0), total_driver_pwidth(0), cell(cell_), nodes_DSTN(1) { for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { w_dec_n[i] = 0; w_dec_p[i] = 0; } /* * _num_dec_signals is the number of decoded signal as output * num_addr_bits_dec is the number of signal to be decoded * as the decoders input. */ int num_addr_bits_dec = _log2(_num_dec_signals); if (num_addr_bits_dec < 4) { if (flag_way_select) { exist = true; num_in_signals = 2; } else { num_in_signals = 0; } } else { exist = true; if (flag_way_select) { num_in_signals = 3; } else { num_in_signals = 2; } } assert(cell.h>0); assert(cell.w>0); // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; //area.h = 4 * cell.h; area.h = g_tp.h_dec * cell.h; compute_widths(); compute_area(); } void Decoder::compute_widths() { double F; double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); if (exist) { if (num_in_signals == 2 || fully_assoc) { w_dec_n[0] = 2 * g_tp.min_w_nmos_; w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; F = gnand2; } else { w_dec_n[0] = 3 * g_tp.min_w_nmos_; w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; F = gnand3; } F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); num_gates = logical_effort( num_gates_min, num_in_signals == 2 ? gnand2 : gnand3, F, w_dec_n, w_dec_p, C_ld_dec_out, p_to_n_sz_ratio, is_dram, is_wl_tr, g_tp.max_w_nmos_dec); } } void Decoder::compute_area() { double cumulative_area = 0; double cumulative_curr = 0; // cumulative leakage current double cumulative_curr_Ig = 0; // cumulative leakage current if (exist) { // First check if this decoder exists if (num_in_signals == 2) { cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); } else if (num_in_signals == 3) { cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); } for (int i = 1; i < num_gates; i++) { cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); } power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; area.w = (cumulative_area / area.h); } } void Decoder::compute_power_gating() { //For all driver change there is only one sleep transistors to save area //Total transistor width for sleep tx calculation for (int i = 1; i <=num_gates; i++) { total_driver_nwidth += w_dec_n[i]; total_driver_pwidth += w_dec_p[i]; } //compute sleep tx bool is_footer = false; double Isat_subarray = simplified_nmos_Isat(total_driver_nwidth); double detalV; double c_wakeup; c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, cell.h);//Psleep tx detalV = g_tp.peri_global.Vdd-g_tp.peri_global.Vcc_min; if (g_ip->power_gating) sleeptx = new Sleep_tx (g_ip->perfloss, Isat_subarray, is_footer, c_wakeup, detalV, nodes_DSTN, area); } double Decoder::compute_delays(double inrisetime) { if (exist) { double ret_val = 0; // outrisetime int i; double rd, tf, this_delay, c_load, c_intrinsic, Vpp; double Vdd = g_tp.peri_global.Vdd; if ((is_wl_tr) && (is_dram)) { Vpp = g_tp.vpp; } else if (is_wl_tr) { Vpp = g_tp.sram_cell.Vdd; } else { Vpp = g_tp.peri_global.Vdd; } // first check whether a decoder is required at all rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr); c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr); c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals + drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay += this_delay; inrisetime = this_delay / (1.0 - 0.5); power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; for (i = 1; i < num_gates - 1; ++i) { rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr); c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay += this_delay; inrisetime = this_delay / (1.0 - 0.5); power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; } // add delay of final inverter that drives the wordline i = num_gates - 1; c_load = C_ld_dec_out; rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2; this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay += this_delay; ret_val = this_delay / (1.0 - 0.5); power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd; compute_power_gating(); return ret_val; } else { return 0.0; } } void Decoder::leakage_feedback(double temperature) { double cumulative_curr = 0; // cumulative leakage current double cumulative_curr_Ig = 0; // cumulative leakage current if (exist) { // First check if this decoder exists if (num_in_signals == 2) { cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); } else if (num_in_signals == 3) { cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); } for (int i = 1; i < num_gates; i++) { cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); } power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; } } PredecBlk::PredecBlk( int num_dec_signals, Decoder * dec_, double C_wire_predec_blk_out, double R_wire_predec_blk_out_, int num_dec_per_predec, bool is_dram, bool is_blk1) :dec(dec_), exist(false), number_input_addr_bits(0), C_ld_predec_blk_out(0), R_wire_predec_blk_out(0), branch_effort_nand2_gate_output(1), branch_effort_nand3_gate_output(1), flag_two_unique_paths(false), flag_L2_gate(0), number_inputs_L1_gate(0), number_gates_L1_nand2_path(0), number_gates_L1_nand3_path(0), number_gates_L2(0), min_number_gates_L1(2), min_number_gates_L2(2), num_L1_active_nand2_path(0), num_L1_active_nand3_path(0), delay_nand2_path(0), delay_nand3_path(0), power_nand2_path(), power_nand3_path(), power_L2(), is_dram_(is_dram) { int branch_effort_predec_out; double C_ld_dec_gate; int num_addr_bits_dec = _log2(num_dec_signals); int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2; int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits; w_L1_nand2_n[0] = 0; w_L1_nand2_p[0] = 0; w_L1_nand3_n[0] = 0; w_L1_nand3_p[0] = 0; if (is_blk1 == true) { if (num_addr_bits_dec <= 0) { return; } else if (num_addr_bits_dec < 4) { // Just one predecoder block is required with NAND2 gates. No decoder required. // The first level of predecoding directly drives the decoder output load exist = true; number_input_addr_bits = num_addr_bits_dec; R_wire_predec_blk_out = dec->R_wire_dec_out; C_ld_predec_blk_out = dec->C_ld_dec_out; } else { exist = true; number_input_addr_bits = blk1_num_input_addr_bits; branch_effort_predec_out = (1 << blk2_num_input_addr_bits); C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); R_wire_predec_blk_out = R_wire_predec_blk_out_; C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; } } else { if (num_addr_bits_dec >= 4) { exist = true; number_input_addr_bits = blk2_num_input_addr_bits; branch_effort_predec_out = (1 << blk1_num_input_addr_bits); C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); R_wire_predec_blk_out = R_wire_predec_blk_out_; C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; } } compute_widths(); compute_area(); } void PredecBlk::compute_widths() { double F, c_load_nand3_path, c_load_nand2_path; double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); if (exist == false) return; switch (number_input_addr_bits) { case 1: flag_two_unique_paths = false; number_inputs_L1_gate = 2; flag_L2_gate = 0; break; case 2: flag_two_unique_paths = false; number_inputs_L1_gate = 2; flag_L2_gate = 0; break; case 3: flag_two_unique_paths = false; number_inputs_L1_gate = 3; flag_L2_gate = 0; break; case 4: flag_two_unique_paths = false; number_inputs_L1_gate = 2; flag_L2_gate = 2; branch_effort_nand2_gate_output = 4; break; case 5: flag_two_unique_paths = true; flag_L2_gate = 2; branch_effort_nand2_gate_output = 8; branch_effort_nand3_gate_output = 4; break; case 6: flag_two_unique_paths = false; number_inputs_L1_gate = 3; flag_L2_gate = 2; branch_effort_nand3_gate_output = 8; break; case 7: flag_two_unique_paths = true; flag_L2_gate = 3; branch_effort_nand2_gate_output = 32; branch_effort_nand3_gate_output = 16; break; case 8: flag_two_unique_paths = true; flag_L2_gate = 3; branch_effort_nand2_gate_output = 64; branch_effort_nand3_gate_output = 32; break; case 9: flag_two_unique_paths = false; number_inputs_L1_gate = 3; flag_L2_gate = 3; branch_effort_nand3_gate_output = 64; break; default: assert(0); break; } // find the number of gates and sizing in second level of predecoder (if there is a second level) if (flag_L2_gate) { if (flag_L2_gate == 2) { // 2nd level is a NAND2 gate w_L2_n[0] = 2 * g_tp.min_w_nmos_; F = gnand2; } else { // 2nd level is a NAND3 gate w_L2_n[0] = 3 * g_tp.min_w_nmos_; F = gnand3; } w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); number_gates_L2 = logical_effort( min_number_gates_L2, flag_L2_gate == 2 ? gnand2 : gnand3, F, w_L2_n, w_L2_p, C_ld_predec_blk_out, p_to_n_sz_ratio, is_dram_, false, g_tp.max_w_nmos_); // Now find the number of gates and widths in first level of predecoder if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2)) { // Whenever flag_two_unique_paths is true, it means first level of decoder employs // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means // a NAND2 gate is used in the first level of the predecoder c_load_nand2_path = branch_effort_nand2_gate_output * (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; F = gnand2 * c_load_nand2_path / (gate_C(w_L1_nand2_n[0], 0, is_dram_) + gate_C(w_L1_nand2_p[0], 0, is_dram_)); number_gates_L1_nand2_path = logical_effort( min_number_gates_L1, gnand2, F, w_L1_nand2_n, w_L1_nand2_p, c_load_nand2_path, p_to_n_sz_ratio, is_dram_, false, g_tp.max_w_nmos_); } //Now find widths of gates along path in which first gate is a NAND3 if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3)) { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means // a NAND3 gate is used in the first level of the predecoder c_load_nand3_path = branch_effort_nand3_gate_output * (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; F = gnand3 * c_load_nand3_path / (gate_C(w_L1_nand3_n[0], 0, is_dram_) + gate_C(w_L1_nand3_p[0], 0, is_dram_)); number_gates_L1_nand3_path = logical_effort( min_number_gates_L1, gnand3, F, w_L1_nand3_n, w_L1_nand3_p, c_load_nand3_path, p_to_n_sz_ratio, is_dram_, false, g_tp.max_w_nmos_); } } else { // find number of gates and widths in first level of predecoder block when there is no second level if (number_inputs_L1_gate == 2) { w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; F = gnand2*C_ld_predec_blk_out / (gate_C(w_L1_nand2_n[0], 0, is_dram_) + gate_C(w_L1_nand2_p[0], 0, is_dram_)); number_gates_L1_nand2_path = logical_effort( min_number_gates_L1, gnand2, F, w_L1_nand2_n, w_L1_nand2_p, C_ld_predec_blk_out, p_to_n_sz_ratio, is_dram_, false, g_tp.max_w_nmos_); } else if (number_inputs_L1_gate == 3) { w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; F = gnand3*C_ld_predec_blk_out / (gate_C(w_L1_nand3_n[0], 0, is_dram_) + gate_C(w_L1_nand3_p[0], 0, is_dram_)); number_gates_L1_nand3_path = logical_effort( min_number_gates_L1, gnand3, F, w_L1_nand3_n, w_L1_nand3_p, C_ld_predec_blk_out, p_to_n_sz_ratio, is_dram_, false, g_tp.max_w_nmos_); } } } void PredecBlk::compute_area() { if (exist) { // First check whether a predecoder block is needed int num_L1_nand2 = 0; int num_L1_nand3 = 0; int num_L2 = 0; double tot_area_L1_nand3 =0; double leak_L1_nand3 =0; double gate_leak_L1_nand3 =0; double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def); double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); if (number_inputs_L1_gate != 3) { tot_area_L1_nand3 = 0; leak_L1_nand3 = 0; gate_leak_L1_nand3 =0; } else { tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def); leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); } switch (number_input_addr_bits) { case 1: //2 NAND2 gates num_L1_nand2 = 2; num_L2 = 0; num_L1_active_nand2_path =1; num_L1_active_nand3_path =0; break; case 2: //4 NAND2 gates num_L1_nand2 = 4; num_L2 = 0; num_L1_active_nand2_path =1; num_L1_active_nand3_path =0; break; case 3: //8 NAND3 gates num_L1_nand3 = 8; num_L2 = 0; num_L1_active_nand2_path =0; num_L1_active_nand3_path =1; break; case 4: //4 + 4 NAND2 gates num_L1_nand2 = 8; num_L2 = 16; num_L1_active_nand2_path =2; num_L1_active_nand3_path =0; break; case 5: //4 NAND2 gates, 8 NAND3 gates num_L1_nand2 = 4; num_L1_nand3 = 8; num_L2 = 32; num_L1_active_nand2_path =1; num_L1_active_nand3_path =1; break; case 6: //8 + 8 NAND3 gates num_L1_nand3 = 16; num_L2 = 64; num_L1_active_nand2_path =0; num_L1_active_nand3_path =2; break; case 7: //4 + 4 NAND2 gates, 8 NAND3 gates num_L1_nand2 = 8; num_L1_nand3 = 8; num_L2 = 128; num_L1_active_nand2_path =2; num_L1_active_nand3_path =1; break; case 8: //4 NAND2 gates, 8 + 8 NAND3 gates num_L1_nand2 = 4; num_L1_nand3 = 16; num_L2 = 256; num_L1_active_nand2_path =2; num_L1_active_nand3_path =2; break; case 9: //8 + 8 + 8 NAND3 gates num_L1_nand3 = 24; num_L2 = 512; num_L1_active_nand2_path =0; num_L1_active_nand3_path =3; break; default: break; } for (int i = 1; i < number_gates_L1_nand2_path; ++i) { tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def); leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); } tot_area_L1_nand2 *= num_L1_nand2; leak_L1_nand2 *= num_L1_nand2; gate_leak_L1_nand2 *= num_L1_nand2; for (int i = 1; i < number_gates_L1_nand3_path; ++i) { tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def); leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); } tot_area_L1_nand3 *= num_L1_nand3; leak_L1_nand3 *= num_L1_nand3; gate_leak_L1_nand3 *= num_L1_nand3; double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3; double cumulative_area_L2 = 0.0; double leakage_L2 = 0.0; double gate_leakage_L2 = 0.0; if (flag_L2_gate == 2) { cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); } else if (flag_L2_gate == 3) { cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); } for (int i = 1; i < number_gates_L2; ++i) { cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def); leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); } cumulative_area_L2 *= num_L2; leakage_L2 *= num_L2; gate_leakage_L2 *= num_L2; power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; area.set_area(cumulative_area_L1 + cumulative_area_L2); power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd; power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd; power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; } } pair PredecBlk::compute_delays( pair inrisetime) // { pair ret_val; ret_val.first = 0; // outrisetime_nand2_path ret_val.second = 0; // outrisetime_nand3_path double inrisetime_nand2_path = inrisetime.first; double inrisetime_nand3_path = inrisetime.second; int i; double rd, c_load, c_intrinsic, tf, this_delay; double Vdd = g_tp.peri_global.Vdd; // TODO: following delay calculation part can be greatly simplified. // first check whether a predecoder block is required if (exist) { //Find delay in first level of predecoder block //First find delay in path if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) { //First gate is a NAND2 gate rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_); c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_); c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); delay_nand2_path += this_delay; inrisetime_nand2_path = this_delay / (1.0 - 0.5); power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; //Add delays of all but the last inverter in the chain for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) { rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_); c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); delay_nand2_path += this_delay; inrisetime_nand2_path = this_delay / (1.0 - 0.5); power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; } //Add delay of the last inverter i = number_gates_L1_nand2_path - 1; rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); if (flag_L2_gate) { c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); delay_nand2_path += this_delay; inrisetime_nand2_path = this_delay / (1.0 - 0.5); power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; } else { //First level directly drives decoder output load c_load = C_ld_predec_blk_out; c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); delay_nand2_path += this_delay; ret_val.first = this_delay / (1.0 - 0.5); power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; } } if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { //Check if the number of gates in the first level is more than 1. //First gate is a NAND3 gate rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_); c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_); c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); delay_nand3_path += this_delay; inrisetime_nand3_path = this_delay / (1.0 - 0.5); power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; //Add delays of all but the last inverter in the chain for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) { rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_); c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); delay_nand3_path += this_delay; inrisetime_nand3_path = this_delay / (1.0 - 0.5); power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; } //Add delay of the last inverter i = number_gates_L1_nand3_path - 1; rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); if (flag_L2_gate) { c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); delay_nand3_path += this_delay; inrisetime_nand3_path = this_delay / (1.0 - 0.5); power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; } else { //First level directly drives decoder output load c_load = C_ld_predec_blk_out; c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); delay_nand3_path += this_delay; ret_val.second = this_delay / (1.0 - 0.5); power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; } } // Find delay through second level if (flag_L2_gate) { if (flag_L2_gate == 2) { rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_); c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); delay_nand2_path += this_delay; inrisetime_nand2_path = this_delay / (1.0 - 0.5); power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; } else { // flag_L2_gate = 3 rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_); c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); delay_nand3_path += this_delay; inrisetime_nand3_path = this_delay / (1.0 - 0.5); power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; } for (i = 1; i < number_gates_L2 - 1; ++i) { rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_); c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); delay_nand2_path += this_delay; inrisetime_nand2_path = this_delay / (1.0 - 0.5); this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); delay_nand3_path += this_delay; inrisetime_nand3_path = this_delay / (1.0 - 0.5); power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; } //Add delay of final inverter that drives the wordline decoders i = number_gates_L2 - 1; c_load = C_ld_predec_blk_out; rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); delay_nand2_path += this_delay; ret_val.first = this_delay / (1.0 - 0.5); this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); delay_nand3_path += this_delay; ret_val.second = this_delay / (1.0 - 0.5); power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; } } delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second; return ret_val; } void PredecBlk::leakage_feedback(double temperature) { if (exist) { // First check whether a predecoder block is needed int num_L1_nand2 = 0; int num_L1_nand3 = 0; int num_L2 = 0; double leak_L1_nand3 =0; double gate_leak_L1_nand3 =0; double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); if (number_inputs_L1_gate != 3) { leak_L1_nand3 = 0; gate_leak_L1_nand3 =0; } else { leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); } switch (number_input_addr_bits) { case 1: //2 NAND2 gates num_L1_nand2 = 2; num_L2 = 0; num_L1_active_nand2_path =1; num_L1_active_nand3_path =0; break; case 2: //4 NAND2 gates num_L1_nand2 = 4; num_L2 = 0; num_L1_active_nand2_path =1; num_L1_active_nand3_path =0; break; case 3: //8 NAND3 gates num_L1_nand3 = 8; num_L2 = 0; num_L1_active_nand2_path =0; num_L1_active_nand3_path =1; break; case 4: //4 + 4 NAND2 gates num_L1_nand2 = 8; num_L2 = 16; num_L1_active_nand2_path =2; num_L1_active_nand3_path =0; break; case 5: //4 NAND2 gates, 8 NAND3 gates num_L1_nand2 = 4; num_L1_nand3 = 8; num_L2 = 32; num_L1_active_nand2_path =1; num_L1_active_nand3_path =1; break; case 6: //8 + 8 NAND3 gates num_L1_nand3 = 16; num_L2 = 64; num_L1_active_nand2_path =0; num_L1_active_nand3_path =2; break; case 7: //4 + 4 NAND2 gates, 8 NAND3 gates num_L1_nand2 = 8; num_L1_nand3 = 8; num_L2 = 128; num_L1_active_nand2_path =2; num_L1_active_nand3_path =1; break; case 8: //4 NAND2 gates, 8 + 8 NAND3 gates num_L1_nand2 = 4; num_L1_nand3 = 16; num_L2 = 256; num_L1_active_nand2_path =2; num_L1_active_nand3_path =2; break; case 9: //8 + 8 + 8 NAND3 gates num_L1_nand3 = 24; num_L2 = 512; num_L1_active_nand2_path =0; num_L1_active_nand3_path =3; break; default: break; } for (int i = 1; i < number_gates_L1_nand2_path; ++i) { leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); } leak_L1_nand2 *= num_L1_nand2; gate_leak_L1_nand2 *= num_L1_nand2; for (int i = 1; i < number_gates_L1_nand3_path; ++i) { leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); } leak_L1_nand3 *= num_L1_nand3; gate_leak_L1_nand3 *= num_L1_nand3; double leakage_L2 = 0.0; double gate_leakage_L2 = 0.0; if (flag_L2_gate == 2) { leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); } else if (flag_L2_gate == 3) { leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); } for (int i = 1; i < number_gates_L2; ++i) { leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); } leakage_L2 *= num_L2; gate_leakage_L2 *= num_L2; power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd; power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd; power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; } } PredecBlkDrv::PredecBlkDrv( int way_select_, PredecBlk * blk_, bool is_dram) :flag_driver_exists(0), number_gates_nand2_path(0), number_gates_nand3_path(0), min_number_gates(2), num_buffers_driving_1_nand2_load(0), num_buffers_driving_2_nand2_load(0), num_buffers_driving_4_nand2_load(0), num_buffers_driving_2_nand3_load(0), num_buffers_driving_8_nand3_load(0), num_buffers_nand3_path(0), c_load_nand2_path_out(0), c_load_nand3_path_out(0), r_load_nand2_path_out(0), r_load_nand3_path_out(0), delay_nand2_path(0), delay_nand3_path(0), power_nand2_path(), power_nand3_path(), blk(blk_), dec(blk->dec), is_dram_(is_dram), way_select(way_select_) { for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { width_nand2_path_n[i] = 0; width_nand2_path_p[i] = 0; width_nand3_path_n[i] = 0; width_nand3_path_p[i] = 0; } number_input_addr_bits = blk->number_input_addr_bits; if (way_select > 1) { flag_driver_exists = 1; number_input_addr_bits = way_select; if (dec->num_in_signals == 2) { c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); num_buffers_driving_2_nand2_load = number_input_addr_bits; } else if (dec->num_in_signals == 3) { c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); num_buffers_driving_2_nand3_load = number_input_addr_bits; } } else if (way_select == 0) { if (blk->exist) { flag_driver_exists = 1; } } compute_widths(); compute_area(); } void PredecBlkDrv::compute_widths() { // The predecode block driver accepts as input the address bits from the h-tree network. For // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of // inversion to generate addrbar and simply treat addrbar as addr. double F; double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); if (flag_driver_exists) { double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_); double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_); if (way_select == 0) { if (blk->number_input_addr_bits == 1) { //2 NAND2 gates num_buffers_driving_2_nand2_load = 1; c_load_nand2_path_out = 2 * C_nand2_gate_blk; } else if (blk->number_input_addr_bits == 2) { //4 NAND2 gates one 2-4 decoder num_buffers_driving_4_nand2_load = 2; c_load_nand2_path_out = 4 * C_nand2_gate_blk; } else if (blk->number_input_addr_bits == 3) { //8 NAND3 gates one 3-8 decoder num_buffers_driving_8_nand3_load = 3; c_load_nand3_path_out = 8 * C_nand3_gate_blk; } else if (blk->number_input_addr_bits == 4) { //4 + 4 NAND2 gates two 2-4 decoder num_buffers_driving_4_nand2_load = 4; c_load_nand2_path_out = 4 * C_nand2_gate_blk; } else if (blk->number_input_addr_bits == 5) { //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder num_buffers_driving_4_nand2_load = 2; num_buffers_driving_8_nand3_load = 3; c_load_nand2_path_out = 4 * C_nand2_gate_blk; c_load_nand3_path_out = 8 * C_nand3_gate_blk; } else if (blk->number_input_addr_bits == 6) { //8 + 8 NAND3 gates two 3-8 decoder num_buffers_driving_8_nand3_load = 6; c_load_nand3_path_out = 8 * C_nand3_gate_blk; } else if (blk->number_input_addr_bits == 7) { //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder num_buffers_driving_4_nand2_load = 4; num_buffers_driving_8_nand3_load = 3; c_load_nand2_path_out = 4 * C_nand2_gate_blk; c_load_nand3_path_out = 8 * C_nand3_gate_blk; } else if (blk->number_input_addr_bits == 8) { //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder num_buffers_driving_4_nand2_load = 2; num_buffers_driving_8_nand3_load = 6; c_load_nand2_path_out = 4 * C_nand2_gate_blk; c_load_nand3_path_out = 8 * C_nand3_gate_blk; } else if (blk->number_input_addr_bits == 9) { //8 + 8 + 8 NAND3 gates three 3-8 decoder num_buffers_driving_8_nand3_load = 9; c_load_nand3_path_out = 8 * C_nand3_gate_blk; } } if ((blk->flag_two_unique_paths) || (blk->number_inputs_L1_gate == 2) || (number_input_addr_bits == 0) || ((way_select)&&(dec->num_in_signals == 2))) { //this means that way_select is driving NAND2 in decoder. width_nand2_path_n[0] = g_tp.min_w_nmos_; width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0]; F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_); number_gates_nand2_path = logical_effort( min_number_gates, 1, F, width_nand2_path_n, width_nand2_path_p, c_load_nand2_path_out, p_to_n_sz_ratio, is_dram_, false, g_tp.max_w_nmos_); } if ((blk->flag_two_unique_paths) || (blk->number_inputs_L1_gate == 3) || ((way_select)&&(dec->num_in_signals == 3))) { //this means that way_select is driving NAND3 in decoder. width_nand3_path_n[0] = g_tp.min_w_nmos_; width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0]; F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_); number_gates_nand3_path = logical_effort( min_number_gates, 1, F, width_nand3_path_n, width_nand3_path_p, c_load_nand3_path_out, p_to_n_sz_ratio, is_dram_, false, g_tp.max_w_nmos_); } } } void PredecBlkDrv::compute_area() { double area_nand2_path = 0; double area_nand3_path = 0; double leak_nand2_path = 0; double leak_nand3_path = 0; double gate_leak_nand2_path = 0; double gate_leak_nand3_path = 0; if (flag_driver_exists) { // first check whether a predecoder block driver is needed for (int i = 0; i < number_gates_nand2_path; ++i) { area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def); leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); } area_nand2_path *= (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + num_buffers_driving_4_nand2_load); leak_nand2_path *= (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + num_buffers_driving_4_nand2_load); gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + num_buffers_driving_4_nand2_load); for (int i = 0; i < number_gates_nand3_path; ++i) { area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def); leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); } area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; area.set_area(area_nand2_path + area_nand3_path); } } pair PredecBlkDrv::compute_delays( double inrisetime_nand2_path, double inrisetime_nand3_path) { pair ret_val; ret_val.first = 0; // outrisetime_nand2_path ret_val.second = 0; // outrisetime_nand3_path int i; double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay; double Vdd = g_tp.peri_global.Vdd; if (flag_driver_exists) { for (i = 0; i < number_gates_nand2_path - 1; ++i) { rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_); c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_gate_load); this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); delay_nand2_path += this_delay; inrisetime_nand2_path = this_delay / (1.0 - 0.5); power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd; } // Final inverter drives the predecoder block or the decoder output load if (number_gates_nand2_path != 0) { i = number_gates_nand2_path - 1; rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); c_load = c_load_nand2_path_out; tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out*c_load/ 2; this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); delay_nand2_path += this_delay; ret_val.first = this_delay / (1.0 - 0.5); power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd; // cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) { driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + drv1->power_nand3_path.readOp.leakage + drv2->power_nand2_path.readOp.leakage + drv2->power_nand3_path.readOp.leakage; block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + blk1->power_nand3_path.readOp.leakage + blk1->power_L2.readOp.leakage + blk2->power_nand2_path.readOp.leakage + blk2->power_nand3_path.readOp.leakage + blk2->power_L2.readOp.leakage; power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + drv1->power_nand3_path.readOp.gate_leakage + drv2->power_nand2_path.readOp.gate_leakage + drv2->power_nand3_path.readOp.gate_leakage; block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + blk1->power_nand3_path.readOp.gate_leakage + blk1->power_L2.readOp.gate_leakage + blk2->power_nand2_path.readOp.gate_leakage + blk2->power_nand3_path.readOp.gate_leakage + blk2->power_L2.readOp.gate_leakage; power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; } void PredecBlkDrv::leakage_feedback(double temperature) { double leak_nand2_path = 0; double leak_nand3_path = 0; double gate_leak_nand2_path = 0; double gate_leak_nand3_path = 0; if (flag_driver_exists) { // first check whether a predecoder block driver is needed for (int i = 0; i < number_gates_nand2_path; ++i) { leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); } leak_nand2_path *= (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + num_buffers_driving_4_nand2_load); gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + num_buffers_driving_4_nand2_load); for (int i = 0; i < number_gates_nand3_path; ++i) { leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); } leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; } } double Predec::compute_delays(double inrisetime) { // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block. pair tmp_pair1, tmp_pair2; tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime); tmp_pair1 = blk1->compute_delays(tmp_pair1); tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime); tmp_pair2 = blk2->compute_delays(tmp_pair2); tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2); driver_power.readOp.dynamic = drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; block_power.readOp.dynamic = blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + blk1->power_L2.readOp.dynamic + blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + blk2->power_L2.readOp.dynamic; power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic; delay = tmp_pair1.first; return tmp_pair1.second; } void Predec::leakage_feedback(double temperature) { drv1->leakage_feedback(temperature); drv2->leakage_feedback(temperature); blk1->leakage_feedback(temperature); blk2->leakage_feedback(temperature); driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + drv1->power_nand3_path.readOp.leakage + drv2->power_nand2_path.readOp.leakage + drv2->power_nand3_path.readOp.leakage; block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + blk1->power_nand3_path.readOp.leakage + blk1->power_L2.readOp.leakage + blk2->power_nand2_path.readOp.leakage + blk2->power_nand3_path.readOp.leakage + blk2->power_L2.readOp.leakage; power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + drv1->power_nand3_path.readOp.gate_leakage + drv2->power_nand2_path.readOp.gate_leakage + drv2->power_nand3_path.readOp.gate_leakage; block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + blk1->power_nand3_path.readOp.gate_leakage + blk1->power_L2.readOp.gate_leakage + blk2->power_nand2_path.readOp.gate_leakage + blk2->power_nand3_path.readOp.gate_leakage + blk2->power_L2.readOp.gate_leakage; power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; } // returns pair Predec::get_max_delay_before_decoder( pair input_pair1, pair input_pair2) { pair ret_val; double delay; delay = drv1->delay_nand2_path + blk1->delay_nand2_path; ret_val.first = delay; ret_val.second = input_pair1.first; delay = drv1->delay_nand3_path + blk1->delay_nand3_path; if (ret_val.first < delay) { ret_val.first = delay; ret_val.second = input_pair1.second; } delay = drv2->delay_nand2_path + blk2->delay_nand2_path; if (ret_val.first < delay) { ret_val.first = delay; ret_val.second = input_pair2.first; } delay = drv2->delay_nand3_path + blk2->delay_nand3_path; if (ret_val.first < delay) { ret_val.first = delay; ret_val.second = input_pair2.second; } return ret_val; } Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram) :number_gates(0), min_number_gates(2), c_gate_load(c_gate_load_), c_wire_load(c_wire_load_), r_wire_load(r_wire_load_), delay(0), // power(), is_dram_(is_dram), total_driver_nwidth(0), total_driver_pwidth(0) { for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { width_n[i] = 0; width_p[i] = 0; } compute_widths(); compute_area(); } void Driver::compute_widths() { double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); double c_load = c_gate_load + c_wire_load; width_n[0] = g_tp.min_w_nmos_; width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); number_gates = logical_effort( min_number_gates, 1, F, width_n, width_p, c_load, p_to_n_sz_ratio, is_dram_, false, g_tp.max_w_nmos_); } void Driver::compute_area() { double cumulative_area = 0; ///double cumulative_curr = 0; // cumulative leakage current ///double cumulative_curr_Ig = 0; // cumulative leakage current area.h = g_tp.cell_h_def; for (int i = 0; i < number_gates; i++) { cumulative_area += compute_gate_area(INV, 1, width_p[i], width_n[i], area.h); ///cumulative_curr += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_); ///cumulative_curr_Ig = cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_); } area.w = (cumulative_area / area.h); } void Driver::compute_power_gating() { //For all driver change there is only one sleep transistors to save area //Total transistor width for sleep tx calculation for (int i = 0; i <=number_gates; i++) { total_driver_nwidth += width_n[i]; total_driver_pwidth += width_p[i]; } //compute sleep tx bool is_footer = false; double Isat_subarray = simplified_nmos_Isat(total_driver_nwidth); double detalV; double c_wakeup; c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, area.h);//Psleep tx detalV = g_tp.peri_global.Vdd-g_tp.peri_global.Vcc_min; if (g_ip->power_gating) sleeptx = new Sleep_tx (g_ip->perfloss, Isat_subarray, is_footer, c_wakeup, detalV, 1, area); } double Driver::compute_delay(double inrisetime) { int i; double rd, c_load, c_intrinsic, tf; double this_delay = 0; for (i = 0; i < number_gates - 1; ++i) { rd = tr_R_on(width_n[i], NCH, 1, is_dram_); c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_); c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load); this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay += this_delay; inrisetime = this_delay / (1.0 - 0.5); power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd; power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; } i = number_gates - 1; c_load = c_gate_load + c_wire_load; rd = tr_R_on(width_n[i], NCH, 1, is_dram_); c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load); this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay += this_delay; power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd; power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; return this_delay / (1.0 - 0.5); } /* void Driver::compute_area() { double cumulative_area = 0; double cumulative_curr = 0; // cumulative leakage current double cumulative_curr_Ig = 0; // cumulative leakage current area.h = g_tp.h_dec * g_tp.dram.b_h; for (int i = 1; i < number_gates; i++) { cumulative_area += compute_gate_area(INV, 1, width_p[i], width_n[i], area.h); cumulative_curr += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_); cumulative_curr_Ig = cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_); } area.w = (cumulative_area / area.h); } */