diff --git a/Anglais/CV/CV_EN - Copie.docx b/Anglais/CV/CV_EN - Copie.docx new file mode 100644 index 0000000..7c7475a Binary files /dev/null and b/Anglais/CV/CV_EN - Copie.docx differ diff --git a/Anglais/CV/CV_EN_2.docx b/Anglais/CV/CV_EN_2.docx new file mode 100644 index 0000000..5f91af9 Binary files /dev/null and b/Anglais/CV/CV_EN_2.docx differ diff --git a/Anglais/CV/CV_bon/CV_EN_Sasa_Radosavljevic.pdf b/Anglais/CV/CV_bon/CV_EN_Sasa_Radosavljevic.pdf new file mode 100644 index 0000000..181b36b Binary files /dev/null and b/Anglais/CV/CV_bon/CV_EN_Sasa_Radosavljevic.pdf differ diff --git a/Anglais/CV/CV_bon/CV_Sasa_Radosavljevic.pdf b/Anglais/CV/CV_bon/CV_Sasa_Radosavljevic.pdf new file mode 100644 index 0000000..c8ca543 Binary files /dev/null and b/Anglais/CV/CV_bon/CV_Sasa_Radosavljevic.pdf differ diff --git a/Anglais/CV/CV_en_Sasa_Radosavljevic_ver_2.pdf b/Anglais/CV/CV_en_Sasa_Radosavljevic_ver_2.pdf new file mode 100644 index 0000000..1cf038c Binary files /dev/null and b/Anglais/CV/CV_en_Sasa_Radosavljevic_ver_2.pdf differ diff --git a/Anglais/CV/Sasa_Radosaljveic_Cover_Letter.docx b/Anglais/CV/Sasa_Radosaljveic_Cover_Letter.docx new file mode 100644 index 0000000..4bf59a9 Binary files /dev/null and b/Anglais/CV/Sasa_Radosaljveic_Cover_Letter.docx differ diff --git a/Anglais/CV/Sasa_Radosaljveic_Cover_Letter.pdf b/Anglais/CV/Sasa_Radosaljveic_Cover_Letter.pdf new file mode 100644 index 0000000..e1bdb38 Binary files /dev/null and b/Anglais/CV/Sasa_Radosaljveic_Cover_Letter.pdf differ diff --git a/Anglais/elevator_pitch.odt b/Anglais/elevator_pitch.odt new file mode 100644 index 0000000..0cc0552 Binary files /dev/null and b/Anglais/elevator_pitch.odt differ diff --git a/Anglais/elevator_pitch.pdf b/Anglais/elevator_pitch.pdf new file mode 100644 index 0000000..6ccb682 Binary files /dev/null and b/Anglais/elevator_pitch.pdf differ diff --git a/T1/TP1/7179-Arm_Cortex-R_Comparison_Table_V8.pdf b/T1/TP1/7179-Arm_Cortex-R_Comparison_Table_V8.pdf new file mode 100644 index 0000000..7924bad Binary files /dev/null and b/T1/TP1/7179-Arm_Cortex-R_Comparison_Table_V8.pdf differ diff --git a/T1/TP1/Architecting_Efficient_Interconnects_for_Large_Caches_with_CACTI_6.0.pdf b/T1/TP1/Architecting_Efficient_Interconnects_for_Large_Caches_with_CACTI_6.0.pdf new file mode 100644 index 0000000..77462e2 Binary files /dev/null and b/T1/TP1/Architecting_Efficient_Interconnects_for_Large_Caches_with_CACTI_6.0.pdf differ diff --git a/T1/TP1/Arm_Cortex-A_Processor_Comparison_Table.pdf b/T1/TP1/Arm_Cortex-A_Processor_Comparison_Table.pdf new file mode 100644 index 0000000..61a9a86 Binary files /dev/null and b/T1/TP1/Arm_Cortex-A_Processor_Comparison_Table.pdf differ diff --git a/T1/TP1/HPL-2009-85.pdf b/T1/TP1/HPL-2009-85.pdf new file mode 100644 index 0000000..9610632 Binary files /dev/null and b/T1/TP1/HPL-2009-85.pdf differ diff --git a/T1/TP1/Master SETI-2022-TP1-v1[8705].pdf b/T1/TP1/Master SETI-2022-TP1-v1[8705].pdf new file mode 100644 index 0000000..8f690a4 Binary files /dev/null and b/T1/TP1/Master SETI-2022-TP1-v1[8705].pdf differ diff --git a/T1/TP1/cacti-master/2DDRAM_Samsung2GbDDR2.cfg b/T1/TP1/cacti-master/2DDRAM_Samsung2GbDDR2.cfg new file mode 100644 index 0000000..d035eae --- /dev/null +++ b/T1/TP1/cacti-master/2DDRAM_Samsung2GbDDR2.cfg @@ -0,0 +1,194 @@ +# Cache size +//-size (bytes) 528 +//-size (bytes) 4096 +//-size (bytes) 262144 +//-size (bytes) 1048576 +//-size (bytes) 2097152 +//-size (bytes) 4194304 +//-size (bytes) 8388608 +//-size (bytes) 16777216 +//-size (bytes) 33554432 +//-size (bytes) 134217728 +//-size (bytes) 268435456 +//-size (bytes) 536870912 +//-size (bytes) 67108864 +//-size (bytes) 536870912 +//-size (bytes) 1073741824 +# For 3D DRAM memory please use Gb as units +-size (Gb) 2 + +# Line size +//-block size (bytes) 8 +-block size (bytes) 128 + +# To model Fully Associative cache, set associativity to zero +//-associativity 0 +//-associativity 2 +//-associativity 4 +-associativity 1 +//-associativity 16 + +-read-write port 1 +-exclusive read port 0 +-exclusive write port 0 +-single ended read ports 0 + +# Multiple banks connected using a bus +-UCA bank count 16 +//-technology (u) 0.032 +//-technology (u) 0.040 +//-technology (u) 0.065 +//-technology (u) 0.078 +-technology (u) 0.080 + +# following three parameters are meaningful only for main memories + +//-page size (bits) 8192 +-burst length 4 +-internal prefetch width 1 + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +//-Data array cell type - "itrs-hp" +//-Data array cell type - "itrs-lstp" +//-Data array cell type - "itrs-lop" +-Data array cell type - "comm-dram" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +//-Data array peripheral type - "itrs-hp" +-Data array peripheral type - "itrs-lstp" +//-Data array peripheral type - "itrs-lop" + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Tag array cell type - "itrs-hp" +//-Tag array cell type - "itrs-lstp" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Tag array peripheral type - "itrs-hp" +//-Tag array peripheral type - "itrs-lstp" + +# Bus width include data bits and address bits required by the decoder +//-output/input bus width 16 +//-output/input bus width 64 +-output/input bus width 64 + +// 300-400 in steps of 10 +-operating temperature (K) 350 + +# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file) +# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report) +//-cache type "cache" +//-cache type "ram" +//-cache type "main memory" +-cache type "3D memory or 2D main memory" + +# Parameters for 3D DRAM +//-page size (bits) 16384 +-page size (bits) 8192 +//-page size (bits) 4096 +-burst depth 4 +-IO width 4 +-system frequency (MHz) 266 + +-stacked die count 1 +-partitioning granularity 0 // 0: coarse-grained rank-level; 1: fine-grained rank-level +//-TSV projection 1 // 0: ITRS aggressive; 1: industrial conservative + +## End of parameters for 3D DRAM + +# to model special structure like branch target buffers, directory, etc. +# change the tag size parameter +# if you want cacti to calculate the tagbits, set the tag size to "default" +-tag size (b) "default" +//-tag size (b) 45 + +# fast - data and tag access happen in parallel +# sequential - data array is accessed after accessing the tag array +# normal - data array lookup and tag access happen in parallel +# final data block is broadcasted in data array h-tree +# after getting the signal from the tag array +-access mode (normal, sequential, fast) - "fast" +//-access mode (normal, sequential, fast) - "normal" +//-access mode (normal, sequential, fast) - "sequential" + + +# DESIGN OBJECTIVE for UCA (or banks in NUCA) +-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:0:100 + +# Percentage deviation from the minimum value +# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization +# that compromises at most 10% delay. +# NOTE: Try reasonable values for % deviation. Inconsistent deviation +# percentage values will not produce any valid organizations. For example, +# 0:0:100:100:100 will try to identify an organization that has both +# least delay and dynamic power. Since such an organization is not possible, CACTI will +# throw an error. Refer CACTI-6 Technical report for more details +-deviate (delay, dynamic power, leakage power, cycle time, area) 50:100000:100000:100000:1000000 + +# Objective for NUCA +-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:0:100 +-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 + +# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for +# energy-delay or energy-delay sq. product +# Note: Optimize tag will disable weight or deviate values mentioned above +# Set it to NONE to let weight and deviate values determine the +# appropriate cache configuration +//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED" +//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2" +-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE" + +-Cache model (NUCA, UCA) - "UCA" +//-Cache model (NUCA, UCA) - "NUCA" + +# In order for CACTI to find the optimal NUCA bank value the following +# variable should be assigned 0. +-NUCA bank count 0 + +# NOTE: for nuca network frequency is set to a default value of +# 5GHz in time.c. CACTI automatically +# calculates the maximum possible frequency and downgrades this value if necessary + +# By default CACTI considers both full-swing and low-swing +# wires to find an optimal configuration. However, it is possible to +# restrict the search space by changing the signaling from "default" to +# "fullswing" or "lowswing" type. +-Wire signaling (fullswing, lowswing, default) - "Global_5" +//-Wire signaling (fullswing, lowswing, default) - "default" +//-Wire signaling (fullswing, lowswing, default) - "lowswing" + +//-Wire inside mat - "global" +-Wire inside mat - "semi-global" +-Wire outside mat - "global" +//-Wire outside mat - "semi-global" + +-Interconnect projection - "conservative" +//-Interconnect projection - "aggressive" + +# Contention in network (which is a function of core count and cache level) is one of +# the critical factor used for deciding the optimal bank count value +# core count can be 4, 8, or 16 +//-Core count 4 +-Core count 8 +//-Core count 16 +-Cache level (L2/L3) - "L3" + +-Add ECC - "false" + +//-Print level (DETAILED, CONCISE) - "CONCISE" +-Print level (DETAILED, CONCISE) - "DETAILED" + +# for debugging +//-Print input parameters - "true" +-Print input parameters - "false" +# force CACTI to model the cache with the +# following Ndbl, Ndwl, Nspd, Ndsam, +# and Ndcm values +-Force cache config - "true" +//-Force cache config - "false" +-Ndwl 128 +-Ndbl 32 +-Nspd 1 +-Ndcm 1 +-Ndsam1 1 +-Ndsam2 1 + diff --git a/T1/TP1/cacti-master/2DDRAM_micron1Gb.cfg b/T1/TP1/cacti-master/2DDRAM_micron1Gb.cfg new file mode 100644 index 0000000..4b94de4 --- /dev/null +++ b/T1/TP1/cacti-master/2DDRAM_micron1Gb.cfg @@ -0,0 +1,194 @@ +# Cache size +//-size (bytes) 528 +//-size (bytes) 4096 +//-size (bytes) 262144 +//-size (bytes) 1048576 +//-size (bytes) 2097152 +//-size (bytes) 4194304 +//-size (bytes) 8388608 +//-size (bytes) 16777216 +//-size (bytes) 33554432 +//-size (bytes) 134217728 +//-size (bytes) 268435456 +//-size (bytes) 536870912 +//-size (bytes) 67108864 +//-size (bytes) 536870912 +//-size (bytes) 1073741824 +# For 3D DRAM memory please use Gb as units +-size (Gb) 1 + +# Line size +//-block size (bytes) 8 +-block size (bytes) 128 + +# To model Fully Associative cache, set associativity to zero +//-associativity 0 +//-associativity 2 +//-associativity 4 +-associativity 1 +//-associativity 16 + +-read-write port 1 +-exclusive read port 0 +-exclusive write port 0 +-single ended read ports 0 + +# Multiple banks connected using a bus +-UCA bank count 8 +//-technology (u) 0.032 +//-technology (u) 0.040 +//-technology (u) 0.065 +-technology (u) 0.078 +//-technology (u) 0.080 + +# following three parameters are meaningful only for main memories + +//-page size (bits) 8192 +-burst length 4 +-internal prefetch width 1 + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +//-Data array cell type - "itrs-hp" +//-Data array cell type - "itrs-lstp" +//-Data array cell type - "itrs-lop" +-Data array cell type - "comm-dram" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +//-Data array peripheral type - "itrs-hp" +-Data array peripheral type - "itrs-lstp" +//-Data array peripheral type - "itrs-lop" + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Tag array cell type - "itrs-hp" +//-Tag array cell type - "itrs-lstp" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Tag array peripheral type - "itrs-hp" +//-Tag array peripheral type - "itrs-lstp" + +# Bus width include data bits and address bits required by the decoder +//-output/input bus width 16 +//-output/input bus width 64 +-output/input bus width 64 + +// 300-400 in steps of 10 +-operating temperature (K) 350 + +# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file) +# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report) +//-cache type "cache" +//-cache type "ram" +//-cache type "main memory" +-cache type "3D memory or 2D main memory" + +## Parameters for 3D DRAM +-page size (bits) 16384 +//-page size (bits) 8192 +-burst depth 8 +-IO width 4 +-system frequency (MHz) 533 + +-stacked die count 1 +-partitioning granularity 0 // 0: coarse-grained rank-level; 1: fine-grained rank-level +//-TSV projection 1 // 0: ITRS aggressive; 1: industrial conservative + +## End of parameters for 3D DRAM + + +# to model special structure like branch target buffers, directory, etc. +# change the tag size parameter +# if you want cacti to calculate the tagbits, set the tag size to "default" +-tag size (b) "default" +//-tag size (b) 45 + +# fast - data and tag access happen in parallel +# sequential - data array is accessed after accessing the tag array +# normal - data array lookup and tag access happen in parallel +# final data block is broadcasted in data array h-tree +# after getting the signal from the tag array +-access mode (normal, sequential, fast) - "fast" +//-access mode (normal, sequential, fast) - "normal" +//-access mode (normal, sequential, fast) - "sequential" + + +# DESIGN OBJECTIVE for UCA (or banks in NUCA) +-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:0:10 + +# Percentage deviation from the minimum value +# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization +# that compromises at most 10% delay. +# NOTE: Try reasonable values for % deviation. Inconsistent deviation +# percentage values will not produce any valid organizations. For example, +# 0:0:100:100:100 will try to identify an organization that has both +# least delay and dynamic power. Since such an organization is not possible, CACTI will +# throw an error. Refer CACTI-6 Technical report for more details +-deviate (delay, dynamic power, leakage power, cycle time, area) 50:100000:100000:100000:1000000 + +# Objective for NUCA +-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100 +-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 + +# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for +# energy-delay or energy-delay sq. product +# Note: Optimize tag will disable weight or deviate values mentioned above +# Set it to NONE to let weight and deviate values determine the +# appropriate cache configuration +//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED" +//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2" +-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE" + +-Cache model (NUCA, UCA) - "UCA" +//-Cache model (NUCA, UCA) - "NUCA" + +# In order for CACTI to find the optimal NUCA bank value the following +# variable should be assigned 0. +-NUCA bank count 0 + +# NOTE: for nuca network frequency is set to a default value of +# 5GHz in time.c. CACTI automatically +# calculates the maximum possible frequency and downgrades this value if necessary + +# By default CACTI considers both full-swing and low-swing +# wires to find an optimal configuration. However, it is possible to +# restrict the search space by changing the signaling from "default" to +# "fullswing" or "lowswing" type. +-Wire signaling (fullswing, lowswing, default) - "Global_30" +//-Wire signaling (fullswing, lowswing, default) - "default" +//-Wire signaling (fullswing, lowswing, default) - "lowswing" + +//-Wire inside mat - "global" +-Wire inside mat - "semi-global" +-Wire outside mat - "global" +//-Wire outside mat - "semi-global" + +-Interconnect projection - "conservative" +//-Interconnect projection - "aggressive" + +# Contention in network (which is a function of core count and cache level) is one of +# the critical factor used for deciding the optimal bank count value +# core count can be 4, 8, or 16 +//-Core count 4 +-Core count 8 +//-Core count 16 +-Cache level (L2/L3) - "L3" + +-Add ECC - "true" + +//-Print level (DETAILED, CONCISE) - "CONCISE" +-Print level (DETAILED, CONCISE) - "DETAILED" + +# for debugging +//-Print input parameters - "true" +-Print input parameters - "false" +# force CACTI to model the cache with the +# following Ndbl, Ndwl, Nspd, Ndsam, +# and Ndcm values +-Force cache config - "true" +//-Force cache config - "false" +-Ndwl 16 +-Ndbl 16 +-Nspd 1 +-Ndcm 1 +-Ndsam1 1 +-Ndsam2 1 + diff --git a/T1/TP1/cacti-master/3DDRAM_Samsung3D8Gb_extened.cfg b/T1/TP1/cacti-master/3DDRAM_Samsung3D8Gb_extened.cfg new file mode 100644 index 0000000..197bc21 --- /dev/null +++ b/T1/TP1/cacti-master/3DDRAM_Samsung3D8Gb_extened.cfg @@ -0,0 +1,197 @@ +# Cache size +//-size (bytes) 528 +//-size (bytes) 4096 +//-size (bytes) 262144 +//-size (bytes) 1048576 +//-size (bytes) 2097152 +//-size (bytes) 4194304 +//-size (bytes) 8388608 +//-size (bytes) 16777216 +//-size (bytes) 33554432 +//-size (bytes) 134217728 +//-size (bytes) 268435456 +//-size (bytes) 536870912 +//-size (bytes) 67108864 +//-size (bytes) 536870912 +//-size (bytes) 1073741824 +# For 3D DRAM memory please use Gb as units +-size (Gb) 8 + +# Line size +//-block size (bytes) 8 +-block size (bytes) 128 + +# To model Fully Associative cache, set associativity to zero +//-associativity 0 +//-associativity 2 +//-associativity 4 +-associativity 1 +//-associativity 16 + +-read-write port 1 +-exclusive read port 0 +-exclusive write port 0 +-single ended read ports 0 + +# Multiple banks connected using a bus +-UCA bank count 8 +//-technology (u) 0.032 +//-technology (u) 0.040 +//-technology (u) 0.065 +//-technology (u) 0.078 +//-technology (u) 0.080 +//-technology (u) 0.090 +-technology (u) 0.050 + +# following three parameters are meaningful only for main memories + +//-page size (bits) 8192 +-burst length 4 +-internal prefetch width 1 + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +//-Data array cell type - "itrs-hp" +//-Data array cell type - "itrs-lstp" +//-Data array cell type - "itrs-lop" +-Data array cell type - "comm-dram" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +//-Data array peripheral type - "itrs-hp" +-Data array peripheral type - "itrs-lstp" +//-Data array peripheral type - "itrs-lop" + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Tag array cell type - "itrs-hp" +//-Tag array cell type - "itrs-lstp" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Tag array peripheral type - "itrs-hp" +//-Tag array peripheral type - "itrs-lstp" + +# Bus width include data bits and address bits required by the decoder +//-output/input bus width 16 +//-output/input bus width 64 +-output/input bus width 64 + +// 300-400 in steps of 10 +-operating temperature (K) 350 + +# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file) +# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report) +//-cache type "cache" +//-cache type "ram" +//-cache type "main memory" # old main memory model, in fact, it is eDRAM model. +-cache type "3D memory or 2D main memory" # once this parameter is used, the new parameter section below of will override the same parameter above + +# +//-page size (bits) 16384 +-page size (bits) 8192 +//-page size (bits) 4096 +-burst depth 8 // for 3D DRAM, IO per bank equals the product of burst depth and IO width +-IO width 4 +-system frequency (MHz) 677 + +-stacked die count 4 +-partitioning granularity 0 // 0: coarse-grained rank-level; 1: fine-grained rank-level +-TSV projection 1 // 0: ITRS aggressive; 1: industrial conservative + +## End of parameters for 3D DRAM + +# to model special structure like branch target buffers, directory, etc. +# change the tag size parameter +# if you want cacti to calculate the tagbits, set the tag size to "default" +-tag size (b) "default" +//-tag size (b) 45 + +# fast - data and tag access happen in parallel +# sequential - data array is accessed after accessing the tag array +# normal - data array lookup and tag access happen in parallel +# final data block is broadcasted in data array h-tree +# after getting the signal from the tag array +-access mode (normal, sequential, fast) - "fast" +//-access mode (normal, sequential, fast) - "normal" +//-access mode (normal, sequential, fast) - "sequential" + + +# DESIGN OBJECTIVE for UCA (or banks in NUCA) +-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:0:100 + +# Percentage deviation from the minimum value +# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization +# that compromises at most 10% delay. +# NOTE: Try reasonable values for % deviation. Inconsistent deviation +# percentage values will not produce any valid organizations. For example, +# 0:0:100:100:100 will try to identify an organization that has both +# least delay and dynamic power. Since such an organization is not possible, CACTI will +# throw an error. Refer CACTI-6 Technical report for more details +-deviate (delay, dynamic power, leakage power, cycle time, area) 50:100000:100000:100000:1000000 + +# Objective for NUCA +-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:0:100 +-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 + +# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for +# energy-delay or energy-delay sq. product +# Note: Optimize tag will disable weight or deviate values mentioned above +# Set it to NONE to let weight and deviate values determine the +# appropriate cache configuration +//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED" +//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2" +-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE" + +-Cache model (NUCA, UCA) - "UCA" +//-Cache model (NUCA, UCA) - "NUCA" + +# In order for CACTI to find the optimal NUCA bank value the following +# variable should be assigned 0. +-NUCA bank count 0 + +# NOTE: for nuca network frequency is set to a default value of +# 5GHz in time.c. CACTI automatically +# calculates the maximum possible frequency and downgrades this value if necessary + +# By default CACTI considers both full-swing and low-swing +# wires to find an optimal configuration. However, it is possible to +# restrict the search space by changing the signaling from "default" to +# "fullswing" or "lowswing" type. +-Wire signaling (fullswing, lowswing, default) - "Global_30" +//-Wire signaling (fullswing, lowswing, default) - "default" +//-Wire signaling (fullswing, lowswing, default) - "lowswing" + +//-Wire inside mat - "global" +-Wire inside mat - "semi-global" +-Wire outside mat - "global" +//-Wire outside mat - "semi-global" + +-Interconnect projection - "conservative" +//-Interconnect projection - "aggressive" + +# Contention in network (which is a function of core count and cache level) is one of +# the critical factor used for deciding the optimal bank count value +# core count can be 4, 8, or 16 +//-Core count 4 +-Core count 8 +//-Core count 16 +-Cache level (L2/L3) - "L3" + +-Add ECC - "true" + +//-Print level (DETAILED, CONCISE) - "CONCISE" +-Print level (DETAILED, CONCISE) - "DETAILED" + + +# for debugging +//-Print input parameters - "true" +-Print input parameters - "false" +# force CACTI to model the cache with the +# following Ndbl, Ndwl, Nspd, Ndsam, +# and Ndcm values +-Force cache config - "true" +//-Force cache config - "false" +-Ndwl 16 +-Ndbl 32 +-Nspd 1 +-Ndcm 1 +-Ndsam1 1 +-Ndsam2 1 + diff --git a/T1/TP1/cacti-master/README b/T1/TP1/cacti-master/README new file mode 100644 index 0000000..0dc88f5 --- /dev/null +++ b/T1/TP1/cacti-master/README @@ -0,0 +1,122 @@ +----------------------------------------------------------- + + + ____ __ ________ __ + /\ _`\ /\ \__ __ /\_____ \ /'__`\ + \ \ \/\_\ __ ___\ \ ,_\/\_\ \/___//'/'/\ \/\ \ + \ \ \/_/_ /'__`\ /'___\ \ \/\/\ \ /' /' \ \ \ \ \ + \ \ \L\ \/\ \L\.\_/\ \__/\ \ \_\ \ \ /' /'__ \ \ \_\ \ + \ \____/\ \__/.\_\ \____\\ \__\\ \_\ /\_/ /\_\ \ \____/ + \/___/ \/__/\/_/\/____/ \/__/ \/_/ \// \/_/ \/___/ + + +A Tool to Model Caches/Memories, 3D stacking, and off-chip IO +----------------------------------------------------------- + +CACTI is an analytical tool that takes a set of cache/memory para- +meters as input and calculates its access time, power, cycle +time, and area. +CACTI was originally developed by Dr. Jouppi and Dr. Wilton +in 1993 and since then it has undergone six major +revisions. + +List of features (version 1-7): +=============================== +The following is the list of features supported by the tool. + +* Power, delay, area, and cycle time model for + direct mapped caches + set-associative caches + fully associative caches + Embedded DRAM memories + Commodity DRAM memories + +* Support for modeling multi-ported uniform cache access (UCA) + and multi-banked, multi-ported non-uniform cache access (NUCA). + +* Leakage power calculation that also considers the operating + temperature of the cache. + +* Router power model. + +* Interconnect model with different delay, power, and area + properties including low-swing wire model. + +* An interface to perform trade-off analysis involving power, delay, + area, and bandwidth. + +* All process specific values used by the tool are obtained + from ITRS and currently, the tool supports 90nm, 65nm, 45nm, + and 32nm technology nodes. + +* Chip IO model to calculate latency and energy for DDR bus. Users can model + different loads (fan-outs) and evaluate the impact on frequency and energy. + This model can be used to study LR-DIMMs, R-DIMMs, etc. + +Version 7.0 is derived from 6.5 and merged with CACTI 3D. +It has many new additions apart from code refinements and +bug fixes: new IO model, 3D memory model, and power gating models. +Ref: CACTI-IO: CACTI With OFF-chip Power-Area-Timing Models + MemCAD: An Interconnect Exploratory Tool for Innovative Memories Beyond DDR4 + CACTI-3DD: Architecture-level modeling for 3D die-stacked DRAM main memory + +-------------------------------------------------------------------------- +Version 6.5 has a new c++ code base and includes numerous bug fixes. +CACTI 5.3 and 6.0 activate an entire row of mats to read/write a single +block of data. This technique improves reliability at the cost of +power. CACTI 6.5 activates minimum number of mats just enough to retrieve +a block to minimize power. + +How to use the tool? +==================== +Prior versions of CACTI take input parameters such as cache +size and technology node as a set of command line arguments. +To avoid a long list of command line arguments, +CACTI 6.5 & & let users specify their cache model in a more +detailed manner by using a config file (cache.cfg). + +-> define the cache model using cache.cfg +-> run the "cacti" binary <./cacti -infile cache.cfg> + +CACTI also provides a command line interface similar to earlier versions. The command line interface can be used as + +./cacti cache_size line_size associativity rw_ports excl_read_ports excl_write_ports + single_ended_read_ports search_ports banks tech_node output_width specific_tag tag_width + access_mode cache main_mem obj_func_delay obj_func_dynamic_power obj_func_leakage_power + obj_func_cycle_time obj_func_area dev_func_delay dev_func_dynamic_power dev_func_leakage_power + dev_func_area dev_func_cycle_time ed_ed2_none temp wt data_arr_ram_cell_tech_flavor_in + data_arr_peri_global_tech_flavor_in tag_arr_ram_cell_tech_flavor_in tag_arr_peri_global_tech_flavor_in + interconnect_projection_type_in wire_inside_mat_type_in wire_outside_mat_type_in + REPEATERS_IN_HTREE_SEGMENTS_in VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in + BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in PAGE_SIZE_BITS_in BURST_LENGTH_in + INTERNAL_PREFETCH_WIDTH_in force_wiretype wiretype force_config ndwl ndbl nspd ndcm + ndsam1 ndsam2 ecc + +For complete documentation of the tool, please refer +to the following publications and reports. + +CACTI-5.3 & 6 reports - Details on Meory/cache organizations and tradeoffs. + +Latency/Energy tradeoffs for large caches and NUCA design: + "Optimizing NUCA Organizations and Wiring Alternatives for Large Caches With CACTI 6.0", that appears in MICRO 2007. + +Memory IO design: CACTI-IO: CACTI With OFF-chip Power-Area-Timing Models, + MemCAD: An Interconnect Exploratory Tool for Innovative Memories Beyond DDR4 + CACTI-IO Technical Report - http://www.hpl.hp.com/techreports/2013/HPL-2013-79.pdf + +3D model: + CACTI-3DD: Architecture-level modeling for 3D die-stacked DRAM main memory + +We are still improving the tool and refining the code. If you +have any comments, questions, or suggestions please write to +us. + +Naveen Muralimanohar +naveen.muralimanohar@hpe.com + +Ali Shafiee +shafiee@cs.utah.edu + +Vaishnav Srinivas +vaishnav.srinivas@gmail.com + diff --git a/T1/TP1/cacti-master/TSV.cc b/T1/TP1/cacti-master/TSV.cc new file mode 100644 index 0000000..2821d4b --- /dev/null +++ b/T1/TP1/cacti-master/TSV.cc @@ -0,0 +1,242 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "TSV.h" + +TSV::TSV(enum TSV_type tsv_type, + /*TechnologyParameter::*/DeviceType *dt)://TSV driver's device type set to be peri_global + deviceType(dt), tsv_type(tsv_type) +{ + num_gates = 1; + num_gates_min = 1;//Is there a minimum number of stages? + min_w_pmos = deviceType -> n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + + switch (tsv_type) + { + case Fine: + cap = g_tp.tsv_parasitic_capacitance_fine; + res = g_tp.tsv_parasitic_resistance_fine; + min_area = g_tp.tsv_minimum_area_fine; + break; + case Coarse: + cap = g_tp.tsv_parasitic_capacitance_coarse; + res = g_tp.tsv_parasitic_resistance_coarse; + min_area = g_tp.tsv_minimum_area_coarse; + break; + default: + break; + } + + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) + { + w_TSV_n[i] = 0; + w_TSV_p[i] = 0; + } + + double first_buf_stg_coef = 5; // To tune the total buffer delay. + w_TSV_n[0] = g_tp.min_w_nmos_*first_buf_stg_coef; + w_TSV_p[0] = min_w_pmos *first_buf_stg_coef; + + is_dram = 0; + is_wl_tr = 0; + + //What does the function assert() mean? Should I put the function here? + compute_buffer_stage(); + compute_area(); + compute_delay(); +} + +TSV::~TSV() +{ +} + +void TSV::compute_buffer_stage() +{ + double p_to_n_sz_ratio = deviceType->n_to_p_eff_curr_drv_ratio; + + //BEOL parasitics in Katti's E modeling and charac. of TSV. Needs further detailed values. + //double res_beol = 0.1;//inaccurate + //double cap_beol = 1e-15; + + //C_load_TSV = cap_beol + cap + cap_beol + gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0); + C_load_TSV = cap + gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0); //+ 57.5e-15; + if(g_ip->print_detail_debug) + { + cout << " The input cap of 1st buffer: " << gate_C(w_TSV_n[0] + w_TSV_p[0], 0) * 1e15 << " fF"; + } + double F = C_load_TSV / gate_C(w_TSV_n[0] + w_TSV_p[0], 0); + if(g_ip->print_detail_debug) + { + cout<<"\nF is "<Vdd; + double cumulative_area = 0; + double cumulative_curr = 0; // cumulative leakage current + double cumulative_curr_Ig = 0; // cumulative leakage current + Buffer_area.h = g_tp.cell_h_def;//cell_h_def is the assigned height for memory cell (5um), is it correct to use it here? + + //logic_effort() didn't give the size of w_n[0] and w_p[0], which is min size inverter + //w_TSV_n[0] = g_tp.min_w_nmos_; + //w_TSV_p[0] = min_w_pmos; + + int i; + for (i = 0; i < num_gates; i++) + { + cumulative_area += compute_gate_area(INV, 1, w_TSV_p[i], w_TSV_n[i], Buffer_area.h); + if(g_ip->print_detail_debug) + { + cout << "\n\tArea up to the " << i+1 << " stages is: " << cumulative_area << " um2"; + } + cumulative_curr += cmos_Isub_leakage(w_TSV_n[i], w_TSV_p[i], 1, inv, is_dram); + cumulative_curr_Ig += cmos_Ig_leakage(w_TSV_n[i], w_TSV_p[i], 1, inv, is_dram);// The operator += is mistakenly put as = in decoder.cc + } + power.readOp.leakage = cumulative_curr * Vdd; + power.readOp.gate_leakage = cumulative_curr_Ig * Vdd; + + Buffer_area.set_area(cumulative_area); + Buffer_area.w = (cumulative_area / Buffer_area.h); + + TSV_metal_area.set_area(min_area * 3.1416/16); + + if( Buffer_area.get_area() < min_area - TSV_metal_area.get_area() ) + area.set_area(min_area); + else + area.set_area(Buffer_area.get_area() + TSV_metal_area.get_area()); + +} + +void TSV::compute_delay() +{ + //Buffer chain delay and Dynamic Power + double rd, tf, this_delay, c_load, c_intrinsic, inrisetime = 0/*The initial time*/; + //is_dram, is_wl_tr are declared to be false in the constructor + rd = tr_R_on(w_TSV_n[0], NCH, 1, is_dram, false, is_wl_tr); + c_load = gate_C(w_TSV_n[1] + w_TSV_p[1], 0.0, is_dram, false, is_wl_tr); + c_intrinsic = drain_C_(w_TSV_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + + drain_C_(w_TSV_n[0], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); + tf = rd * (c_intrinsic + c_load); + //Refer to horowitz function definition + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + + double Vdd = deviceType -> Vdd; + power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + + int i; + for (i = 1; i < num_gates - 1; ++i) + { + rd = tr_R_on(w_TSV_n[i], NCH, 1, is_dram, false, is_wl_tr); + c_load = gate_C(w_TSV_p[i+1] + w_TSV_n[i+1], 0.0, is_dram, false, is_wl_tr); + c_intrinsic = drain_C_(w_TSV_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + + drain_C_(w_TSV_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + } + + // add delay of final inverter that drives the TSV + i = num_gates - 1; + c_load = C_load_TSV; + rd = tr_R_on(w_TSV_n[i], NCH, 1, is_dram, false, is_wl_tr); + c_intrinsic = drain_C_(w_TSV_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + + drain_C_(w_TSV_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); + //The delay method for the last stage of buffer chain in Decoder.cc + + //double res_beol = 0.1;//inaccurate + //double R_TSV_out = res_beol + res + res_beol; + double R_TSV_out = res; + tf = rd * (c_intrinsic + c_load) + R_TSV_out * c_load / 2; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + + power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; //Dynamic power done + + //Is the delay actually delay/(1.0-0.5)?? + //ret_val = this_delay / (1.0 - 0.5); + //return ret_val;//Originally for decoder.cc to get outrise time + + + /* This part is to obtain delay in the TSV path, refer to Katti's paper. + * It can be used alternatively as the step to get the final-stage delay + double C_ext = c_intrinsic; + R_dr = rd; + double C_int = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0.0, is_dram, false, is_wl_tr); + delay_TSV_path = 0.693 * (R_dr * C_ext + (R_dr + res_beol) * cap_beol + (R_dr + res_beol + 0.5 * res) * cap + + (R_dr + res_beol + res + res_beol) * (cap_beol + C_int); + delay += delay_TSV_path; + */ +} + +void TSV::print_TSV() +{ + + cout << "\nTSV Properties:\n\n"; + cout << " Delay Optimal - "<< + " \n\tTSV Cap: " << cap * 1e15 << " fF" << + " \n\tTSV Res: " << res * 1e3 << " mOhm"<< + " \n\tNumber of Buffer Chain stages - " << num_gates << + " \n\tDelay - " << delay * 1e9 << " (ns) " + " \n\tPowerD - " << power.readOp.dynamic * 1e9<< " (nJ)" + " \n\tPowerL - " << power.readOp.leakage * 1e3<< " (mW)" + " \n\tPowerLgate - " << power.readOp.gate_leakage * 1e3<< " (mW)\n" << + " \n\tBuffer Area: " << Buffer_area.get_area() << " um2" << + " \n\tBuffer Height: " << Buffer_area.h << " um" << + " \n\tBuffer Width: " << Buffer_area.w << " um" << + " \n\tTSV metal area: " << TSV_metal_area.get_area() << " um2" << + " \n\tTSV minimum occupied area: " < +#include +#include + + +class TSV : public Component +{ + public: + TSV(enum TSV_type tsv_type, + /*TechnologyParameter::*/DeviceType * dt = &(g_tp.peri_global));//Should change peri_global to TSV in technology.cc + //TSV():len(20),rad(2.5),pitch(50){} + ~TSV(); + + double res;//TSV resistance + double cap;//TSV capacitance + double C_load_TSV;//The intrinsic load plus the load TSV is driving, needs changes? + double min_area; + + //int num_IO;//number of I/O + int num_gates; + int num_gates_min;//Necessary? + double w_TSV_n[MAX_NUMBER_GATES_STAGE]; + double w_TSV_p[MAX_NUMBER_GATES_STAGE]; + + //double delay_TSV_path;//Delay of TSV path including the parasitics + + double is_dram;//two external arguments, defaulted to be false in constructor + double is_wl_tr; + + void compute_buffer_stage(); + void compute_area(); + void compute_delay(); + void print_TSV(); + + Area TSV_metal_area; + Area Buffer_area; + + /*//Herigated from Component + double delay; + Area area; + powerDef power, rt_power; + double delay; + double cycle_time; + + int logical_effort();*/ + + private: + double min_w_pmos; + /*TechnologyParameter::*/DeviceType * deviceType; + unsigned int tsv_type; + +}; + + +#endif /* TSV_H_ */ diff --git a/T1/TP1/cacti-master/Ucache.cc b/T1/TP1/cacti-master/Ucache.cc new file mode 100644 index 0000000..7df0207 --- /dev/null +++ b/T1/TP1/cacti-master/Ucache.cc @@ -0,0 +1,1073 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + +#include +#include + + +#include "area.h" +#include "bank.h" +#include "basic_circuit.h" +#include "component.h" +#include "const.h" +#include "decoder.h" +#include "parameter.h" +#include "Ucache.h" +#include "subarray.h" +#include "uca.h" + +#include +#include +#include +#include + +using namespace std; + +const uint32_t nthreads = NTHREADS; + + +void min_values_t::update_min_values(const min_values_t * val) +{ + min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay; + min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn; + min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage; + min_area = (min_area > val->min_area) ? val->min_area : min_area; + min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc; +} + + + +void min_values_t::update_min_values(const uca_org_t & res) +{ + min_delay = (min_delay > res.access_time) ? res.access_time : min_delay; + min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn; + min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage; + min_area = (min_area > res.area) ? res.area : min_area; + min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc; +} + +void min_values_t::update_min_values(const nuca_org_t * res) +{ + min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay; + min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn; + min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage; + min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area; + min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc; +} + +void min_values_t::update_min_values(const mem_array * res) +{ + min_delay = (min_delay > res->access_time) ? res->access_time : min_delay; + min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn; + min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage; + min_area = (min_area > res->area) ? res->area : min_area; + min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc; +} + + + +void * calc_time_mt_wrapper(void * void_obj) +{ + calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj; + uint32_t tid = calc_obj->tid; + list & data_arr = calc_obj->data_arr; + list & tag_arr = calc_obj->tag_arr; + bool is_tag = calc_obj->is_tag; + bool pure_ram = calc_obj->pure_ram; + bool pure_cam = calc_obj->pure_cam; + bool is_main_mem = calc_obj->is_main_mem; + double Nspd_min = calc_obj->Nspd_min; + min_values_t * data_res = calc_obj->data_res; + min_values_t * tag_res = calc_obj->tag_res; + + data_arr.clear(); + data_arr.push_back(new mem_array); + tag_arr.clear(); + tag_arr.push_back(new mem_array); + + uint32_t Ndwl_niter = _log2(MAXDATAN) + 1; + uint32_t Ndbl_niter = _log2(MAXDATAN) + 1; + uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1; + uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter; + + + bool is_valid_partition; + int wt_min, wt_max; + + if (g_ip->force_wiretype) { + if (g_ip->wt == Full_swing) { + wt_min = Global; + wt_max = Low_swing-1; + } + else { + switch(g_ip->wt) { + case Global: + wt_min = wt_max = Global; + break; + case Global_5: + wt_min = wt_max = Global_5; + break; + case Global_10: + wt_min = wt_max = Global_10; + break; + case Global_20: + wt_min = wt_max = Global_20; + break; + case Global_30: + wt_min = wt_max = Global_30; + break; + case Low_swing: + wt_min = wt_max = Low_swing; + break; + default: + cerr << "Unknown wire type!\n"; + exit(0); + } + } + } + else { + wt_min = Global; + wt_max = Low_swing; + } + + for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) + { + for (int wr = wt_min; wr <= wt_max; wr++) + { + for (uint32_t iter = tid; iter < niter; iter += nthreads) + { + // reconstruct Ndwl, Ndbl, Ndcm + unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter)); + unsigned int Ndbl = 1 << ((iter / (Ndcm_niter))%Ndbl_niter); + unsigned int Ndcm = 1 << (iter % Ndcm_niter); + for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2) + { + for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2) + { + //for debuging + if (g_ip->force_cache_config && is_tag == false) + { + wr = g_ip->wt; + Ndwl = g_ip->ndwl; + Ndbl = g_ip->ndbl; + Ndcm = g_ip->ndcm; + if(g_ip->nspd != 0) { + Nspd = g_ip->nspd; + } + if(g_ip->ndsam1 != 0) { + Ndsam_lev_1 = g_ip->ndsam1; + Ndsam_lev_2 = g_ip->ndsam2; + } + } + + if (is_tag == true) + { + is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl, + Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, + tag_arr.back(), 0, NULL, NULL,(Wire_type) wr, + is_main_mem); + } + // If it's a fully-associative cache, the data array partition parameters are identical to that of + // the tag array, so compute data array partition properties also here. + if (is_tag == false || g_ip->fully_assoc) + { + is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl, + Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, + data_arr.back(), 0, NULL, NULL,(Wire_type) wr, + is_main_mem); + if (g_ip->is_3d_mem) + { + Ndsam_lev_1 = MAX_COL_MUX+1; + Ndsam_lev_2 = MAX_COL_MUX+1; + } + } + + if (is_valid_partition) + { + if (is_tag == true) + { + tag_arr.back()->wt = (enum Wire_type) wr; + tag_res->update_min_values(tag_arr.back()); + tag_arr.push_back(new mem_array); + } + if (is_tag == false || g_ip->fully_assoc) + { + data_arr.back()->wt = (enum Wire_type) wr; + data_res->update_min_values(data_arr.back()); + data_arr.push_back(new mem_array); + } + } + + if (g_ip->force_cache_config && is_tag == false) + { + wr = wt_max; + iter = niter; + if(g_ip->nspd != 0) { + Nspd = MAXDATASPD; + } + if (g_ip->ndsam1 != 0) { + Ndsam_lev_1 = MAX_COL_MUX+1; + Ndsam_lev_2 = MAX_COL_MUX+1; + } + } + } + } + } + } + } + + delete data_arr.back(); + delete tag_arr.back(); + data_arr.pop_back(); + tag_arr.pop_back(); + + pthread_exit(NULL); +} + + + +bool calculate_time( + bool is_tag, + int pure_ram, + bool pure_cam, + double Nspd, + unsigned int Ndwl, + unsigned int Ndbl, + unsigned int Ndcm, + unsigned int Ndsam_lev_1, + unsigned int Ndsam_lev_2, + mem_array *ptr_array, + int flag_results_populate, + results_mem_array *ptr_results, + uca_org_t *ptr_fin_res, + Wire_type wt, // merge from cacti-7 to cacti3d + bool is_main_mem) +{ + DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, wt, is_main_mem); + + if (dyn_p.is_valid != true) + { + return false; + } + + UCA * uca = new UCA(dyn_p); + + + if (flag_results_populate) + { //For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables + } + else + { + int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir; + int num_mats = uca->bank.dp.num_mats; + bool is_fa = uca->bank.dp.fully_assoc; + bool pure_cam = uca->bank.dp.pure_cam; + ptr_array->Ndwl = Ndwl; + ptr_array->Ndbl = Ndbl; + ptr_array->Nspd = Nspd; + ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing; + ptr_array->Ndsam_lev_1 = Ndsam_lev_1; + ptr_array->Ndsam_lev_2 = Ndsam_lev_2; + ptr_array->access_time = uca->access_time; + ptr_array->cycle_time = uca->cycle_time; + ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time; + ptr_array->area_ram_cells = uca->area_all_dataramcells; + ptr_array->area = uca->area.get_area(); + if(g_ip->is_3d_mem) + { //ptr_array->area = (uca->area_all_dataramcells)/0.5; + ptr_array->area = uca->area.get_area(); + if(g_ip->num_die_3d>1) + ptr_array->area += uca->area_TSV_tot; + } + + ptr_array->height = uca->area.h; + ptr_array->width = uca->area.w; + ptr_array->mat_height = uca->bank.mat.area.h; + ptr_array->mat_length = uca->bank.mat.area.w; + ptr_array->subarray_height = uca->bank.mat.subarray.area.h; + ptr_array->subarray_length = uca->bank.mat.subarray.area.w; + ptr_array->power = uca->power; + ptr_array->delay_senseamp_mux_decoder = + MAX(uca->delay_array_to_sa_mux_lev_1_decoder, + uca->delay_array_to_sa_mux_lev_2_decoder); + ptr_array->delay_before_subarray_output_driver = uca->delay_before_subarray_output_driver; + ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out; + + ptr_array->delay_route_to_bank = uca->htree_in_add->delay; + ptr_array->delay_input_htree = uca->bank.htree_in_add->delay; + ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay; + ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay; + ptr_array->delay_bitlines = uca->bank.mat.delay_bitline; + ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline; + ptr_array->delay_sense_amp = uca->bank.mat.delay_sa; + ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree; + ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay; + ptr_array->delay_comparator = uca->bank.mat.delay_comparator; + + if(g_ip->is_3d_mem) + { + ptr_array->delay_row_activate_net = uca->membus_RAS->delay_bus; + ptr_array->delay_row_predecode_driver_and_block = uca->membus_RAS->delay_add_predecoder; + ptr_array->delay_row_decoder = uca->membus_RAS->delay_add_decoder; + ptr_array->delay_local_wordline = uca->membus_RAS->delay_lwl_drv; + ptr_array->delay_column_access_net = uca->membus_CAS->delay_bus; + ptr_array->delay_column_predecoder = uca->membus_CAS->delay_add_predecoder; + ptr_array->delay_column_decoder = uca->membus_CAS->delay_add_decoder; + ptr_array->delay_column_selectline = 0; // Integrated into add_decoder + ptr_array->delay_datapath_net = uca->membus_data->delay_bus; + ptr_array->delay_global_data = uca->membus_data->delay_global_data; + ptr_array->delay_local_data_and_drv = uca->membus_data->delay_local_data; + ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv; + ptr_array->delay_data_buffer = uca->membus_data->delay_data_buffer; + + /*ptr_array->energy_row_activate_net = uca->membus_RAS->add_bits * (uca->membus_RAS->center_stripe->power.readOp.dynamic + uca->membus_RAS->bank_bus->power.readOp.dynamic); + ptr_array->energy_row_predecode_driver_and_block = uca->membus_RAS->add_predec->power.readOp.dynamic; + ptr_array->energy_row_decoder = uca->membus_RAS->add_dec->power.readOp.dynamic; + ptr_array->energy_local_wordline = uca->membus_RAS->num_lwl_drv * uca->membus_RAS->lwl_drv->power.readOp.dynamic; + ptr_array->energy_column_access_net = uca->membus_CAS->add_bits * (uca->membus_CAS->center_stripe->power.readOp.dynamic + uca->membus_CAS->bank_bus->power.readOp.dynamic); + ptr_array->energy_column_predecoder = uca->membus_CAS->add_predec->power.readOp.dynamic; + ptr_array->energy_column_decoder = uca->membus_CAS->add_dec->power.readOp.dynamic; + ptr_array->energy_column_selectline = uca->membus_CAS->column_sel->power.readOp.dynamic; + ptr_array->energy_datapath_net = uca->membus_data->data_bits * (uca->membus_data->center_stripe->power.readOp.dynamic + uca->membus_data->bank_bus->power.readOp.dynamic); + ptr_array->energy_global_data = uca->membus_data->data_bits * (uca->membus_data->global_data->power.readOp.dynamic); + ptr_array->energy_local_data_and_drv = uca->membus_data->data_bits * (uca->membus_data->data_drv->power.readOp.dynamic); + ptr_array->energy_data_buffer = 0;*/ + + ptr_array->energy_row_activate_net = uca->membus_RAS->power_bus.readOp.dynamic; + ptr_array->energy_row_predecode_driver_and_block = uca->membus_RAS->power_add_predecoder.readOp.dynamic; + ptr_array->energy_row_decoder = uca->membus_RAS->power_add_decoders.readOp.dynamic; + ptr_array->energy_local_wordline = uca->membus_RAS->power_lwl_drv.readOp.dynamic; + ptr_array->energy_bitlines = dyn_p.Ndwl * uca->bank.mat.power_bitline.readOp.dynamic; + ptr_array->energy_sense_amp = dyn_p.Ndwl * uca->bank.mat.power_sa.readOp.dynamic; + + ptr_array->energy_column_access_net = uca->membus_CAS->power_bus.readOp.dynamic; + ptr_array->energy_column_predecoder = uca->membus_CAS->power_add_predecoder.readOp.dynamic; + ptr_array->energy_column_decoder = uca->membus_CAS->power_add_decoders.readOp.dynamic; + ptr_array->energy_column_selectline = uca->membus_CAS->power_col_sel.readOp.dynamic; + + ptr_array->energy_datapath_net = uca->membus_data->power_bus.readOp.dynamic; + ptr_array->energy_global_data = uca->membus_data->power_global_data.readOp.dynamic; + ptr_array->energy_local_data_and_drv = uca->membus_data->power_local_data.readOp.dynamic; + ptr_array->energy_subarray_output_driver = uca->bank.mat.power_subarray_out_drv.readOp.dynamic; // + ptr_array->energy_data_buffer = 0; + + ptr_array->area_lwl_drv = uca->area_lwl_drv; + ptr_array->area_row_predec_dec = uca->area_row_predec_dec; + ptr_array->area_col_predec_dec = uca->area_col_predec_dec; + ptr_array->area_subarray = uca->area_subarray; + ptr_array->area_bus = uca->area_bus; + ptr_array->area_address_bus = uca->area_address_bus; + ptr_array->area_data_bus = uca->area_data_bus; + ptr_array->area_data_drv = uca->area_data_drv; + ptr_array->area_IOSA = uca->area_IOSA; + ptr_array->area_sense_amp = uca->area_sense_amp; + + } + + ptr_array->all_banks_height = uca->area.h; + ptr_array->all_banks_width = uca->area.w; + //ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area()); + ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / ptr_array->area; + + ptr_array->power_routing_to_bank = uca->power_routing_to_bank; + ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power; + ptr_array->power_data_input_htree = uca->bank.htree_in_data->power; +// cout<<"power_data_input_htree"<bank.htree_in_data->power.readOp.leakage<power_data_output_htree = uca->bank.htree_out_data->power; +// cout<<"power_data_output_htree"<bank.htree_out_data->power.readOp.leakage<power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power; + ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power; + ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders; + ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power; + ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_bit_mux_predecoder_blocks = uca->bank.mat.b_mux_predec->block_power; + ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders; + ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power; + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power; + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders; + ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power; + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power; + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders; + ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_bitlines = uca->bank.mat.power_bitline; + ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_sense_amps = uca->bank.mat.power_sa; + ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv; + ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv; + ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_comparators = uca->bank.mat.power_comparator; + ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir; + +// cout << " num of mats: " << dyn_p.num_mats << endl; + if (is_fa || pure_cam) + { + ptr_array->power_htree_in_search = uca->bank.htree_in_search->power; +// cout<<"power_htree_in_search"<bank.htree_in_search->power.readOp.leakage<power_htree_out_search = uca->bank.htree_out_search->power; +// cout<<"power_htree_out_search"<bank.htree_out_search->power.readOp.leakage<power_searchline = uca->bank.mat.power_searchline; +// cout<<"power_searchlineh"<bank.mat.power_searchline.readOp.leakage<power_searchline.searchOp.dynamic *= num_mats; + ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge; + ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats; + ptr_array->power_matchlines = uca->bank.mat.power_matchline; + ptr_array->power_matchlines.searchOp.dynamic *= num_mats; + ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge; + ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats; + ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv; +// cout<<"power_matchline.searchOp.leakage"<bank.mat.power_matchline.searchOp.leakage<activate_energy = uca->activate_energy; + ptr_array->read_energy = uca->read_energy; + ptr_array->write_energy = uca->write_energy; + ptr_array->precharge_energy = uca->precharge_energy; + ptr_array->refresh_power = uca->refresh_power; + ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page; + ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page; + ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks; + + ptr_array->precharge_delay = uca->precharge_delay; + + if(g_ip->is_3d_mem) + { + //CACTI3DD + ptr_array->t_RCD = uca->t_RCD; + ptr_array->t_RAS = uca->t_RAS; + ptr_array->t_RC = uca->t_RC; + ptr_array->t_CAS = uca->t_CAS; + ptr_array->t_RP = uca->t_RP; + ptr_array->t_RRD = uca->t_RRD; + + ptr_array->activate_energy = uca->activate_energy; + ptr_array->read_energy = uca->read_energy; + ptr_array->write_energy = uca->write_energy; + ptr_array->precharge_energy = uca->precharge_energy; + + + ptr_array->activate_power = uca->activate_power; + ptr_array->read_power = uca->read_power; + ptr_array->write_power = uca->write_power; + ptr_array->peak_read_power = uca->read_energy/((g_ip->burst_depth)/(g_ip->sys_freq_MHz*1e6)/2); + + ptr_array->num_row_subarray = dyn_p.num_r_subarray; + ptr_array->num_col_subarray = dyn_p.num_c_subarray; + + + ptr_array->delay_TSV_tot = uca->delay_TSV_tot; + ptr_array->area_TSV_tot = uca->area_TSV_tot; + ptr_array->dyn_pow_TSV_tot = uca->dyn_pow_TSV_tot; + ptr_array->dyn_pow_TSV_per_access = uca->dyn_pow_TSV_per_access; + ptr_array->num_TSV_tot = uca->num_TSV_tot; + + //Covers the previous values + //ptr_array->area = g_ip->num_die_3d * (uca->area_per_bank * g_ip->nbanks); + //ptr_array->area_efficiency = g_ip->num_die_3d * uca->area_all_dataramcells * 100 / ptr_array->area; + } +// cout<<"power_matchline.searchOp.leakage"<bank.mat.<bank.mat.subarray.get_total_cell_area()<power_gating) + { + ptr_array->sram_sleep_tx_width= uca->bank.mat.sram_sleep_tx->width; + ptr_array->sram_sleep_tx_area= uca->bank.mat.array_sleep_tx_area; + ptr_array->sram_sleep_wakeup_latency= uca->bank.mat.array_wakeup_t; + ptr_array->sram_sleep_wakeup_energy= uca->bank.mat.array_wakeup_e.readOp.dynamic; + + ptr_array->wl_sleep_tx_width= uca->bank.mat.row_dec->sleeptx->width; + ptr_array->wl_sleep_tx_area= uca->bank.mat.wl_sleep_tx_area; + ptr_array->wl_sleep_wakeup_latency= uca->bank.mat.wl_wakeup_t; + ptr_array->wl_sleep_wakeup_energy= uca->bank.mat.wl_wakeup_e.readOp.dynamic; + + ptr_array->bl_floating_wakeup_latency= uca->bank.mat.blfloating_wakeup_t; + ptr_array->bl_floating_wakeup_energy= uca->bank.mat.blfloating_wakeup_e.readOp.dynamic; + + ptr_array->array_leakage= uca->bank.array_leakage; + ptr_array->wl_leakage= uca->bank.wl_leakage; + ptr_array->cl_leakage= uca->bank.cl_leakage; + } + + ptr_array->num_active_mats = uca->bank.dp.num_act_mats_hor_dir; + ptr_array->num_submarray_mats = uca->bank.mat.num_subarrays_per_mat; + // cout<<"array_leakage"<array_leakage<wl_leakage<cl_leakage<min_delay)*100/minval->min_delay) > g_ip->delay_dev) { + return false; + } + if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > + g_ip->dynamic_power_dev) { + return false; + } + if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > + g_ip->leakage_power_dev) { + return false; + } + if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > + g_ip->cycle_time_dev) { + return false; + } + if (((u.area - minval->min_area)/minval->min_area)*100 > + g_ip->area_dev) { + return false; + } + return true; +} + +bool check_mem_org(mem_array & u, const min_values_t *minval) +{ + if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) { + return false; + } + if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > + g_ip->dynamic_power_dev) { + return false; + } + if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > + g_ip->leakage_power_dev) { + return false; + } + if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > + g_ip->cycle_time_dev) { + return false; + } + if (((u.area - minval->min_area)/minval->min_area)*100 > + g_ip->area_dev) { + return false; + } + return true; +} + + + + +void find_optimal_uca(uca_org_t *res, min_values_t * minval, list & ulist) +{ + double cost = 0; + double min_cost = BIGNUM; + float d, a, dp, lp, c; + + dp = g_ip->dynamic_power_wt; + lp = g_ip->leakage_power_wt; + a = g_ip->area_wt; + d = g_ip->delay_wt; + c = g_ip->cycle_time_wt; + + if (ulist.empty() == true) + { + cout << "ERROR: no valid cache organizations found" << endl; + exit(0); + } + + for (list::iterator niter = ulist.begin(); niter != ulist.end(); niter++) + { + if (g_ip->ed == 1) + { + cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn); + if (min_cost > cost) + { + min_cost = cost; + *res = (*(niter)); + } + } + else if (g_ip->ed == 2) + { + cost = ((niter)->access_time/minval->min_delay)* + ((niter)->access_time/minval->min_delay)* + ((niter)->power.readOp.dynamic/minval->min_dyn); + if (min_cost > cost) + { + min_cost = cost; + *res = (*(niter)); + } + } + else + { + /* + * check whether the current organization + * meets the input deviation constraints + */ + bool v = check_uca_org(*niter, minval); + //if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling + + if (v) + { + cost = (d * ((niter)->access_time/minval->min_delay) + + c * ((niter)->cycle_time/minval->min_cyc) + + dp * ((niter)->power.readOp.dynamic/minval->min_dyn) + + lp * ((niter)->power.readOp.leakage/minval->min_leakage) + + a * ((niter)->area/minval->min_area)); + //fprintf(stderr, "cost = %g\n", cost); + + if (min_cost > cost) { + min_cost = cost; + *res = (*(niter)); + niter = ulist.erase(niter); + if (niter!=ulist.begin()) + niter--; + } + } + else { + niter = ulist.erase(niter); + if (niter!=ulist.begin()) + niter--; + } + } + } + + if (min_cost == BIGNUM) + { + cout << "ERROR: no cache organizations met optimization criteria" << endl; + exit(0); + } +} + + + +void filter_tag_arr(const min_values_t * min, list & list) +{ + double cost = BIGNUM; + double cur_cost; + double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt; + mem_array * res = NULL; + + if (list.empty() == true) + { + cout << "ERROR: no valid tag organizations found" << endl; + exit(1); + } + + + while (list.empty() != true) + { + bool v = check_mem_org(*list.back(), min); + if (v) + { + cur_cost = wt_delay * (list.back()->access_time/min->min_delay) + + wt_dyn * (list.back()->power.readOp.dynamic/min->min_dyn) + + wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) + + wt_area * (list.back()->area/min->min_area) + + wt_cyc * (list.back()->cycle_time/min->min_cyc); + } + else + { + cur_cost = BIGNUM; + } + if (cur_cost < cost) + { + if (res != NULL) + { + delete res; + } + cost = cur_cost; + res = list.back(); + } + else + { + delete list.back(); + } + list.pop_back(); + } + if(!res) + { + cout << "ERROR: no valid tag organizations found" << endl; + exit(0); + } + + list.push_back(res); +} + + + +void filter_data_arr(list & curr_list) +{ + if (curr_list.empty() == true) + { + cout << "ERROR: no valid data array organizations found" << endl; + exit(1); + } + + list::iterator iter; + + for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) + { + mem_array * m = *iter; + + if (m == NULL) exit(1); + + if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) && + ((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5)) + { + delete m; + iter = curr_list.erase(iter); + iter --; + } + } +} + + + +/* + * Performs exhaustive search across different sub-array sizes, + * wire types and aspect ratios to find an optimal UCA organization + * 1. First different valid tag array organizations are calculated + * and stored in tag_arr array + * 2. The exhaustive search is repeated to find valid data array + * organizations and stored in data_arr array + * 3. Cache area, delay, power, and cycle time for different + * cache organizations are calculated based on the + * above results + * 4. Cache model with least cost is picked from sol_list + */ +void solve(uca_org_t *fin_res) +{ + ///bool is_dram = false; + int pure_ram = g_ip->pure_ram; + bool pure_cam = g_ip->pure_cam; + + init_tech_params(g_ip->F_sz_um, false); + g_ip->print_detail_debug = 0; // ---detail outputs for debug, initiated for 3D memory + + list tag_arr (0); + list data_arr(0); + list::iterator miter; + list sol_list(1, uca_org_t()); + + fin_res->tag_array.access_time = 0; + fin_res->tag_array.Ndwl = 0; + fin_res->tag_array.Ndbl = 0; + fin_res->tag_array.Nspd = 0; + fin_res->tag_array.deg_bl_muxing = 0; + fin_res->tag_array.Ndsam_lev_1 = 0; + fin_res->tag_array.Ndsam_lev_2 = 0; + + + // distribute calculate_time() execution to multiple threads + calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads]; + pthread_t threads[nthreads]; + + for (uint32_t t = 0; t < nthreads; t++) + { + calc_array[t].tid = t; + calc_array[t].pure_ram = pure_ram; + calc_array[t].pure_cam = pure_cam; + calc_array[t].data_res = new min_values_t(); + calc_array[t].tag_res = new min_values_t(); + } + + bool is_tag; + ///uint32_t ram_cell_tech_type; + + // If it's a cache, first calculate the area, delay and power for all tag array partitions. + if (!(pure_ram||pure_cam||g_ip->fully_assoc)) + { //cache + is_tag = true; + /// ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type; + /// is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); + init_tech_params(g_ip->F_sz_um, is_tag); + + for (uint32_t t = 0; t < nthreads; t++) + { + calc_array[t].is_tag = is_tag; + calc_array[t].is_main_mem = false; + calc_array[t].Nspd_min = 0.125; + pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); + } + + for (uint32_t t = 0; t < nthreads; t++) + { + pthread_join(threads[t], NULL); + } + + for (uint32_t t = 0; t < nthreads; t++) + { + calc_array[t].data_arr.sort(mem_array::lt); + data_arr.merge(calc_array[t].data_arr, mem_array::lt); + calc_array[t].tag_arr.sort(mem_array::lt); + tag_arr.merge(calc_array[t].tag_arr, mem_array::lt); + } + } + + + // calculate the area, delay and power for all data array partitions (for cache or plain RAM). +// if (!g_ip->fully_assoc) +// {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion + is_tag = false; + /// ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type; + /// is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); + init_tech_params(g_ip->F_sz_um, is_tag); + + for (uint32_t t = 0; t < nthreads; t++) + { + calc_array[t].is_tag = is_tag; + calc_array[t].is_main_mem = g_ip->is_main_mem; + if (!(pure_cam||g_ip->fully_assoc)) + { + calc_array[t].Nspd_min = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8); + } + else + { + calc_array[t].Nspd_min = 1; + } + + pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); + } + + for (uint32_t t = 0; t < nthreads; t++) + { + pthread_join(threads[t], NULL); + } + + data_arr.clear(); + for (uint32_t t = 0; t < nthreads; t++) + { + calc_array[t].data_arr.sort(mem_array::lt); + data_arr.merge(calc_array[t].data_arr, mem_array::lt); + } +// } + + + min_values_t * d_min = new min_values_t(); + min_values_t * t_min = new min_values_t(); + min_values_t * cache_min = new min_values_t(); + + for (uint32_t t = 0; t < nthreads; t++) + { + d_min->update_min_values(calc_array[t].data_res); + t_min->update_min_values(calc_array[t].tag_res); + } + + for (miter = data_arr.begin(); miter != data_arr.end(); miter++) + { + (*miter)->arr_min = d_min; + } + + + //cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n"; + filter_data_arr(data_arr); + if(!(pure_ram||pure_cam||g_ip->fully_assoc)) + { + filter_tag_arr(t_min, tag_arr); + } + //cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n"; + + + if (pure_ram||pure_cam||g_ip->fully_assoc) + { + for (miter = data_arr.begin(); miter != data_arr.end(); miter++) + { + uca_org_t & curr_org = sol_list.back(); + curr_org.tag_array2 = NULL; + curr_org.data_array2 = (*miter); + + curr_org.find_delay(); + curr_org.find_energy(); + curr_org.find_area(); + curr_org.find_cyc(); + + //update min values for the entire cache + cache_min->update_min_values(curr_org); + + sol_list.push_back(uca_org_t()); + } + } + else + { + while (tag_arr.empty() != true) + { + mem_array * arr_temp = (tag_arr.back()); + //delete tag_arr.back(); + tag_arr.pop_back(); + + for (miter = data_arr.begin(); miter != data_arr.end(); miter++) + { + uca_org_t & curr_org = sol_list.back(); + curr_org.tag_array2 = arr_temp; + curr_org.data_array2 = (*miter); + + curr_org.find_delay(); + curr_org.find_energy(); + curr_org.find_area(); + curr_org.find_cyc(); + + //update min values for the entire cache + cache_min->update_min_values(curr_org); + + sol_list.push_back(uca_org_t()); + } + } + } + + sol_list.pop_back(); + + find_optimal_uca(fin_res, cache_min, sol_list); + + sol_list.clear(); + + for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) + { + if (*miter != fin_res->data_array2) + { + delete *miter; + } + } + data_arr.clear(); + + for (uint32_t t = 0; t < nthreads; t++) + { + delete calc_array[t].data_res; + delete calc_array[t].tag_res; + } + + delete [] calc_array; + delete cache_min; + delete d_min; + delete t_min; +} + +void update(uca_org_t *fin_res) +{ + if(fin_res->tag_array2) + { + init_tech_params(g_ip->F_sz_um,true); + DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, fin_res->data_array2->wt, g_ip->is_main_mem); + if(tag_arr_dyn_p.is_valid) + { + UCA * tag_arr = new UCA(tag_arr_dyn_p); + fin_res->tag_array2->power = tag_arr->power; + } + else + { + cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl; + exit(1); + } + } + init_tech_params(g_ip->F_sz_um,false); + DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, fin_res->data_array2->wt, g_ip->is_main_mem); + if(data_arr_dyn_p.is_valid) + { + UCA * data_arr = new UCA(data_arr_dyn_p); + fin_res->data_array2->power = data_arr->power; + } + else + { + cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl; + exit(1); + } + + fin_res->find_energy(); +} + diff --git a/T1/TP1/cacti-master/Ucache.h b/T1/TP1/cacti-master/Ucache.h new file mode 100644 index 0000000..bfa1a30 --- /dev/null +++ b/T1/TP1/cacti-master/Ucache.h @@ -0,0 +1,118 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + +#ifndef __UCACHE_H__ +#define __UCACHE_H__ + +#include +#include "area.h" +#include "router.h" +#include "nuca.h" + + +class min_values_t +{ + public: + double min_delay; + double min_dyn; + double min_leakage; + double min_area; + double min_cyc; + + min_values_t() : min_delay(BIGNUM), min_dyn(BIGNUM), min_leakage(BIGNUM), min_area(BIGNUM), min_cyc(BIGNUM) { } + + void update_min_values(const min_values_t * val); + void update_min_values(const uca_org_t & res); + void update_min_values(const nuca_org_t * res); + void update_min_values(const mem_array * res); +}; + + + +struct solution +{ + int tag_array_index; + int data_array_index; + list::iterator tag_array_iter; + list::iterator data_array_iter; + double access_time; + double cycle_time; + double area; + double efficiency; + powerDef total_power; +}; + + + +bool calculate_time( + bool is_tag, + int pure_ram, + bool pure_cam, + double Nspd, + unsigned int Ndwl, + unsigned int Ndbl, + unsigned int Ndcm, + unsigned int Ndsam_lev_1, + unsigned int Ndsam_lev_2, + mem_array *ptr_array, + int flag_results_populate, + results_mem_array *ptr_results, + uca_org_t *ptr_fin_res, + Wire_type wtype, // merge from cacti-7 to cacti3d + bool is_main_mem); +void update(uca_org_t *fin_res); + +void solve(uca_org_t *fin_res); +void init_tech_params(double tech, bool is_tag); + + +struct calc_time_mt_wrapper_struct +{ + uint32_t tid; + bool is_tag; + bool pure_ram; + bool pure_cam; + bool is_main_mem; + double Nspd_min; + + min_values_t * data_res; + min_values_t * tag_res; + + list data_arr; + list tag_arr; +}; + +void *calc_time_mt_wrapper(void * void_obj); + +void print_g_tp(); + +#endif diff --git a/T1/TP1/cacti-master/arbiter.cc b/T1/TP1/cacti-master/arbiter.cc new file mode 100644 index 0000000..f09dcb7 --- /dev/null +++ b/T1/TP1/cacti-master/arbiter.cc @@ -0,0 +1,130 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "arbiter.h" + +Arbiter::Arbiter( + double n_req, + double flit_size_, + double output_len, + /*TechnologyParameter::*/DeviceType *dt + ):R(n_req), flit_size(flit_size_), + o_len (output_len), deviceType(dt) +{ + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; + Vdd = dt->Vdd; + double technology = g_ip->F_sz_um; + NTn1 = 13.5*technology/2; + PTn1 = 76*technology/2; + NTn2 = 13.5*technology/2; + PTn2 = 76*technology/2; + NTi = 12.5*technology/2; + PTi = 25*technology/2; + NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/ + PTtr = 20*technology/2; /* pmos tr. length*/ +} + +Arbiter::~Arbiter(){} + +double +Arbiter::arb_req() { + double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) + + gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) + + drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def)); + return temp; +} + +double +Arbiter::arb_pri() { + double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance + of flip-flop is ignored */ + return temp; +} + + +double +Arbiter::arb_grant() { + double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline(); + return temp; +} + +double +Arbiter::arb_int() { + double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + + 2*gate_C(NTn2, 0) + gate_C(PTn2, 0)); + return temp; +} + +void +Arbiter::compute_power() { + power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 + + arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd); + double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor); + double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor); + double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv); + double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor); + double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor); + double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv); + power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage + power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd; +} + +double //wire cap with triple spacing +Arbiter::Cw3(double length) { + Wire wc(g_ip->wt, length, 1, 3, 3); + double temp = (wc.wire_cap(length,true)); + return temp; +} + +double +Arbiter::crossbar_ctrline() { + double temp = (Cw3(o_len * 1e-6 /* m */) + + drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) + + gate_C(NTi, 0) + gate_C(PTi, 0)); + return temp; +} + +double +Arbiter::transmission_buf_ctrcap() { + double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0); + return temp; +} + + +void Arbiter::print_arbiter() +{ + cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n"; + cout << "Flit size : " << flit_size << " bits" << endl; + cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl; + cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl; +} + + diff --git a/T1/TP1/cacti-master/arbiter.h b/T1/TP1/cacti-master/arbiter.h new file mode 100644 index 0000000..8358e95 --- /dev/null +++ b/T1/TP1/cacti-master/arbiter.h @@ -0,0 +1,77 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __ARBITER__ +#define __ARBITER__ + +#include +#include +#include "basic_circuit.h" +#include "cacti_interface.h" +#include "component.h" +#include "parameter.h" +#include "mat.h" +#include "wire.h" + +class Arbiter : public Component +{ + public: + Arbiter( + double Req, + double flit_sz, + double output_len, + /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global)); + ~Arbiter(); + + void print_arbiter(); + double arb_req(); + double arb_pri(); + double arb_grant(); + double arb_int(); + void compute_power(); + double Cw3(double len); + double crossbar_ctrline(); + double transmission_buf_ctrcap(); + + + + private: + double NTn1, PTn1, NTn2, PTn2, R, PTi, NTi; + double flit_size; + double NTtr, PTtr; + double o_len; + /*TechnologyParameter::*/DeviceType *deviceType; + double TriS1, TriS2; + double min_w_pmos, Vdd; + +}; + +#endif diff --git a/T1/TP1/cacti-master/area.cc b/T1/TP1/cacti-master/area.cc new file mode 100644 index 0000000..d6a3746 --- /dev/null +++ b/T1/TP1/cacti-master/area.cc @@ -0,0 +1,46 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#include "area.h" +#include "component.h" +#include "decoder.h" +#include "parameter.h" +#include "basic_circuit.h" +#include +#include +#include + +using namespace std; + + + diff --git a/T1/TP1/cacti-master/area.h b/T1/TP1/cacti-master/area.h new file mode 100644 index 0000000..a592dbc --- /dev/null +++ b/T1/TP1/cacti-master/area.h @@ -0,0 +1,71 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#ifndef __AREA_H__ +#define __AREA_H__ + +#include "cacti_interface.h" +#include "basic_circuit.h" + +using namespace std; + +class Area +{ + public: + double w; + double h; + + Area():w(0), h(0), area(0) { } + double get_w() const { return w; } + double get_h() const { return h; } + double get_area() const + { + if (w == 0 && h == 0) + { + return area; + } + else + { + return w*h; + } + } + void set_w(double w_) { w = w_; } + void set_h(double h_) { h = h_; } + void set_area(double a_) { area = a_; } + + private: + double area; +}; + +#endif + diff --git a/T1/TP1/cacti-master/bank.cc b/T1/TP1/cacti-master/bank.cc new file mode 100644 index 0000000..e7e5d81 --- /dev/null +++ b/T1/TP1/cacti-master/bank.cc @@ -0,0 +1,206 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#include "bank.h" +#include + + +Bank::Bank(const DynamicParameter & dyn_p): + dp(dyn_p), mat(dp), + num_addr_b_mat(dyn_p.number_addr_bits_mat), + num_mats_hor_dir(dyn_p.num_mats_h_dir), num_mats_ver_dir(dyn_p.num_mats_v_dir), + array_leakage(0), + wl_leakage(0), + cl_leakage(0) +{ +// Mat temp(dyn_p); + int RWP; + int ERP; + int EWP; + int SCHP; + + if (dp.use_inp_params) + { + RWP = dp.num_rw_ports; + ERP = dp.num_rd_ports; + EWP = dp.num_wr_ports; + SCHP = dp.num_search_ports; + } + else + { + RWP = g_ip->num_rw_ports; + ERP = g_ip->num_rd_ports; + EWP = g_ip->num_wr_ports; + SCHP = g_ip->num_search_ports; + } + + int total_addrbits = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP); + int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); + int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); + int searchinbits; + int searchoutbits; + + if (dp.fully_assoc || dp.pure_cam) + { + datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); + dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); + searchinbits = dp.num_si_b_bank_per_port * SCHP; + searchoutbits = dp.num_so_b_bank_per_port * SCHP; + } + + if (!(dp.fully_assoc || dp.pure_cam)) + { + if (g_ip->fast_access && dp.is_tag == false) + { + dataoutbits *= g_ip->data_assoc; + } + + htree_in_add = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree); + htree_in_data = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree); + htree_out_data = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); + +// htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100, +// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); + + area.w = htree_in_data->area.w; + area.h = htree_in_data->area.h; + } + else + { + htree_in_add = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, searchinbits,dataoutbits,searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree); + htree_in_data = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree); + htree_out_data = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); + htree_in_search = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree,true, true); + htree_out_search = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree,true); + + area.w = htree_in_data->area.w; + area.h = htree_in_data->area.h; + } + + num_addr_b_row_dec = _log2(mat.subarray.num_rows); + num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec; + num_addr_b_routed_to_mat_for_rd_or_wr = num_addr_b_mat - num_addr_b_row_dec; +} + + + +Bank::~Bank() +{ + delete htree_in_add; + delete htree_out_data; + delete htree_in_data; + if (dp.fully_assoc || dp.pure_cam) + { + delete htree_in_search; + delete htree_out_search; + } +} + + + +double Bank::compute_delays(double inrisetime) +{ + return mat.compute_delays(inrisetime); +} + + + +void Bank::compute_power_energy() +{ + mat.compute_power_energy(); + + if (!(dp.fully_assoc || dp.pure_cam)) + { + power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir; + power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; + power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; + + power.readOp.dynamic += htree_in_add->power.readOp.dynamic; + power.readOp.dynamic += htree_out_data->power.readOp.dynamic; + + array_leakage += mat.array_leakage*dp.num_mats; + wl_leakage += mat.wl_leakage*dp.num_mats; + cl_leakage += mat.cl_leakage*dp.num_mats; +// +// power.readOp.leakage += htree_in_add->power.readOp.leakage; +// power.readOp.leakage += htree_in_data->power.readOp.leakage; +// power.readOp.leakage += htree_out_data->power.readOp.leakage; +// power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; +// power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; +// power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; + } + else + { + + power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w + power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; + power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; + + power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats; + power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic + + mat.power_sa.searchOp.dynamic + + mat.power_bitline.searchOp.dynamic + + mat.power_subarray_out_drv.searchOp.dynamic+ + mat.ml_to_ram_wl_drv->power.readOp.dynamic; + + power.readOp.dynamic += htree_in_add->power.readOp.dynamic; + power.readOp.dynamic += htree_out_data->power.readOp.dynamic; + + power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic; + power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic; + + power.readOp.leakage += htree_in_add->power.readOp.leakage; + power.readOp.leakage += htree_in_data->power.readOp.leakage; + power.readOp.leakage += htree_out_data->power.readOp.leakage; + power.readOp.leakage += htree_in_search->power.readOp.leakage; + power.readOp.leakage += htree_out_search->power.readOp.leakage; + + + power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage; + + } + +} + diff --git a/T1/TP1/cacti-master/bank.h b/T1/TP1/cacti-master/bank.h new file mode 100644 index 0000000..e12665f --- /dev/null +++ b/T1/TP1/cacti-master/bank.h @@ -0,0 +1,74 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#ifndef __BANK_H__ +#define __BANK_H__ + +#include "component.h" +#include "decoder.h" +#include "mat.h" +#include "htree2.h" + + +class Bank : public Component +{ + public: + Bank(const DynamicParameter & dyn_p); + ~Bank(); + double compute_delays(double inrisetime); // return outrisetime + void compute_power_energy(); + + const DynamicParameter & dp; + Mat mat; + Htree2 *htree_in_add; + Htree2 *htree_in_data; + Htree2 *htree_out_data; + Htree2 *htree_in_search; + Htree2 *htree_out_search; + + int num_addr_b_mat; + int num_mats_hor_dir; + int num_mats_ver_dir; + + int num_addr_b_row_dec; + int num_addr_b_routed_to_mat_for_act; + int num_addr_b_routed_to_mat_for_rd_or_wr; + + double array_leakage; + double wl_leakage; + double cl_leakage; +}; + + + +#endif diff --git a/T1/TP1/cacti-master/basic_circuit.cc b/T1/TP1/cacti-master/basic_circuit.cc new file mode 100644 index 0000000..696f45c --- /dev/null +++ b/T1/TP1/cacti-master/basic_circuit.cc @@ -0,0 +1,999 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + + +#include "basic_circuit.h" +#include "parameter.h" +#include +#include +#include + +uint32_t _log2(uint64_t num) +{ + uint32_t log2 = 0; + + if (num == 0) + { + std::cerr << "log0?" << std::endl; + exit(1); + } + + while (num > 1) + { + num = (num >> 1); + log2++; + } + + return log2; +} + + +bool is_pow2(int64_t val) +{ + if (val <= 0) + { + return false; + } + else if (val == 1) + { + return true; + } + else + { + return (_log2(val) != _log2(val-1)); + } +} + + +int powers (int base, int n) +{ + int i, p; + + p = 1; + for (i = 1; i <= n; ++i) + p *= base; + return p; +} + +/*----------------------------------------------------------------------*/ + +double logtwo (double x) +{ + assert(x > 0); + return ((double) (log (x) / log (2.0))); +} + +/*----------------------------------------------------------------------*/ + + +double gate_C( + double width, + double wirelength, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) +{ + const /*TechnologyParameter::*/DeviceType * dt; + + if (_is_dram && _is_cell) + { + dt = &g_tp.dram_acc; //DRAM cell access transistor + } + else if (_is_dram && _is_wl_tr) + { + dt = &g_tp.dram_wl; //DRAM wordline transistor + } + else if (!_is_dram && _is_cell) + { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } + else if (_is_sleep_tx) + { + dt = &g_tp.sleep_tx; // Sleep transistor + } + else + { + dt = &g_tp.peri_global; + } + + return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire; +} + + +// returns gate capacitance in Farads +// actually this function is the same as gate_C() now +double gate_C_pass( + double width, // gate width in um (length is Lphy_periph_global) + double wirelength, // poly wire length going to gate in lambda + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) +{ + // v5.0 + const /*TechnologyParameter::*/DeviceType * dt; + + if ((_is_dram) && (_is_cell)) + { + dt = &g_tp.dram_acc; //DRAM cell access transistor + } + else if ((_is_dram) && (_is_wl_tr)) + { + dt = &g_tp.dram_wl; //DRAM wordline transistor + } + else if ((!_is_dram) && _is_cell) + { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } + else if (_is_sleep_tx) + { + dt = &g_tp.sleep_tx; // Sleep transistor + } + else + { + dt = &g_tp.peri_global; + } + + return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire; +} + + + +double drain_C_( + double width, + int nchannel, + int stack, + int next_arg_thresh_folding_width_or_height_cell, + double fold_dimension, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) +{ + double w_folded_tr; + const /*TechnologyParameter::*/DeviceType * dt; + + if ((_is_dram) && (_is_cell)) + { + dt = &g_tp.dram_acc; // DRAM cell access transistor + } + else if ((_is_dram) && (_is_wl_tr)) + { + dt = &g_tp.dram_wl; // DRAM wordline transistor + } + else if ((!_is_dram) && _is_cell) + { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } + else if (_is_sleep_tx) + { + dt = &g_tp.sleep_tx; // Sleep transistor + } + else + { + dt = &g_tp.peri_global; + } + + double c_junc_area = dt->C_junc; + double c_junc_sidewall = dt->C_junc_sidewall; + double c_fringe = 2*dt->C_fringe; + double c_overlap = 2*dt->C_overlap; + double drain_C_metal_connecting_folded_tr = 0; + + // determine the width of the transistor after folding (if it is getting folded) + if (next_arg_thresh_folding_width_or_height_cell == 0) + { // interpret fold_dimension as the the folding width threshold + // i.e. the value of transistor width above which the transistor gets folded + w_folded_tr = fold_dimension; + } + else + { // interpret fold_dimension as the height of the cell that this transistor is part of. + double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL; + // TODO : w_folded_tr must come from Component::compute_gate_area() + double ratio_p_to_n = 2.0 / (2.0 + 1.0); + if (nchannel) + { + w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); + } + else + { + w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); + } + } + int num_folded_tr = (int) (ceil(width / w_folded_tr)); + + if (num_folded_tr < 2) + { + w_folded_tr = width; + } + + double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain + (stack - 1) * g_tp.spacing_poly_to_poly; + double drain_h_for_sidewall = w_folded_tr; + double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1); + if (num_folded_tr > 1) + { + total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + + (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly); + + if (num_folded_tr%2 == 0) + { + drain_h_for_sidewall = 0; + } + total_drain_height_for_cap_wrt_gate *= num_folded_tr; + drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w; + } + + double drain_C_area = c_junc_area * total_drain_w * w_folded_tr; + double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w); + double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate; + + return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr); +} + + +double tr_R_on( + double width, + int nchannel, + int stack, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) +{ + const /*TechnologyParameter::*/DeviceType * dt; + + if ((_is_dram) && (_is_cell)) + { + dt = &g_tp.dram_acc; //DRAM cell access transistor + } + else if ((_is_dram) && (_is_wl_tr)) + { + dt = &g_tp.dram_wl; //DRAM wordline transistor + } + else if ((!_is_dram) && _is_cell) + { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } + else if (_is_sleep_tx) + { + dt = &g_tp.sleep_tx; // Sleep transistor + } + else + { + dt = &g_tp.peri_global; + } + + double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on; + return (stack * restrans / width); +} + + +/* This routine operates in reverse: given a resistance, it finds + * the transistor width that would have this R. It is used in the + * data wordline to estimate the wordline driver size. */ + +// returns width in um +double R_to_w( + double res, + int nchannel, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) +{ + const /*TechnologyParameter::*/DeviceType * dt; + + if ((_is_dram) && (_is_cell)) + { + dt = &g_tp.dram_acc; //DRAM cell access transistor + } + else if ((_is_dram) && (_is_wl_tr)) + { + dt = &g_tp.dram_wl; //DRAM wordline transistor + } + else if ((!_is_dram) && (_is_cell)) + { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } + else if (_is_sleep_tx) + { + dt = &g_tp.sleep_tx; // Sleep transistor + } + else + { + dt = &g_tp.peri_global; + } + + double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on; + return (restrans / res); +} + + +double pmos_to_nmos_sz_ratio( + bool _is_dram, + bool _is_wl_tr, + bool _is_sleep_tx) +{ + double p_to_n_sizing_ratio; + if ((_is_dram) && (_is_wl_tr)) + { //DRAM wordline transistor + p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio; + } + else if (_is_sleep_tx) + { + p_to_n_sizing_ratio = g_tp.sleep_tx.n_to_p_eff_curr_drv_ratio; // Sleep transistor + } + else + { //DRAM or SRAM all other transistors + p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio; + } + return p_to_n_sizing_ratio; +} + + +// "Timing Models for MOS Circuits" by Mark Horowitz, 1984 +double horowitz( + double inputramptime, // input rise time + double tf, // time constant of gate + double vs1, // threshold voltage + double vs2, // threshold voltage + int rise) // whether input rises or fall +{ + if (inputramptime == 0 && vs1 == vs2) + { + return tf * (vs1 < 1 ? -log(vs1) : log(vs1)); + } + double a, b, td; + + a = inputramptime / tf; + if (rise == RISE) + { + b = 0.5; + td = tf * sqrt(log(vs1)*log(vs1) + 2*a*b*(1.0 - vs1)) + tf*(log(vs1) - log(vs2)); + } + else + { + b = 0.4; + td = tf * sqrt(log(1.0 - vs1)*log(1.0 - vs1) + 2*a*b*(vs1)) + tf*(log(1.0 - vs1) - log(1.0 - vs2)); + } + return (td); +} + +double cmos_Ileak( + double nWidth, + double pWidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) +{ + /*TechnologyParameter::*/DeviceType * dt; + + if ((!_is_dram)&&(_is_cell)) + { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } + else if ((_is_dram)&&(_is_wl_tr)) + { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } + else if (_is_sleep_tx) + { + dt = &g_tp.sleep_tx; // Sleep transistor + } + else + { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return nWidth*dt->I_off_n + pWidth*dt->I_off_p; +} + +int factorial(int n, int m) +{ + int fa = m, i; + for (i=m+1; i<=n; i++) + fa *=i; + return fa; +} + +int combination(int n, int m) +{ + int ret; + ret = factorial(n, m+1) / factorial(n - m); + return ret; +} + +double simplified_nmos_Isat( + double nwidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) +{ + /*TechnologyParameter::*/DeviceType * dt; + + if ((!_is_dram)&&(_is_cell)) + { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } + else if ((_is_dram)&&(_is_wl_tr)) + { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } + else if (_is_sleep_tx) + { + dt = &g_tp.sleep_tx; // Sleep transistor + } + else + { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return nwidth * dt->I_on_n; +} + +double simplified_pmos_Isat( + double pwidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) +{ + /*TechnologyParameter::*/DeviceType * dt; + + if ((!_is_dram)&&(_is_cell)) + { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } + else if ((_is_dram)&&(_is_wl_tr)) + { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } + else if (_is_sleep_tx) + { + dt = &g_tp.sleep_tx; // Sleep transistor + } + else + { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return pwidth * dt->I_on_n/dt->n_to_p_eff_curr_drv_ratio; +} + + +double simplified_nmos_leakage( + double nwidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) +{ + /*TechnologyParameter::*/DeviceType * dt; + + if ((!_is_dram)&&(_is_cell)) + { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } + else if ((_is_dram)&&(_is_wl_tr)) + { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } + else if (_is_sleep_tx) + { + dt = &g_tp.sleep_tx; // Sleep transistor + } + else + { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return nwidth * dt->I_off_n; +} + +double simplified_pmos_leakage( + double pwidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) +{ + /*TechnologyParameter::*/DeviceType * dt; + + if ((!_is_dram)&&(_is_cell)) + { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } + else if ((_is_dram)&&(_is_wl_tr)) + { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } + else if (_is_sleep_tx) + { + dt = &g_tp.sleep_tx; // Sleep transistor + } + else + { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return pwidth * dt->I_off_p; +} + +double cmos_Ig_n( + double nWidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) +{ + /*TechnologyParameter::*/DeviceType * dt; + + if ((!_is_dram)&&(_is_cell)) + { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } + else if ((_is_dram)&&(_is_wl_tr)) + { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } + else if (_is_sleep_tx) + { + dt = &g_tp.sleep_tx; // Sleep transistor + } + else + { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return nWidth*dt->I_g_on_n; +} + +double cmos_Ig_p( + double pWidth, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx) +{ + /*TechnologyParameter::*/DeviceType * dt; + + if ((!_is_dram)&&(_is_cell)) + { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } + else if ((_is_dram)&&(_is_wl_tr)) + { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } + else if (_is_sleep_tx) + { + dt = &g_tp.sleep_tx; // Sleep transistor + } + else + { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return pWidth*dt->I_g_on_p; +} + +double cmos_Isub_leakage( + double nWidth, + double pWidth, + int fanin, + enum Gate_type g_type, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx, + enum Half_net_topology topo) +{ + assert (fanin>=1); + double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx); + double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx); + double Isub=0; + int num_states; + int num_off_tx; + + num_states = int(pow(2.0, fanin)); + + switch (g_type) + { + case nmos: + if (fanin==1) + { + Isub = nmos_leak/num_states; + } + else + { + if (topo==parallel) + { + Isub=nmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states + } + else + { + for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power + { + //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); + Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); + } + Isub /=num_states; + } + + } + break; + case pmos: + if (fanin==1) + { + Isub = pmos_leak/num_states; + } + else + { + if (topo==parallel) + { + Isub=pmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states + } + else + { + for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power + { + //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); + Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); + } + Isub /=num_states; + } + + } + break; + case inv: + Isub = (nmos_leak + pmos_leak)/2; + break; + case nand: + Isub += fanin*pmos_leak;//the pullup network + for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pulldown network + { + //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); + Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); + } + Isub /=num_states; + break; + case nor: + for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pullup network + { + //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); + Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); + } + Isub += fanin*nmos_leak;//the pulldown network + Isub /=num_states; + break; + case tri: + Isub += (nmos_leak + pmos_leak)/2;//enabled + Isub += nmos_leak*UNI_LEAK_STACK_FACTOR; //disabled upper bound of leakage power + Isub /=2; + break; + case tg: + Isub = (nmos_leak + pmos_leak)/2; + break; + default: + assert(0); + break; + } + + return Isub; +} + + +double cmos_Ig_leakage( + double nWidth, + double pWidth, + int fanin, + enum Gate_type g_type, + bool _is_dram, + bool _is_cell, + bool _is_wl_tr, + bool _is_sleep_tx, + enum Half_net_topology topo) +{ + assert (fanin>=1); + double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx); + double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx); + double Ig_on=0; + int num_states; + int num_on_tx; + + num_states = int(pow(2.0, fanin)); + + switch (g_type) + { + case nmos: + if (fanin==1) + { + Ig_on = nmos_leak/num_states; + } + else + { + if (topo==parallel) + { + for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++) + { + Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx; + } + } + else + { + Ig_on += nmos_leak * fanin;//pull down network when all TXs are on. + //num_on_tx is the number of on tx + for (num_on_tx=1; num_on_txprint_detail_debug) + { + cout<<"TSV ox cap: "<1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR + +# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_ca 0.5 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR, 0 to 0.25 for 2T, and 0 to 0.17 for 3T + +# Number of DQ pins + +-num_dq 72 //Number of DQ pins. Includes ECC pins. + +# Number of DQS pins. DQS is a data strobe that is sent along with a small number of data-lanes so the source synchronous timing is local to these DQ bits. Typically, 1 DQS per byte (8 DQ bits) is used. The DQS is also typucally differential, just like the CLK pin. + +-num_dqs 18 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins. + +# Number of CA pins + +-num_ca 25 //Valid range 0 to 35 pins. + +# Number of CLK pins. CLK is typically a differential pair. In some cases additional CLK pairs may be used to limit the loading on the CLK pin. + +-num_clk 2 //2 x differential pair. Valid values: 0/2/4. + +# Number of Physical Ranks + +-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per buffer/register. If multiple LRDIMMs or buffer chips exist, the analysis for capacity and power is reported per buffer/register. + +# Width of the Memory Data Bus + +-mem_data_width 8 //x4 or x8 or x16 or x32 memories. For WideIO upto x128. + +# RTT Termination Resistance + +-rtt_value 10000 + +# RON Termination Resistance + +-ron_value 34 + +# Time of flight for DQ + +-tflight_value + +# Parameter related to MemCAD + +# Number of BoBs: 1,2,3,4,5,6, +-num_bobs 1 + +# Memory System Capacity in GB +-capacity 80 + +# Number of Channel per BoB: 1,2. +-num_channels_per_bob 1 + +# First Metric for ordering different design points +-first metric "Cost" +#-first metric "Bandwidth" +#-first metric "Energy" + +# Second Metric for ordering different design points +#-second metric "Cost" +-second metric "Bandwidth" +#-second metric "Energy" + +# Third Metric for ordering different design points +#-third metric "Cost" +#-third metric "Bandwidth" +-third metric "Energy" + + +# Possible DIMM option to consider +#-DIMM model "JUST_UDIMM" +#-DIMM model "JUST_RDIMM" +#-DIMM model "JUST_LRDIMM" +-DIMM model "ALL" + +#if channels of each bob have the same configurations +#-mirror_in_bob "T" +-mirror_in_bob "F" + +#if we want to see all channels/bobs/memory configurations explored +#-verbose "T" +#-verbose "F" + diff --git a/T1/TP1/cacti-master/cacti.i b/T1/TP1/cacti-master/cacti.i new file mode 100644 index 0000000..7964138 --- /dev/null +++ b/T1/TP1/cacti-master/cacti.i @@ -0,0 +1,8 @@ +%module cacti +%{ +/* Includes the header in the wrapper code */ +#include "cacti_interface.h" +%} + +/* Parse the header file to generate wrappers */ +%include "cacti_interface.h" \ No newline at end of file diff --git a/T1/TP1/cacti-master/cacti.mk b/T1/TP1/cacti-master/cacti.mk new file mode 100644 index 0000000..b675d75 --- /dev/null +++ b/T1/TP1/cacti-master/cacti.mk @@ -0,0 +1,53 @@ +TARGET = cacti +SHELL = /bin/sh +.PHONY: all depend clean +.SUFFIXES: .cc .o + +ifndef NTHREADS + NTHREADS = 8 +endif + + +LIBS = +INCS = -lm + +ifeq ($(TAG),dbg) + DBG = -Wall + OPT = -ggdb -g -O0 -DNTHREADS=1 -gstabs+ +else + DBG = + OPT = -g -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS) +endif + +#CXXFLAGS = -Wall -Wno-unknown-pragmas -Winline $(DBG) $(OPT) +CXXFLAGS = -Wno-unknown-pragmas $(DBG) $(OPT) +CXX = g++ -m64 +CC = gcc -m64 + +SRCS = area.cc bank.cc mat.cc main.cc Ucache.cc io.cc technology.cc basic_circuit.cc parameter.cc \ + decoder.cc component.cc uca.cc subarray.cc wire.cc htree2.cc extio.cc extio_technology.cc \ + cacti_interface.cc router.cc nuca.cc crossbar.cc arbiter.cc powergating.cc TSV.cc memorybus.cc \ + memcad.cc memcad_parameters.cc + + +OBJS = $(patsubst %.cc,obj_$(TAG)/%.o,$(SRCS)) +PYTHONLIB_SRCS = $(patsubst main.cc, ,$(SRCS)) obj_$(TAG)/cacti_wrap.cc +PYTHONLIB_OBJS = $(patsubst %.cc,%.o,$(PYTHONLIB_SRCS)) +INCLUDES = -I /usr/include/python2.4 -I /usr/lib/python2.4/config + +all: obj_$(TAG)/$(TARGET) + cp -f obj_$(TAG)/$(TARGET) $(TARGET) + +obj_$(TAG)/$(TARGET) : $(OBJS) + $(CXX) $(OBJS) -o $@ $(INCS) $(CXXFLAGS) $(LIBS) -pthread + +#obj_$(TAG)/%.o : %.cc +# $(CXX) -c $(CXXFLAGS) $(INCS) -o $@ $< + +obj_$(TAG)/%.o : %.cc + $(CXX) $(CXXFLAGS) -c $< -o $@ + +clean: + -rm -f *.o _cacti.so cacti.py $(TARGET) + + diff --git a/T1/TP1/cacti-master/cacti_interface.cc b/T1/TP1/cacti-master/cacti_interface.cc new file mode 100644 index 0000000..763b1d6 --- /dev/null +++ b/T1/TP1/cacti-master/cacti_interface.cc @@ -0,0 +1,174 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include +#include + + +#include "area.h" +#include "basic_circuit.h" +#include "component.h" +#include "const.h" +#include "parameter.h" +#include "cacti_interface.h" +#include "Ucache.h" + +#include +#include +#include + +using namespace std; + + +bool mem_array::lt(const mem_array * m1, const mem_array * m2) +{ + if (m1->Nspd < m2->Nspd) return true; + else if (m1->Nspd > m2->Nspd) return false; + else if (m1->Ndwl < m2->Ndwl) return true; + else if (m1->Ndwl > m2->Ndwl) return false; + else if (m1->Ndbl < m2->Ndbl) return true; + else if (m1->Ndbl > m2->Ndbl) return false; + else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true; + else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false; + else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true; + else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false; + else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true; + else return false; +} + + + +void uca_org_t::find_delay() +{ + mem_array * data_arr = data_array2; + mem_array * tag_arr = tag_array2; + + // check whether it is a regular cache or scratch ram + if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc) + { + access_time = data_arr->access_time; + } + // Both tag and data lookup happen in parallel + // and the entire set is sent over the data array h-tree without + // waiting for the way-select signal --TODO add the corresponding + // power overhead Nav + else if (g_ip->fast_access == true) + { + access_time = MAX(tag_arr->access_time, data_arr->access_time); + } + // Tag is accessed first. On a hit, way-select signal along with the + // address is sent to read/write the appropriate block in the data + // array + else if (g_ip->is_seq_acc == true) + { + access_time = tag_arr->access_time + data_arr->access_time; + } + // Normal access: tag array access and data array access happen in parallel. + // But, the data array will wait for the way-select and transfer only the + // appropriate block over the h-tree. + else + { + access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder, + data_arr->delay_before_subarray_output_driver) + + data_arr->delay_from_subarray_output_driver_to_output; + } +} + + + +void uca_org_t::find_energy() +{ + if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache) + power = data_array2->power + tag_array2->power; + else + power = data_array2->power; +} + + + +void uca_org_t::find_area() +{ + if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)//(g_ip->is_cache == false) + { + cache_ht = data_array2->height; + cache_len = data_array2->width; + } + else + { + cache_ht = MAX(tag_array2->height, data_array2->height); + cache_len = tag_array2->width + data_array2->width; + } + area = cache_ht * cache_len; +} + +void uca_org_t::adjust_area() +{ + double area_adjust; + if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc) + { + if (data_array2->area_efficiency/100.0<0.2) + { + //area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2)); + area_adjust = sqrt(0.2/(data_array2->area_efficiency/100.0)); + cache_ht = cache_ht/area_adjust; + cache_len = cache_len/area_adjust; + } + } + area = cache_ht * cache_len; +} + +void uca_org_t::find_cyc() +{ + if ((g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache == false) + { + cycle_time = data_array2->cycle_time; + } + else + { + cycle_time = MAX(tag_array2->cycle_time, + data_array2->cycle_time); + } +} + +uca_org_t :: uca_org_t() +:tag_array2(0), + data_array2(0) +{ + +} + +void uca_org_t :: cleanup() +{ + if (data_array2!=0) + delete data_array2; + if (tag_array2!=0) + delete tag_array2; +} diff --git a/T1/TP1/cacti-master/cacti_interface.h b/T1/TP1/cacti-master/cacti_interface.h new file mode 100644 index 0000000..a2b8e2d --- /dev/null +++ b/T1/TP1/cacti-master/cacti_interface.h @@ -0,0 +1,904 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#ifndef __CACTI_INTERFACE_H__ +#define __CACTI_INTERFACE_H__ + +#include +#include +#include +#include +#include +#include "const.h" + +using namespace std; + + +class min_values_t; +class mem_array; +class uca_org_t; + + +class powerComponents +{ + public: + double dynamic; + double leakage; + double gate_leakage; + double short_circuit; + double longer_channel_leakage; + + powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { } + powerComponents(const powerComponents & obj) { *this = obj; } + powerComponents & operator=(const powerComponents & rhs) + { + dynamic = rhs.dynamic; + leakage = rhs.leakage; + gate_leakage = rhs.gate_leakage; + short_circuit = rhs.short_circuit; + longer_channel_leakage = rhs.longer_channel_leakage; + return *this; + } + void reset() { dynamic = 0; leakage = 0; gate_leakage = 0; short_circuit = 0;longer_channel_leakage = 0;} + + friend powerComponents operator+(const powerComponents & x, const powerComponents & y); + friend powerComponents operator*(const powerComponents & x, double const * const y); +}; + + + +class powerDef +{ + public: + powerComponents readOp; + powerComponents writeOp; + powerComponents searchOp;//: for CAM and FA + + powerDef() : readOp(), writeOp(), searchOp() { } + void reset() { readOp.reset(); writeOp.reset(); searchOp.reset();} + + friend powerDef operator+(const powerDef & x, const powerDef & y); + friend powerDef operator*(const powerDef & x, double const * const y); +}; + +enum Wire_type +{ + Global /* gloabl wires with repeaters */, + Global_5 /* 5% delay penalty */, + Global_10 /* 10% delay penalty */, + Global_20 /* 20% delay penalty */, + Global_30 /* 30% delay penalty */, + Low_swing /* differential low power wires with high area overhead */, + Semi_global /* mid-level wires with repeaters*/, + Full_swing /* models all global wires with different latencies (Global_x )*/, + Transmission /* tranmission lines with high area overhead */, + Optical /* optical wires */, + Invalid_wtype +}; + +enum TSV_type +{ + Fine, /*ITRS high density*/ + Coarse /*Industry reported in 2010*/ +}; + +// ali + +enum Mem_IO_type +{ + DDR3, + DDR4, + LPDDR2, + WideIO, + Low_Swing_Diff, + Serial +}; + +enum Mem_DIMM +{ + UDIMM, + RDIMM, + LRDIMM +}; + +enum Mem_state +{ + READ, + WRITE, + IDLE, + SLEEP +}; + +enum Mem_ECC +{ + NO_ECC, + SECDED, // single error correction, double error detection + CHIP_KILL +}; + +enum DIMM_Model +{ + JUST_UDIMM,JUST_RDIMM,JUST_LRDIMM,ALL +}; + +enum MemCad_metrics +{ + Bandwidth, Energy, Cost +}; + +/** +enum BoB_LINK +{ + PARALLEL, // e.g. Intel SMB c104 + SERIAL // e.g. Intel SMB 7510, IBM AMB +}; +**/ +// end ali + + +class InputParameter +{ + public: + + InputParameter(); + void parse_cfg(const string & infile); + + bool error_checking(); // return false if the input parameters are problematic + void display_ip(); + + unsigned int cache_sz; // in bytes + unsigned int line_sz; + unsigned int assoc; + unsigned int nbanks; + unsigned int out_w;// == nr_bits_out + bool specific_tag; + unsigned int tag_w; + unsigned int access_mode; + unsigned int obj_func_dyn_energy; + unsigned int obj_func_dyn_power; + unsigned int obj_func_leak_power; + unsigned int obj_func_cycle_t; + + double F_sz_nm; // feature size in nm + double F_sz_um; // feature size in um + unsigned int num_rw_ports; + unsigned int num_rd_ports; + unsigned int num_wr_ports; + unsigned int num_se_rd_ports; // number of single ended read ports + unsigned int num_search_ports; // : number of search ports for CAM + bool is_main_mem; + bool is_3d_mem; + bool print_detail_debug; + bool is_cache; + bool pure_ram; + bool pure_cam; + bool rpters_in_htree; // if there are repeaters in htree segment + unsigned int ver_htree_wires_over_array; + unsigned int broadcast_addr_din_over_ver_htrees; + unsigned int temp; + + unsigned int ram_cell_tech_type; + unsigned int peri_global_tech_type; + unsigned int data_arr_ram_cell_tech_type; + unsigned int data_arr_peri_global_tech_type; + unsigned int tag_arr_ram_cell_tech_type; + unsigned int tag_arr_peri_global_tech_type; + + unsigned int burst_len; + unsigned int int_prefetch_w; + unsigned int page_sz_bits; + + unsigned int num_die_3d; + unsigned int burst_depth; + unsigned int io_width; + unsigned int sys_freq_MHz; + + unsigned int tsv_is_subarray_type; + unsigned int tsv_os_bank_type; + unsigned int TSV_proj_type; + + int partition_gran; + unsigned int num_tier_row_sprd; + unsigned int num_tier_col_sprd; + bool fine_gran_bank_lvl; + + unsigned int ic_proj_type; // interconnect_projection_type + unsigned int wire_is_mat_type; // wire_inside_mat_type + unsigned int wire_os_mat_type; // wire_outside_mat_type + enum Wire_type wt; + int force_wiretype; + bool print_input_args; + unsigned int nuca_cache_sz; // TODO + int ndbl, ndwl, nspd, ndsam1, ndsam2, ndcm; + bool force_cache_config; + + int cache_level; + int cores; + int nuca_bank_count; + int force_nuca_bank; + + int delay_wt, dynamic_power_wt, leakage_power_wt, + cycle_time_wt, area_wt; + int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca, + cycle_time_wt_nuca, area_wt_nuca; + + int delay_dev, dynamic_power_dev, leakage_power_dev, + cycle_time_dev, area_dev; + int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca, + cycle_time_dev_nuca, area_dev_nuca; + int ed; //ED or ED2 optimization + int nuca; + + bool fast_access; + unsigned int block_sz; // bytes + unsigned int tag_assoc; + unsigned int data_assoc; + bool is_seq_acc; + bool fully_assoc; + unsigned int nsets; // == number_of_sets + int print_detail; + + + bool add_ecc_b_; + //parameters for design constraint + double throughput; + double latency; + bool pipelinable; + int pipeline_stages; + int per_stage_vector; + bool with_clock_grid; + + bool array_power_gated; + bool bitline_floating; + bool wl_power_gated; + bool cl_power_gated; + bool interconect_power_gated; + bool power_gating; + + double perfloss; + + bool cl_vertical; + + // Parameters related to off-chip I/O + + double addr_timing, duty_cycle, mem_density, bus_bw, activity_dq, activity_ca, bus_freq; + int mem_data_width, num_mem_dq, num_clk, num_ca, num_dqs, num_dq; + + double rtt_value, ron_value, tflight_value; //FIXME + + Mem_state iostate; + + ///char iostate, dram_ecc, io_type; + + Mem_ECC dram_ecc; + Mem_IO_type io_type; + Mem_DIMM dram_dimm; + + int num_bobs; // BoB is buffer-on-board such as Intel SMB c102 + + int capacity; // in GB + + int num_channels_per_bob; // 1 means no bob + + MemCad_metrics first_metric; + + MemCad_metrics second_metric; + + MemCad_metrics third_metric; + + DIMM_Model dimm_model; + + bool low_power_permitted; // Not yet implemented. It determines acceptable VDDs. + + double load; // between 0 to 1 + + double row_buffer_hit_rate; + + double rd_2_wr_ratio; + + bool same_bw_in_bob; // true if all the channels in the bob have the same bandwidth. + + bool mirror_in_bob;// true if all the channels in the bob have the same configs + + bool total_power; // false means just considering I/O Power + + bool verbose; + + + +}; + + +typedef struct{ + int Ndwl; + int Ndbl; + double Nspd; + int deg_bl_muxing; + int Ndsam_lev_1; + int Ndsam_lev_2; + int number_activated_mats_horizontal_direction; + int number_subbanks; + int page_size_in_bits; + double delay_route_to_bank; + double delay_crossbar; + double delay_addr_din_horizontal_htree; + double delay_addr_din_vertical_htree; + double delay_row_predecode_driver_and_block; + double delay_row_decoder; + double delay_bitlines; + double delay_sense_amp; + double delay_subarray_output_driver; + double delay_bit_mux_predecode_driver_and_block; + double delay_bit_mux_decoder; + double delay_senseamp_mux_lev_1_predecode_driver_and_block; + double delay_senseamp_mux_lev_1_decoder; + double delay_senseamp_mux_lev_2_predecode_driver_and_block; + double delay_senseamp_mux_lev_2_decoder; + double delay_input_htree; + double delay_output_htree; + double delay_dout_vertical_htree; + double delay_dout_horizontal_htree; + double delay_comparator; + double access_time; + double cycle_time; + double multisubbank_interleave_cycle_time; + double delay_request_network; + double delay_inside_mat; + double delay_reply_network; + double trcd; + double cas_latency; + double precharge_delay; + powerDef power_routing_to_bank; + powerDef power_addr_input_htree; + powerDef power_data_input_htree; + powerDef power_data_output_htree; + powerDef power_addr_horizontal_htree; + powerDef power_datain_horizontal_htree; + powerDef power_dataout_horizontal_htree; + powerDef power_addr_vertical_htree; + powerDef power_datain_vertical_htree; + powerDef power_row_predecoder_drivers; + powerDef power_row_predecoder_blocks; + powerDef power_row_decoders; + powerDef power_bit_mux_predecoder_drivers; + powerDef power_bit_mux_predecoder_blocks; + powerDef power_bit_mux_decoders; + powerDef power_senseamp_mux_lev_1_predecoder_drivers; + powerDef power_senseamp_mux_lev_1_predecoder_blocks; + powerDef power_senseamp_mux_lev_1_decoders; + powerDef power_senseamp_mux_lev_2_predecoder_drivers; + powerDef power_senseamp_mux_lev_2_predecoder_blocks; + powerDef power_senseamp_mux_lev_2_decoders; + powerDef power_bitlines; + powerDef power_sense_amps; + powerDef power_prechg_eq_drivers; + powerDef power_output_drivers_at_subarray; + powerDef power_dataout_vertical_htree; + powerDef power_comparators; + powerDef power_crossbar; + powerDef total_power; + double area; + double all_banks_height; + double all_banks_width; + double bank_height; + double bank_width; + double subarray_memory_cell_area_height; + double subarray_memory_cell_area_width; + double mat_height; + double mat_width; + double routing_area_height_within_bank; + double routing_area_width_within_bank; + double area_efficiency; +// double perc_power_dyn_routing_to_bank; +// double perc_power_dyn_addr_horizontal_htree; +// double perc_power_dyn_datain_horizontal_htree; +// double perc_power_dyn_dataout_horizontal_htree; +// double perc_power_dyn_addr_vertical_htree; +// double perc_power_dyn_datain_vertical_htree; +// double perc_power_dyn_row_predecoder_drivers; +// double perc_power_dyn_row_predecoder_blocks; +// double perc_power_dyn_row_decoders; +// double perc_power_dyn_bit_mux_predecoder_drivers; +// double perc_power_dyn_bit_mux_predecoder_blocks; +// double perc_power_dyn_bit_mux_decoders; +// double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers; +// double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks; +// double perc_power_dyn_senseamp_mux_lev_1_decoders; +// double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers; +// double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks; +// double perc_power_dyn_senseamp_mux_lev_2_decoders; +// double perc_power_dyn_bitlines; +// double perc_power_dyn_sense_amps; +// double perc_power_dyn_prechg_eq_drivers; +// double perc_power_dyn_subarray_output_drivers; +// double perc_power_dyn_dataout_vertical_htree; +// double perc_power_dyn_comparators; +// double perc_power_dyn_crossbar; +// double perc_power_dyn_spent_outside_mats; +// double perc_power_leak_routing_to_bank; +// double perc_power_leak_addr_horizontal_htree; +// double perc_power_leak_datain_horizontal_htree; +// double perc_power_leak_dataout_horizontal_htree; +// double perc_power_leak_addr_vertical_htree; +// double perc_power_leak_datain_vertical_htree; +// double perc_power_leak_row_predecoder_drivers; +// double perc_power_leak_row_predecoder_blocks; +// double perc_power_leak_row_decoders; +// double perc_power_leak_bit_mux_predecoder_drivers; +// double perc_power_leak_bit_mux_predecoder_blocks; +// double perc_power_leak_bit_mux_decoders; +// double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers; +// double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks; +// double perc_power_leak_senseamp_mux_lev_1_decoders; +// double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers; +// double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks; +// double perc_power_leak_senseamp_mux_lev_2_decoders; +// double perc_power_leak_bitlines; +// double perc_power_leak_sense_amps; +// double perc_power_leak_prechg_eq_drivers; +// double perc_power_leak_subarray_output_drivers; +// double perc_power_leak_dataout_vertical_htree; +// double perc_power_leak_comparators; +// double perc_power_leak_crossbar; +// double perc_leak_mats; +// double perc_active_mats; + double refresh_power; + double dram_refresh_period; + double dram_array_availability; + double dyn_read_energy_from_closed_page; + double dyn_read_energy_from_open_page; + double leak_power_subbank_closed_page; + double leak_power_subbank_open_page; + double leak_power_request_and_reply_networks; + double activate_energy; + double read_energy; + double write_energy; + double precharge_energy; +} results_mem_array; + + +class uca_org_t +{ + public: + mem_array * tag_array2; + mem_array * data_array2; + double access_time; + double cycle_time; + double area; + double area_efficiency; + powerDef power; + double leak_power_with_sleep_transistors_in_mats; + double cache_ht; + double cache_len; + char file_n[100]; + double vdd_periph_global; + bool valid; + results_mem_array tag_array; + results_mem_array data_array; + + uca_org_t(); + void find_delay(); + void find_energy(); + void find_area(); + void find_cyc(); + void adjust_area();//for McPAT only to adjust routing overhead + void cleanup(); + ~uca_org_t(){}; +}; + + +class IO_org_t +{ + public: + double io_area; + double io_timing_margin; + double io_voltage_margin; + double io_dynamic_power; + double io_phy_power; + double io_wakeup_time; + double io_termination_power; + IO_org_t():io_area(0),io_timing_margin(0),io_voltage_margin(0) + ,io_dynamic_power(0),io_phy_power(0),io_wakeup_time(0),io_termination_power(0) + {} +}; + + +void reconfigure(InputParameter *local_interface, uca_org_t *fin_res); + +uca_org_t cacti_interface(const string & infile_name); +//McPAT's plain interface, please keep !!! +uca_org_t cacti_interface(InputParameter * const local_interface); +//McPAT's plain interface, please keep !!! +uca_org_t init_interface(InputParameter * const local_interface); +//McPAT's plain interface, please keep !!! +uca_org_t cacti_interface( + int cache_size, + int line_size, + int associativity, + int rw_ports, + int excl_read_ports, + int excl_write_ports, + int single_ended_read_ports, + int search_ports, + int banks, + double tech_node, + int output_width, + int specific_tag, + int tag_width, + int access_mode, + int cache, + int main_mem, + int obj_func_delay, + int obj_func_dynamic_power, + int obj_func_leakage_power, + int obj_func_cycle_time, + int obj_func_area, + int dev_func_delay, + int dev_func_dynamic_power, + int dev_func_leakage_power, + int dev_func_area, + int dev_func_cycle_time, + int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate + int temp, + int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing + int data_arr_ram_cell_tech_flavor_in, + int data_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, + int tag_arr_peri_global_tech_flavor_in, + int interconnect_projection_type_in, + int wire_inside_mat_type_in, + int wire_outside_mat_type_in, + int REPEATERS_IN_HTREE_SEGMENTS_in, + int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, + int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, + int PAGE_SIZE_BITS_in, + int BURST_LENGTH_in, + int INTERNAL_PREFETCH_WIDTH_in, + int force_wiretype, + int wiretype, + int force_config, + int ndwl, + int ndbl, + int nspd, + int ndcm, + int ndsam1, + int ndsam2, + int ecc); +// int cache_size, +// int line_size, +// int associativity, +// int rw_ports, +// int excl_read_ports, +// int excl_write_ports, +// int single_ended_read_ports, +// int banks, +// double tech_node, +// int output_width, +// int specific_tag, +// int tag_width, +// int access_mode, +// int cache, +// int main_mem, +// int obj_func_delay, +// int obj_func_dynamic_power, +// int obj_func_leakage_power, +// int obj_func_area, +// int obj_func_cycle_time, +// int dev_func_delay, +// int dev_func_dynamic_power, +// int dev_func_leakage_power, +// int dev_func_area, +// int dev_func_cycle_time, +// int temp, +// int data_arr_ram_cell_tech_flavor_in, +// int data_arr_peri_global_tech_flavor_in, +// int tag_arr_ram_cell_tech_flavor_in, +// int tag_arr_peri_global_tech_flavor_in, +// int interconnect_projection_type_in, +// int wire_inside_mat_type_in, +// int wire_outside_mat_type_in, +// int REPEATERS_IN_HTREE_SEGMENTS_in, +// int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, +// int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, +//// double MAXAREACONSTRAINT_PERC_in, +//// double MAXACCTIMECONSTRAINT_PERC_in, +//// double MAX_PERC_DIFF_IN_DELAY_FROM_BEST_DELAY_REPEATER_SOLUTION_in, +// int PAGE_SIZE_BITS_in, +// int BURST_LENGTH_in, +// int INTERNAL_PREFETCH_WIDTH_in); + +//Naveen's interface +uca_org_t cacti_interface( + int cache_size, + int line_size, + int associativity, + int rw_ports, + int excl_read_ports, + int excl_write_ports, + int single_ended_read_ports, + int banks, + double tech_node, + int page_sz, + int burst_length, + int pre_width, + int output_width, + int specific_tag, + int tag_width, + int access_mode, //0 normal, 1 seq, 2 fast + int cache, //scratch ram or cache + int main_mem, + int obj_func_delay, + int obj_func_dynamic_power, + int obj_func_leakage_power, + int obj_func_area, + int obj_func_cycle_time, + int dev_func_delay, + int dev_func_dynamic_power, + int dev_func_leakage_power, + int dev_func_area, + int dev_func_cycle_time, + int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate + int temp, + int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing + int data_arr_ram_cell_tech_flavor_in, + int data_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, + int tag_arr_peri_global_tech_flavor_in, + int interconnect_projection_type_in, // 0 - aggressive, 1 - normal + int wire_inside_mat_type_in, + int wire_outside_mat_type_in, + int is_nuca, // 0 - UCA, 1 - NUCA + int core_count, + int cache_level, // 0 - L2, 1 - L3 + int nuca_bank_count, + int nuca_obj_func_delay, + int nuca_obj_func_dynamic_power, + int nuca_obj_func_leakage_power, + int nuca_obj_func_area, + int nuca_obj_func_cycle_time, + int nuca_dev_func_delay, + int nuca_dev_func_dynamic_power, + int nuca_dev_func_leakage_power, + int nuca_dev_func_area, + int nuca_dev_func_cycle_time, + int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported + int p_input); + + +//CACTI3DD interface +uca_org_t cacti_interface( + int cache_size, + int line_size, + int associativity, + int rw_ports, + int excl_read_ports,// para5 + int excl_write_ports, + int single_ended_read_ports, + int search_ports, + int banks, + double tech_node,//para10 + int output_width, + int specific_tag, + int tag_width, + int access_mode, + int cache, //para15 + int main_mem, + int obj_func_delay, + int obj_func_dynamic_power, + int obj_func_leakage_power, + int obj_func_cycle_time, //para20 + int obj_func_area, + int dev_func_delay, + int dev_func_dynamic_power, + int dev_func_leakage_power, + int dev_func_area, //para25 + int dev_func_cycle_time, + int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate + int temp, + int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing + int data_arr_ram_cell_tech_flavor_in,//para30 + int data_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, + int tag_arr_peri_global_tech_flavor_in, + int interconnect_projection_type_in, + int wire_inside_mat_type_in,//para35 + int wire_outside_mat_type_in, + int REPEATERS_IN_HTREE_SEGMENTS_in, + int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, + int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, + int PAGE_SIZE_BITS_in,//para40 + int BURST_LENGTH_in, + int INTERNAL_PREFETCH_WIDTH_in, + int force_wiretype, + int wiretype, + int force_config,//para45 + int ndwl, + int ndbl, + int nspd, + int ndcm, + int ndsam1,//para50 + int ndsam2, + int ecc, + int is_3d_dram, + int burst_depth, + int IO_width, + int sys_freq, + int debug_detail, + int num_dies, + int tsv_gran_is_subarray, + int tsv_gran_os_bank, + int num_tier_row_sprd, + int num_tier_col_sprd, + int partition_level); + +class mem_array +{ + public: + int Ndcm; + int Ndwl; + int Ndbl; + double Nspd; + int deg_bl_muxing; + int Ndsam_lev_1; + int Ndsam_lev_2; + double access_time; + double cycle_time; + double multisubbank_interleave_cycle_time; + double area_ram_cells; + double area; + powerDef power; + double delay_senseamp_mux_decoder; + double delay_before_subarray_output_driver; + double delay_from_subarray_output_driver_to_output; + double height; + double width; + + double mat_height; + double mat_length; + double subarray_length; + double subarray_height; + + double delay_route_to_bank, + delay_input_htree, + delay_row_predecode_driver_and_block, + delay_row_decoder, + delay_bitlines, + delay_sense_amp, + delay_subarray_output_driver, + delay_dout_htree, + delay_comparator, + delay_matchlines; + //CACTI3DD 3d stats + double delay_row_activate_net, + delay_local_wordline, + + delay_column_access_net, + delay_column_predecoder, + delay_column_decoder, + delay_column_selectline, + delay_datapath_net, + delay_global_data, + delay_local_data_and_drv, + delay_data_buffer; + + double energy_row_activate_net, + energy_row_predecode_driver_and_block, + energy_row_decoder, + energy_local_wordline, + energy_bitlines, + energy_sense_amp, + energy_column_access_net, + energy_column_predecoder, + energy_column_decoder, + energy_column_selectline, + energy_datapath_net, + energy_global_data, + energy_local_data_and_drv, + energy_data_buffer, + energy_subarray_output_driver; + + double all_banks_height, + all_banks_width, + area_efficiency; + + powerDef power_routing_to_bank; + powerDef power_addr_input_htree; + powerDef power_data_input_htree; + powerDef power_data_output_htree; + powerDef power_htree_in_search; + powerDef power_htree_out_search; + powerDef power_row_predecoder_drivers; + powerDef power_row_predecoder_blocks; + powerDef power_row_decoders; + powerDef power_bit_mux_predecoder_drivers; + powerDef power_bit_mux_predecoder_blocks; + powerDef power_bit_mux_decoders; + powerDef power_senseamp_mux_lev_1_predecoder_drivers; + powerDef power_senseamp_mux_lev_1_predecoder_blocks; + powerDef power_senseamp_mux_lev_1_decoders; + powerDef power_senseamp_mux_lev_2_predecoder_drivers; + powerDef power_senseamp_mux_lev_2_predecoder_blocks; + powerDef power_senseamp_mux_lev_2_decoders; + powerDef power_bitlines; + powerDef power_sense_amps; + powerDef power_prechg_eq_drivers; + powerDef power_output_drivers_at_subarray; + powerDef power_dataout_vertical_htree; + powerDef power_comparators; + + powerDef power_cam_bitline_precharge_eq_drv; + powerDef power_searchline; + powerDef power_searchline_precharge; + powerDef power_matchlines; + powerDef power_matchline_precharge; + powerDef power_matchline_to_wordline_drv; + + min_values_t *arr_min; + enum Wire_type wt; + + // dram stats + double activate_energy, read_energy, write_energy, precharge_energy, + refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page, + leak_power_request_and_reply_networks; + + double precharge_delay; + + //Power-gating stats + double array_leakage; + double wl_leakage; + double cl_leakage; + + double sram_sleep_tx_width, wl_sleep_tx_width, cl_sleep_tx_width; + double sram_sleep_tx_area, wl_sleep_tx_area, cl_sleep_tx_area; + double sram_sleep_wakeup_latency, wl_sleep_wakeup_latency, cl_sleep_wakeup_latency, bl_floating_wakeup_latency; + double sram_sleep_wakeup_energy, wl_sleep_wakeup_energy, cl_sleep_wakeup_energy, bl_floating_wakeup_energy; + + int num_active_mats; + int num_submarray_mats; + + static bool lt(const mem_array * m1, const mem_array * m2); + + //CACTI3DD 3d dram stats + double t_RCD, t_RAS, t_RC, t_CAS, t_RP, t_RRD; + double activate_power, read_power, write_power, peak_read_power; + int num_row_subarray, num_col_subarray; + double delay_TSV_tot, area_TSV_tot, dyn_pow_TSV_tot, dyn_pow_TSV_per_access; + unsigned int num_TSV_tot; + double area_lwl_drv, area_row_predec_dec, area_col_predec_dec, + area_subarray, area_bus, area_address_bus, area_data_bus, area_data_drv, area_IOSA, area_sense_amp; + +}; + + +#endif + diff --git a/T1/TP1/cacti-master/component.cc b/T1/TP1/cacti-master/component.cc new file mode 100644 index 0000000..ea48659 --- /dev/null +++ b/T1/TP1/cacti-master/component.cc @@ -0,0 +1,237 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + + +#include +#include +#include + +#include "bank.h" +#include "component.h" +#include "decoder.h" + +using namespace std; + + + +Component::Component() + :area(), power(), rt_power(),delay(0) +{ +} + + + +Component::~Component() +{ +} + + + +double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr) +{ + double w_poly = g_ip->F_sz_um; + double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; + double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain + num_stacked_in * w_poly + + (num_stacked_in - 1) * g_tp.spacing_poly_to_poly; + + if (num_folded_tr > 1) + { + total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly + + (num_folded_tr - 1) * num_stacked_in * w_poly + + (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly; + } + + return total_diff_w; +} + + + +double Component::compute_gate_area( + int gate_type, + int num_inputs, + double w_pmos, + double w_nmos, + double h_gate) +{ + if (w_pmos <= 0.0 || w_nmos <= 0.0) + { + return 0.0; + } + + double w_folded_pmos, w_folded_nmos; + int num_folded_pmos, num_folded_nmos; + double total_ndiff_w, total_pdiff_w; + Area gate; + + double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL; + double ratio_p_to_n = w_pmos / (w_pmos + w_nmos); + + if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0) + { + return 0.0; + } + + w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n; + w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n); + + assert(w_folded_pmos > 0); + + num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos)); + num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos)); + + switch (gate_type) + { + case INV: + total_ndiff_w = compute_diffusion_width(1, num_folded_nmos); + total_pdiff_w = compute_diffusion_width(1, num_folded_pmos); + break; + + case NOR: + total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos); + total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos); + break; + + case NAND: + total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos); + total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos); + break; + default: + cout << "Unknown gate type: " << gate_type << endl; + exit(1); + } + + gate.w = MAX(total_ndiff_w, total_pdiff_w); + + if (w_folded_nmos > w_nmos) + { + //means that the height of the gate can + //be made smaller than the input height specified, so calculate the height of the gate. + gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL; + } + else + { + gate.h = h_gate; + } + return gate.get_area(); +} + + + +double Component::compute_tr_width_after_folding( + double input_width, + double threshold_folding_width) +{//This is actually the width of the cell not the width of a device. +//The width of a cell and the width of a device is orthogonal. + if (input_width <= 0) + { + return 0; + } + + int num_folded_tr = (int) (ceil(input_width / threshold_folding_width)); + double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; + double width_poly = g_ip->F_sz_um; + double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly; + + return total_diff_width; +} + + + +double Component::height_sense_amplifier(double pitch_sense_amp) +{ + // compute the height occupied by all PMOS transistors + double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 + + compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) + + 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; + + // compute the height occupied by all NMOS transistors + double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 + + compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) + + 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; + + // compute total height by considering gap between the p and n diffusion areas + return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS; +} + + + +int Component::logical_effort( + int num_gates_min, + double g, + double F, + double * w_n, + double * w_p, + double C_load, + double p_to_n_sz_ratio, + bool is_dram_, + bool is_wl_tr_, + double max_w_nmos) +{ + int num_gates = (int) (log(F) / log(fopt)); + + // check if num_gates is odd. if so, add 1 to make it even + num_gates+= (num_gates % 2) ? 1 : 0; + num_gates = MAX(num_gates, num_gates_min); + + // recalculate the effective fanout of each stage + double f = pow(F, 1.0 / num_gates); + int i = num_gates - 1; + double C_in = C_load / f; + w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_); + w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_); + w_p[i] = p_to_n_sz_ratio * w_n[i]; + + if (w_n[i] > max_w_nmos) // && !g_ip->is_3d_mem) + { + double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_); + F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_); + num_gates = (int) (log(F) / log(fopt)) + 1; + num_gates+= (num_gates % 2) ? 1 : 0; + num_gates = MAX(num_gates, num_gates_min); + f = pow(F, 1.0 / (num_gates - 1)); + i = num_gates - 1; + w_n[i] = max_w_nmos; + w_p[i] = p_to_n_sz_ratio * w_n[i]; + } + + for (i = num_gates - 2; i >= 1; i--) + { + w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_); + w_p[i] = p_to_n_sz_ratio * w_n[i]; + } + + assert(num_gates <= MAX_NUMBER_GATES_STAGE); + return num_gates; +} + diff --git a/T1/TP1/cacti-master/component.h b/T1/TP1/cacti-master/component.h new file mode 100644 index 0000000..7d6dbf8 --- /dev/null +++ b/T1/TP1/cacti-master/component.h @@ -0,0 +1,84 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#ifndef __COMPONENT_H__ +#define __COMPONENT_H__ + +#include "parameter.h" +#include "area.h" + +using namespace std; + +class Crossbar; +class Bank; + +class Component +{ + public: + Component(); + ~Component(); + + Area area; + powerDef power,rt_power; + double delay; + double cycle_time; + + double compute_gate_area( + int gate_type, + int num_inputs, + double w_pmos, + double w_nmos, + double h_gate); + + double compute_tr_width_after_folding(double input_width, double threshold_folding_width); + double height_sense_amplifier(double pitch_sense_amp); + + protected: + int logical_effort( + int num_gates_min, + double g, + double F, + double * w_n, + double * w_p, + double C_load, + double p_to_n_sz_ratio, + bool is_dram_, + bool is_wl_tr_, + double max_w_nmos); + + private: + double compute_diffusion_width(int num_stacked_in, int num_folded_tr); +}; + +#endif + diff --git a/T1/TP1/cacti-master/const.h b/T1/TP1/cacti-master/const.h new file mode 100644 index 0000000..a2851d7 --- /dev/null +++ b/T1/TP1/cacti-master/const.h @@ -0,0 +1,273 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef __CONST_H__ +#define __CONST_H__ + +#include +#include +#include +#include +#include + +/* The following are things you might want to change + * when compiling + */ + +/* + * Address bits in a word, and number of output bits from the cache + */ + +/* +was: #define ADDRESS_BITS 32 +now: 42 bits as in the Power4 +This is 36 bits in Pentium 4 +and 40 bits in Opteron. +*/ +const int ADDRESS_BITS = 42; + +/*dt: In addition to the tag bits, the tags also include 1 valid bit, 1 dirty bit, 2 bits for a 4-state + cache coherency protocoll (MESI), 1 bit for MRU (change this to log(ways) for full LRU). + So in total we have 1 + 1 + 2 + 1 = 5 */ +const int EXTRA_TAG_BITS = 5; + +/* limits on the various N parameters */ + +const unsigned int MAXDATAN = 512; // maximum for Ndwl and Ndbl +const unsigned int MAXSUBARRAYS = 1048576; // maximum subarrays for data and tag arrays +const unsigned int MAXDATASPD = 256; // maximum for Nspd +const unsigned int MAX_COL_MUX = 256; + + + +#define ROUTER_TYPES 3 +#define WIRE_TYPES 6 + +const double Cpolywire = 0; + + +/* Threshold voltages (as a proportion of Vdd) + If you don't know them, set all values to 0.5 */ +#define VTHFA1 0.452 +#define VTHFA2 0.304 +#define VTHFA3 0.420 +#define VTHFA4 0.413 +#define VTHFA5 0.405 +#define VTHFA6 0.452 +#define VSINV 0.452 +#define VTHCOMPINV 0.437 +#define VTHMUXNAND 0.548 // TODO : this constant must be revisited +#define VTHEVALINV 0.452 +#define VTHSENSEEXTDRV 0.438 + + +//WmuxdrvNANDn and WmuxdrvNANDp are no longer being used but it's part of the old +//delay_comparator function which we are using exactly as it used to be, so just setting these to 0 +const double WmuxdrvNANDn = 0; +const double WmuxdrvNANDp = 0; + + +/*===================================================================*/ +/* + * The following are things you probably wouldn't want to change. + */ + +#define BIGNUM 1e30 +#define INF 9999999 +#define MAX(a,b) (((a)>(b))?(a):(b)) +#define MIN(a,b) (((a)<(b))?(a):(b)) + +/* Used to communicate with the horowitz model */ +#define RISE 1 +#define FALL 0 +#define NCH 1 +#define PCH 0 + + +#define EPSILON 0.5 //v4.1: This constant is being used in order to fix floating point -> integer +//conversion problems that were occuring within CACTI. Typical problem that was occuring was +//that with different compilers a floating point number like 3.0 would get represented as either +//2.9999....or 3.00000001 and then the integer part of the floating point number (3.0) would +//be computed differently depending on the compiler. What we are doing now is to replace +//int (x) with (int) (x+EPSILON) where EPSILON is 0.5. This would fix such problems. Note that +//this works only when x is an integer >= 0. +/* + * thinks this is more a solution to solve the simple truncate problem + * (http://www.cs.tut.fi/~jkorpela/round.html) rather than the problem mentioned above. + * Unfortunately, this solution causes nasty bugs (different results when using O0 and O3). + * Moreover, round is not correct in CACTI since when an extra fraction of bit/line is needed, + * we need to provide a complete bit/line even the fraction is just 0.01. + * So, in later version than 6.5 we use (int)ceil() to get double to int conversion. + */ + +#define EPSILON2 0.1 +#define EPSILON3 0.6 + + +#define MINSUBARRAYROWS 16 //For simplicity in modeling, for the row decoding structure, we assume +//that each row predecode block is composed of at least one 2-4 decoder. When the outputs from the +//row predecode blocks are combined this means that there are at least 4*4=16 row decode outputs +#define MAXSUBARRAYROWS 262144 //Each row predecode block produces a max of 2^9 outputs. So +//the maximum number of row decode outputs will be 2^9*2^9 +#define MINSUBARRAYCOLS 2 +#define MAXSUBARRAYCOLS 262144 + + +#define INV 0 +#define NOR 1 +#define NAND 2 + + +#define NUMBER_TECH_FLAVORS 4 + +#define NUMBER_INTERCONNECT_PROJECTION_TYPES 2 //aggressive and conservative +//0 = Aggressive projections, 1 = Conservative projections +#define NUMBER_WIRE_TYPES 4 //local, semi-global and global +//1 = 'Semi-global' wire type, 2 = 'Global' wire type +#define NUMBER_TSV_TYPES 3 +//0 = ITRS projected fine TSV type, 1 = Industrial reported large TSV type, 2 = TBD + +const int dram_cell_tech_flavor = 3; + + +#define VBITSENSEMIN 0.08 //minimum bitline sense voltage is fixed to be 80 mV. + +#define fopt 4.0 + +#define INPUT_WIRE_TO_INPUT_GATE_CAP_RATIO 0 +#define BUFFER_SEPARATION_LENGTH_MULTIPLIER 1 +#define NUMBER_MATS_PER_REDUNDANT_MAT 8 + +#define NUMBER_STACKED_DIE_LAYERS 1 + +// this variable can be set to carry out solution optimization for +// a maximum area allocation. +#define STACKED_DIE_LAYER_ALLOTED_AREA_mm2 0 //6.24 //6.21//71.5 + +// this variable can also be employed when solution optimization +// with maximum area allocation is carried out. +#define MAX_PERCENT_AWAY_FROM_ALLOTED_AREA 50 + +// this variable can also be employed when solution optimization +// with maximum area allocation is carried out. +#define MIN_AREA_EFFICIENCY 20 + +// this variable can be employed when solution with a desired +// aspect ratio is required. +#define STACKED_DIE_LAYER_ASPECT_RATIO 1 + +// this variable can be employed when solution with a desired +// aspect ratio is required. +#define MAX_PERCENT_AWAY_FROM_ASPECT_RATIO 101 + +// this variable can be employed to carry out solution optimization +// for a certain target random cycle time. +#define TARGET_CYCLE_TIME_ns 1000000000 + +#define NUMBER_PIPELINE_STAGES 4 + +// this can be used to model the length of interconnect +// between a bank and a crossbar +#define LENGTH_INTERCONNECT_FROM_BANK_TO_CROSSBAR 0 //3791 // 2880//micron + +#define IS_CROSSBAR 0 +#define NUMBER_INPUT_PORTS_CROSSBAR 8 +#define NUMBER_OUTPUT_PORTS_CROSSBAR 8 +#define NUMBER_SIGNALS_PER_PORT_CROSSBAR 256 + + +#define MAT_LEAKAGE_REDUCTION_DUE_TO_SLEEP_TRANSISTORS_FACTOR 1 +#define LEAKAGE_REDUCTION_DUE_TO_LONG_CHANNEL_HP_TRANSISTORS_FACTOR 1 + +#define PAGE_MODE 0 + +#define MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA 60 +// We are actually not using this variable in the CACTI code. We just want to acknowledge that +// this current should be multiplied by the DDR(n) system VDD value to compute the standby power +// consumed during precharge. + + +const double VDD_STORAGE_LOSS_FRACTION_WORST = 0.125; +const double CU_RESISTIVITY = 0.022; //ohm-micron +const double BULK_CU_RESISTIVITY = 0.018; //ohm-micron +const double PERMITTIVITY_FREE_SPACE = 8.854e-18; //F/micron + +const static uint32_t sram_num_cells_wl_stitching_ = 16; +const static uint32_t dram_num_cells_wl_stitching_ = 64; +const static uint32_t comm_dram_num_cells_wl_stitching_ = 256; +const static double num_bits_per_ecc_b_ = 8.0; + +const double bit_to_byte = 8.0; + +#define MAX_NUMBER_GATES_STAGE 20 +#define MAX_NUMBER_HTREE_NODES 20 +#define NAND2_LEAK_STACK_FACTOR 0.2 +#define NAND3_LEAK_STACK_FACTOR 0.2 +#define NOR2_LEAK_STACK_FACTOR 0.2 +#define INV_LEAK_STACK_FACTOR 0.5 +#define MAX_NUMBER_ARRAY_PARTITIONS 1000000 + +// abbreviations used in this project +// ---------------------------------- +// +// num : number +// rw : read/write +// rd : read +// wr : write +// se : single-ended +// sz : size +// F : feature +// w : width +// h : height or horizontal +// v : vertical or velocity + + +enum ram_cell_tech_type_num +{ + itrs_hp = 0, + itrs_lstp = 1, + itrs_lop = 2, + lp_dram = 3, + comm_dram = 4 +}; + +const double pppm[4] = {1,1,1,1}; +const double pppm_lkg[4] = {0,1,1,0}; +const double pppm_dyn[4] = {1,0,0,0}; +const double pppm_Isub[4] = {0,1,0,0}; +const double pppm_Ig[4] = {0,0,1,0}; +const double pppm_sc[4] = {0,0,0,1}; + +const double Ilinear_to_Isat_ratio =2.0; + + + +#endif diff --git a/T1/TP1/cacti-master/contention.dat b/T1/TP1/cacti-master/contention.dat new file mode 100644 index 0000000..826553e --- /dev/null +++ b/T1/TP1/cacti-master/contention.dat @@ -0,0 +1,126 @@ +l34c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l34c64l2b: 9 11 19 29 43 62 81 102 +l34c64l4b: 6 8 12 17 24 29 39 47 +l34c64l8b: 7 8 10 14 18 22 25 30 +l34c64l16b: 7 7 9 12 14 17 20 24 +l34c64l32b: 7 7 9 12 14 17 20 24 -r +l34c64l64b: 7 7 9 12 14 17 20 24 -r +l34c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l34c128l2b: 4 10 19 30 44 64 82 103 +l34c128l4b: 3 6 11 17 24 31 38 47 +l34c128l8b: 3 5 9 13 17 21 25 29 +l34c128l16b: 4 5 7 10 13 16 19 22 +l34c128l32b: 4 5 7 10 13 16 19 22 -r +l34c128l64b: 4 5 7 10 13 16 19 22 -r +l34c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l34c256l2b: 3 10 19 30 44 63 82 103 +l34c256l4b: 3 6 11 17 24 31 38 47 +l34c256l8b: 2 5 8 12 16 20 24 29 +l34c256l16b: 2 4 7 9 12 15 18 21 +l34c256l32b: 2 4 7 9 12 15 18 21 -r +l34c256l64b: 2 4 7 9 12 15 18 21 -r +l38c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l38c64l2b: 57 59 77 90 137 187 219 245 +l38c64l4b: 35 40 48 56 43 61 80 101 +l38c64l8b: 18 27 41 45 52 58 58 58 -r +l38c64l16b: 16 17 19 35 40 49 53 53 -r +l38c64l32b: 15 15 17 19 22 25 30 30 -r +l38c64l64b: 15 15 17 19 22 25 30 30 -r +l38c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l38c128l2b: 38 50 78 93 139 188 220 245 +l38c128l4b: 29 37 46 56 43 61 81 102 +l38c128l8b: 16 30 39 44 50 57 57 57 -r +l38c128l16b: 14 16 19 33 40 47 52 52 -r +l38c128l32b: 14 15 17 20 23 27 31 31 -r +l38c128l64b: 14 15 17 20 23 27 31 31 -r +l38c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l38c256l2b: 35 50 78 94 139 188 220 246 +l38c256l4b: 28 36 45 55 55 61 81 102 +l38c256l8b: 17 30 38 43 50 57 57 57 -r +l38c256l16b: 15 17 21 32 40 47 51 51 +l38c256l32b: 15 17 19 21 24 29 33 33 +l38c256l64b: 15 17 19 21 24 29 33 33 -r +l316c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l316c64l2b: 1000 1000 1000 1000 1000 1000 1000 1000 +l316c64l4b: 34 35 78 126 178 220 252 274 +l316c64l8b: 9 11 23 43 62 87 105 130 +l316c64l16b: 7 9 13 23 33 45 56 67 +l316c64l32b: 5 6 7 10 13 19 25 30 +l316c64l64b: 4 5 6 8 10 14 18 21 +l316c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l316c128l2b: 25 131 243 1000 1000 1000 1000 1000 +l316c128l4b: 8 28 79 127 179 221 253 274 +l316c128l8b: 4 9 22 43 62 88 106 131 +l316c128l16b: 4 6 11 21 32 44 55 67 +l316c128l32b: 4 6 11 12 12 18 24 29 +l316c128l64b: 2 3 5 7 9 13 17 21 +l316c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l316c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000 +l316c256l4b: 5 28 80 128 180 221 253 274 +l316c256l8b: 3 8 22 43 63 88 107 131 +l316c256l16b: 2 5 11 21 32 44 55 67 +l316c256l32b: 2 3 5 8 12 18 24 29 +l316c256l64b: 2 3 4 6 9 13 17 21 +l24c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l24c64l2b: 10 12 24 41 60 86 105 122 +l24c64l4b: 5 7 13 20 29 38 47 56 +l24c64l8b: 5 6 9 14 18 24 29 35 +l24c64l16b: 4 5 7 10 12 16 19 22 +l24c64l32b: 5 5 6 8 10 12 14 17 +l24c64l64b: 5 5 6 8 10 12 14 16 +l24c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l24c128l2b: 1000 1000 1000 1000 1000 1000 1000 1000 +l24c128l4b: 3 7 13 20 29 38 47 57 +l24c128l8b: 3 5 9 13 18 23 29 35 +l24c128l16b: 3 4 6 9 12 15 19 22 +l24c128l32b: 3 4 5 7 9 11 14 16 +l24c128l64b: 1000 1000 1000 1000 1000 1000 1000 1000 +l24c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l24c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000 +l24c256l4b: 2 6 13 20 29 38 47 57 +l24c256l8b: 2 4 8 13 18 23 28 35 +l24c256l16b: 2 3 6 8 11 15 18 22 +l24c256l32b: 2 3 5 6 8 11 14 16 +l24c256l64b: 1000 1000 1000 1000 1000 1000 1000 1000 +l28c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l28c64l2b: 46 52 117 157 188 225 246 261 +l28c64l4b: 19 25 39 54 96 107 120 150 +l28c64l8b: 9 12 21 30 39 47 58 79 +l28c64l16b: 8 9 11 16 25 32 37 42 +l28c64l32b: 7 8 9 11 14 19 23 28 +l28c64l64b: 7 7 8 10 12 14 18 22 +l28c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l28c128l2b: 1000 1000 1000 1000 1000 1000 1000 1000 +l28c128l4b: 12 22 39 54 98 108 130 151 +l28c128l8b: 7 12 21 30 39 48 59 80 +l28c128l16b: 6 8 11 16 24 31 37 42 +l28c128l32b: 6 7 9 11 14 19 24 28 +l28c128l64b: 6 7 9 11 14 19 24 28 +l28c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l28c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000 +l28c256l4b: 12 22 39 54 100 108 130 152 +l28c256l8b: 7 12 21 30 39 48 59 81 +l28c256l16b: 6 8 11 16 24 31 37 42 +l28c256l32b: 6 7 9 11 14 19 24 28 +l28c256l64b: 6 7 9 11 14 19 24 28 +l216c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l216c64l2b: 1000 1000 1000 1000 1000 1000 1000 1000 +l216c64l4b: 34 35 78 126 178 220 252 274 +l216c64l8b: 9 11 23 43 62 87 105 130 +l216c64l16b: 7 9 13 23 33 45 56 67 +l216c64l32b: 5 6 7 10 13 19 25 30 +l216c64l64b: 4 5 6 8 10 14 18 21 +l216c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l216c128l2b: 25 131 243 1000 1000 1000 1000 1000 +l216c128l4b: 8 28 79 127 179 221 253 274 +l216c128l8b: 4 9 22 43 62 88 106 131 +l216c128l16b: 4 6 11 21 32 44 55 67 +l216c128l32b: 4 6 11 12 12 18 24 29 +l216c128l64b: 2 3 5 7 9 13 17 21 +l216c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000 +l216c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000 +l216c256l4b: 5 28 80 128 180 221 253 274 +l216c256l8b: 3 8 22 43 63 88 107 131 +l216c256l16b: 2 5 11 21 32 44 55 67 +l216c256l32b: 2 3 5 8 12 18 24 29 +l216c256l64b: 2 3 4 6 9 13 17 21 diff --git a/T1/TP1/cacti-master/crossbar.cc b/T1/TP1/cacti-master/crossbar.cc new file mode 100644 index 0000000..be32736 --- /dev/null +++ b/T1/TP1/cacti-master/crossbar.cc @@ -0,0 +1,161 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "crossbar.h" + +#define ASPECT_THRESHOLD .8 +#define ADJ 1 + +Crossbar::Crossbar( + double n_inp_, + double n_out_, + double flit_size_, + /*TechnologyParameter::*/DeviceType *dt + ):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) +{ + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; + Vdd = dt->Vdd; + CB_ADJ = 1; +} + +Crossbar::~Crossbar(){} + +double Crossbar::output_buffer() +{ + + //Wire winit(4, 4); + double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch; + Wire w1(g_ip->wt, l_eff); + //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing; + double s1 = w1.repeater_size * (l_eff n_to_p_eff_curr_drv_ratio; + // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor + TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size); + TriS2 = s1; //driver transistor + + if (TriS1 < 1) + TriS1 = 1; + + double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) + + gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0); +// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + +// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + +// gate_C(TriS2*g_tp.min_w_nmos_, 0)+ +// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + +// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + +// gate_C(TriS2*min_w_pmos, 0); + tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + + gate_C(TriS2*g_tp.min_w_nmos_, 0)+ + drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + + drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(TriS2*min_w_pmos, 0); + double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def); + double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0); + + tri_inp_cap = input_cap; + tri_out_cap = output_cap; + tri_ctr_cap = ctr_cap; + return input_cap + output_cap + ctr_cap; +} + +void Crossbar::compute_power() +{ + + Wire winit(4, 4); + double tri_cap = output_buffer(); + assert(tri_cap > 0); + //area of a tristate logic + double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def); + g_area *= 2; // to model area of output transistors + g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def); + g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def); + double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def); + // effective no. of tristate buffers that need to be laid side by side + int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch)); + double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out); + Wire w1(g_ip->wt, wire_len); + + area.w = wire_len; + area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ; + Wire w2(g_ip->wt, area.h); + + double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp); + if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb; + + if (aspect_ratio_cb < ASPECT_THRESHOLD) { + if (n_out > 2 && n_inp > 2) { + CB_ADJ+=0.2; + //cout << "CB ADJ " << CB_ADJ << endl; + if (CB_ADJ < 4) { + this->compute_power(); + } + } + } + + + + power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size; + power.readOp.leakage = n_inp * n_out * flit_size * ( + cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+ + cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+ + cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+ + w1.power.readOp.leakage + w2.power.readOp.leakage); + power.readOp.gate_leakage = n_inp * n_out * flit_size * ( + cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+ + cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+ + cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+ + w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage); + + // delay calculation + double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch; + Wire wdriver(g_ip->wt, l_eff); + double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1); + double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap; + delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); + + Wire wreset(); +} + +void Crossbar::print_crossbar() +{ + cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n"; + cout << "Flit size : " << flit_size << " bits" << endl; + cout << "Width : " << area.w << " u" << endl; + cout << "Height : " << area.h << " u" << endl; + cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl; + cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl; + cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl; + cout << "Crossbar Delay : " << delay*1e12 << " ps\n"; +} + + diff --git a/T1/TP1/cacti-master/crossbar.h b/T1/TP1/cacti-master/crossbar.h new file mode 100644 index 0000000..529db9c --- /dev/null +++ b/T1/TP1/cacti-master/crossbar.h @@ -0,0 +1,83 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + +#ifndef __CROSSBAR__ +#define __CROSSBAR__ + +#include +#include +#include "basic_circuit.h" +#include "cacti_interface.h" +#include "component.h" +#include "parameter.h" +#include "mat.h" +#include "wire.h" + +class Crossbar : public Component +{ + public: + Crossbar( + double in, + double out, + double flit_sz, + /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global)); + ~Crossbar(); + + void print_crossbar(); + double output_buffer(); + void compute_power(); + + double n_inp, n_out; + double flit_size; + double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap; + + private: + double CB_ADJ; + /* + * Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar + * buffer is adjusted to get an aspect ratio of whole cross bar close to one; + * when adjust the ratio, the number of wires route over the tri-state buffers does not change, + * however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase + * during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch + * will increase. As a result, the height of the crossbar (area.h) will increase. + */ + + /*TechnologyParameter::*/DeviceType *deviceType; + double TriS1, TriS2; + double min_w_pmos, Vdd; + +}; + + + + +#endif diff --git a/T1/TP1/cacti-master/ddr3.cfg b/T1/TP1/cacti-master/ddr3.cfg new file mode 100644 index 0000000..f6645ab --- /dev/null +++ b/T1/TP1/cacti-master/ddr3.cfg @@ -0,0 +1,254 @@ +# Cache size +//-size (bytes) 2048 +//-size (bytes) 4096 +//-size (bytes) 32768 +//-size (bytes) 131072 +//-size (bytes) 262144 +//-size (bytes) 1048576 +//-size (bytes) 2097152 +//-size (bytes) 4194304 +-size (bytes) 8388608 +//-size (bytes) 16777216 +//-size (bytes) 33554432 +//-size (bytes) 134217728 +//-size (bytes) 67108864 +//-size (bytes) 1073741824 + +# power gating +-Array Power Gating - "false" +-WL Power Gating - "false" +-CL Power Gating - "false" +-Bitline floating - "false" +-Interconnect Power Gating - "false" +-Power Gating Performance Loss 0.01 + +# Line size +//-block size (bytes) 8 +-block size (bytes) 64 + +# To model Fully Associative cache, set associativity to zero +//-associativity 0 +//-associativity 2 +//-associativity 4 +//-associativity 8 +-associativity 8 + +-read-write port 1 +-exclusive read port 0 +-exclusive write port 0 +-single ended read ports 0 + +# Multiple banks connected using a bus +-UCA bank count 1 +-technology (u) 0.022 +//-technology (u) 0.040 +//-technology (u) 0.032 +//-technology (u) 0.090 + +# following three parameters are meaningful only for main memories + +-page size (bits) 8192 +-burst length 8 +-internal prefetch width 8 + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Data array cell type - "itrs-hp" +//-Data array cell type - "itrs-lstp" +//-Data array cell type - "itrs-lop" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Data array peripheral type - "itrs-hp" +//-Data array peripheral type - "itrs-lstp" +//-Data array peripheral type - "itrs-lop" + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Tag array cell type - "itrs-hp" +//-Tag array cell type - "itrs-lstp" +//-Tag array cell type - "itrs-lop" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Tag array peripheral type - "itrs-hp" +//-Tag array peripheral type - "itrs-lstp" +//-Tag array peripheral type - "itrs-lop + +# Bus width include data bits and address bits required by the decoder +//-output/input bus width 16 +-output/input bus width 512 + +// 300-400 in steps of 10 +-operating temperature (K) 360 + +# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file) +# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report) +-cache type "cache" +//-cache type "ram" +//-cache type "main memory" + +# to model special structure like branch target buffers, directory, etc. +# change the tag size parameter +# if you want cacti to calculate the tagbits, set the tag size to "default" +-tag size (b) "default" +//-tag size (b) 22 + +# fast - data and tag access happen in parallel +# sequential - data array is accessed after accessing the tag array +# normal - data array lookup and tag access happen in parallel +# final data block is broadcasted in data array h-tree +# after getting the signal from the tag array +//-access mode (normal, sequential, fast) - "fast" +-access mode (normal, sequential, fast) - "normal" +//-access mode (normal, sequential, fast) - "sequential" + + +# DESIGN OBJECTIVE for UCA (or banks in NUCA) +-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0 + +# Percentage deviation from the minimum value +# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization +# that compromises at most 10% delay. +# NOTE: Try reasonable values for % deviation. Inconsistent deviation +# percentage values will not produce any valid organizations. For example, +# 0:0:100:100:100 will try to identify an organization that has both +# least delay and dynamic power. Since such an organization is not possible, CACTI will +# throw an error. Refer CACTI-6 Technical report for more details +-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000 + +# Objective for NUCA +-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100 +-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 + +# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for +# energy-delay or energy-delay sq. product +# Note: Optimize tag will disable weight or deviate values mentioned above +# Set it to NONE to let weight and deviate values determine the +# appropriate cache configuration +//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED" +-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2" +//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE" + +-Cache model (NUCA, UCA) - "UCA" +//-Cache model (NUCA, UCA) - "NUCA" + +# In order for CACTI to find the optimal NUCA bank value the following +# variable should be assigned 0. +-NUCA bank count 0 + +# NOTE: for nuca network frequency is set to a default value of +# 5GHz in time.c. CACTI automatically +# calculates the maximum possible frequency and downgrades this value if necessary + +# By default CACTI considers both full-swing and low-swing +# wires to find an optimal configuration. However, it is possible to +# restrict the search space by changing the signaling from "default" to +# "fullswing" or "lowswing" type. +-Wire signaling (fullswing, lowswing, default) - "Global_30" +//-Wire signaling (fullswing, lowswing, default) - "default" +//-Wire signaling (fullswing, lowswing, default) - "lowswing" + +//-Wire inside mat - "global" +-Wire inside mat - "semi-global" +//-Wire outside mat - "global" +-Wire outside mat - "semi-global" + +-Interconnect projection - "conservative" +//-Interconnect projection - "aggressive" + +# Contention in network (which is a function of core count and cache level) is one of +# the critical factor used for deciding the optimal bank count value +# core count can be 4, 8, or 16 +//-Core count 4 +-Core count 8 +//-Core count 16 +-Cache level (L2/L3) - "L3" + +-Add ECC - "true" + +//-Print level (DETAILED, CONCISE) - "CONCISE" +-Print level (DETAILED, CONCISE) - "DETAILED" + +# for debugging +//-Print input parameters - "true" +-Print input parameters - "false" +# force CACTI to model the cache with the +# following Ndbl, Ndwl, Nspd, Ndsam, +# and Ndcm values +//-Force cache config - "true" +-Force cache config - "false" +-Ndwl 1 +-Ndbl 1 +-Nspd 0 +-Ndcm 1 +-Ndsam1 0 +-Ndsam2 0 + + + +#### Default CONFIGURATION values for baseline external IO parameters to DRAM. More details can be found in the CACTI-IO technical report (), especially Chapters 2 and 3. + +# Memory Type (D=DDR3, L=LPDDR2, W=WideIO). Additional memory types can be defined by the user in extio_technology.cc, along with their technology and configuration parameters. + +-dram_type "D" +//-dram_type "L" +//-dram_type "W" +//-dram_type "S" + +# Memory State (R=Read, W=Write, I=Idle or S=Sleep) + +//-iostate "R" +-iostate "W" +//-iostate "I" +//-iostate "S" + +#Address bus timing. To alleviate the timing on the command and address bus due to high loading (shared across all memories on the channel), the interface allows for multi-cycle timing options. + +-addr_timing 0.5 //DDR +//-addr_timing 1.0 //SDR (half of DQ rate) +//-addr_timing 2.0 //2T timing (One fourth of DQ rate) +//-addr_timing 3.0 // 3T timing (One sixth of DQ rate) + +# Memory Density (Gbit per memory/DRAM die) + +-mem_density 8 Gb //Valid values 2^n Gb + +# IO frequency (MHz) (frequency of the external memory interface). + +-bus_freq 800 MHz //As of current memory standards (2013), valid range 0 to 1.5 GHz for DDR3, 0 to 533 MHz for LPDDR2, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential. However this can change, and the user is free to define valid ranges based on new memory types or extending beyond existing standards for existing dram types. + +# Duty Cycle (fraction of time in the Memory State defined above) + +-duty_cycle 1.0 //Valid range 0 to 1.0 + +# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR +#-activity_dq .50 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR + +# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_ca 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR, 0 to 0.25 for 2T, and 0 to 0.17 for 3T +#-activity_ca 0.25 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR, 0 to 0.25 for 2T, and 0 to 0.17 for 3T + +# Number of DQ pins + +-num_dq 72 //Number of DQ pins. Includes ECC pins. + +# Number of DQS pins. DQS is a data strobe that is sent along with a small number of data-lanes so the source synchronous timing is local to these DQ bits. Typically, 1 DQS per byte (8 DQ bits) is used. The DQS is also typucally differential, just like the CLK pin. + +-num_dqs 36 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins. + +# Number of CA pins + +-num_ca 35 //Valid range 0 to 35 pins. +#-num_ca 25 //Valid range 0 to 35 pins. + +# Number of CLK pins. CLK is typically a differential pair. In some cases additional CLK pairs may be used to limit the loading on the CLK pin. + +-num_clk 2 //2 x differential pair. Valid values: 0/2/4. + +# Number of Physical Ranks + +-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per buffer/register. If multiple LRDIMMs or buffer chips exist, the analysis for capacity and power is reported per buffer/register. + +# Width of the Memory Data Bus + +-mem_data_width 4 //x4 or x8 or x16 or x32 memories. For WideIO upto x128. diff --git a/T1/TP1/cacti-master/decoder.cc b/T1/TP1/cacti-master/decoder.cc new file mode 100644 index 0000000..6ab9bb5 --- /dev/null +++ b/T1/TP1/cacti-master/decoder.cc @@ -0,0 +1,1673 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#include "area.h" +#include "decoder.h" +#include "parameter.h" +#include +#include +#include + +using namespace std; + + +Decoder::Decoder( + int _num_dec_signals, + bool flag_way_select, + double _C_ld_dec_out, + double _R_wire_dec_out, + bool fully_assoc_, + bool is_dram_, + bool is_wl_tr_, + const Area & cell_) +:exist(false), + C_ld_dec_out(_C_ld_dec_out), + R_wire_dec_out(_R_wire_dec_out), + num_gates(0), num_gates_min(2), + delay(0), + //power(), + fully_assoc(fully_assoc_), is_dram(is_dram_), + is_wl_tr(is_wl_tr_), + total_driver_nwidth(0), + total_driver_pwidth(0), + cell(cell_), + nodes_DSTN(1) +{ + + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) + { + w_dec_n[i] = 0; + w_dec_p[i] = 0; + } + + /* + * _num_dec_signals is the number of decoded signal as output + * num_addr_bits_dec is the number of signal to be decoded + * as the decoders input. + */ + int num_addr_bits_dec = _log2(_num_dec_signals); + + if (num_addr_bits_dec < 4) + { + if (flag_way_select) + { + exist = true; + num_in_signals = 2; + } + else + { + num_in_signals = 0; + } + } + else + { + exist = true; + + if (flag_way_select) + { + num_in_signals = 3; + } + else + { + num_in_signals = 2; + } + } + + assert(cell.h>0); + assert(cell.w>0); + // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; + //area.h = 4 * cell.h; + area.h = g_tp.h_dec * cell.h; + + compute_widths(); + compute_area(); + +} + + + +void Decoder::compute_widths() +{ + double F; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); + double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + + if (exist) + { + if (num_in_signals == 2 || fully_assoc) + { + w_dec_n[0] = 2 * g_tp.min_w_nmos_; + w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand2; + } + else + { + w_dec_n[0] = 3 * g_tp.min_w_nmos_; + w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand3; + } + + F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + + gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); + num_gates = logical_effort( + num_gates_min, + num_in_signals == 2 ? gnand2 : gnand3, + F, + w_dec_n, + w_dec_p, + C_ld_dec_out, + p_to_n_sz_ratio, + is_dram, + is_wl_tr, + g_tp.max_w_nmos_dec); + + } +} + + + +void Decoder::compute_area() +{ + double cumulative_area = 0; + double cumulative_curr = 0; // cumulative leakage current + double cumulative_curr_Ig = 0; // cumulative leakage current + + if (exist) + { // First check if this decoder exists + if (num_in_signals == 2) + { + cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); + cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); + cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); + } + else if (num_in_signals == 3) + { + cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); + cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; + cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); + } + + for (int i = 1; i < num_gates; i++) + { + cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); + cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); + cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); + } + power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; + power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; + + area.w = (cumulative_area / area.h); + } +} + +void Decoder::compute_power_gating() +{ + //For all driver change there is only one sleep transistors to save area + //Total transistor width for sleep tx calculation + for (int i = 1; i <=num_gates; i++) + { + total_driver_nwidth += w_dec_n[i]; + total_driver_pwidth += w_dec_p[i]; + } + + //compute sleep tx + bool is_footer = false; + double Isat_subarray = simplified_nmos_Isat(total_driver_nwidth); + double detalV; + double c_wakeup; + + c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, cell.h);//Psleep tx + detalV = g_tp.peri_global.Vdd-g_tp.peri_global.Vcc_min; + if (g_ip->power_gating) + sleeptx = new Sleep_tx (g_ip->perfloss, + Isat_subarray, + is_footer, + c_wakeup, + detalV, + nodes_DSTN, + area); +} + +double Decoder::compute_delays(double inrisetime) +{ + if (exist) + { + double ret_val = 0; // outrisetime + int i; + double rd, tf, this_delay, c_load, c_intrinsic, Vpp; + double Vdd = g_tp.peri_global.Vdd; + + if ((is_wl_tr) && (is_dram)) + { + Vpp = g_tp.vpp; + } + else if (is_wl_tr) + { + Vpp = g_tp.sram_cell.Vdd; + } + else + { + Vpp = g_tp.peri_global.Vdd; + } + + // first check whether a decoder is required at all + rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr); + c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr); + c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals + + drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + + for (i = 1; i < num_gates - 1; ++i) + { + rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); + c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr); + c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + + drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + } + + // add delay of final inverter that drives the wordline + i = num_gates - 1; + c_load = C_ld_dec_out; + rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); + c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + + drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); + tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + ret_val = this_delay / (1.0 - 0.5); + power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd; + + compute_power_gating(); + return ret_val; + } + else + { + return 0.0; + } +} + +void Decoder::leakage_feedback(double temperature) +{ + double cumulative_curr = 0; // cumulative leakage current + double cumulative_curr_Ig = 0; // cumulative leakage current + + if (exist) + { // First check if this decoder exists + if (num_in_signals == 2) + { + cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); + cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); + } + else if (num_in_signals == 3) + { + cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; + cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); + } + + for (int i = 1; i < num_gates; i++) + { + cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); + cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); + } + + power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; + power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; + } +} + +PredecBlk::PredecBlk( + int num_dec_signals, + Decoder * dec_, + double C_wire_predec_blk_out, + double R_wire_predec_blk_out_, + int num_dec_per_predec, + bool is_dram, + bool is_blk1) + :dec(dec_), + exist(false), + number_input_addr_bits(0), + C_ld_predec_blk_out(0), + R_wire_predec_blk_out(0), + branch_effort_nand2_gate_output(1), + branch_effort_nand3_gate_output(1), + flag_two_unique_paths(false), + flag_L2_gate(0), + number_inputs_L1_gate(0), + number_gates_L1_nand2_path(0), + number_gates_L1_nand3_path(0), + number_gates_L2(0), + min_number_gates_L1(2), + min_number_gates_L2(2), + num_L1_active_nand2_path(0), + num_L1_active_nand3_path(0), + delay_nand2_path(0), + delay_nand3_path(0), + power_nand2_path(), + power_nand3_path(), + power_L2(), + is_dram_(is_dram) +{ + int branch_effort_predec_out; + double C_ld_dec_gate; + int num_addr_bits_dec = _log2(num_dec_signals); + int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2; + int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits; + + w_L1_nand2_n[0] = 0; + w_L1_nand2_p[0] = 0; + w_L1_nand3_n[0] = 0; + w_L1_nand3_p[0] = 0; + + if (is_blk1 == true) + { + if (num_addr_bits_dec <= 0) + { + return; + } + else if (num_addr_bits_dec < 4) + { + // Just one predecoder block is required with NAND2 gates. No decoder required. + // The first level of predecoding directly drives the decoder output load + exist = true; + number_input_addr_bits = num_addr_bits_dec; + R_wire_predec_blk_out = dec->R_wire_dec_out; + C_ld_predec_blk_out = dec->C_ld_dec_out; + } + else + { + exist = true; + number_input_addr_bits = blk1_num_input_addr_bits; + branch_effort_predec_out = (1 << blk2_num_input_addr_bits); + C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); + R_wire_predec_blk_out = R_wire_predec_blk_out_; + C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; + } + } + else + { + if (num_addr_bits_dec >= 4) + { + exist = true; + number_input_addr_bits = blk2_num_input_addr_bits; + branch_effort_predec_out = (1 << blk1_num_input_addr_bits); + C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); + R_wire_predec_blk_out = R_wire_predec_blk_out_; + C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; + } + } + + compute_widths(); + compute_area(); +} + + + +void PredecBlk::compute_widths() +{ + double F, c_load_nand3_path, c_load_nand2_path; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); + double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + + if (exist == false) return; + + + switch (number_input_addr_bits) + { + case 1: + flag_two_unique_paths = false; + number_inputs_L1_gate = 2; + flag_L2_gate = 0; + break; + case 2: + flag_two_unique_paths = false; + number_inputs_L1_gate = 2; + flag_L2_gate = 0; + break; + case 3: + flag_two_unique_paths = false; + number_inputs_L1_gate = 3; + flag_L2_gate = 0; + break; + case 4: + flag_two_unique_paths = false; + number_inputs_L1_gate = 2; + flag_L2_gate = 2; + branch_effort_nand2_gate_output = 4; + break; + case 5: + flag_two_unique_paths = true; + flag_L2_gate = 2; + branch_effort_nand2_gate_output = 8; + branch_effort_nand3_gate_output = 4; + break; + case 6: + flag_two_unique_paths = false; + number_inputs_L1_gate = 3; + flag_L2_gate = 2; + branch_effort_nand3_gate_output = 8; + break; + case 7: + flag_two_unique_paths = true; + flag_L2_gate = 3; + branch_effort_nand2_gate_output = 32; + branch_effort_nand3_gate_output = 16; + break; + case 8: + flag_two_unique_paths = true; + flag_L2_gate = 3; + branch_effort_nand2_gate_output = 64; + branch_effort_nand3_gate_output = 32; + break; + case 9: + flag_two_unique_paths = false; + number_inputs_L1_gate = 3; + flag_L2_gate = 3; + branch_effort_nand3_gate_output = 64; + break; + default: + assert(0); + break; + } + + // find the number of gates and sizing in second level of predecoder (if there is a second level) + if (flag_L2_gate) + { + if (flag_L2_gate == 2) + { // 2nd level is a NAND2 gate + w_L2_n[0] = 2 * g_tp.min_w_nmos_; + F = gnand2; + } + else + { // 2nd level is a NAND3 gate + w_L2_n[0] = 3 * g_tp.min_w_nmos_; + F = gnand3; + } + w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); + number_gates_L2 = logical_effort( + min_number_gates_L2, + flag_L2_gate == 2 ? gnand2 : gnand3, + F, + w_L2_n, + w_L2_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + + // Now find the number of gates and widths in first level of predecoder + if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2)) + { // Whenever flag_two_unique_paths is true, it means first level of decoder employs + // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means + // a NAND2 gate is used in the first level of the predecoder + c_load_nand2_path = branch_effort_nand2_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + + gate_C(w_L2_p[0], 0, is_dram_)); + w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; + w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand2 * c_load_nand2_path / + (gate_C(w_L1_nand2_n[0], 0, is_dram_) + + gate_C(w_L1_nand2_p[0], 0, is_dram_)); + number_gates_L1_nand2_path = logical_effort( + min_number_gates_L1, + gnand2, + F, + w_L1_nand2_n, + w_L1_nand2_p, + c_load_nand2_path, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + } + + //Now find widths of gates along path in which first gate is a NAND3 + if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3)) + { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs + // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means + // a NAND3 gate is used in the first level of the predecoder + c_load_nand3_path = branch_effort_nand3_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + + gate_C(w_L2_p[0], 0, is_dram_)); + w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; + w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand3 * c_load_nand3_path / + (gate_C(w_L1_nand3_n[0], 0, is_dram_) + + gate_C(w_L1_nand3_p[0], 0, is_dram_)); + number_gates_L1_nand3_path = logical_effort( + min_number_gates_L1, + gnand3, + F, + w_L1_nand3_n, + w_L1_nand3_p, + c_load_nand3_path, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + } + } + else + { // find number of gates and widths in first level of predecoder block when there is no second level + if (number_inputs_L1_gate == 2) + { + w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; + w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand2*C_ld_predec_blk_out / + (gate_C(w_L1_nand2_n[0], 0, is_dram_) + + gate_C(w_L1_nand2_p[0], 0, is_dram_)); + number_gates_L1_nand2_path = logical_effort( + min_number_gates_L1, + gnand2, + F, + w_L1_nand2_n, + w_L1_nand2_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + } + else if (number_inputs_L1_gate == 3) + { + w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; + w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand3*C_ld_predec_blk_out / + (gate_C(w_L1_nand3_n[0], 0, is_dram_) + + gate_C(w_L1_nand3_p[0], 0, is_dram_)); + number_gates_L1_nand3_path = logical_effort( + min_number_gates_L1, + gnand3, + F, + w_L1_nand3_n, + w_L1_nand3_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + } + } +} + + + +void PredecBlk::compute_area() +{ + if (exist) + { // First check whether a predecoder block is needed + int num_L1_nand2 = 0; + int num_L1_nand3 = 0; + int num_L2 = 0; + double tot_area_L1_nand3 =0; + double leak_L1_nand3 =0; + double gate_leak_L1_nand3 =0; + + double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def); + double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); + double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); + if (number_inputs_L1_gate != 3) { + tot_area_L1_nand3 = 0; + leak_L1_nand3 = 0; + gate_leak_L1_nand3 =0; + } + else { + tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def); + leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); + gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); + } + + switch (number_input_addr_bits) + { + case 1: //2 NAND2 gates + num_L1_nand2 = 2; + num_L2 = 0; + num_L1_active_nand2_path =1; + num_L1_active_nand3_path =0; + break; + case 2: //4 NAND2 gates + num_L1_nand2 = 4; + num_L2 = 0; + num_L1_active_nand2_path =1; + num_L1_active_nand3_path =0; + break; + case 3: //8 NAND3 gates + num_L1_nand3 = 8; + num_L2 = 0; + num_L1_active_nand2_path =0; + num_L1_active_nand3_path =1; + break; + case 4: //4 + 4 NAND2 gates + num_L1_nand2 = 8; + num_L2 = 16; + num_L1_active_nand2_path =2; + num_L1_active_nand3_path =0; + break; + case 5: //4 NAND2 gates, 8 NAND3 gates + num_L1_nand2 = 4; + num_L1_nand3 = 8; + num_L2 = 32; + num_L1_active_nand2_path =1; + num_L1_active_nand3_path =1; + break; + case 6: //8 + 8 NAND3 gates + num_L1_nand3 = 16; + num_L2 = 64; + num_L1_active_nand2_path =0; + num_L1_active_nand3_path =2; + break; + case 7: //4 + 4 NAND2 gates, 8 NAND3 gates + num_L1_nand2 = 8; + num_L1_nand3 = 8; + num_L2 = 128; + num_L1_active_nand2_path =2; + num_L1_active_nand3_path =1; + break; + case 8: //4 NAND2 gates, 8 + 8 NAND3 gates + num_L1_nand2 = 4; + num_L1_nand3 = 16; + num_L2 = 256; + num_L1_active_nand2_path =2; + num_L1_active_nand3_path =2; + break; + case 9: //8 + 8 + 8 NAND3 gates + num_L1_nand3 = 24; + num_L2 = 512; + num_L1_active_nand2_path =0; + num_L1_active_nand3_path =3; + break; + default: + break; + } + + for (int i = 1; i < number_gates_L1_nand2_path; ++i) + { + tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def); + leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); + gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); + } + tot_area_L1_nand2 *= num_L1_nand2; + leak_L1_nand2 *= num_L1_nand2; + gate_leak_L1_nand2 *= num_L1_nand2; + + for (int i = 1; i < number_gates_L1_nand3_path; ++i) + { + tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def); + leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); + gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); + } + tot_area_L1_nand3 *= num_L1_nand3; + leak_L1_nand3 *= num_L1_nand3; + gate_leak_L1_nand3 *= num_L1_nand3; + + double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3; + double cumulative_area_L2 = 0.0; + double leakage_L2 = 0.0; + double gate_leakage_L2 = 0.0; + + if (flag_L2_gate == 2) + { + cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); + leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); + gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); + } + else if (flag_L2_gate == 3) + { + cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); + leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); + gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); + } + + for (int i = 1; i < number_gates_L2; ++i) + { + cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def); + leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); + gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); + } + cumulative_area_L2 *= num_L2; + leakage_L2 *= num_L2; + gate_leakage_L2 *= num_L2; + + power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; + power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; + power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; + area.set_area(cumulative_area_L1 + cumulative_area_L2); + power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd; + power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd; + power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; + } +} + + + +pair PredecBlk::compute_delays( + pair inrisetime) // +{ + pair ret_val; + ret_val.first = 0; // outrisetime_nand2_path + ret_val.second = 0; // outrisetime_nand3_path + + double inrisetime_nand2_path = inrisetime.first; + double inrisetime_nand3_path = inrisetime.second; + int i; + double rd, c_load, c_intrinsic, tf, this_delay; + double Vdd = g_tp.peri_global.Vdd; + + // TODO: following delay calculation part can be greatly simplified. + // first check whether a predecoder block is required + if (exist) + { + //Find delay in first level of predecoder block + //First find delay in path + if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) + { + //First gate is a NAND2 gate + rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_); + c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_); + c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + + //Add delays of all but the last inverter in the chain + for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) + { + rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); + c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + //Add delay of the last inverter + i = number_gates_L1_nand2_path - 1; + rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); + if (flag_L2_gate) + { + c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); + c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + else + { //First level directly drives decoder output load + c_load = C_ld_predec_blk_out; + c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + ret_val.first = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + } + + if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) + { //Check if the number of gates in the first level is more than 1. + //First gate is a NAND3 gate + rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_); + c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_); + c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + + //Add delays of all but the last inverter in the chain + for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) + { + rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); + c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + //Add delay of the last inverter + i = number_gates_L1_nand3_path - 1; + rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); + if (flag_L2_gate) + { + c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); + c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + else + { //First level directly drives decoder output load + c_load = C_ld_predec_blk_out; + c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + ret_val.second = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + } + + // Find delay through second level + if (flag_L2_gate) + { + if (flag_L2_gate == 2) + { + rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_); + c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); + c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + else + { // flag_L2_gate = 3 + rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_); + c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); + c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + for (i = 1; i < number_gates_L2 - 1; ++i) + { + rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); + c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + //Add delay of final inverter that drives the wordline decoders + i = number_gates_L2 - 1; + c_load = C_ld_predec_blk_out; + rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); + c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + ret_val.first = this_delay / (1.0 - 0.5); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + ret_val.second = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + } + + delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second; + return ret_val; +} + +void PredecBlk::leakage_feedback(double temperature) +{ + if (exist) + { // First check whether a predecoder block is needed + int num_L1_nand2 = 0; + int num_L1_nand3 = 0; + int num_L2 = 0; + double leak_L1_nand3 =0; + double gate_leak_L1_nand3 =0; + + double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); + double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); + if (number_inputs_L1_gate != 3) { + leak_L1_nand3 = 0; + gate_leak_L1_nand3 =0; + } + else { + leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); + gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); + } + + switch (number_input_addr_bits) + { + case 1: //2 NAND2 gates + num_L1_nand2 = 2; + num_L2 = 0; + num_L1_active_nand2_path =1; + num_L1_active_nand3_path =0; + break; + case 2: //4 NAND2 gates + num_L1_nand2 = 4; + num_L2 = 0; + num_L1_active_nand2_path =1; + num_L1_active_nand3_path =0; + break; + case 3: //8 NAND3 gates + num_L1_nand3 = 8; + num_L2 = 0; + num_L1_active_nand2_path =0; + num_L1_active_nand3_path =1; + break; + case 4: //4 + 4 NAND2 gates + num_L1_nand2 = 8; + num_L2 = 16; + num_L1_active_nand2_path =2; + num_L1_active_nand3_path =0; + break; + case 5: //4 NAND2 gates, 8 NAND3 gates + num_L1_nand2 = 4; + num_L1_nand3 = 8; + num_L2 = 32; + num_L1_active_nand2_path =1; + num_L1_active_nand3_path =1; + break; + case 6: //8 + 8 NAND3 gates + num_L1_nand3 = 16; + num_L2 = 64; + num_L1_active_nand2_path =0; + num_L1_active_nand3_path =2; + break; + case 7: //4 + 4 NAND2 gates, 8 NAND3 gates + num_L1_nand2 = 8; + num_L1_nand3 = 8; + num_L2 = 128; + num_L1_active_nand2_path =2; + num_L1_active_nand3_path =1; + break; + case 8: //4 NAND2 gates, 8 + 8 NAND3 gates + num_L1_nand2 = 4; + num_L1_nand3 = 16; + num_L2 = 256; + num_L1_active_nand2_path =2; + num_L1_active_nand3_path =2; + break; + case 9: //8 + 8 + 8 NAND3 gates + num_L1_nand3 = 24; + num_L2 = 512; + num_L1_active_nand2_path =0; + num_L1_active_nand3_path =3; + break; + default: + break; + } + + for (int i = 1; i < number_gates_L1_nand2_path; ++i) + { + leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); + gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); + } + leak_L1_nand2 *= num_L1_nand2; + gate_leak_L1_nand2 *= num_L1_nand2; + + for (int i = 1; i < number_gates_L1_nand3_path; ++i) + { + leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); + gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); + } + leak_L1_nand3 *= num_L1_nand3; + gate_leak_L1_nand3 *= num_L1_nand3; + + double leakage_L2 = 0.0; + double gate_leakage_L2 = 0.0; + + if (flag_L2_gate == 2) + { + leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); + gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); + } + else if (flag_L2_gate == 3) + { + leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); + gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); + } + + for (int i = 1; i < number_gates_L2; ++i) + { + leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); + gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); + } + leakage_L2 *= num_L2; + gate_leakage_L2 *= num_L2; + + power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; + power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; + power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; + + power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd; + power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd; + power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; + } +} + +PredecBlkDrv::PredecBlkDrv( + int way_select_, + PredecBlk * blk_, + bool is_dram) + :flag_driver_exists(0), + number_gates_nand2_path(0), + number_gates_nand3_path(0), + min_number_gates(2), + num_buffers_driving_1_nand2_load(0), + num_buffers_driving_2_nand2_load(0), + num_buffers_driving_4_nand2_load(0), + num_buffers_driving_2_nand3_load(0), + num_buffers_driving_8_nand3_load(0), + num_buffers_nand3_path(0), + c_load_nand2_path_out(0), + c_load_nand3_path_out(0), + r_load_nand2_path_out(0), + r_load_nand3_path_out(0), + delay_nand2_path(0), + delay_nand3_path(0), + power_nand2_path(), + power_nand3_path(), + blk(blk_), dec(blk->dec), + is_dram_(is_dram), + way_select(way_select_) +{ + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) + { + width_nand2_path_n[i] = 0; + width_nand2_path_p[i] = 0; + width_nand3_path_n[i] = 0; + width_nand3_path_p[i] = 0; + } + + number_input_addr_bits = blk->number_input_addr_bits; + + if (way_select > 1) + { + flag_driver_exists = 1; + number_input_addr_bits = way_select; + if (dec->num_in_signals == 2) + { + c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); + num_buffers_driving_2_nand2_load = number_input_addr_bits; + } + else if (dec->num_in_signals == 3) + { + c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); + num_buffers_driving_2_nand3_load = number_input_addr_bits; + } + } + else if (way_select == 0) + { + if (blk->exist) + { + flag_driver_exists = 1; + } + } + + compute_widths(); + compute_area(); +} + + + +void PredecBlkDrv::compute_widths() +{ + // The predecode block driver accepts as input the address bits from the h-tree network. For + // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of + // inversion to generate addrbar and simply treat addrbar as addr. + + double F; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); + + if (flag_driver_exists) + { + double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_); + double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_); + + if (way_select == 0) + { + if (blk->number_input_addr_bits == 1) + { //2 NAND2 gates + num_buffers_driving_2_nand2_load = 1; + c_load_nand2_path_out = 2 * C_nand2_gate_blk; + } + else if (blk->number_input_addr_bits == 2) + { //4 NAND2 gates one 2-4 decoder + num_buffers_driving_4_nand2_load = 2; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + } + else if (blk->number_input_addr_bits == 3) + { //8 NAND3 gates one 3-8 decoder + num_buffers_driving_8_nand3_load = 3; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } + else if (blk->number_input_addr_bits == 4) + { //4 + 4 NAND2 gates two 2-4 decoder + num_buffers_driving_4_nand2_load = 4; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + } + else if (blk->number_input_addr_bits == 5) + { //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder + num_buffers_driving_4_nand2_load = 2; + num_buffers_driving_8_nand3_load = 3; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } + else if (blk->number_input_addr_bits == 6) + { //8 + 8 NAND3 gates two 3-8 decoder + num_buffers_driving_8_nand3_load = 6; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } + else if (blk->number_input_addr_bits == 7) + { //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder + num_buffers_driving_4_nand2_load = 4; + num_buffers_driving_8_nand3_load = 3; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } + else if (blk->number_input_addr_bits == 8) + { //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder + num_buffers_driving_4_nand2_load = 2; + num_buffers_driving_8_nand3_load = 6; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } + else if (blk->number_input_addr_bits == 9) + { //8 + 8 + 8 NAND3 gates three 3-8 decoder + num_buffers_driving_8_nand3_load = 9; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } + } + + if ((blk->flag_two_unique_paths) || + (blk->number_inputs_L1_gate == 2) || + (number_input_addr_bits == 0) || + ((way_select)&&(dec->num_in_signals == 2))) + { //this means that way_select is driving NAND2 in decoder. + width_nand2_path_n[0] = g_tp.min_w_nmos_; + width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0]; + F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_); + number_gates_nand2_path = logical_effort( + min_number_gates, + 1, + F, + width_nand2_path_n, + width_nand2_path_p, + c_load_nand2_path_out, + p_to_n_sz_ratio, + is_dram_, false, g_tp.max_w_nmos_); + } + + if ((blk->flag_two_unique_paths) || + (blk->number_inputs_L1_gate == 3) || + ((way_select)&&(dec->num_in_signals == 3))) + { //this means that way_select is driving NAND3 in decoder. + width_nand3_path_n[0] = g_tp.min_w_nmos_; + width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0]; + F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_); + number_gates_nand3_path = logical_effort( + min_number_gates, + 1, + F, + width_nand3_path_n, + width_nand3_path_p, + c_load_nand3_path_out, + p_to_n_sz_ratio, + is_dram_, false, g_tp.max_w_nmos_); + } + } +} + + + +void PredecBlkDrv::compute_area() +{ + double area_nand2_path = 0; + double area_nand3_path = 0; + double leak_nand2_path = 0; + double leak_nand3_path = 0; + double gate_leak_nand2_path = 0; + double gate_leak_nand3_path = 0; + + if (flag_driver_exists) + { // first check whether a predecoder block driver is needed + for (int i = 0; i < number_gates_nand2_path; ++i) + { + area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def); + leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); + gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); + } + area_nand2_path *= (num_buffers_driving_1_nand2_load + + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); + leak_nand2_path *= (num_buffers_driving_1_nand2_load + + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); + gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); + + for (int i = 0; i < number_gates_nand3_path; ++i) + { + area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def); + leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); + gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); + } + area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + + power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; + power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; + power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; + power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; + area.set_area(area_nand2_path + area_nand3_path); + } +} + + + +pair PredecBlkDrv::compute_delays( + double inrisetime_nand2_path, + double inrisetime_nand3_path) +{ + pair ret_val; + ret_val.first = 0; // outrisetime_nand2_path + ret_val.second = 0; // outrisetime_nand3_path + int i; + double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay; + double Vdd = g_tp.peri_global.Vdd; + + if (flag_driver_exists) + { + for (i = 0; i < number_gates_nand2_path - 1; ++i) + { + rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); + c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_gate_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd; + } + + // Final inverter drives the predecoder block or the decoder output load + if (number_gates_nand2_path != 0) + { + i = number_gates_nand2_path - 1; + rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); + c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + c_load = c_load_nand2_path_out; + tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out*c_load/ 2; + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + ret_val.first = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd; +// cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) +{ + driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + + drv1->power_nand3_path.readOp.leakage + + drv2->power_nand2_path.readOp.leakage + + drv2->power_nand3_path.readOp.leakage; + block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + + blk1->power_nand3_path.readOp.leakage + + blk1->power_L2.readOp.leakage + + blk2->power_nand2_path.readOp.leakage + + blk2->power_nand3_path.readOp.leakage + + blk2->power_L2.readOp.leakage; + power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; + + driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + + drv1->power_nand3_path.readOp.gate_leakage + + drv2->power_nand2_path.readOp.gate_leakage + + drv2->power_nand3_path.readOp.gate_leakage; + block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + + blk1->power_nand3_path.readOp.gate_leakage + + blk1->power_L2.readOp.gate_leakage + + blk2->power_nand2_path.readOp.gate_leakage + + blk2->power_nand3_path.readOp.gate_leakage + + blk2->power_L2.readOp.gate_leakage; + power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; +} + +void PredecBlkDrv::leakage_feedback(double temperature) +{ + double leak_nand2_path = 0; + double leak_nand3_path = 0; + double gate_leak_nand2_path = 0; + double gate_leak_nand3_path = 0; + + if (flag_driver_exists) + { // first check whether a predecoder block driver is needed + for (int i = 0; i < number_gates_nand2_path; ++i) + { + leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); + gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); + } + leak_nand2_path *= (num_buffers_driving_1_nand2_load + + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); + gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); + + for (int i = 0; i < number_gates_nand3_path; ++i) + { + leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); + gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); + } + leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + + power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; + power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; + power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; + power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; + } +} + +double Predec::compute_delays(double inrisetime) +{ + // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block. + pair tmp_pair1, tmp_pair2; + tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime); + tmp_pair1 = blk1->compute_delays(tmp_pair1); + tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime); + tmp_pair2 = blk2->compute_delays(tmp_pair2); + tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2); + + driver_power.readOp.dynamic = + drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + + drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + + drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + + drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; + + block_power.readOp.dynamic = + blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + + blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + + blk1->power_L2.readOp.dynamic + + blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + + blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + + blk2->power_L2.readOp.dynamic; + + power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic; + + delay = tmp_pair1.first; + return tmp_pair1.second; +} + + +void Predec::leakage_feedback(double temperature) +{ + drv1->leakage_feedback(temperature); + drv2->leakage_feedback(temperature); + blk1->leakage_feedback(temperature); + blk2->leakage_feedback(temperature); + + driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + + drv1->power_nand3_path.readOp.leakage + + drv2->power_nand2_path.readOp.leakage + + drv2->power_nand3_path.readOp.leakage; + block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + + blk1->power_nand3_path.readOp.leakage + + blk1->power_L2.readOp.leakage + + blk2->power_nand2_path.readOp.leakage + + blk2->power_nand3_path.readOp.leakage + + blk2->power_L2.readOp.leakage; + power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; + + driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + + drv1->power_nand3_path.readOp.gate_leakage + + drv2->power_nand2_path.readOp.gate_leakage + + drv2->power_nand3_path.readOp.gate_leakage; + block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + + blk1->power_nand3_path.readOp.gate_leakage + + blk1->power_L2.readOp.gate_leakage + + blk2->power_nand2_path.readOp.gate_leakage + + blk2->power_nand3_path.readOp.gate_leakage + + blk2->power_L2.readOp.gate_leakage; + power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; +} + +// returns +pair Predec::get_max_delay_before_decoder( + pair input_pair1, + pair input_pair2) +{ + pair ret_val; + double delay; + + delay = drv1->delay_nand2_path + blk1->delay_nand2_path; + ret_val.first = delay; + ret_val.second = input_pair1.first; + delay = drv1->delay_nand3_path + blk1->delay_nand3_path; + if (ret_val.first < delay) + { + ret_val.first = delay; + ret_val.second = input_pair1.second; + } + delay = drv2->delay_nand2_path + blk2->delay_nand2_path; + if (ret_val.first < delay) + { + ret_val.first = delay; + ret_val.second = input_pair2.first; + } + delay = drv2->delay_nand3_path + blk2->delay_nand3_path; + if (ret_val.first < delay) + { + ret_val.first = delay; + ret_val.second = input_pair2.second; + } + + return ret_val; +} + + + +Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram) +:number_gates(0), + min_number_gates(2), + c_gate_load(c_gate_load_), + c_wire_load(c_wire_load_), + r_wire_load(r_wire_load_), + delay(0), +// power(), + is_dram_(is_dram), + total_driver_nwidth(0), + total_driver_pwidth(0) +{ + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) + { + width_n[i] = 0; + width_p[i] = 0; + } + + compute_widths(); + compute_area(); +} + + +void Driver::compute_widths() +{ + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); + double c_load = c_gate_load + c_wire_load; + width_n[0] = g_tp.min_w_nmos_; + width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + + double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); + number_gates = logical_effort( + min_number_gates, + 1, + F, + width_n, + width_p, + c_load, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); +} + +void Driver::compute_area() +{ + double cumulative_area = 0; + ///double cumulative_curr = 0; // cumulative leakage current + ///double cumulative_curr_Ig = 0; // cumulative leakage current + area.h = g_tp.cell_h_def; + for (int i = 0; i < number_gates; i++) + { + cumulative_area += compute_gate_area(INV, 1, width_p[i], width_n[i], area.h); + ///cumulative_curr += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_); + ///cumulative_curr_Ig = cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_); + + } + area.w = (cumulative_area / area.h); +} + +void Driver::compute_power_gating() +{ + //For all driver change there is only one sleep transistors to save area + //Total transistor width for sleep tx calculation + for (int i = 0; i <=number_gates; i++) + { + total_driver_nwidth += width_n[i]; + total_driver_pwidth += width_p[i]; + } + + //compute sleep tx + bool is_footer = false; + double Isat_subarray = simplified_nmos_Isat(total_driver_nwidth); + double detalV; + double c_wakeup; + + c_wakeup = drain_C_(total_driver_pwidth, PCH, 1, 1, area.h);//Psleep tx + detalV = g_tp.peri_global.Vdd-g_tp.peri_global.Vcc_min; + if (g_ip->power_gating) + sleeptx = new Sleep_tx (g_ip->perfloss, + Isat_subarray, + is_footer, + c_wakeup, + detalV, + 1, + area); +} + + +double Driver::compute_delay(double inrisetime) +{ + int i; + double rd, c_load, c_intrinsic, tf; + double this_delay = 0; + + for (i = 0; i < number_gates - 1; ++i) + { + rd = tr_R_on(width_n[i], NCH, 1, is_dram_); + c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd; + power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; + } + + i = number_gates - 1; + c_load = c_gate_load + c_wire_load; + rd = tr_R_on(width_n[i], NCH, 1, is_dram_); + c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd; + power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; + + return this_delay / (1.0 - 0.5); +} + +/* +void Driver::compute_area() +{ + double cumulative_area = 0; + double cumulative_curr = 0; // cumulative leakage current + double cumulative_curr_Ig = 0; // cumulative leakage current + + area.h = g_tp.h_dec * g_tp.dram.b_h; + for (int i = 1; i < number_gates; i++) + { + cumulative_area += compute_gate_area(INV, 1, width_p[i], width_n[i], area.h); + cumulative_curr += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_); + cumulative_curr_Ig = cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_); + } + area.w = (cumulative_area / area.h); + +} +*/ diff --git a/T1/TP1/cacti-master/decoder.h b/T1/TP1/cacti-master/decoder.h new file mode 100644 index 0000000..bd74c64 --- /dev/null +++ b/T1/TP1/cacti-master/decoder.h @@ -0,0 +1,272 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + +#ifndef __DECODER_H__ +#define __DECODER_H__ + +#include "area.h" +#include "component.h" +#include "parameter.h" +#include "powergating.h" +#include + +using namespace std; + + +class Decoder : public Component +{ + public: + Decoder( + int _num_dec_signals, + bool flag_way_select, + double _C_ld_dec_out, + double _R_wire_dec_out, + bool fully_assoc_, + bool is_dram_, + bool is_wl_tr_, + const Area & cell_); + + bool exist; + int num_in_signals; + double C_ld_dec_out; + double R_wire_dec_out; + int num_gates; + int num_gates_min; + double w_dec_n[MAX_NUMBER_GATES_STAGE]; + double w_dec_p[MAX_NUMBER_GATES_STAGE]; + double delay; + //powerDef power; + bool fully_assoc; + bool is_dram; + bool is_wl_tr; + + double total_driver_nwidth; + double total_driver_pwidth; + Sleep_tx * sleeptx; + + const Area & cell; + int nodes_DSTN; + + void compute_widths(); + void compute_area(); + double compute_delays(double inrisetime); // return outrisetime + void compute_power_gating(); + + void leakage_feedback(double temperature); + + ~Decoder() + { + if (!sleeptx) + delete sleeptx; + }; +}; + + + +class PredecBlk : public Component +{ + public: + PredecBlk( + int num_dec_signals, + Decoder * dec, + double C_wire_predec_blk_out, + double R_wire_predec_blk_out, + int num_dec_per_predec, + bool is_dram_, + bool is_blk1); + + Decoder * dec; + bool exist; + int number_input_addr_bits; + double C_ld_predec_blk_out; + double R_wire_predec_blk_out; + int branch_effort_nand2_gate_output; + int branch_effort_nand3_gate_output; + bool flag_two_unique_paths; + int flag_L2_gate; + int number_inputs_L1_gate; + int number_gates_L1_nand2_path; + int number_gates_L1_nand3_path; + int number_gates_L2; + int min_number_gates_L1; + int min_number_gates_L2; + int num_L1_active_nand2_path; + int num_L1_active_nand3_path; + double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE]; + double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE]; + double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE]; + double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE]; + double w_L2_n[MAX_NUMBER_GATES_STAGE]; + double w_L2_p[MAX_NUMBER_GATES_STAGE]; + double delay_nand2_path; + double delay_nand3_path; + powerDef power_nand2_path; + powerDef power_nand3_path; + powerDef power_L2; + + bool is_dram_; + + void compute_widths(); + void compute_area(); + + void leakage_feedback(double temperature); + + pair compute_delays(pair inrisetime); // + // return +}; + + +class PredecBlkDrv : public Component +{ + public: + PredecBlkDrv( + int way_select, + PredecBlk * blk_, + bool is_dram); + + int flag_driver_exists; + int number_input_addr_bits; + int number_gates_nand2_path; + int number_gates_nand3_path; + int min_number_gates; + int num_buffers_driving_1_nand2_load; + int num_buffers_driving_2_nand2_load; + int num_buffers_driving_4_nand2_load; + int num_buffers_driving_2_nand3_load; + int num_buffers_driving_8_nand3_load; + int num_buffers_nand3_path; + double c_load_nand2_path_out; + double c_load_nand3_path_out; + double r_load_nand2_path_out; + double r_load_nand3_path_out; + double width_nand2_path_n[MAX_NUMBER_GATES_STAGE]; + double width_nand2_path_p[MAX_NUMBER_GATES_STAGE]; + double width_nand3_path_n[MAX_NUMBER_GATES_STAGE]; + double width_nand3_path_p[MAX_NUMBER_GATES_STAGE]; + double delay_nand2_path; + double delay_nand3_path; + powerDef power_nand2_path; + powerDef power_nand3_path; + + PredecBlk * blk; + Decoder * dec; + bool is_dram_; + int way_select; + + void compute_widths(); + void compute_area(); + + void leakage_feedback(double temperature); + + + pair compute_delays( + double inrisetime_nand2_path, + double inrisetime_nand3_path); // return + + inline int num_addr_bits_nand2_path() + { + return num_buffers_driving_1_nand2_load + + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load; + } + inline int num_addr_bits_nand3_path() + { + return num_buffers_driving_2_nand3_load + + num_buffers_driving_8_nand3_load; + } + double get_rdOp_dynamic_E(int num_act_mats_hor_dir); +}; + + + +class Predec : public Component +{ + public: + Predec( + PredecBlkDrv * drv1, + PredecBlkDrv * drv2); + + double compute_delays(double inrisetime); // return outrisetime + + void leakage_feedback(double temperature); + PredecBlk * blk1; + PredecBlk * blk2; + PredecBlkDrv * drv1; + PredecBlkDrv * drv2; + + powerDef block_power; + powerDef driver_power; + + private: + // returns + pair get_max_delay_before_decoder( + pair input_pair1, + pair input_pair2); +}; + + + +class Driver : public Component +{ + public: + Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram); + + int number_gates; + int min_number_gates; + double width_n[MAX_NUMBER_GATES_STAGE]; + double width_p[MAX_NUMBER_GATES_STAGE]; + double c_gate_load; + double c_wire_load; + double r_wire_load; + double delay; +// powerDef power; + bool is_dram_; + + double total_driver_nwidth; + double total_driver_pwidth; + Sleep_tx * sleeptx; + + void compute_widths(); + void compute_area(); + double compute_delay(double inrisetime); + + void compute_power_gating(); + + ~Driver() + { + if (!sleeptx) + delete sleeptx; + }; +}; + + +#endif diff --git a/T1/TP1/cacti-master/dram.cfg b/T1/TP1/cacti-master/dram.cfg new file mode 100644 index 0000000..f55b5b3 --- /dev/null +++ b/T1/TP1/cacti-master/dram.cfg @@ -0,0 +1,114 @@ +//-size (bytes) 16777216 +//-size (bytes) 33554432 +-size (bytes) 134217728 +//-size (bytes) 67108864 +//-size (bytes) 1073741824 + +-block size (bytes) 64 +-associativity 1 +-read-write port 1 +-exclusive read port 0 +-exclusive write port 0 +-single ended read ports 0 +-UCA bank count 1 +//-technology (u) 0.032 +//-technology (u) 0.045 +-technology (u) 0.068 +//-technology (u) 0.078 + +# following three parameters are meaningful only for main memories +-page size (bits) 8192 +-burst length 8 +-internal prefetch width 8 + +# following parameter can have one of the five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Data array cell type - "comm-dram" + +# following parameter can have one of the three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Data array peripheral type - "itrs-hp" + +# following parameter can have one of the five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Tag array cell type - "itrs-hp" + +# following parameter can have one of the three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Tag array peripheral type - "itrs-hp" + +# Bus width include data bits and address bits required by the decoder +//-output/input bus width 512 +-output/input bus width 64 + +-operating temperature (K) 350 + +-cache type "main memory" + +# to model special structure like branch target buffers, directory, etc. +# change the tag size parameter +# if you want cacti to calculate the tagbits, set the tag size to "default" +-tag size (b) "default" +//-tag size (b) 45 + +# fast - data and tag access happen in parallel +# sequential - data array is accessed after accessing the tag array +# normal - data array lookup and tag access happen in parallel +# final data block is broadcasted in data array h-tree +# after getting the signal from the tag array +//-access mode (normal, sequential, fast) - "fast" +-access mode (normal, sequential, fast) - "normal" +//-access mode (normal, sequential, fast) - "sequential" + +# DESIGN OBJECTIVE for UCA (or banks in NUCA) +//-design objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:0 +-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0 +-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:1000000 +//-deviate (delay, dynamic power, leakage power, cycle time, area) 200:100000:100000:100000:20 + +-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE" + +-Cache model (NUCA, UCA) - "UCA" + +//-Wire signalling (fullswing, lowswing, default) - "default" +-Wire signalling (fullswing, lowswing, default) - "Global_10" + +-Wire inside mat - "global" +//-Wire inside mat - "semi-global" +-Wire outside mat - "global" + +-Interconnect projection - "conservative" +//-Interconnect projection - "aggressive" + +-Add ECC - "true" + +-Print level (DETAILED, CONCISE) - "DETAILED" + +# for debugging +-Print input parameters - "true" +# force CACTI to model the cache with the +# following Ndbl, Ndwl, Nspd, Ndsam, +# and Ndcm values +//-Force cache config - "true" +-Force cache config - "false" +-Ndwl 1 +-Ndbl 1 +-Nspd 0 +-Ndcm 1 +-Ndsam1 0 +-Ndsam2 0 + +########### NUCA Params ############ + +# Objective for NUCA +-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100 +-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 + +# Contention in network (which is a function of core count and cache level) is one of +# the critical factor used for deciding the optimal bank count value +# core count can be 4, 8, or 16 +//-Core count 4 +-Core count 8 +//-Core count 16 +-Cache level (L2/L3) - "L3" + +# In order for CACTI to find the optimal NUCA bank value the following +# variable should be assigned 0. +-NUCA bank count 0 + diff --git a/T1/TP1/cacti-master/extio.cc b/T1/TP1/cacti-master/extio.cc new file mode 100644 index 0000000..382a7a0 --- /dev/null +++ b/T1/TP1/cacti-master/extio.cc @@ -0,0 +1,506 @@ +#include "extio.h" +#include + + +Extio::Extio(IOTechParam *iot): +io_param(iot){} + + +//External IO AREA. Does not include PHY or decap, includes only IO active circuit. More details can be found in the CACTI-IO technical report (), Chapter 2.3. + +void Extio::extio_area() +{ + + //Area per IO, assuming drive stage and ODT are shared + double single_io_area = io_param->ioarea_c + + (io_param->ioarea_k0/io_param->r_on)+(1/io_param->r_on)* + (io_param->ioarea_k1*io_param->frequency + + io_param->ioarea_k2*io_param->frequency*io_param->frequency + + io_param->ioarea_k3*io_param->frequency* + io_param->frequency*io_param->frequency); // IO Area in sq.mm. + + //Area per IO if ODT requirements are more stringent than the Ron + //requirements in determining size of driver + if (2*io_param->rtt1_dq_read < io_param->r_on) { + single_io_area = io_param->ioarea_c + + (io_param->ioarea_k0/(2*io_param->rtt1_dq_read))+ + (1/io_param->r_on)*(io_param->ioarea_k1*io_param->frequency + + io_param->ioarea_k2*io_param->frequency*io_param->frequency + + io_param->ioarea_k3*io_param->frequency*io_param->frequency*io_param->frequency); + } + + //Total IO area + io_area = (g_ip->num_dq + g_ip->num_dqs + g_ip->num_ca + g_ip->num_clk) * + single_io_area; + + printf("IO Area (sq.mm) = "); + cout << io_area << endl; + +} + +//External IO Termination Power. More details can be found in the CACTI-IO technical report (), Chapter 2.1. + +void Extio::extio_power_term() +{ + + //IO Termination and Bias Power + + //Bias and Leakage Power + power_bias = io_param->i_bias * io_param->vdd_io + + io_param->i_leak * (g_ip->num_dq + + g_ip->num_dqs + + g_ip->num_clk + + g_ip->num_ca) * io_param->vdd_io/1000000; + + + //Termination Power + power_termination_read = 1000 * (g_ip->num_dq + g_ip->num_dqs) * + io_param->vdd_io * io_param->vdd_io * 0.25 * + (1/(io_param->r_on + io_param->rpar_read + io_param->rs1_dq) + + 1/(io_param->rtt1_dq_read) + 1/(io_param->rtt2_dq_read)) + + 1000 * g_ip->num_ca * io_param->vdd_io * io_param->vdd_io * + (0.5 / (2 * (io_param->r_on_ca + io_param->rtt_ca))); + + power_termination_write = 1000 * (g_ip->num_dq + g_ip->num_dqs) * + io_param->vdd_io * io_param->vdd_io * 0.25 * + (1/(io_param->r_on + io_param->rpar_write) + + 1/(io_param->rtt1_dq_write) + 1/(io_param->rtt2_dq_write)) + + 1000 * g_ip->num_ca * io_param->vdd_io * io_param->vdd_io * + (0.5 / (2 * (io_param->r_on_ca + io_param->rtt_ca))); + + power_clk_bias = io_param->vdd_io * io_param->v_sw_clk / io_param->r_diff_term * 1000; + + + if (io_param->io_type == Serial) + { power_termination_read= 1000*(g_ip->num_dq)*io_param->vdd_io*io_param->v_sw_clk/io_param->r_diff_term; + power_termination_write= 1000*(g_ip->num_dq)*io_param->vdd_io*io_param->v_sw_clk/io_param->r_diff_term; + power_clk_bias=0; + } + + if (io_param->io_type == DDR4) + { + power_termination_read=1000 * (g_ip->num_dq + g_ip->num_dqs) * + io_param->vdd_io * io_param->vdd_io *0.5 * (1/(io_param->r_on + io_param->rpar_read + io_param->rs1_dq)) + + 1000 * g_ip->num_ca * io_param->vdd_io * io_param->vdd_io * + (0.5 / (2 * (io_param->r_on_ca + io_param->rtt_ca))); + + + + power_termination_write = 1000 * (g_ip->num_dq + g_ip->num_dqs) * + io_param->vdd_io * io_param->vdd_io * 0.5 * + (1/(io_param->r_on + io_param->rpar_write)) + + 1000 * g_ip->num_ca * io_param->vdd_io * io_param->vdd_io * + (0.5 / (2 * (io_param->r_on_ca + io_param->rtt_ca))); + + + + } + + + //Combining the power terms based on STATE (READ/WRITE/IDLE/SLEEP) + if (g_ip->iostate == READ) + { + io_power_term = g_ip->duty_cycle * + (power_termination_read + power_bias + power_clk_bias); + } + else if (g_ip->iostate == WRITE) + { + io_power_term = g_ip->duty_cycle * + (power_termination_write + power_bias + power_clk_bias); + } + else if (g_ip->iostate == IDLE) + { + io_power_term = g_ip->duty_cycle * + (power_termination_write + power_bias + power_clk_bias); + if (io_param->io_type == DDR4) + { io_power_term = 1e-6*io_param->i_leak*io_param->vdd_io; // IDLE IO power for DDR4 is leakage since bus can be parked at VDDQ + } + } + else if (g_ip->iostate == SLEEP) + { + io_power_term = 1e-6*io_param->i_leak*io_param->vdd_io; //nA to mW + } + else + { + io_power_term = 0; + } + + + printf("IO Termination and Bias Power (mW) = "); + cout << io_power_term << endl; +} + + +//External PHY Power and Wakeup Times. More details can be found in the CACTI-IO technical report (), Chapter 2.1. + +void Extio::extio_power_phy () +{ + + + phy_static_power = io_param->phy_datapath_s + io_param->phy_phase_rotator_s + + io_param->phy_clock_tree_s + io_param->phy_rx_s + io_param->phy_dcc_s + + io_param->phy_deskew_s + io_param->phy_leveling_s + io_param->phy_pll_s; // in mW + + phy_dynamic_power = io_param->phy_datapath_d + io_param->phy_phase_rotator_d + + io_param->phy_clock_tree_d + io_param->phy_rx_d + io_param->phy_dcc_d + + io_param->phy_deskew_d + io_param->phy_leveling_d + + io_param->phy_pll_d; // in mW/Gbps + + + +//Combining the power terms based on STATE (READ/WRITE/IDLE/SLEEP) + if (g_ip->iostate == READ) + { + phy_power = phy_static_power + 2 * io_param->frequency * g_ip->num_dq * phy_dynamic_power / 1000; // Total PHY power in mW + } + else if (g_ip->iostate == WRITE) + { + phy_power = phy_static_power + 2 * io_param->frequency * g_ip->num_dq * phy_dynamic_power / 1000; // Total PHY power in mW + } + else if (g_ip->iostate == IDLE) + { + phy_power = phy_static_power; // Total PHY power in mW + + } + else if (g_ip->iostate == SLEEP) + { + phy_power = 0; // Total PHY power in mW; + } + else + { + phy_power = 0; // Total PHY power in mW; + } + + + phy_wtime = io_param->phy_pll_wtime + io_param->phy_phase_rotator_wtime + io_param->phy_rx_wtime + io_param->phy_bandgap_wtime + io_param->phy_deskew_wtime + io_param->phy_vrefgen_wtime; // Total Wakeup time from SLEEP to ACTIVE. Some of the Wakeup time can be hidden if all components do not need to be serially brought out of SLEEP. This depends on the implementation and user can modify the Wakeup times accordingly. + + + printf("PHY Power (mW) = "); + cout << phy_power << " "; + printf("PHY Wakeup Time (us) = "); + cout << phy_wtime << endl; + +} + + +//External IO Dynamic Power. Does not include termination or PHY. More details can be found in the CACTI-IO technical report (), Chapter 2.1. + +void Extio::extio_power_dynamic() +{ + + if (io_param->io_type == Serial) + { + power_dq_write = 0; + + power_dqs_write = 0; + + power_ca_write = 0; + + power_dq_read = 0; + + power_dqs_read = 0; + + power_ca_read = 0; + + power_clk = 0; + + } + else + { + + + //Line capacitance calculations for effective c_line + + double c_line =1e6/(io_param->z0*2*io_param->frequency); //For DDR signals: DQ, DQS, CLK + double c_line_ca=c_line; //For DDR CA + double c_line_sdr=1e6/(io_param->z0*io_param->frequency); //For SDR CA + double c_line_2T=1e6*2/(io_param->z0*io_param->frequency); //For 2T timing + double c_line_3T=1e6*3/(io_param->z0*io_param->frequency); //For 3T timing + + //Line capacitance if flight time is less than half the bit period + + if (io_param->t_flight < 1e3/(4*io_param->frequency)){ + c_line = 1e3*io_param->t_flight/io_param->z0; + } + + if (io_param->t_flight_ca < 1e3/(4*io_param->frequency)){ + c_line_ca = 1e3*io_param->t_flight/io_param->z0; + } + + if (io_param->t_flight_ca < 1e3/(2*io_param->frequency)){ + c_line_sdr = 1e3*io_param->t_flight/io_param->z0; + } + + if (io_param->t_flight_ca < 1e3*2/(2*io_param->frequency)){ + c_line_2T = 1e3*io_param->t_flight/io_param->z0; + } + + if (io_param->t_flight_ca < 1e3*3/(2*io_param->frequency)){ + c_line_3T = 1e3*io_param->t_flight/io_param->z0; + } + + //Line capacitance calculation for the address bus, depending on what address timing is chosen (DDR/SDR/2T/3T) + + if (g_ip->addr_timing==1.0) { + c_line_ca = c_line_sdr; + } + else if (g_ip->addr_timing==2.0){ + c_line_ca = c_line_2T; + } + else if (g_ip->addr_timing==3.0){ + c_line_ca = c_line_3T; + } + + //Dynamic power per signal group for WRITE and READ modes + + power_dq_write = g_ip->num_dq * g_ip->activity_dq * + (io_param->c_tx + c_line) * io_param->vdd_io * + io_param->v_sw_data_write_line * io_param->frequency / 1000 + + g_ip->num_dq * g_ip->activity_dq * io_param->c_data * + io_param->vdd_io * io_param->v_sw_data_write_load1 * + io_param->frequency / 1000 + + g_ip->num_dq * g_ip->activity_dq * ((g_ip->num_mem_dq-1) * + io_param->c_data) * io_param->vdd_io * + io_param->v_sw_data_write_load2 * io_param->frequency / 1000 + + g_ip->num_dq * g_ip->activity_dq * io_param->c_int * + io_param->vdd_io * io_param->vdd_io * io_param->frequency / 1000; + + power_dqs_write = g_ip->num_dqs * (io_param->c_tx + c_line) * + io_param->vdd_io * io_param->v_sw_data_write_line * + io_param->frequency / 1000 + + g_ip->num_dqs * io_param->c_data * io_param->vdd_io * + io_param->v_sw_data_write_load1 * io_param->frequency / 1000 + + g_ip->num_dqs * ((g_ip->num_mem_dq-1) * io_param->c_data) * + io_param->vdd_io * io_param->v_sw_data_write_load2 * + io_param->frequency / 1000 + + g_ip->num_dqs * io_param->c_int * io_param->vdd_io * + io_param->vdd_io * io_param->frequency / 1000; + + power_ca_write = g_ip->num_ca * g_ip->activity_ca * + (io_param->c_tx + io_param->num_mem_ca * io_param->c_addr + + c_line_ca) * + io_param->vdd_io * io_param->v_sw_addr * io_param->frequency / 1000 + + g_ip->num_ca * g_ip->activity_ca * io_param->c_int * + io_param->vdd_io * io_param->vdd_io * io_param->frequency / 1000; + + power_dq_read = g_ip->num_dq * g_ip->activity_dq * + (io_param->c_tx + c_line) * io_param->vdd_io * + io_param->v_sw_data_read_line * io_param->frequency / 1000.0 + + g_ip->num_dq * g_ip->activity_dq * io_param->c_data * + io_param->vdd_io * io_param->v_sw_data_read_load1 * io_param->frequency / 1000.0 + + g_ip->num_dq *g_ip->activity_dq * ((g_ip->num_mem_dq-1) * io_param->c_data) * + io_param->vdd_io * io_param->v_sw_data_read_load2 * io_param->frequency / 1000.0 + + g_ip->num_dq * g_ip->activity_dq * io_param->c_int * io_param->vdd_io * + io_param->vdd_io * io_param->frequency / 1000.0; + + power_dqs_read = g_ip->num_dqs * (io_param->c_tx + c_line) * + io_param->vdd_io * io_param->v_sw_data_read_line * + io_param->frequency / 1000.0 + + g_ip->num_dqs * io_param->c_data * io_param->vdd_io * + io_param->v_sw_data_read_load1 * io_param->frequency / 1000.0 + + g_ip->num_dqs * ((g_ip->num_mem_dq-1) * io_param->c_data) * + io_param->vdd_io * io_param->v_sw_data_read_load2 * io_param->frequency / 1000.0 + + g_ip->num_dqs * io_param->c_int * io_param->vdd_io * io_param->vdd_io * + io_param->frequency / 1000.0; + + power_ca_read = g_ip->num_ca * g_ip->activity_ca * + (io_param->c_tx + io_param->num_mem_ca * + io_param->c_addr + c_line_ca) * + io_param->vdd_io * io_param->v_sw_addr * io_param->frequency / 1000 + + g_ip->num_ca * g_ip->activity_ca * io_param->c_int * + io_param->vdd_io * io_param->vdd_io * io_param->frequency / 1000; + + power_clk = g_ip->num_clk * + (io_param->c_tx + io_param->num_mem_clk * + io_param->c_data + c_line) * + io_param->vdd_io * io_param->v_sw_clk *io_param->frequency / 1000 + + g_ip->num_clk * io_param->c_int * io_param->vdd_io * + io_param->vdd_io * io_param->frequency / 1000; + + + + } + + //Combining the power terms based on STATE (READ/WRITE/IDLE/SLEEP) + + if (g_ip->iostate == READ) { + io_power_dynamic = g_ip->duty_cycle * (power_dq_read + + power_ca_read + power_dqs_read + power_clk); + + } + else if (g_ip->iostate == WRITE) { + io_power_dynamic = g_ip->duty_cycle * + (power_dq_write + power_ca_write + power_dqs_write + power_clk); + } + else if (g_ip->iostate == IDLE) { + io_power_dynamic = g_ip->duty_cycle * (power_clk); + } + else if (g_ip->iostate == SLEEP) { + io_power_dynamic = 0; + } + else { + io_power_dynamic = 0; + } + + + printf("IO Dynamic Power (mW) = "); + cout << io_power_dynamic << " "; +} + + +//External IO Timing and Voltage Margins. More details can be found in the CACTI-IO technical report (), Chapter 2.2. + +void Extio::extio_eye() +{ + + if (io_param->io_type == Serial) + {io_vmargin=0; + } + else + { + + //VOLTAGE MARGINS + //Voltage noise calculations based on proportional and independent noise + //sources for WRITE, READ and CA + double v_noise_write = io_param->k_noise_write_sen * io_param->v_sw_data_write_line + + io_param->v_noise_independent_write; + double v_noise_read = io_param->k_noise_read_sen * io_param->v_sw_data_read_line + + io_param->v_noise_independent_read; + double v_noise_addr = io_param->k_noise_addr_sen * io_param->v_sw_addr + + io_param->v_noise_independent_addr; + + + //Worst-case voltage margin (Swing/2 - Voltage noise) calculations per state + //depending on DQ voltage margin and CA voltage margin (lesser or the two is + //reported) + if (g_ip->iostate == READ) + { + if ((io_param->v_sw_data_read_line/2 - v_noise_read) < + (io_param->v_sw_addr/2 - v_noise_addr)) { + io_vmargin = io_param->v_sw_data_read_line/2 - v_noise_read; + } + else { + io_vmargin = io_param->v_sw_addr/2 - v_noise_addr; + } + } + else if (g_ip->iostate == WRITE) { + if ((io_param->v_sw_data_write_line/2 - v_noise_write) < + (io_param->v_sw_addr/2 - v_noise_addr)) { + io_vmargin = io_param->v_sw_data_write_line/2 - v_noise_write; + } + else { + io_vmargin = io_param->v_sw_addr/2 - v_noise_addr; + } + } + else { + io_vmargin = 0; + } + + } + + //TIMING MARGINS + + double t_margin_write_setup,t_margin_write_hold,t_margin_read_setup + ,t_margin_read_hold,t_margin_addr_setup,t_margin_addr_hold; + + if (io_param->io_type == Serial) + { + + t_margin_write_setup = (1e6/(4*io_param->frequency)) - + io_param->t_ds - + io_param->t_jitter_setup_sen; + + t_margin_write_hold = (1e6/(4*io_param->frequency)) - + io_param->t_dh - io_param->t_dcd_soc - + io_param->t_jitter_hold_sen; + + t_margin_read_setup = (1e6/(4*io_param->frequency)) - + io_param->t_soc_setup - + io_param->t_jitter_setup_sen; + + t_margin_read_hold = (1e6/(4*io_param->frequency)) - + io_param->t_soc_hold - io_param->t_dcd_dram - + io_param->t_dcd_soc - + io_param->t_jitter_hold_sen; + + + + t_margin_addr_setup = (1e6*g_ip->addr_timing/(2*io_param->frequency)); + + + t_margin_addr_hold = (1e6*g_ip->addr_timing/(2*io_param->frequency)); + + + + } + else + { + + + + //Setup and Hold timing margins for DQ WRITE, DQ READ and CA based on timing + //budget + t_margin_write_setup = (1e6/(4*io_param->frequency)) - + io_param->t_ds - io_param->t_error_soc - + io_param->t_jitter_setup_sen - io_param->t_skew_setup + io_param->t_cor_margin; + + t_margin_write_hold = (1e6/(4*io_param->frequency)) - + io_param->t_dh - io_param->t_dcd_soc - io_param->t_error_soc - + io_param->t_jitter_hold_sen - io_param->t_skew_hold + io_param->t_cor_margin; + + t_margin_read_setup = (1e6/(4*io_param->frequency)) - + io_param->t_soc_setup - io_param->t_error_soc - + io_param->t_jitter_setup_sen - io_param->t_skew_setup - + io_param->t_dqsq + io_param->t_cor_margin; + + t_margin_read_hold = (1e6/(4*io_param->frequency)) - + io_param->t_soc_hold - io_param->t_dcd_dram - + io_param->t_dcd_soc - io_param->t_error_soc - + io_param->t_jitter_hold_sen - io_param->t_skew_hold + io_param->t_cor_margin; + + + + t_margin_addr_setup = (1e6*g_ip->addr_timing/(2*io_param->frequency)) - + io_param->t_is - io_param->t_error_soc - + io_param->t_jitter_addr_setup_sen - io_param->t_skew_setup + io_param->t_cor_margin; + + + t_margin_addr_hold = (1e6*g_ip->addr_timing/(2*io_param->frequency)) - + io_param->t_ih - io_param->t_dcd_soc - io_param->t_error_soc - + io_param->t_jitter_addr_hold_sen - io_param->t_skew_hold + io_param->t_cor_margin; + } + + //Worst-case timing margin per state depending on DQ and CA timing margins + if (g_ip->iostate == READ) { + io_tmargin = t_margin_read_setup < t_margin_read_hold ? + t_margin_read_setup : t_margin_read_hold; + io_tmargin = io_tmargin < t_margin_addr_setup ? + io_tmargin : t_margin_addr_setup; + io_tmargin = io_tmargin < t_margin_addr_hold ? + io_tmargin : t_margin_addr_hold; + } + else if (g_ip->iostate == WRITE) { + io_tmargin = t_margin_write_setup < t_margin_write_hold ? + t_margin_write_setup : t_margin_write_hold; + io_tmargin = io_tmargin < t_margin_addr_setup ? + io_tmargin : t_margin_addr_setup; + io_tmargin = io_tmargin < t_margin_addr_hold ? + io_tmargin : t_margin_addr_hold; + } + else { + io_tmargin = 0; + } + + + + + + //OUTPUTS + + + printf("IO Timing Margin (ps) = "); + cout << io_tmargin < + +/* This file contains configuration parameters, including + * default configuration for DDR3, LPDDR2 and WIDEIO. The configuration + * parameters include technology parameters - voltage, load capacitances, IO + * area coefficients, timing parameters, as well as external io configuration parameters - + * termination values, voltage noise coefficients and voltage/timing noise + * sensitivity parameters. More details can be found in the CACTI-IO technical + * report (), especially Chapters 2 and 3. The user can define new dram types here. */ + + + +///////////// DDR3 /////////////////// + + const double rtt1_wr_lrdimm_ddr3[8][4] = +{ + {INF,INF,120,120}, + {INF,INF,120,120}, + {INF,120,120,80}, + {120,120,120,60}, + {120,120,120,60}, + {120,80,80,60}, + {120,80,80,60}, + {120,80,60,40} +}; + + const double rtt2_wr_lrdimm_ddr3[8][4] = +{ + {INF,INF,INF,INF},//1 + {INF,INF,120,120},//2 + {120,120,120,80}, //3 + {120,120,80,60}, //4 + {120,120,80,60}, + {120,80,60,40}, //6 + {120,80,60,40}, + {80,80,40,30}//8 +}; + + const double rtt1_rd_lrdimm_ddr3[8][4] = +{ + {INF,INF,120,120},//1 + {INF,INF,120,120},//2 + {INF,120,120,80}, //3 + {120,120,120,60}, //4 + {120,120,120,60}, + {120,80,80,60}, //6 + {120,80,80,60}, + {120,80,60,40}//8 +}; + + const double rtt2_rd_lrdimm_ddr3[8][4] = +{ + {INF,INF,INF,INF},//1 + {INF,120,80,60},//2 + {120,80,80,40}, //3 + {120,80,60,40}, //4 + {120,80,60,40}, + {80,60,60,30}, //6 + {80,60,60,30}, + {80,60,40,20}//8 +}; + + + const double rtt1_wr_host_dimm_ddr3[3][4]= +{ + {120,120,120,60}, + {120,80,80,60}, + {120,80,60,40} +}; + +const double rtt2_wr_host_dimm_ddr3[3][4]= +{ + {120,120,80,60}, + {120,80,60,40}, + {80,80,40,30} +}; + + const double rtt1_rd_host_dimm_ddr3[3][4]= +{ + {120,120,120,60}, + {120,80,80,60}, + {120,80,60,40} +}; + + const double rtt2_rd_host_dimm_ddr3[3][4]= +{ + {120,80,60,40}, + {80,60,60,30}, + {80,60,40,20} +}; + + + const double rtt1_wr_bob_dimm_ddr3[3][4]= +{ + {INF,120,120,80}, + {120,120,120,60}, + {120,80,80,60} +}; + + const double rtt2_wr_bob_dimm_ddr3[3][4]= +{ + {120,120,120,80}, + {120,120,80,60}, + {120,80,60,40} +}; + + const double rtt1_rd_bob_dimm_ddr3[3][4]= +{ + {INF,120,120,80}, + {120,120,120,60}, + {120,80,80,60} +}; + + const double rtt2_rd_bob_dimm_ddr3[3][4]= +{ + {120,80,80,40}, + {120,80,60,40}, + {80,60,60,30} +}; + + +///////////// DDR4 /////////////////// + + const double rtt1_wr_lrdimm_ddr4[8][4] = +{ + {120,120,80,80},//1 + {120,120,80,80},//2 + {120,80,80,60}, //3 + {80,60,60,60}, //4 + {80,60,60,60}, + {60,60,60,40}, //6 + {60,60,60,40}, + {40,40,40,40}//8 +}; + + const double rtt2_wr_lrdimm_ddr4[8][4] = +{ + {INF,INF,INF,INF},//1 + {120,120,120,80},//2 + {120,80,80,80},//3 + {80,80,80,60},//4 + {80,80,80,60}, + {60,60,60,40},//6 + {60,60,60,40}, + {60,40,40,30}//8 +}; + + const double rtt1_rd_lrdimm_ddr4[8][4] = +{ + {120,120,80,80},//1 + {120,120,80,60},//2 + {120,80,80,60}, //3 + {120,60,60,60}, //4 + {120,60,60,60}, + {80,60,60,40}, //6 + {80,60,60,40}, + {60,40,40,30}//8 +}; + + const double rtt2_rd_lrdimm_ddr4[8][4] = +{ + {INF,INF,INF,INF},//1 + {80,60,60,60},//2 + {60,60,40,40}, //3 + {60,40,40,40}, //4 + {60,40,40,40}, + {40,40,40,30}, //6 + {40,40,40,30}, + {40,30,30,20}//8 +}; + + + + const double rtt1_wr_host_dimm_ddr4[3][4]= +{ + {80,60,60,60}, + {60,60,60,60}, + {40,40,40,40} +}; + + const double rtt2_wr_host_dimm_ddr4[3][4]= +{ + {80,80,80,60}, + {60,60,60,40}, + {60,40,40,30} +}; + + const double rtt1_rd_host_dimm_ddr4[3][4]= +{ + {120,60,60,60}, + {80,60,60,40}, + {60,40,40,30} +}; + + const double rtt2_rd_host_dimm_ddr4[3][4]= +{ + {60,40,40,40}, + {40,40,40,30}, + {40,30,30,20} +}; + + + const double rtt1_wr_bob_dimm_ddr4[3][4]= +{ + {120,80,80,60}, + {80,60,60,60}, + {60,60,60,40} +}; + + const double rtt2_wr_bob_dimm_ddr4[3][4]= +{ + {120,80,80,80}, + {80,80,80,60}, + {60,60,60,40} +}; + + const double rtt1_rd_bob_dimm_ddr4[3][4]= +{ + {120,80,80,60}, + {120,60,60,60}, + {80,60,60,40} +}; + + const double rtt2_rd_bob_dimm_ddr4[3][4]= +{ + {60,60,40,40}, + {60,40,40,40}, + {40,40,40,30} +}; + + +///////////////////////////////////////////// + +int IOTechParam::frequnecy_index(Mem_IO_type type) +{ + if(type==DDR3) + { + if(frequency<=400) + return 0; + else if(frequency<=533) + return 1; + else if(frequency<=667) + return 2; + else + return 3; + } + else if(type==DDR4) + { + if(frequency<=800) + return 0; + else if(frequency<=933) + return 1; + else if(frequency<=1066) + return 2; + else + return 3; + } + else + { + assert(false); + } + return 0; +} + + + +IOTechParam::IOTechParam(InputParameter * g_ip) +{ + num_mem_ca = g_ip->num_mem_dq * (g_ip->num_dq/g_ip->mem_data_width); + num_mem_clk = g_ip->num_mem_dq * + (g_ip->num_dq/g_ip->mem_data_width)/(g_ip->num_clk/2); + + + if (g_ip->io_type == LPDDR2) { //LPDDR + //Technology Parameters + + vdd_io = 1.2; + v_sw_clk = 1; + + // Loading paramters + c_int = 1.5; + c_tx = 2; + c_data = 1.5; + c_addr = 0.75; + i_bias = 5; + i_leak = 1000; + + // IO Area coefficients + + ioarea_c = 0.01; + ioarea_k0 = 0.5; + ioarea_k1 = 0.00008; + ioarea_k2 = 0.000000030; + ioarea_k3 = 0.000000000008; + + // Timing parameters (ps) + t_ds = 250; + t_is = 250; + t_dh = 250; + t_ih = 250; + t_dcd_soc = 50; + t_dcd_dram = 50; + t_error_soc = 50; + t_skew_setup = 50; + t_skew_hold = 50; + t_dqsq = 250; + t_soc_setup = 50; + t_soc_hold = 50; + t_jitter_setup = 200; + t_jitter_hold = 200; + t_jitter_addr_setup = 200; + t_jitter_addr_hold = 200; + t_cor_margin = 40; + + //External IO Configuration Parameters + + r_diff_term = 480; + rtt1_dq_read = 100000; + rtt2_dq_read = 100000; + rtt1_dq_write = 100000; + rtt2_dq_write = 100000; + rtt_ca = 240; + rs1_dq = 0; + rs2_dq = 0; + r_stub_ca = 0; + r_on = 50; + r_on_ca = 50; + z0 = 50; + t_flight = 0.5; + t_flight_ca = 0.5; + + // Voltage noise coeffecients + k_noise_write = 0.2; + k_noise_read = 0.2; + k_noise_addr = 0.2; + v_noise_independent_write = 0.1; + v_noise_independent_read = 0.1; + v_noise_independent_addr = 0.1; + + //SENSITIVITY INPUTS FOR TIMING AND VOLTAGE NOISE + +/* This is a user-defined section that depends on the channel sensitivity + * to IO and DRAM parameters. The t_jitter_* and k_noise_* are the + * parameters that are impacted based on the channel analysis. The user + * can define any relationship between the termination, loading and + * configuration parameters AND the t_jitter/k_noise parameters. E.g. a + * linear relationship, a non-linear analytical relationship or a lookup + * table. The sensitivity coefficients are based on channel analysis + * performed on the channel of interest.Given below is an example of such + * a sensitivity relationship. + * Such a linear fit can be found efficiently using an orthogonal design + * of experiments method shown in the technical report (), in Chapter 2.2. */ + + k_noise_write_sen = k_noise_write * (1 + 0.2*(r_on/34 - 1) + + 0.2*(g_ip->num_mem_dq/2 - 1)); + k_noise_read_sen = k_noise_read * (1 + 0.2*(r_on/34 - 1) + + 0.2*(g_ip->num_mem_dq/2 - 1)); + k_noise_addr_sen = k_noise_addr * (1 + 0.1*(rtt_ca/100 - 1) + + 0.2*(r_on/34 - 1) + 0.2*(num_mem_ca/16 - 1)); + + t_jitter_setup_sen = t_jitter_setup * (1 + 0.1*(r_on/34 - 1) + + 0.3*(g_ip->num_mem_dq/2 - 1)); + t_jitter_hold_sen = t_jitter_hold * (1 + 0.1*(r_on/34 - 1) + + 0.3*(g_ip->num_mem_dq/2 - 1)); + t_jitter_addr_setup_sen = t_jitter_addr_setup * (1 + 0.2*(rtt_ca/100 - 1) + + 0.1*(r_on/34 - 1) + 0.4*(num_mem_ca/16 - 1)); + t_jitter_addr_hold_sen = t_jitter_addr_hold * (1 + 0.2*(rtt_ca/100 - 1) + + 0.1*(r_on/34 - 1) + 0.4*(num_mem_ca/16 - 1)); + + // PHY Static Power Coefficients (mW) + + phy_datapath_s = 0; + phy_phase_rotator_s = 5; + phy_clock_tree_s = 0; + phy_rx_s = 3; + phy_dcc_s = 0; + phy_deskew_s = 0; + phy_leveling_s = 0; + phy_pll_s = 2; + + + // PHY Dynamic Power Coefficients (mW/Gbps) + + phy_datapath_d = 0.3; + phy_phase_rotator_d = 0.01; + phy_clock_tree_d = 0.4; + phy_rx_d = 0.2; + phy_dcc_d = 0; + phy_deskew_d = 0; + phy_leveling_d = 0; + phy_pll_d = 0.05; + + + //PHY Wakeup Times (Sleep to Active) (microseconds) + + phy_pll_wtime = 10; + phy_phase_rotator_wtime = 5; + phy_rx_wtime = 2; + phy_bandgap_wtime = 10; + phy_deskew_wtime = 0; + phy_vrefgen_wtime = 0; + + + } + else if (g_ip->io_type == WideIO) { //WIDEIO + //Technology Parameters + vdd_io = 1.2; + v_sw_clk = 1.2; + + // Loading parameters + c_int = 0.5; + c_tx = 0.5; + c_data = 0.5; + c_addr = 0.35; + i_bias = 0; + i_leak = 500; + + // IO Area coefficients + ioarea_c = 0.003; + ioarea_k0 = 0.2; + ioarea_k1 = 0.00004; + ioarea_k2 = 0.000000020; + ioarea_k3 = 0.000000000004; + + // Timing parameters (ps) + t_ds = 250; + t_is = 250; + t_dh = 250; + t_ih = 250; + t_dcd_soc = 50; + t_dcd_dram = 50; + t_error_soc = 50; + t_skew_setup = 50; + t_skew_hold = 50; + t_dqsq = 250; + t_soc_setup = 50; + t_soc_hold = 50; + t_jitter_setup = 200; + t_jitter_hold = 200; + t_jitter_addr_setup = 200; + t_jitter_addr_hold = 200; + t_cor_margin = 50; + + //External IO Configuration Parameters + + r_diff_term = 100000; + rtt1_dq_read = 100000; + rtt2_dq_read = 100000; + rtt1_dq_write = 100000; + rtt2_dq_write = 100000; + rtt_ca = 100000; + rs1_dq = 0; + rs2_dq = 0; + r_stub_ca = 0; + r_on = 75; + r_on_ca = 75; + z0 = 50; + t_flight = 0.05; + t_flight_ca = 0.05; + + // Voltage noise coeffecients + k_noise_write = 0.2; + k_noise_read = 0.2; + k_noise_addr = 0.2; + v_noise_independent_write = 0.1; + v_noise_independent_read = 0.1; + v_noise_independent_addr = 0.1; + + //SENSITIVITY INPUTS FOR TIMING AND VOLTAGE NOISE + + /* This is a user-defined section that depends on the channel sensitivity + * to IO and DRAM parameters. The t_jitter_* and k_noise_* are the + * parameters that are impacted based on the channel analysis. The user + * can define any relationship between the termination, loading and + * configuration parameters AND the t_jitter/k_noise parameters. E.g. a + * linear relationship, a non-linear analytical relationship or a lookup + * table. The sensitivity coefficients are based on channel analysis + * performed on the channel of interest.Given below is an example of such + * a sensitivity relationship. + * Such a linear fit can be found efficiently using an orthogonal design + * of experiments method shown in the technical report (), in Chapter 2.2. */ + + k_noise_write_sen = k_noise_write * (1 + 0.2*(r_on/50 - 1) + + 0.2*(g_ip->num_mem_dq/2 - 1)); + k_noise_read_sen = k_noise_read * (1 + 0.2*(r_on/50 - 1) + + 0.2*(g_ip->num_mem_dq/2 - 1)); + k_noise_addr_sen = k_noise_addr * (1 + 0.2*(r_on/50 - 1) + + 0.2*(num_mem_ca/16 - 1)); + + + t_jitter_setup_sen = t_jitter_setup * (1 + 0.1*(r_on/50 - 1) + + 0.3*(g_ip->num_mem_dq/2 - 1)); + t_jitter_hold_sen = t_jitter_hold * (1 + 0.1*(r_on/50 - 1) + + 0.3*(g_ip->num_mem_dq/2 - 1)); + t_jitter_addr_setup_sen = t_jitter_addr_setup * (1 + 0.1*(r_on/50 - 1) + + 0.4*(num_mem_ca/16 - 1)); + t_jitter_addr_hold_sen = t_jitter_addr_hold * (1 + 0.1*(r_on/50 - 1) + + 0.4*(num_mem_ca/16 - 1)); + + // PHY Static Power Coefficients (mW) + phy_datapath_s = 0; + phy_phase_rotator_s = 1; + phy_clock_tree_s = 0; + phy_rx_s = 0; + phy_dcc_s = 0; + phy_deskew_s = 0; + phy_leveling_s = 0; + phy_pll_s = 0; + + + // PHY Dynamic Power Coefficients (mW/Gbps) + phy_datapath_d = 0.3; + phy_phase_rotator_d = 0.01; + phy_clock_tree_d = 0.2; + phy_rx_d = 0.1; + phy_dcc_d = 0; + phy_deskew_d = 0; + phy_leveling_d = 0; + phy_pll_d = 0; + + //PHY Wakeup Times (Sleep to Active) (microseconds) + + phy_pll_wtime = 10; + phy_phase_rotator_wtime = 0; + phy_rx_wtime = 0; + phy_bandgap_wtime = 0; + phy_deskew_wtime = 0; + phy_vrefgen_wtime = 0; + + + } + else if (g_ip->io_type == DDR3) + { //Default parameters for DDR3 + // IO Supply voltage (V) + vdd_io = 1.5; + v_sw_clk = 0.75; + + // Loading parameters + c_int = 1.5; + c_tx = 2; + c_data = 1.5; + c_addr = 0.75; + i_bias = 15; + i_leak = 1000; + + // IO Area coefficients + ioarea_c = 0.01; + ioarea_k0 = 0.5; + ioarea_k1 = 0.00015; + ioarea_k2 = 0.000000045; + ioarea_k3 = 0.000000000015; + + // Timing parameters (ps) + t_ds = 150; + t_is = 150; + t_dh = 150; + t_ih = 150; + t_dcd_soc = 50; + t_dcd_dram = 50; + t_error_soc = 25; + t_skew_setup = 25; + t_skew_hold = 25; + t_dqsq = 100; + t_soc_setup = 50; + t_soc_hold = 50; + t_jitter_setup = 100; + t_jitter_hold = 100; + t_jitter_addr_setup = 100; + t_jitter_addr_hold = 100; + t_cor_margin = 30; + + + //External IO Configuration Parameters + + r_diff_term = 100; + rtt1_dq_read = g_ip->rtt_value; + rtt2_dq_read = g_ip->rtt_value; + rtt1_dq_write = g_ip->rtt_value; + rtt2_dq_write = g_ip->rtt_value; + rtt_ca = 50; + rs1_dq = 15; + rs2_dq = 15; + r_stub_ca = 0; + r_on = g_ip->ron_value; + r_on_ca = 50; + z0 = 50; + t_flight = g_ip->tflight_value; + t_flight_ca = 2; + + // Voltage noise coeffecients + + k_noise_write = 0.2; + k_noise_read = 0.2; + k_noise_addr = 0.2; + v_noise_independent_write = 0.1; + v_noise_independent_read = 0.1; + v_noise_independent_addr = 0.1; + + //SENSITIVITY INPUTS FOR TIMING AND VOLTAGE NOISE + + /* This is a user-defined section that depends on the channel sensitivity + * to IO and DRAM parameters. The t_jitter_* and k_noise_* are the + * parameters that are impacted based on the channel analysis. The user + * can define any relationship between the termination, loading and + * configuration parameters AND the t_jitter/k_noise parameters. E.g. a + * linear relationship, a non-linear analytical relationship or a lookup + * table. The sensitivity coefficients are based on channel analysis + * performed on the channel of interest.Given below is an example of such + * a sensitivity relationship. + * Such a linear fit can be found efficiently using an orthogonal design + * of experiments method shown in the technical report (), in Chapter 2.2. */ + + k_noise_write_sen = k_noise_write * (1 + 0.1*(rtt1_dq_write/60 - 1) + + 0.2*(rtt2_dq_write/60 - 1) + 0.2*(r_on/34 - 1) + + 0.2*(g_ip->num_mem_dq/2 - 1)); + + k_noise_read_sen = k_noise_read * (1 + 0.1*(rtt1_dq_read/60 - 1) + + 0.2*(rtt2_dq_read/60 - 1) + 0.2*(r_on/34 - 1) + + 0.2*(g_ip->num_mem_dq/2 - 1)); + + k_noise_addr_sen = k_noise_addr * (1 + 0.1*(rtt_ca/50 - 1) + + 0.2*(r_on/34 - 1) + 0.2*(num_mem_ca/16 - 1)); + + + t_jitter_setup_sen = t_jitter_setup * (1 + 0.2*(rtt1_dq_write/60 - 1) + + 0.3*(rtt2_dq_write/60 - 1) + 0.1*(r_on/34 - 1) + + 0.3*(g_ip->num_mem_dq/2 - 1)); + + t_jitter_hold_sen = t_jitter_hold * (1 + 0.2*(rtt1_dq_write/60 - 1) + + 0.3*(rtt2_dq_write/60 - 1) + + 0.1*(r_on/34 - 1) + 0.3*(g_ip->num_mem_dq/2 - 1)); + + t_jitter_addr_setup_sen = t_jitter_addr_setup * (1 + 0.2*(rtt_ca/50 - 1) + + 0.1*(r_on/34 - 1) + 0.4*(num_mem_ca/16 - 1)); + + t_jitter_addr_hold_sen = t_jitter_addr_hold * (1 + 0.2*(rtt_ca/50 - 1) + + 0.1*(r_on/34 - 1) + 0.4*(num_mem_ca/16 - 1)); + + // PHY Static Power Coefficients (mW) + phy_datapath_s = 0; + phy_phase_rotator_s = 10; + phy_clock_tree_s = 0; + phy_rx_s = 10; + phy_dcc_s = 0; + phy_deskew_s = 0; + phy_leveling_s = 0; + phy_pll_s = 10; + + // PHY Dynamic Power Coefficients (mW/Gbps) + phy_datapath_d = 0.5; + phy_phase_rotator_d = 0.01; + phy_clock_tree_d = 0.5; + phy_rx_d = 0.5; + phy_dcc_d = 0.05; + phy_deskew_d = 0.1; + phy_leveling_d = 0.05; + phy_pll_d = 0.05; + + //PHY Wakeup Times (Sleep to Active) (microseconds) + + phy_pll_wtime = 10; + phy_phase_rotator_wtime = 5; + phy_rx_wtime = 2; + phy_bandgap_wtime = 10; + phy_deskew_wtime = 0.003; + phy_vrefgen_wtime = 0.5; + + + } + else if (g_ip->io_type == DDR4) + { //Default parameters for DDR4 + // IO Supply voltage (V) + vdd_io = 1.2; + v_sw_clk = 0.6; + + // Loading parameters + c_int = 1.5; + c_tx = 2; + c_data = 1; + c_addr = 0.75; + i_bias = 15; + i_leak = 1000; + + // IO Area coefficients + ioarea_c = 0.01; + ioarea_k0 = 0.35; + ioarea_k1 = 0.00008; + ioarea_k2 = 0.000000035; + ioarea_k3 = 0.000000000010; + + // Timing parameters (ps) + t_ds = 30; + t_is = 60; + t_dh = 30; + t_ih = 60; + t_dcd_soc = 20; + t_dcd_dram = 20; + t_error_soc = 15; + t_skew_setup = 15; + t_skew_hold = 15; + t_dqsq = 50; + t_soc_setup = 20; + t_soc_hold = 10; + t_jitter_setup = 30; + t_jitter_hold = 30; + t_jitter_addr_setup = 60; + t_jitter_addr_hold = 60; + t_cor_margin = 10; + + + //External IO Configuration Parameters + + r_diff_term = 100; + rtt1_dq_read = g_ip->rtt_value; + rtt2_dq_read = g_ip->rtt_value; + rtt1_dq_write = g_ip->rtt_value; + rtt2_dq_write = g_ip->rtt_value; + rtt_ca = 50; + rs1_dq = 15; + rs2_dq = 15; + r_stub_ca = 0; + r_on = g_ip->ron_value; + r_on_ca = 50; + z0 = 50; + t_flight = g_ip->tflight_value; + t_flight_ca = 2; + + // Voltage noise coeffecients + + k_noise_write = 0.2; + k_noise_read = 0.2; + k_noise_addr = 0.2; + v_noise_independent_write = 0.1; + v_noise_independent_read = 0.1; + v_noise_independent_addr = 0.1; + + //SENSITIVITY INPUTS FOR TIMING AND VOLTAGE NOISE + + /* This is a user-defined section that depends on the channel sensitivity + * to IO and DRAM parameters. The t_jitter_* and k_noise_* are the + * parameters that are impacted based on the channel analysis. The user + * can define any relationship between the termination, loading and + * configuration parameters AND the t_jitter/k_noise parameters. E.g. a + * linear relationship, a non-linear analytical relationship or a lookup + * table. The sensitivity coefficients are based on channel analysis + * performed on the channel of interest.Given below is an example of such + * a sensitivity relationship. + * Such a linear fit can be found efficiently using an orthogonal design + * of experiments method shown in the technical report (), in Chapter 2.2. */ + + k_noise_write_sen = k_noise_write * (1 + 0.1*(rtt1_dq_write/60 - 1) + + 0.2*(rtt2_dq_write/60 - 1) + 0.2*(r_on/34 - 1) + + 0.2*(g_ip->num_mem_dq/2 - 1)); + + k_noise_read_sen = k_noise_read * (1 + 0.1*(rtt1_dq_read/60 - 1) + + 0.2*(rtt2_dq_read/60 - 1) + 0.2*(r_on/34 - 1) + + 0.2*(g_ip->num_mem_dq/2 - 1)); + + k_noise_addr_sen = k_noise_addr * (1 + 0.1*(rtt_ca/50 - 1) + + 0.2*(r_on/34 - 1) + 0.2*(num_mem_ca/16 - 1)); + + + t_jitter_setup_sen = t_jitter_setup * (1 + 0.2*(rtt1_dq_write/60 - 1) + + 0.3*(rtt2_dq_write/60 - 1) + 0.1*(r_on/34 - 1) + + 0.3*(g_ip->num_mem_dq/2 - 1)); + + t_jitter_hold_sen = t_jitter_hold * (1 + 0.2*(rtt1_dq_write/60 - 1) + + 0.3*(rtt2_dq_write/60 - 1) + + 0.1*(r_on/34 - 1) + 0.3*(g_ip->num_mem_dq/2 - 1)); + + t_jitter_addr_setup_sen = t_jitter_addr_setup * (1 + 0.2*(rtt_ca/50 - 1) + + 0.1*(r_on/34 - 1) + 0.4*(num_mem_ca/16 - 1)); + + t_jitter_addr_hold_sen = t_jitter_addr_hold * (1 + 0.2*(rtt_ca/50 - 1) + + 0.1*(r_on/34 - 1) + 0.4*(num_mem_ca/16 - 1)); + + // PHY Static Power Coefficients (mW) + phy_datapath_s = 0; + phy_phase_rotator_s = 10; + phy_clock_tree_s = 0; + phy_rx_s = 10; + phy_dcc_s = 0; + phy_deskew_s = 0; + phy_leveling_s = 0; + phy_pll_s = 10; + + // PHY Dynamic Power Coefficients (mW/Gbps) + phy_datapath_d = 0.5; + phy_phase_rotator_d = 0.01; + phy_clock_tree_d = 0.5; + phy_rx_d = 0.5; + phy_dcc_d = 0.05; + phy_deskew_d = 0.1; + phy_leveling_d = 0.05; + phy_pll_d = 0.05; + + //PHY Wakeup Times (Sleep to Active) (microseconds) + + phy_pll_wtime = 10; + phy_phase_rotator_wtime = 5; + phy_rx_wtime = 2; + phy_bandgap_wtime = 10; + phy_deskew_wtime = 0.003; + phy_vrefgen_wtime = 0.5; + + + } + else if (g_ip->io_type == Serial) + { //Default parameters for Serial + // IO Supply voltage (V) + vdd_io = 1.2; + v_sw_clk = 0.75; + + // IO Area coefficients + ioarea_c = 0.01; + ioarea_k0 = 0.15; + ioarea_k1 = 0.00005; + ioarea_k2 = 0.000000025; + ioarea_k3 = 0.000000000005; + + // Timing parameters (ps) + t_ds = 15; + t_dh = 15; + t_dcd_soc = 10; + t_dcd_dram = 10; + t_soc_setup = 10; + t_soc_hold = 10; + t_jitter_setup = 20; + t_jitter_hold = 20; + + //External IO Configuration Parameters + + r_diff_term = 100; + + + t_jitter_setup_sen = t_jitter_setup; + + t_jitter_hold_sen = t_jitter_hold; + + t_jitter_addr_setup_sen = t_jitter_addr_setup; + + t_jitter_addr_hold_sen = t_jitter_addr_hold; + + // PHY Static Power Coefficients (mW) + phy_datapath_s = 0; + phy_phase_rotator_s = 10; + phy_clock_tree_s = 0; + phy_rx_s = 10; + phy_dcc_s = 0; + phy_deskew_s = 0; + phy_leveling_s = 0; + phy_pll_s = 10; + + // PHY Dynamic Power Coefficients (mW/Gbps) + phy_datapath_d = 0.5; + phy_phase_rotator_d = 0.01; + phy_clock_tree_d = 0.5; + phy_rx_d = 0.5; + phy_dcc_d = 0.05; + phy_deskew_d = 0.1; + phy_leveling_d = 0.05; + phy_pll_d = 0.05; + + //PHY Wakeup Times (Sleep to Active) (microseconds) + + phy_pll_wtime = 10; + phy_phase_rotator_wtime = 5; + phy_rx_wtime = 2; + phy_bandgap_wtime = 10; + phy_deskew_wtime = 0.003; + phy_vrefgen_wtime = 0.5; + + + } + else + { + cout << "Not Yet supported" << endl; + exit(1); + } + + + //SWING AND TERMINATION CALCULATIONS + + //R|| calculation + rpar_write =(rtt1_dq_write + rs1_dq)*(rtt2_dq_write + rs2_dq)/ + (rtt1_dq_write + rs1_dq + rtt2_dq_write + rs2_dq); + rpar_read =(rtt1_dq_read)*(rtt2_dq_read + rs2_dq)/ + (rtt1_dq_read + rtt2_dq_read + rs2_dq); + + //Swing calculation + v_sw_data_read_load1 =vdd_io * (rtt1_dq_read)*(rtt2_dq_read + rs2_dq) / + ((rtt1_dq_read + rtt2_dq_read + rs2_dq)*(r_on + rs1_dq + rpar_read)); + v_sw_data_read_load2 =vdd_io * (rtt1_dq_read)*(rtt2_dq_read) / + ((rtt1_dq_read + rtt2_dq_read + rs2_dq)*(r_on + rs1_dq + rpar_read)); + v_sw_data_read_line =vdd_io * rpar_read / (r_on + rs1_dq + rpar_read); + v_sw_addr =vdd_io * rtt_ca / (50 + rtt_ca); + v_sw_data_write_load1 =vdd_io * (rtt1_dq_write)*(rtt2_dq_write + rs2_dq) / + ((rtt1_dq_write + rs1_dq + rtt2_dq_write + rs2_dq)*(r_on + rpar_write)); + v_sw_data_write_load2 =vdd_io * (rtt2_dq_write)*(rtt1_dq_write + rs1_dq) / + ((rtt1_dq_write + rs1_dq + rtt2_dq_write + rs2_dq)*(r_on + rpar_write)); + v_sw_data_write_line =vdd_io * rpar_write / (r_on + rpar_write); + +} + +// This constructor recieves most of the input from g_ip. +// however it is possible to customize other some of the paremeters, +// that are mentioned as inputs. +// connection: 0 bob-dimm, 1 host-dimm, 2 lrdimm + + +IOTechParam::IOTechParam(InputParameter * g_ip, Mem_IO_type io_type1, int num_mem_dq, int mem_data_width + , int num_dq, int connection, int num_loads, double freq) +{ + num_mem_ca = num_mem_dq * (mem_data_width); + num_mem_clk = num_mem_dq * + (num_dq/mem_data_width)/(g_ip->num_clk/2); + + io_type = io_type1; + frequency = freq; + + + + + if (io_type == LPDDR2) { //LPDDR + //Technology Parameters + + vdd_io = 1.2; + v_sw_clk = 1; + + // Loading paramters + c_int = 1.5; + c_tx = 2; + c_data = 1.5; + c_addr = 0.75; + i_bias = 5; + i_leak = 1000; + + // IO Area coefficients + + ioarea_c = 0.01; + ioarea_k0 = 0.5; + ioarea_k1 = 0.00008; + ioarea_k2 = 0.000000030; + ioarea_k3 = 0.000000000008; + + // Timing parameters (ps) + t_ds = 250; + t_is = 250; + t_dh = 250; + t_ih = 250; + t_dcd_soc = 50; + t_dcd_dram = 50; + t_error_soc = 50; + t_skew_setup = 50; + t_skew_hold = 50; + t_dqsq = 250; + t_soc_setup = 50; + t_soc_hold = 50; + t_jitter_setup = 200; + t_jitter_hold = 200; + t_jitter_addr_setup = 200; + t_jitter_addr_hold = 200; + t_cor_margin = 40; + + //External IO Configuration Parameters + + r_diff_term = 480; + rtt1_dq_read = 100000; + rtt2_dq_read = 100000; + rtt1_dq_write = 100000; + rtt2_dq_write = 100000; + rtt_ca = 240; + rs1_dq = 0; + rs2_dq = 0; + r_stub_ca = 0; + r_on = 50; + r_on_ca = 50; + z0 = 50; + t_flight = 0.5; + t_flight_ca = 0.5; + + // Voltage noise coeffecients + k_noise_write = 0.2; + k_noise_read = 0.2; + k_noise_addr = 0.2; + v_noise_independent_write = 0.1; + v_noise_independent_read = 0.1; + v_noise_independent_addr = 0.1; + + //SENSITIVITY INPUTS FOR TIMING AND VOLTAGE NOISE + +/* This is a user-defined section that depends on the channel sensitivity + * to IO and DRAM parameters. The t_jitter_* and k_noise_* are the + * parameters that are impacted based on the channel analysis. The user + * can define any relationship between the termination, loading and + * configuration parameters AND the t_jitter/k_noise parameters. E.g. a + * linear relationship, a non-linear analytical relationship or a lookup + * table. The sensitivity coefficients are based on channel analysis + * performed on the channel of interest.Given below is an example of such + * a sensitivity relationship. + * Such a linear fit can be found efficiently using an orthogonal design + * of experiments method shown in the technical report (), in Chapter 2.2. */ + + k_noise_write_sen = k_noise_write * (1 + 0.2*(r_on/34 - 1) + + 0.2*(num_mem_dq/2 - 1)); + k_noise_read_sen = k_noise_read * (1 + 0.2*(r_on/34 - 1) + + 0.2*(num_mem_dq/2 - 1)); + k_noise_addr_sen = k_noise_addr * (1 + 0.1*(rtt_ca/100 - 1) + + 0.2*(r_on/34 - 1) + 0.2*(num_mem_ca/16 - 1)); + + t_jitter_setup_sen = t_jitter_setup * (1 + 0.1*(r_on/34 - 1) + + 0.3*(num_mem_dq/2 - 1)); + t_jitter_hold_sen = t_jitter_hold * (1 + 0.1*(r_on/34 - 1) + + 0.3*(num_mem_dq/2 - 1)); + t_jitter_addr_setup_sen = t_jitter_addr_setup * (1 + 0.2*(rtt_ca/100 - 1) + + 0.1*(r_on/34 - 1) + 0.4*(num_mem_ca/16 - 1)); + t_jitter_addr_hold_sen = t_jitter_addr_hold * (1 + 0.2*(rtt_ca/100 - 1) + + 0.1*(r_on/34 - 1) + 0.4*(num_mem_ca/16 - 1)); + + // PHY Static Power Coefficients (mW) + + phy_datapath_s = 0; + phy_phase_rotator_s = 5; + phy_clock_tree_s = 0; + phy_rx_s = 3; + phy_dcc_s = 0; + phy_deskew_s = 0; + phy_leveling_s = 0; + phy_pll_s = 2; + + + // PHY Dynamic Power Coefficients (mW/Gbps) + + phy_datapath_d = 0.3; + phy_phase_rotator_d = 0.01; + phy_clock_tree_d = 0.4; + phy_rx_d = 0.2; + phy_dcc_d = 0; + phy_deskew_d = 0; + phy_leveling_d = 0; + phy_pll_d = 0.05; + + + //PHY Wakeup Times (Sleep to Active) (microseconds) + + phy_pll_wtime = 10; + phy_phase_rotator_wtime = 5; + phy_rx_wtime = 2; + phy_bandgap_wtime = 10; + phy_deskew_wtime = 0; + phy_vrefgen_wtime = 0; + + + } + else if (io_type == WideIO) { //WIDEIO + //Technology Parameters + vdd_io = 1.2; + v_sw_clk = 1.2; + + // Loading parameters + c_int = 0.5; + c_tx = 0.5; + c_data = 0.5; + c_addr = 0.35; + i_bias = 0; + i_leak = 500; + + // IO Area coefficients + ioarea_c = 0.003; + ioarea_k0 = 0.2; + ioarea_k1 = 0.00004; + ioarea_k2 = 0.000000020; + ioarea_k3 = 0.000000000004; + + // Timing parameters (ps) + t_ds = 250; + t_is = 250; + t_dh = 250; + t_ih = 250; + t_dcd_soc = 50; + t_dcd_dram = 50; + t_error_soc = 50; + t_skew_setup = 50; + t_skew_hold = 50; + t_dqsq = 250; + t_soc_setup = 50; + t_soc_hold = 50; + t_jitter_setup = 200; + t_jitter_hold = 200; + t_jitter_addr_setup = 200; + t_jitter_addr_hold = 200; + t_cor_margin = 50; + + //External IO Configuration Parameters + + r_diff_term = 100000; + rtt1_dq_read = 100000; + rtt2_dq_read = 100000; + rtt1_dq_write = 100000; + rtt2_dq_write = 100000; + rtt_ca = 100000; + rs1_dq = 0; + rs2_dq = 0; + r_stub_ca = 0; + r_on = 75; + r_on_ca = 75; + z0 = 50; + t_flight = 0.05; + t_flight_ca = 0.05; + + // Voltage noise coeffecients + k_noise_write = 0.2; + k_noise_read = 0.2; + k_noise_addr = 0.2; + v_noise_independent_write = 0.1; + v_noise_independent_read = 0.1; + v_noise_independent_addr = 0.1; + + //SENSITIVITY INPUTS FOR TIMING AND VOLTAGE NOISE + + /* This is a user-defined section that depends on the channel sensitivity + * to IO and DRAM parameters. The t_jitter_* and k_noise_* are the + * parameters that are impacted based on the channel analysis. The user + * can define any relationship between the termination, loading and + * configuration parameters AND the t_jitter/k_noise parameters. E.g. a + * linear relationship, a non-linear analytical relationship or a lookup + * table. The sensitivity coefficients are based on channel analysis + * performed on the channel of interest.Given below is an example of such + * a sensitivity relationship. + * Such a linear fit can be found efficiently using an orthogonal design + * of experiments method shown in the technical report (), in Chapter 2.2. */ + + k_noise_write_sen = k_noise_write * (1 + 0.2*(r_on/50 - 1) + + 0.2*(num_mem_dq/2 - 1)); + k_noise_read_sen = k_noise_read * (1 + 0.2*(r_on/50 - 1) + + 0.2*(num_mem_dq/2 - 1)); + k_noise_addr_sen = k_noise_addr * (1 + 0.2*(r_on/50 - 1) + + 0.2*(num_mem_ca/16 - 1)); + + + t_jitter_setup_sen = t_jitter_setup * (1 + 0.1*(r_on/50 - 1) + + 0.3*(num_mem_dq/2 - 1)); + t_jitter_hold_sen = t_jitter_hold * (1 + 0.1*(r_on/50 - 1) + + 0.3*(num_mem_dq/2 - 1)); + t_jitter_addr_setup_sen = t_jitter_addr_setup * (1 + 0.1*(r_on/50 - 1) + + 0.4*(num_mem_ca/16 - 1)); + t_jitter_addr_hold_sen = t_jitter_addr_hold * (1 + 0.1*(r_on/50 - 1) + + 0.4*(num_mem_ca/16 - 1)); + + // PHY Static Power Coefficients (mW) + phy_datapath_s = 0; + phy_phase_rotator_s = 1; + phy_clock_tree_s = 0; + phy_rx_s = 0; + phy_dcc_s = 0; + phy_deskew_s = 0; + phy_leveling_s = 0; + phy_pll_s = 0; + + + // PHY Dynamic Power Coefficients (mW/Gbps) + phy_datapath_d = 0.3; + phy_phase_rotator_d = 0.01; + phy_clock_tree_d = 0.2; + phy_rx_d = 0.1; + phy_dcc_d = 0; + phy_deskew_d = 0; + phy_leveling_d = 0; + phy_pll_d = 0; + + //PHY Wakeup Times (Sleep to Active) (microseconds) + + phy_pll_wtime = 10; + phy_phase_rotator_wtime = 0; + phy_rx_wtime = 0; + phy_bandgap_wtime = 0; + phy_deskew_wtime = 0; + phy_vrefgen_wtime = 0; + + + } + else if (io_type == DDR3) + { //Default parameters for DDR3 + // IO Supply voltage (V) + vdd_io = 1.5; + v_sw_clk = 0.75; + + // Loading parameters + c_int = 1.5; + c_tx = 2; + c_data = 1.5; + c_addr = 0.75; + i_bias = 15; + i_leak = 1000; + + // IO Area coefficients + ioarea_c = 0.01; + ioarea_k0 = 0.5; + ioarea_k1 = 0.00015; + ioarea_k2 = 0.000000045; + ioarea_k3 = 0.000000000015; + + // Timing parameters (ps) + t_ds = 150; + t_is = 150; + t_dh = 150; + t_ih = 150; + t_dcd_soc = 50; + t_dcd_dram = 50; + t_error_soc = 25; + t_skew_setup = 25; + t_skew_hold = 25; + t_dqsq = 100; + t_soc_setup = 50; + t_soc_hold = 50; + t_jitter_setup = 100; + t_jitter_hold = 100; + t_jitter_addr_setup = 100; + t_jitter_addr_hold = 100; + t_cor_margin = 30; + + + //External IO Configuration Parameters + + r_diff_term = 100; + + /* + rtt1_dq_read = g_ip->rtt_value; + rtt2_dq_read = g_ip->rtt_value; + rtt1_dq_write = g_ip->rtt_value; + rtt2_dq_write = g_ip->rtt_value; + */ + switch(connection) + { + case(0): + rtt1_dq_write = rtt1_wr_bob_dimm_ddr3[num_loads-1][frequnecy_index(io_type)]; + rtt2_dq_write = rtt2_wr_bob_dimm_ddr3[num_loads-1][frequnecy_index(io_type)]; + rtt1_dq_read = rtt1_rd_bob_dimm_ddr3[num_loads-1][frequnecy_index(io_type)]; + rtt2_dq_read = rtt2_rd_bob_dimm_ddr3[num_loads-1][frequnecy_index(io_type)]; + break; + case(1): + rtt1_dq_write = rtt1_wr_host_dimm_ddr3[num_loads-1][frequnecy_index(io_type)]; + rtt2_dq_write = rtt2_wr_host_dimm_ddr3[num_loads-1][frequnecy_index(io_type)]; + rtt1_dq_read = rtt1_rd_host_dimm_ddr3[num_loads-1][frequnecy_index(io_type)]; + rtt2_dq_read = rtt2_rd_host_dimm_ddr3[num_loads-1][frequnecy_index(io_type)]; + break; + case(2): + rtt1_dq_write = rtt1_wr_lrdimm_ddr3[num_loads-1][frequnecy_index(io_type)]; + rtt2_dq_write = rtt2_wr_lrdimm_ddr3[num_loads-1][frequnecy_index(io_type)]; + rtt1_dq_read = rtt1_rd_lrdimm_ddr3[num_loads-1][frequnecy_index(io_type)]; + rtt2_dq_read = rtt2_rd_lrdimm_ddr3[num_loads-1][frequnecy_index(io_type)]; + break; + default: + break; + } + + + rtt_ca = 50; + rs1_dq = 15; + rs2_dq = 15; + r_stub_ca = 0; + r_on = g_ip->ron_value; + r_on_ca = 50; + z0 = 50; + t_flight = g_ip->tflight_value; + t_flight_ca = 2; + + // Voltage noise coeffecients + + k_noise_write = 0.2; + k_noise_read = 0.2; + k_noise_addr = 0.2; + v_noise_independent_write = 0.1; + v_noise_independent_read = 0.1; + v_noise_independent_addr = 0.1; + + //SENSITIVITY INPUTS FOR TIMING AND VOLTAGE NOISE + + /* This is a user-defined section that depends on the channel sensitivity + * to IO and DRAM parameters. The t_jitter_* and k_noise_* are the + * parameters that are impacted based on the channel analysis. The user + * can define any relationship between the termination, loading and + * configuration parameters AND the t_jitter/k_noise parameters. E.g. a + * linear relationship, a non-linear analytical relationship or a lookup + * table. The sensitivity coefficients are based on channel analysis + * performed on the channel of interest.Given below is an example of such + * a sensitivity relationship. + * Such a linear fit can be found efficiently using an orthogonal design + * of experiments method shown in the technical report (), in Chapter 2.2. */ + + k_noise_write_sen = k_noise_write * (1 + 0.1*(rtt1_dq_write/60 - 1) + + 0.2*(rtt2_dq_write/60 - 1) + 0.2*(r_on/34 - 1) + + 0.2*(num_mem_dq/2 - 1)); + + k_noise_read_sen = k_noise_read * (1 + 0.1*(rtt1_dq_read/60 - 1) + + 0.2*(rtt2_dq_read/60 - 1) + 0.2*(r_on/34 - 1) + + 0.2*(num_mem_dq/2 - 1)); + + k_noise_addr_sen = k_noise_addr * (1 + 0.1*(rtt_ca/50 - 1) + + 0.2*(r_on/34 - 1) + 0.2*(num_mem_ca/16 - 1)); + + + t_jitter_setup_sen = t_jitter_setup * (1 + 0.2*(rtt1_dq_write/60 - 1) + + 0.3*(rtt2_dq_write/60 - 1) + 0.1*(r_on/34 - 1) + + 0.3*(num_mem_dq/2 - 1)); + + t_jitter_hold_sen = t_jitter_hold * (1 + 0.2*(rtt1_dq_write/60 - 1) + + 0.3*(rtt2_dq_write/60 - 1) + + 0.1*(r_on/34 - 1) + 0.3*(num_mem_dq/2 - 1)); + + t_jitter_addr_setup_sen = t_jitter_addr_setup * (1 + 0.2*(rtt_ca/50 - 1) + + 0.1*(r_on/34 - 1) + 0.4*(num_mem_ca/16 - 1)); + + t_jitter_addr_hold_sen = t_jitter_addr_hold * (1 + 0.2*(rtt_ca/50 - 1) + + 0.1*(r_on/34 - 1) + 0.4*(num_mem_ca/16 - 1)); + + // PHY Static Power Coefficients (mW) + phy_datapath_s = 0; + phy_phase_rotator_s = 10; + phy_clock_tree_s = 0; + phy_rx_s = 10; + phy_dcc_s = 0; + phy_deskew_s = 0; + phy_leveling_s = 0; + phy_pll_s = 10; + + // PHY Dynamic Power Coefficients (mW/Gbps) + phy_datapath_d = 0.5; + phy_phase_rotator_d = 0.01; + phy_clock_tree_d = 0.5; + phy_rx_d = 0.5; + phy_dcc_d = 0.05; + phy_deskew_d = 0.1; + phy_leveling_d = 0.05; + phy_pll_d = 0.05; + + //PHY Wakeup Times (Sleep to Active) (microseconds) + + phy_pll_wtime = 10; + phy_phase_rotator_wtime = 5; + phy_rx_wtime = 2; + phy_bandgap_wtime = 10; + phy_deskew_wtime = 0.003; + phy_vrefgen_wtime = 0.5; + + + } + else if (io_type == DDR4) + { //Default parameters for DDR4 + // IO Supply voltage (V) + vdd_io = 1.2; + v_sw_clk = 0.6; + + // Loading parameters + c_int = 1.5; + c_tx = 2; + c_data = 1; + c_addr = 0.75; + i_bias = 15; + i_leak = 1000; + + // IO Area coefficients + ioarea_c = 0.01; + ioarea_k0 = 0.35; + ioarea_k1 = 0.00008; + ioarea_k2 = 0.000000035; + ioarea_k3 = 0.000000000010; + + // Timing parameters (ps) + t_ds = 30; + t_is = 60; + t_dh = 30; + t_ih = 60; + t_dcd_soc = 20; + t_dcd_dram = 20; + t_error_soc = 15; + t_skew_setup = 15; + t_skew_hold = 15; + t_dqsq = 50; + t_soc_setup = 20; + t_soc_hold = 10; + t_jitter_setup = 30; + t_jitter_hold = 30; + t_jitter_addr_setup = 60; + t_jitter_addr_hold = 60; + t_cor_margin = 10; + + + //External IO Configuration Parameters + + r_diff_term = 100; + /* + rtt1_dq_read = g_ip->rtt_value; + rtt2_dq_read = g_ip->rtt_value; + rtt1_dq_write = g_ip->rtt_value; + rtt2_dq_write = g_ip->rtt_value; + */ + + switch(connection) + { + case(0): + rtt1_dq_write = rtt1_wr_bob_dimm_ddr4[num_loads-1][frequnecy_index(io_type)]; + rtt2_dq_write = rtt2_wr_bob_dimm_ddr4[num_loads-1][frequnecy_index(io_type)]; + rtt1_dq_read = rtt1_rd_bob_dimm_ddr4[num_loads-1][frequnecy_index(io_type)]; + rtt2_dq_read = rtt2_rd_bob_dimm_ddr4[num_loads-1][frequnecy_index(io_type)]; + break; + case(1): + rtt1_dq_write = rtt1_wr_host_dimm_ddr4[num_loads-1][frequnecy_index(io_type)]; + rtt2_dq_write = rtt2_wr_host_dimm_ddr4[num_loads-1][frequnecy_index(io_type)]; + rtt1_dq_read = rtt1_rd_host_dimm_ddr4[num_loads-1][frequnecy_index(io_type)]; + rtt2_dq_read = rtt2_rd_host_dimm_ddr4[num_loads-1][frequnecy_index(io_type)]; + break; + case(2): + rtt1_dq_write = rtt1_wr_lrdimm_ddr4[num_loads-1][frequnecy_index(io_type)]; + rtt2_dq_write = rtt2_wr_lrdimm_ddr4[num_loads-1][frequnecy_index(io_type)]; + rtt1_dq_read = rtt1_rd_lrdimm_ddr4[num_loads-1][frequnecy_index(io_type)]; + rtt2_dq_read = rtt2_rd_lrdimm_ddr4[num_loads-1][frequnecy_index(io_type)]; + break; + default: + break; + } + + rtt_ca = 50; + rs1_dq = 15; + rs2_dq = 15; + r_stub_ca = 0; + r_on = g_ip->ron_value; + r_on_ca = 50; + z0 = 50; + t_flight = g_ip->tflight_value; + t_flight_ca = 2; + + // Voltage noise coeffecients + + k_noise_write = 0.2; + k_noise_read = 0.2; + k_noise_addr = 0.2; + v_noise_independent_write = 0.1; + v_noise_independent_read = 0.1; + v_noise_independent_addr = 0.1; + + //SENSITIVITY INPUTS FOR TIMING AND VOLTAGE NOISE + + /* This is a user-defined section that depends on the channel sensitivity + * to IO and DRAM parameters. The t_jitter_* and k_noise_* are the + * parameters that are impacted based on the channel analysis. The user + * can define any relationship between the termination, loading and + * configuration parameters AND the t_jitter/k_noise parameters. E.g. a + * linear relationship, a non-linear analytical relationship or a lookup + * table. The sensitivity coefficients are based on channel analysis + * performed on the channel of interest.Given below is an example of such + * a sensitivity relationship. + * Such a linear fit can be found efficiently using an orthogonal design + * of experiments method shown in the technical report (), in Chapter 2.2. */ + + k_noise_write_sen = k_noise_write * (1 + 0.1*(rtt1_dq_write/60 - 1) + + 0.2*(rtt2_dq_write/60 - 1) + 0.2*(r_on/34 - 1) + + 0.2*(num_mem_dq/2 - 1)); + + k_noise_read_sen = k_noise_read * (1 + 0.1*(rtt1_dq_read/60 - 1) + + 0.2*(rtt2_dq_read/60 - 1) + 0.2*(r_on/34 - 1) + + 0.2*(num_mem_dq/2 - 1)); + + k_noise_addr_sen = k_noise_addr * (1 + 0.1*(rtt_ca/50 - 1) + + 0.2*(r_on/34 - 1) + 0.2*(num_mem_ca/16 - 1)); + + + t_jitter_setup_sen = t_jitter_setup * (1 + 0.2*(rtt1_dq_write/60 - 1) + + 0.3*(rtt2_dq_write/60 - 1) + 0.1*(r_on/34 - 1) + + 0.3*(num_mem_dq/2 - 1)); + + t_jitter_hold_sen = t_jitter_hold * (1 + 0.2*(rtt1_dq_write/60 - 1) + + 0.3*(rtt2_dq_write/60 - 1) + + 0.1*(r_on/34 - 1) + 0.3*(num_mem_dq/2 - 1)); + + t_jitter_addr_setup_sen = t_jitter_addr_setup * (1 + 0.2*(rtt_ca/50 - 1) + + 0.1*(r_on/34 - 1) + 0.4*(num_mem_ca/16 - 1)); + + t_jitter_addr_hold_sen = t_jitter_addr_hold * (1 + 0.2*(rtt_ca/50 - 1) + + 0.1*(r_on/34 - 1) + 0.4*(num_mem_ca/16 - 1)); + + // PHY Static Power Coefficients (mW) + phy_datapath_s = 0; + phy_phase_rotator_s = 10; + phy_clock_tree_s = 0; + phy_rx_s = 10; + phy_dcc_s = 0; + phy_deskew_s = 0; + phy_leveling_s = 0; + phy_pll_s = 10; + + // PHY Dynamic Power Coefficients (mW/Gbps) + phy_datapath_d = 0.5; + phy_phase_rotator_d = 0.01; + phy_clock_tree_d = 0.5; + phy_rx_d = 0.5; + phy_dcc_d = 0.05; + phy_deskew_d = 0.1; + phy_leveling_d = 0.05; + phy_pll_d = 0.05; + + //PHY Wakeup Times (Sleep to Active) (microseconds) + + phy_pll_wtime = 10; + phy_phase_rotator_wtime = 5; + phy_rx_wtime = 2; + phy_bandgap_wtime = 10; + phy_deskew_wtime = 0.003; + phy_vrefgen_wtime = 0.5; + + + } + else if (io_type == Serial) + { //Default parameters for Serial + // IO Supply voltage (V) + vdd_io = 1.2; + v_sw_clk = 0.75; + + // IO Area coefficients + ioarea_c = 0.01; + ioarea_k0 = 0.15; + ioarea_k1 = 0.00005; + ioarea_k2 = 0.000000025; + ioarea_k3 = 0.000000000005; + + // Timing parameters (ps) + t_ds = 15; + t_dh = 15; + t_dcd_soc = 10; + t_dcd_dram = 10; + t_soc_setup = 10; + t_soc_hold = 10; + t_jitter_setup = 20; + t_jitter_hold = 20; + + //External IO Configuration Parameters + + r_diff_term = 100; + + + t_jitter_setup_sen = t_jitter_setup; + + t_jitter_hold_sen = t_jitter_hold; + + t_jitter_addr_setup_sen = t_jitter_addr_setup; + + t_jitter_addr_hold_sen = t_jitter_addr_hold; + + // PHY Static Power Coefficients (mW) + phy_datapath_s = 0; + phy_phase_rotator_s = 10; + phy_clock_tree_s = 0; + phy_rx_s = 10; + phy_dcc_s = 0; + phy_deskew_s = 0; + phy_leveling_s = 0; + phy_pll_s = 10; + + // PHY Dynamic Power Coefficients (mW/Gbps) + phy_datapath_d = 0.5; + phy_phase_rotator_d = 0.01; + phy_clock_tree_d = 0.5; + phy_rx_d = 0.5; + phy_dcc_d = 0.05; + phy_deskew_d = 0.1; + phy_leveling_d = 0.05; + phy_pll_d = 0.05; + + //PHY Wakeup Times (Sleep to Active) (microseconds) + + phy_pll_wtime = 10; + phy_phase_rotator_wtime = 5; + phy_rx_wtime = 2; + phy_bandgap_wtime = 10; + phy_deskew_wtime = 0.003; + phy_vrefgen_wtime = 0.5; + + + } + else + { + cout << "Not Yet supported" << endl; + exit(1); + } + + + //SWING AND TERMINATION CALCULATIONS + + //R|| calculation + rpar_write =(rtt1_dq_write + rs1_dq)*(rtt2_dq_write + rs2_dq)/ + (rtt1_dq_write + rs1_dq + rtt2_dq_write + rs2_dq); + rpar_read =(rtt1_dq_read)*(rtt2_dq_read + rs2_dq)/ + (rtt1_dq_read + rtt2_dq_read + rs2_dq); + + + + //Swing calculation + v_sw_data_read_load1 =vdd_io * (rtt1_dq_read)*(rtt2_dq_read + rs2_dq) / + ((rtt1_dq_read + rtt2_dq_read + rs2_dq)*(r_on + rs1_dq + rpar_read)); + v_sw_data_read_load2 =vdd_io * (rtt1_dq_read)*(rtt2_dq_read) / + ((rtt1_dq_read + rtt2_dq_read + rs2_dq)*(r_on + rs1_dq + rpar_read)); + v_sw_data_read_line =vdd_io * rpar_read / (r_on + rs1_dq + rpar_read); + v_sw_addr =vdd_io * rtt_ca / (50 + rtt_ca); + v_sw_data_write_load1 =vdd_io * (rtt1_dq_write)*(rtt2_dq_write + rs2_dq) / + ((rtt1_dq_write + rs1_dq + rtt2_dq_write + rs2_dq)*(r_on + rpar_write)); + v_sw_data_write_load2 =vdd_io * (rtt2_dq_write)*(rtt1_dq_write + rs1_dq) / + ((rtt1_dq_write + rs1_dq + rtt2_dq_write + rs2_dq)*(r_on + rpar_write)); + v_sw_data_write_line =vdd_io * rpar_write / (r_on + rpar_write); + +} + + + +IOTechParam::~IOTechParam() +{} diff --git a/T1/TP1/cacti-master/extio_technology.h b/T1/TP1/cacti-master/extio_technology.h new file mode 100644 index 0000000..2f3d308 --- /dev/null +++ b/T1/TP1/cacti-master/extio_technology.h @@ -0,0 +1,225 @@ +#ifndef __EXTIO_TECH__ +#define __EXTIO_TECH__ + +#include +#include "parameter.h" +#include "const.h" + +#define NUM_DIMM 1 + + +extern const double rtt1_wr_lrdimm_ddr3[8][4]; +extern const double rtt2_wr_lrdimm_ddr3[8][4]; +extern const double rtt1_rd_lrdimm_ddr3[8][4]; +extern const double rtt2_rd_lrdimm_ddr3[8][4]; + +extern const double rtt1_wr_host_dimm_ddr3[3][4]; +extern const double rtt2_wr_host_dimm_ddr3[3][4]; +extern const double rtt1_rd_host_dimm_ddr3[3][4]; +extern const double rtt2_rd_host_dimm_ddr3[3][4]; + +extern const double rtt1_wr_bob_dimm_ddr3[3][4]; +extern const double rtt2_wr_bob_dimm_ddr3[3][4]; +extern const double rtt1_rd_bob_dimm_ddr3[3][4]; +extern const double rtt2_rd_bob_dimm_ddr3[3][4]; + + +extern const double rtt1_wr_lrdimm_ddr4[8][4]; +extern const double rtt2_wr_lrdimm_ddr4[8][4]; +extern const double rtt1_rd_lrdimm_ddr4[8][4]; +extern const double rtt2_rd_lrdimm_ddr4[8][4]; + +extern const double rtt1_wr_host_dimm_ddr4[3][4]; +extern const double rtt2_wr_host_dimm_ddr4[3][4]; +extern const double rtt1_rd_host_dimm_ddr4[3][4]; +extern const double rtt2_rd_host_dimm_ddr4[3][4]; + +extern const double rtt1_wr_bob_dimm_ddr4[3][4]; +extern const double rtt2_wr_bob_dimm_ddr4[3][4]; +extern const double rtt1_rd_bob_dimm_ddr4[3][4]; +extern const double rtt2_rd_bob_dimm_ddr4[3][4]; + +class IOTechParam +{ + public: + IOTechParam(InputParameter *); + // connection : 0(bob-dimm), 1(host-dimm), 2(on-dimm) + IOTechParam(InputParameter *, Mem_IO_type io_type, int num_mem_dq, int mem_data_width, int num_dq, int connection, int num_loads, double freq) ; + ~IOTechParam(); + double num_mem_ca; /* Number of loads on the address bus + based on total number of memories in the channel.For + registered or buffered configurations, the num_mem_dq and num_mem_ca is per buffer. */ + + double num_mem_clk; /* Number of loads on the clock as total + memories in the channel / number of clock lines available */ + + //Technology Parameters + // IO Supply voltage (V) + double vdd_io; /* Voltage swing on CLK/CLKB (V) (swing on the CLK pin if it + is differentially terminated) */ + double v_sw_clk; + + // Loading parameters + + double c_int; /*Internal IO loading (pF) (loading within the IO, due to + predriver nets) */ + double c_tx; /* IO TX self-load including package (pF) (loading at the + CPU TX pin) */ + double c_data; /* Device loading per memory data pin (pF) (DRAM device + load for DQ per die) */ + double c_addr; /* Device loading per memory address pin (pF) (DRAM + device load for CA per die) */ + double i_bias; /* Bias current (mA) (includes bias current for the whole memory + bus due to RX Vref based receivers */ + double i_leak; // Active leakage current per pin (nA) + + + + // IO Area coefficients + + double ioarea_c; /* sq.mm. (IO Area baseline coeeficient for control + circuitry and overhead) */ + double ioarea_k0; /* sq.mm * ohms (IO Area coefficient for the driver, for + unit drive strength or output impedance) */ + double ioarea_k1; /* sq.mm * ohms / MHz (IO Area coefficient for the + predriver final stage, based on fanout needed) */ + double ioarea_k2; /* sq.mm * ohms / MHz^2 (IO Area coefficient for + predriver middle stage, based on fanout needed) */ + double ioarea_k3; /* sq.mm * ohms / MHz^3 (IO Area coefficient for + predriver first stage, based on fanout needed) */ + + + // Timing parameters (ps) + + double t_ds; //DQ setup time at DRAM + double t_is; //CA setup time at DRAM + double t_dh; //DQ hold time at DRAM + double t_ih; //CA hold time at DRAM + double t_dcd_soc; //Duty-cycle distortion at the CPU/SOC + double t_dcd_dram; //Duty-cycle distortion at the DRAM + double t_error_soc; //Timing error due to edge placement uncertainty of the DLL + double t_skew_setup;//Setup skew between DQ/DQS or CA/CLK after deskewing the lines + double t_skew_hold; //Hold skew between DQ/DQS or CA/CLK after deskewing the lines + double t_dqsq; //DQ-DQS skew at the DRAM output during Read + //double t_qhs; //DQ-DQS hold factor at the DRAM output during Read FIXME: I am commenting it as the variable is never used. + double t_soc_setup; //Setup time at SOC input dueing Read + double t_soc_hold; //Hold time at SOC input during Read + double t_jitter_setup; /* Half-cycle jitter on the DQS at DRAM input + affecting setup time */ + double t_jitter_hold; /* Half-cycle jitter on the DQS at the DRAM input + affecting hold time */ + double t_jitter_addr_setup; /* Half-cycle jitter on the CLK at DRAM input + affecting setup time */ + double t_jitter_addr_hold; /* Half-cycle jitter on the CLK at the DRAM + input affecting hold time */ + double t_cor_margin; // Statistical correlation margin + + + //Termination Parameters + + double r_diff_term; /* Differential termination resister if + used for CLK (Ohm) */ + + + // ODT related termination resistor values (Ohm) + + double rtt1_dq_read; //DQ Read termination at CPU + double rtt2_dq_read; //DQ Read termination at inactive DRAM + double rtt1_dq_write; //DQ Write termination at active DRAM + double rtt2_dq_write; //DQ Write termination at inactive DRAM + double rtt_ca; //CA fly-by termination + double rs1_dq; //Series resistor at active DRAM + double rs2_dq; //Series resistor at inactive DRAM + double r_stub_ca; //Series resistor for the fly-by channel + double r_on; //Driver impedance + double r_on_ca; //CA driver impedance + + double z0; //Line impedance (ohms): Characteristic impedance of the route. + double t_flight; /* Flight time of the interconnect (ns) (approximately + 180ps/inch for FR4) */ + double t_flight_ca; /* Flight time of the Control/Address (CA) + interconnect (ns) (approximately 180ps/inch for FR4) */ + + // Voltage noise coeffecients + + double k_noise_write; //Proportional noise coefficient for Write mode + double k_noise_read; //Proportional noise coefficient for Read mode + double k_noise_addr; //Proportional noise coefficient for Address bus + double v_noise_independent_write; //Independent noise voltage for Write mode + double v_noise_independent_read; //Independent noise voltage for Read mode + double v_noise_independent_addr; //Independent noise voltage for Address bus + + + //SENSITIVITY INPUTS FOR TIMING AND VOLTAGE NOISE + + /* This is a user-defined section that depends on the channel sensitivity + * to IO and DRAM parameters. The t_jitter_* and k_noise_* are the + * parameters that are impacted based on the channel analysis. The user + * can define any relationship between the termination, loading and + * configuration parameters AND the t_jitter/k_noise parameters. */ + + double k_noise_write_sen; + double k_noise_read_sen; + double k_noise_addr_sen; + double t_jitter_setup_sen; + double t_jitter_hold_sen; + double t_jitter_addr_setup_sen; + double t_jitter_addr_hold_sen; + + //SWING AND TERMINATION CALCULATIONS + //R|| calculation + + double rpar_write; + double rpar_read; + + //Swing calculation + + double v_sw_data_read_load1; //Swing for DQ at dram1 during READ + double v_sw_data_read_load2; //Swing for DQ at dram2 during READ + double v_sw_data_read_line; //Swing for DQ on the line during READ + double v_sw_addr; //Swing for the address bus + double v_sw_data_write_load1; //Swing for DQ at dram1 during WRITE + double v_sw_data_write_load2; //Swing for DQ at dram2 during WRITE + double v_sw_data_write_line; //Swing for DQ on the line during WRITE + + // PHY Static Power Coefficients (mW) + + double phy_datapath_s; // Datapath Static Power + double phy_phase_rotator_s; // Phase Rotator Static Power + double phy_clock_tree_s; // Clock Tree Static Power + double phy_rx_s; // Receiver Static Power + double phy_dcc_s; // Duty Cycle Correction Static Power + double phy_deskew_s; // Deskewing Static Power + double phy_leveling_s; // Write and Read Leveling Static Power + double phy_pll_s; // PHY PLL Static Power + + + // PHY Dynamic Power Coefficients (mW/Gbps) + + double phy_datapath_d; // Datapath Dynamic Power + double phy_phase_rotator_d; // Phase Rotator Dynamic Power + double phy_clock_tree_d; // Clock Tree Dynamic Power + double phy_rx_d; // Receiver Dynamic Power + double phy_dcc_d; // Duty Cycle Correction Dynamic Power + double phy_deskew_d; // Deskewing Dynamic Power + double phy_leveling_d; // Write and Read Leveling Dynamic Power + double phy_pll_d; // PHY PLL Dynamic Power + + + //PHY Wakeup Times (Sleep to Active) (microseconds) + + double phy_pll_wtime; // PHY PLL Wakeup Time + double phy_phase_rotator_wtime; // Phase Rotator Wakeup Time + double phy_rx_wtime; // Receiver Wakeup Time + double phy_bandgap_wtime; // Bandgap Wakeup Time + double phy_deskew_wtime; // Deskewing Wakeup Time + double phy_vrefgen_wtime; // VREF Generator Wakeup Time + + + // RTT values depends on the number of loads, frequency, and link_type + double frequency; + Mem_IO_type io_type; + int frequnecy_index(Mem_IO_type type); +}; + +#endif diff --git a/T1/TP1/cacti-master/htree2.cc b/T1/TP1/cacti-master/htree2.cc new file mode 100644 index 0000000..3077820 --- /dev/null +++ b/T1/TP1/cacti-master/htree2.cc @@ -0,0 +1,640 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#include "htree2.h" +#include "wire.h" +#include +#include + +Htree2::Htree2( + enum Wire_type wire_model, double mat_w, double mat_h, + int a_bits, int d_inbits, int search_data_in, int d_outbits, int search_data_out, int bl, int wl, enum Htree_type htree_type, + bool uca_tree_, bool search_tree_, /*TechnologyParameter::*/DeviceType *dt) + :in_rise_time(0), out_rise_time(0), + tree_type(htree_type), mat_width(mat_w), mat_height(mat_h), + add_bits(a_bits), data_in_bits(d_inbits), search_data_in_bits(search_data_in),data_out_bits(d_outbits), + search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl), + uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), deviceType(dt) +{ + assert(ndbl >= 2 && ndwl >= 2); + +// if (ndbl == 1 && ndwl == 1) +// { +// delay = 0; +// power.readOp.dynamic = 0; +// power.readOp.leakage = 0; +// area.w = mat_w; +// area.h = mat_h; +// return; +// } +// if (ndwl == 1) ndwl++; +// if (ndbl == 1) ndbl++; + + max_unpipelined_link_delay = 0; //TODO + min_w_nmos = g_tp.min_w_nmos_; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; + + switch (htree_type) + { + case Add_htree: + wire_bw = init_wire_bw = add_bits; + in_htree(); + break; + case Data_in_htree: + wire_bw = init_wire_bw = data_in_bits; + in_htree(); + break; + case Data_out_htree: + wire_bw = init_wire_bw = data_out_bits; + out_htree(); + break; + case Search_in_htree: + wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not. + in_htree(); + break; + case Search_out_htree: + wire_bw = init_wire_bw = search_data_out_bits; + out_htree(); + break; + default: + assert(0); + break; + } + + power_bit = power; + power.readOp.dynamic *= init_wire_bw; + + assert(power.readOp.dynamic >= 0); + assert(power.readOp.leakage >= 0); +} + + + +// nand gate sizing calculation +void Htree2::input_nand(double s1, double s2, double l_eff) +{ + Wire w1(wt, l_eff); + double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; + // input capacitance of a repeater = input capacitance of nand. + double nsize = s1*(1 + pton_size)/(2 + pton_size); + nsize = (nsize < 1) ? 1 : nsize; + + double tc = 2*tr_R_on(nsize*min_w_nmos, NCH, 1) * + (drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)*2 + + 2 * gate_C(s2*(min_w_nmos + min_w_pmos), 0)); + delay+= horowitz (w1.out_rise_time, tc, + deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); + power.readOp.dynamic += 0.5 * + (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; + + power.searchOp.dynamic += 0.5 * + (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * wire_bw ; + power.readOp.leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd; + power.readOp.gate_leakage += (wire_bw*cmos_Ig_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd; +} + + + +// tristate buffer model consisting of not, nand, nor, and driver transistors +void Htree2::output_buffer(double s1, double s2, double l_eff) +{ + Wire w1(wt, l_eff); + double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; + // input capacitance of repeater = input capacitance of nand + nor. + double size = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size); + double s_eff = //stage eff of a repeater in a wire + (gate_C(s2*(min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff*1e-6,true))/ + gate_C(s2*(min_w_nmos + min_w_pmos), 0); + double tr_size = gate_C(s1*(min_w_nmos + min_w_pmos), 0) * 1/2/(s_eff*gate_C(min_w_pmos, 0)); + size = (size < 1) ? 1 : size; + + double res_nor = 2*tr_R_on(size*min_w_pmos, PCH, 1); + double res_ptrans = tr_R_on(tr_size*min_w_nmos, NCH, 1); + double cap_nand_out = drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + + gate_C(tr_size*min_w_pmos, 0); + double cap_ptrans_out = 2 *(drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) + + gate_C(s1*(min_w_nmos + min_w_pmos), 0); + + double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out; + + + delay += horowitz (w1.out_rise_time, tc, + deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); + + //nand + power.readOp.dynamic += 0.5 * + (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size*(min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; + + power.searchOp.dynamic += 0.5 * + (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size*(min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd*init_wire_bw; + + //not + power.readOp.dynamic += 0.5 * + (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + +gate_C(size*(min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; + + power.searchOp.dynamic += 0.5 * + (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + +gate_C(size*(min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd*init_wire_bw; + + //nor + power.readOp.dynamic += 0.5 * + (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; + + power.searchOp.dynamic += 0.5 * + (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd*init_wire_bw; + + //output transistor + power.readOp.dynamic += 0.5 * + ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2 + + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; + + power.searchOp.dynamic += 0.5 * + ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2 + + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd*init_wire_bw; + + if(uca_tree) { + power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ + power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand + power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor + + power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ + power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand + power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor + //power.readOp.gate_leakage *=; + } + else { + power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ + power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand + power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor + + power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ + power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand + power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor + //power.readOp.gate_leakage *=deviceType->Vdd*wire_bw; + } +} + + + +/* calculates the input h-tree delay/power + * A nand gate is used at each node to + * limit the signal + * The area of an unbalanced htree (rows != columns) + * depends on how data is traversed. + * In the following function, if ( no. of rows < no. of columns), + * then data first traverse in excess hor. links until vertical + * and horizontal nodes are same. + * If no. of rows is bigger, then data traverse in + * a hor. link followed by a ver. link in a repeated + * fashion (similar to a balanced tree) until there are no + * hor. links left. After this it goes through the remaining vertical + * links. + */ + void +Htree2::in_htree() +{ + //temp var + double s1 = 0, s2 = 0, s3 = 0; + double l_eff = 0; + Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; + double len = 0, ht = 0; + int option = 0; + + int h = (int) _log2(ndwl/2); // horizontal nodes + int v = (int) _log2(ndbl/2); // vertical nodes + double len_temp; + double ht_temp; + if (uca_tree) + {//: this computation do not consider the wires that route from edge to middle. + ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */ + ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * + 2 * (1-pow(0.5,h))))/2; + len_temp = (mat_width*ndwl/2 + + ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * + 2 * (1-pow(0.5,v))))/2; + } + else + { + if (ndwl == ndbl) { + ht_temp = ((mat_height*ndbl/2) + + ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndbl/2-1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) + )/2; + len_temp = (mat_width*ndwl/2 + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; + } + else if (ndwl > ndbl) { + double excess_part = (_log2(ndwl/2) - _log2(ndbl/2)); + ht_temp = ((mat_height*ndbl/2) + + ((add_bits + + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * + (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2; + len_temp = (mat_width*ndwl/2 + + ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; + } + else { + double excess_part = (_log2(ndbl/2) - _log2(ndwl/2)); + ht_temp = ((mat_height*ndbl/2) + + ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) + )/2; + len_temp = (mat_width*ndwl/2 + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2; + } + } + + area.h = ht_temp * 2; + area.w = len_temp * 2; + delay = 0; + power.readOp.dynamic = 0; + power.readOp.leakage = 0; + power.searchOp.dynamic =0; + len = len_temp; + ht = ht_temp/2; + + while (v > 0 || h > 0) + { + if (wtemp1) delete wtemp1; + if (wtemp2) delete wtemp2; + if (wtemp3) delete wtemp3; + + if (h > v) + { + //the iteration considers only one horizontal link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, len/2); // ver + len_temp = len; + len /= 2; + wtemp3 = 0; + h--; + option = 0; + } + else if (v>0 && h>0) + { + //considers one horizontal link and one vertical link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, ht); // ver + wtemp3 = new Wire(wt, len/2); // next hor + len_temp = len; + ht_temp = ht; + len /= 2; + ht /= 2; + v--; + h--; + option = 1; + } + else + { + // considers only one vertical link + assert(h == 0); + wtemp1 = new Wire(wt, ht); // ver + wtemp2 = new Wire(wt, ht/2); // hor + ht_temp = ht; + ht /= 2; + wtemp3 = 0; + v--; + option = 2; + } + + delay += wtemp1->delay; + power.readOp.dynamic += wtemp1->power.readOp.dynamic; + power.searchOp.dynamic += wtemp1->power.readOp.dynamic*wire_bw; + power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw; + power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw; + if ((uca_tree == false && option == 2) || search_tree==true) + { + wire_bw*=2; // wire bandwidth doubles only for vertical branches + } + + if (uca_tree == false) + { + if (len_temp > wtemp1->repeater_spacing) + { + s1 = wtemp1->repeater_size; + l_eff = wtemp1->repeater_spacing; + } + else + { + s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size; + l_eff = len_temp; + } + + if (ht_temp > wtemp2->repeater_spacing) + { + s2 = wtemp2->repeater_size; + } + else + { + s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size; + } + // first level + input_nand(s1, s2, l_eff); + } + + + if (option != 1) + { + continue; + } + + // second level + delay += wtemp2->delay; + power.readOp.dynamic += wtemp2->power.readOp.dynamic; + power.searchOp.dynamic += wtemp2->power.readOp.dynamic*wire_bw; + power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw; + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; + + if (uca_tree) + { + power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; + } + else + { + power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; + wire_bw*=2; + + if (ht_temp > wtemp3->repeater_spacing) + { + s3 = wtemp3->repeater_size; + l_eff = wtemp3->repeater_spacing; + } + else + { + s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size; + l_eff = ht_temp; + } + + input_nand(s2, s3, l_eff); + } + } + + if (wtemp1) delete wtemp1; + if (wtemp2) delete wtemp2; + if (wtemp3) delete wtemp3; +} + + + +/* a tristate buffer is used to handle fan-ins + * The area of an unbalanced htree (rows != columns) + * depends on how data is traversed. + * In the following function, if ( no. of rows < no. of columns), + * then data first traverse in excess hor. links until vertical + * and horizontal nodes are same. + * If no. of rows is bigger, then data traverse in + * a hor. link followed by a ver. link in a repeated + * fashion (similar to a balanced tree) until there are no + * hor. links left. After this it goes through the remaining vertical + * links. + */ +void Htree2::out_htree() +{ + //temp var + double s1 = 0, s2 = 0, s3 = 0; + double l_eff = 0; + Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; + double len = 0, ht = 0; + int option = 0; + + int h = (int) _log2(ndwl/2); + int v = (int) _log2(ndbl/2); + double len_temp; + double ht_temp; + if (uca_tree) + { + ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */ + ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * + 2 * (1-pow(0.5,h))))/2; + len_temp = (mat_width*ndwl/2 + + ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * + 2 * (1-pow(0.5,v))))/2; + } + else + { + if (ndwl == ndbl) { + ht_temp = ((mat_height*ndbl/2) + + ((add_bits+ (search_data_in_bits + search_data_out_bits)) * (ndbl/2-1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) + )/2; + len_temp = (mat_width*ndwl/2 + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; + + } + else if (ndwl > ndbl) { + double excess_part = (_log2(ndwl/2) - _log2(ndbl/2)); + ht_temp = ((mat_height*ndbl/2) + + ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * + (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2; + len_temp = (mat_width*ndwl/2 + + ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; + } + else { + double excess_part = (_log2(ndbl/2) - _log2(ndwl/2)); + ht_temp = ((mat_height*ndbl/2) + + ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) + )/2; + len_temp = (mat_width*ndwl/2 + + ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2; + } + } + area.h = ht_temp * 2; + area.w = len_temp * 2; + delay = 0; + power.readOp.dynamic = 0; + power.readOp.leakage = 0; + power.readOp.gate_leakage = 0; + //cout<<"power.readOp.gate_leakage"< 0 || h > 0) + { //finds delay/power of each link in the tree + if (wtemp1) delete wtemp1; + if (wtemp2) delete wtemp2; + if (wtemp3) delete wtemp3; + + if(h > v) { + //the iteration considers only one horizontal link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, len/2); // ver + len_temp = len; + len /= 2; + wtemp3 = 0; + h--; + option = 0; + } + else if (v>0 && h>0) { + //considers one horizontal link and one vertical link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, ht); // ver + wtemp3 = new Wire(wt, len/2); // next hor + len_temp = len; + ht_temp = ht; + len /= 2; + ht /= 2; + v--; + h--; + option = 1; + } + else { + // considers only one vertical link + assert(h == 0); + wtemp1 = new Wire(wt, ht); // hor + wtemp2 = new Wire(wt, ht/2); // ver + ht_temp = ht; + ht /= 2; + wtemp3 = 0; + v--; + option = 2; + } + delay += wtemp1->delay; + power.readOp.dynamic += wtemp1->power.readOp.dynamic; + power.searchOp.dynamic += wtemp1->power.readOp.dynamic*init_wire_bw; + power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw; + power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw; + //cout<<"power.readOp.gate_leakage"< wtemp1->repeater_spacing) + { + s1 = wtemp1->repeater_size; + l_eff = wtemp1->repeater_spacing; + } + else + { + s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size; + l_eff = len_temp; + } + if (ht_temp > wtemp2->repeater_spacing) + { + s2 = wtemp2->repeater_size; + } + else + { + s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size; + } + // first level + output_buffer(s1, s2, l_eff); + } + + + if (option != 1) + { + continue; + } + + // second level + delay += wtemp2->delay; + power.readOp.dynamic += wtemp2->power.readOp.dynamic; + power.searchOp.dynamic += wtemp2->power.readOp.dynamic*init_wire_bw; + power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw; + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; + //cout<<"power.readOp.gate_leakage"<power.readOp.leakage*wire_bw); + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; + } + else + { + power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; + wire_bw*=2; + + if (ht_temp > wtemp3->repeater_spacing) + { + s3 = wtemp3->repeater_size; + l_eff = wtemp3->repeater_spacing; + } + else + { + s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size; + l_eff = ht_temp; + } + + output_buffer(s2, s3, l_eff); + } + //cout<<"power.readOp.leakage"<power.readOp.gate_leakage"<power.readOp.gate_leakage< +#include +#include + + +#include "io.h" +#include "area.h" +#include "basic_circuit.h" +#include "parameter.h" +#include "Ucache.h" +#include "nuca.h" +#include "crossbar.h" +#include "arbiter.h" +//#include "highradix.h" +#include "TSV.h" +#include "memorybus.h" +#include "version_cacti.h" + +#include "extio.h" +#include "extio_technology.h" +#include "memcad.h" + +using namespace std; + + +InputParameter::InputParameter() +: array_power_gated(false), + bitline_floating(false), + wl_power_gated(false), + cl_power_gated(false), + interconect_power_gated(false), + power_gating(false), + cl_vertical (true) +{ + +} + +/* Parses "cache.cfg" file */ + void +InputParameter::parse_cfg(const string & in_file) +{ + FILE *fp = fopen(in_file.c_str(), "r"); + char line[5000]; + char jk[5000]; + char temp_var[5000]; + + if(!fp) { + cout << in_file << " is missing!\n"; + exit(-1); + } + + while(fscanf(fp, "%[^\n]\n", line) != EOF) { + + if (!strncmp("-size", line, strlen("-size"))) { + sscanf(line, "-size %[(:-~)*]%u", jk, &(cache_sz)); + if (g_ip->print_detail_debug) + cout << "cache size: " << g_ip->cache_sz << "GB" << endl; + continue; + } + + + + if (!strncmp("-page size", line, strlen("-page size"))) { + sscanf(line, "-page size %[(:-~)*]%u", jk, &(page_sz_bits)); + continue; + } + + if (!strncmp("-burst length", line, strlen("-burst length"))) { + sscanf(line, "-burst %[(:-~)*]%u", jk, &(burst_len)); + continue; + } + + if (!strncmp("-internal prefetch width", line, strlen("-internal prefetch width"))) { + sscanf(line, "-internal prefetch %[(:-~)*]%u", jk, &(int_prefetch_w)); + continue; + } + + if (!strncmp("-block", line, strlen("-block"))) { + sscanf(line, "-block size (bytes) %d", &(line_sz)); + continue; + } + + if (!strncmp("-associativity", line, strlen("-associativity"))) { + sscanf(line, "-associativity %d", &(assoc)); + continue; + } + + if (!strncmp("-read-write", line, strlen("-read-write"))) { + sscanf(line, "-read-write port %d", &(num_rw_ports)); + continue; + } + + if (!strncmp("-exclusive read", line, strlen("exclusive read"))) { + sscanf(line, "-exclusive read port %d", &(num_rd_ports)); + continue; + } + + if(!strncmp("-exclusive write", line, strlen("-exclusive write"))) { + sscanf(line, "-exclusive write port %d", &(num_wr_ports)); + continue; + } + + if (!strncmp("-single ended", line, strlen("-single ended"))) { + sscanf(line, "-single %[(:-~)*]%d", jk, + &(num_se_rd_ports)); + continue; + } + + if (!strncmp("-search", line, strlen("-search"))) { + sscanf(line, "-search port %d", &(num_search_ports)); + continue; + } + + if (!strncmp("-UCA bank", line, strlen("-UCA bank"))) { + sscanf(line, "-UCA bank%[((:-~)| )*]%d", jk, &(nbanks)); + continue; + } + + if (!strncmp("-technology", line, strlen("-technology"))) { + sscanf(line, "-technology (u) %lf", &(F_sz_um)); + F_sz_nm = F_sz_um*1000; + continue; + } + + if (!strncmp("-output/input", line, strlen("-output/input"))) { + sscanf(line, "-output/input bus %[(:-~)*]%d", jk, &(out_w)); + continue; + } + + if (!strncmp("-operating temperature", line, strlen("-operating temperature"))) { + sscanf(line, "-operating temperature %[(:-~)*]%d", jk, &(temp)); + continue; + } + + if (!strncmp("-cache type", line, strlen("-cache type"))) { + sscanf(line, "-cache type%[^\"]\"%[^\"]\"", jk, temp_var); + + if (!strncmp("cache", temp_var, sizeof("cache"))) { + is_cache = true; + } + else + { + is_cache = false; + } + + if (!strncmp("main memory", temp_var, sizeof("main memory"))) { + is_main_mem = true; + } + else { + is_main_mem = false; + } + + if (!strncmp("3D memory or 2D main memory", temp_var, sizeof("3D memory or 2D main memory"))) { + is_3d_mem = true; + is_main_mem = true; + } + else { + is_3d_mem = false; + //is_main_mem = false; + } + + if (g_ip->print_detail_debug) + {cout << "io.cc: is_3d_mem = " << is_3d_mem << endl;} + + if (!strncmp("cam", temp_var, sizeof("cam"))) { + pure_cam = true; + } + else { + pure_cam = false; + } + + if (!strncmp("ram", temp_var, sizeof("ram"))) { + pure_ram = true; + } + else { + if (!is_main_mem) + pure_ram = false; + else + pure_ram = true; + } + + continue; + } + + if (!strncmp("-print option", line, strlen("-print option"))) { + sscanf(line, "-print option%[^\"]\"%[^\"]\"", jk, temp_var); + + if (!strncmp("debug detail", temp_var, sizeof("debug detail"))) { + print_detail_debug = true; + } + else { + print_detail_debug = false; + } + if (g_ip->print_detail_debug) + {cout << "io.cc: print_detail_debug = " << print_detail_debug << endl;} + continue; + } + + if (!strncmp("-burst depth", line, strlen("-burst depth"))) { + sscanf(line, "-burst %[(:-~)*]%u", jk, &(burst_depth)); + continue; + } + + if (!strncmp("-IO width", line, strlen("-IO width"))) { + sscanf(line, "-IO %[(:-~)*]%u", jk, &(io_width)); + continue; + } + + if (!strncmp("-system frequency", line, strlen("-system frequency"))) { + sscanf(line, "-system frequency %[(:-~)*]%u", jk, &(sys_freq_MHz)); + if(g_ip->print_detail_debug) + cout << "system frequency: " << g_ip->sys_freq_MHz << endl; + continue; + } + + + + if (!strncmp("-stacked die", line, strlen("-stacked die"))) { + sscanf(line, "-stacked die %[(:-~)*]%u", jk, &(num_die_3d)); + if(g_ip->print_detail_debug) + cout << "num_die_3d: " << g_ip->num_die_3d << endl; + continue; + } + + if (!strncmp("-partitioning granularity", line, strlen("-partitioning granularity"))) { + sscanf(line, "-partitioning %[(:-~)*]%u", jk, &(partition_gran)); + if(g_ip->print_detail_debug) + cout << "partitioning granularity: " << g_ip->partition_gran << endl; + continue; + } + + if (!strncmp("-TSV projection", line, strlen("-TSV projection"))) { + sscanf(line, "-TSV %[(:-~)*]%u", jk, &(TSV_proj_type)); + if(g_ip->print_detail_debug) + cout << "TSV projection: " << g_ip->TSV_proj_type << endl; + continue; + } + + + //g_ip->print_detail_debug = debug_detail; + + + //g_ip->partition_gran = 1; + + // --- These two parameters are supposed for bank level partitioning, currently not shown to public + g_ip->num_tier_row_sprd = 1; + g_ip->num_tier_col_sprd = 1; + + if (!strncmp("-tag size", line, strlen("-tag size"))) { + sscanf(line, "-tag size%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("default", temp_var, sizeof("default"))) { + specific_tag = false; + tag_w = 42; /* the actual value is calculated + * later based on the cache size, bank count, and associativity + */ + } + else { + specific_tag = true; + sscanf(line, "-tag size (b) %d", &(tag_w)); + } + continue; + } + + if (!strncmp("-access mode", line, strlen("-access mode"))) { + sscanf(line, "-access %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("fast", temp_var, strlen("fast"))) { + access_mode = 2; + } + else if (!strncmp("sequential", temp_var, strlen("sequential"))) { + access_mode = 1; + } + else if(!strncmp("normal", temp_var, strlen("normal"))) { + access_mode = 0; + } + else { + cout << "ERROR: Invalid access mode!\n"; + exit(0); + } + continue; + } + + if (!strncmp("-Data array cell type", line, strlen("-Data array cell type"))) { + sscanf(line, "-Data array cell type %[^\"]\"%[^\"]\"", jk, temp_var); + + if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { + data_arr_ram_cell_tech_type = 0; + } + else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { + data_arr_ram_cell_tech_type = 1; + } + else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { + data_arr_ram_cell_tech_type = 2; + } + else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) { + data_arr_ram_cell_tech_type = 3; + } + else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) { + data_arr_ram_cell_tech_type = 4; + } + else { + cout << "ERROR: Invalid type!\n"; + exit(0); + } + continue; + } + + if (!strncmp("-Data array peripheral type", line, strlen("-Data array peripheral type"))) { + sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var); + + if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { + data_arr_peri_global_tech_type = 0; + } + else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { + data_arr_peri_global_tech_type = 1; + } + else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { + data_arr_peri_global_tech_type = 2; + } + else { + cout << "ERROR: Invalid type!\n"; + exit(0); + } + continue; + } + + if (!strncmp("-Tag array cell type", line, strlen("-Tag array cell type"))) { + sscanf(line, "-Tag array cell type %[^\"]\"%[^\"]\"", jk, temp_var); + + if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { + tag_arr_ram_cell_tech_type = 0; + } + else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { + tag_arr_ram_cell_tech_type = 1; + } + else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { + tag_arr_ram_cell_tech_type = 2; + } + else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) { + tag_arr_ram_cell_tech_type = 3; + } + else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) { + tag_arr_ram_cell_tech_type = 4; + } + else { + cout << "ERROR: Invalid type!\n"; + exit(0); + } + continue; + } + + if (!strncmp("-Tag array peripheral type", line, strlen("-Tag array peripheral type"))) { + sscanf(line, "-Tag array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var); + + if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { + tag_arr_peri_global_tech_type = 0; + } + else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { + tag_arr_peri_global_tech_type = 1; + } + else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { + tag_arr_peri_global_tech_type = 2; + } + else { + cout << "ERROR: Invalid type!\n"; + exit(0); + } + continue; + } + if(!strncmp("-design", line, strlen("-design"))) { + sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, + &(delay_wt), &(dynamic_power_wt), + &(leakage_power_wt), + &(cycle_time_wt), &(area_wt)); + continue; + } + + if(!strncmp("-deviate", line, strlen("-deviate"))) { + sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, + &(delay_dev), &(dynamic_power_dev), + &(leakage_power_dev), + &(cycle_time_dev), &(area_dev)); + continue; + } + + if(!strncmp("-Optimize", line, strlen("-Optimize"))) { + sscanf(line, "-Optimize %[^\"]\"%[^\"]\"", jk, temp_var); + + if(!strncmp("ED^2", temp_var, strlen("ED^2"))) { + ed = 2; + } + else if(!strncmp("ED", temp_var, strlen("ED"))) { + ed = 1; + } + else { + ed = 0; + } + } + + if(!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) { + sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, + &(delay_wt_nuca), &(dynamic_power_wt_nuca), + &(leakage_power_wt_nuca), + &(cycle_time_wt_nuca), &(area_wt_nuca)); + continue; + } + + if(!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) { + sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, + &(delay_dev_nuca), &(dynamic_power_dev_nuca), + &(leakage_power_dev_nuca), + &(cycle_time_dev_nuca), &(area_dev_nuca)); + continue; + } + + if(!strncmp("-Cache model", line, strlen("-cache model"))) { + sscanf(line, "-Cache model %[^\"]\"%[^\"]\"", jk, temp_var); + + if (!strncmp("UCA", temp_var, strlen("UCA"))) { + nuca = 0; + } + else { + nuca = 1; + } + continue; + } + + if(!strncmp("-NUCA bank", line, strlen("-NUCA bank"))) { + sscanf(line, "-NUCA bank count %d", &(nuca_bank_count)); + + if (nuca_bank_count != 0) { + force_nuca_bank = 1; + } + continue; + } + + if(!strncmp("-Wire inside mat", line, strlen("-Wire inside mat"))) { + sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var); + + if (!strncmp("global", temp_var, strlen("global"))) { + wire_is_mat_type = 2; + continue; + } + else if (!strncmp("local", temp_var, strlen("local"))) { + wire_is_mat_type = 0; + continue; + } + else { + wire_is_mat_type = 1; + continue; + } + } + + if(!strncmp("-Wire outside mat", line, strlen("-Wire outside mat"))) { + sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var); + + if (!strncmp("global", temp_var, strlen("global"))) { + wire_os_mat_type = 2; + } + else { + wire_os_mat_type = 1; + } + continue; + } + + if(!strncmp("-Interconnect projection", line, strlen("-Interconnect projection"))) { + sscanf(line, "-Interconnect projection%[^\"]\"%[^\"]\"", jk, temp_var); + + if (!strncmp("aggressive", temp_var, strlen("aggressive"))) { + ic_proj_type = 0; + } + else { + ic_proj_type = 1; + } + continue; + } + + if(!strncmp("-Wire signaling", line, strlen("-wire signaling"))) { + sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var); + + if (!strncmp("default", temp_var, strlen("default"))) { + force_wiretype = 0; + wt = Global; + } + else if (!(strncmp("Global_10", temp_var, strlen("Global_10")))) { + force_wiretype = 1; + wt = Global_10; + } + else if (!(strncmp("Global_20", temp_var, strlen("Global_20")))) { + force_wiretype = 1; + wt = Global_20; + } + else if (!(strncmp("Global_30", temp_var, strlen("Global_30")))) { + force_wiretype = 1; + wt = Global_30; + } + else if (!(strncmp("Global_5", temp_var, strlen("Global_5")))) { + force_wiretype = 1; + wt = Global_5; + } + else if (!(strncmp("Global", temp_var, strlen("Global")))) { + force_wiretype = 1; + wt = Global; + } + else if (!(strncmp("fullswing", temp_var, strlen("fullswing")))) { + force_wiretype = 1; + wt = Full_swing; + } + else if (!(strncmp("lowswing", temp_var, strlen("lowswing")))) { + force_wiretype = 1; + wt = Low_swing; + } + else { + cout << "Unknown wire type!\n"; + exit(0); + } + continue; + } + + + + if(!strncmp("-Core", line, strlen("-Core"))) { + sscanf(line, "-Core count %d\n", &(cores)); + if (cores > 16) { + printf("No. of cores should be less than 16!\n"); + } + continue; + } + + if(!strncmp("-Cache level", line, strlen("-Cache level"))) { + sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("L2", temp_var, strlen("L2"))) { + cache_level = 0; + } + else { + cache_level = 1; + } + } + + if(!strncmp("-Print level", line, strlen("-Print level"))) { + sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) { + print_detail = 1; + } + else { + print_detail = 0; + } + + } + if(!strncmp("-Add ECC", line, strlen("-Add ECC"))) { + sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + add_ecc_b_ = true; + } + else { + add_ecc_b_ = false; + } + } + + if(!strncmp("-CLDriver vertical", line, strlen("-CLDriver vertical"))) { + sscanf(line, "-CLDriver vertical %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + cl_vertical = true; + } + else { + cl_vertical = false; + } + } + + if(!strncmp("-Array Power Gating", line, strlen("-Array Power Gating"))) { + sscanf(line, "-Array Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + array_power_gated = true; + } + else { + array_power_gated = false; + } + } + + if(!strncmp("-Bitline floating", line, strlen("-Bitline floating"))) { + sscanf(line, "-Bitline floating %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + bitline_floating = true; + } + else { + bitline_floating = false; + } + } + + if(!strncmp("-WL Power Gating", line, strlen("-WL Power Gating"))) { + sscanf(line, "-WL Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + wl_power_gated = true; + } + else { + wl_power_gated = false; + } + } + + if(!strncmp("-CL Power Gating", line, strlen("-CL Power Gating"))) { + sscanf(line, "-CL Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + cl_power_gated = true; + } + else { + cl_power_gated = false; + } + } + + if(!strncmp("-Interconnect Power Gating", line, strlen("-Interconnect Power Gating"))) { + sscanf(line, "-Interconnect Power Gating %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + interconect_power_gated = true; + } + else { + interconect_power_gated = false; + } + } + + if(!strncmp("-Power Gating Performance Loss", line, strlen("-Power Gating Performance Loss"))) { + sscanf(line, "-Power Gating Performance Loss %lf", &(perfloss)); + continue; + } + + if(!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) { + sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + print_input_args = true; + } + else { + print_input_args = false; + } + } + + if(!strncmp("-Force cache config", line, strlen("-Force cache config"))) { + sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + force_cache_config = true; + } + else { + force_cache_config = false; + } + } + + if(!strncmp("-Ndbl", line, strlen("-Ndbl"))) { + sscanf(line, "-Ndbl %d\n", &(ndbl)); + continue; + } + if(!strncmp("-Ndwl", line, strlen("-Ndwl"))) { + sscanf(line, "-Ndwl %d\n", &(ndwl)); + continue; + } + if(!strncmp("-Nspd", line, strlen("-Nspd"))) { + sscanf(line, "-Nspd %d\n", &(nspd)); + continue; + } + if(!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) { + sscanf(line, "-Ndsam1 %d\n", &(ndsam1)); + continue; + } + if(!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) { + sscanf(line, "-Ndsam2 %d\n", &(ndsam2)); + continue; + } + if(!strncmp("-Ndcm", line, strlen("-Ndcm"))) { + sscanf(line, "-Ndcm %d\n", &(ndcm)); + continue; + } + + // Parameters related to off-chip interconnect + + if(!strncmp("-dram type", line, strlen("-dram type"))) { + sscanf(line, "-dram type%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("DDR3", temp_var, strlen("DDR3"))) + { + io_type = DDR3; + } + else if(!strncmp("DDR4", temp_var, strlen("DDR4"))) + { + io_type = DDR4; + } + else if(!strncmp("LPDDR2", temp_var, strlen("LPDDR2"))) + { + io_type = LPDDR2; + } + else if(!strncmp("WideIO", temp_var, strlen("WideIO"))) + { + io_type = WideIO; + } + else if(!strncmp("Low_Swing_Diff", temp_var, strlen("Low_Swing_Diff"))) + { + io_type = Low_Swing_Diff; + } + else if(!strncmp("Serial", temp_var, strlen("Serial"))) + { + io_type = Serial; + } + else + { + cout << "Invalid Input for dram type!" << endl; + exit(1); + } + // sscanf(line, "-io_type \"%c\"\n", &(io_type)); + } + if(!strncmp("-io state", line, strlen("-io state"))) { + sscanf(line, "-io state%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("READ", temp_var, strlen("READ"))) + { + iostate = READ; + } + else if(!strncmp("WRITE", temp_var, strlen("WRITE"))) + { + iostate = WRITE; + } + else if(!strncmp("IDLE", temp_var, strlen("IDLE"))) + { + iostate = IDLE; + } + else if(!strncmp("SLEEP", temp_var, strlen("SLEEP"))) + { + iostate = SLEEP; + } + else + { + cout << "Invalid Input for io state!" << endl; + exit(1); + } + //sscanf(line, "-iostate \"%c\"\n", &(iostate)); + } + if(!strncmp("-addr_timing", line, strlen("-addr_timing"))) { + sscanf(line, "-addr_timing %lf", &(addr_timing)); + } + if(!strncmp("-dram ecc", line, strlen("-dram ecc"))) { + sscanf(line, "-dram ecc%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("NO_ECC", temp_var, strlen("NO_ECC"))) + { + dram_ecc = NO_ECC; + } + else if(!strncmp("SECDED", temp_var, strlen("SECDED"))) + { + dram_ecc = SECDED; + } + else if(!strncmp("CHIP_KILL", temp_var, strlen("CHIP_KILL"))) + { + dram_ecc = CHIP_KILL; + } + else + { + cout << "Invalid Input for dram ecc!" << endl; + exit(1); + } + //sscanf(line, "-dram_ecc \"%c\"\n", &(dram_ecc)); + } + if(!strncmp("-dram dimm", line, strlen("-dram dimm"))) { + sscanf(line, "-dram dimm%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("UDIMM", temp_var, strlen("UDIMM"))) + { + dram_dimm = UDIMM; + } + else if(!strncmp("RDIMM", temp_var, strlen("RDIMM"))) + { + dram_dimm = RDIMM; + } + else if(!strncmp("LRDIMM", temp_var, strlen("LRDIMM"))) + { + dram_dimm = LRDIMM; + } + else + { + cout << "Invalid Input for dram dimm!" << endl; + exit(1); + } + //sscanf(line, "-dram_ecc \"%c\"\n", &(dram_ecc)); + } + + + if(!strncmp("-bus_bw", line, strlen("-bus_bw"))) { + sscanf(line, "-bus_bw %lf", &(bus_bw)); + } + if(!strncmp("-duty_cycle", line, strlen("-duty_cycle"))) { + sscanf(line, "-duty_cycle %lf", &(duty_cycle)); + } + if(!strncmp("-mem_density", line, strlen("-mem_density"))) { + sscanf(line, "-mem_density %lf", &(mem_density)); + } + if(!strncmp("-activity_dq", line, strlen("-activity_dq"))) { + sscanf(line, "-activity_dq %lf", &activity_dq); + } + if(!strncmp("-activity_ca", line, strlen("-activity_ca"))) { + sscanf(line, "-activity_ca %lf", &activity_ca); + } + if(!strncmp("-bus_freq", line, strlen("-bus_freq"))) { + sscanf(line, "-bus_freq %lf", &bus_freq); + } + if(!strncmp("-num_dq", line, strlen("-num_dq"))) { + sscanf(line, "-num_dq %d", &num_dq); + } + if(!strncmp("-num_dqs", line, strlen("-num_dqs"))) { + sscanf(line, "-num_dqs %d", &num_dqs); + } + if(!strncmp("-num_ca", line, strlen("-num_ca"))) { + sscanf(line, "-num_ca %d", &num_ca); + } + if(!strncmp("-num_clk", line, strlen("-num_clk"))) { + sscanf(line, "-num_clk %d", &num_clk); + if(num_clk<=0) + { + cout << "num_clk should be greater than zero!\n"; + exit(1); + } + } + if(!strncmp("-num_mem_dq", line, strlen("-num_mem_dq"))) { + sscanf(line, "-num_mem_dq %d", &num_mem_dq); + } + if(!strncmp("-mem_data_width", line, strlen("-mem_data_width"))) { + sscanf(line, "-mem_data_width %d", &mem_data_width); + } + + // added just for memcad + + if(!strncmp("-num_bobs", line, strlen("-num_bobs"))) { + sscanf(line, "-num_bobs %d", &num_bobs); + } + if(!strncmp("-capacity", line, strlen("-capacity"))) { + sscanf(line, "-capacity %d", &capacity); + } + if(!strncmp("-num_channels_per_bob", line, strlen("-num_channels_per_bob"))) { + sscanf(line, "-num_channels_per_bob %d", &num_channels_per_bob); + } + if(!strncmp("-first metric", line, strlen("-first metric"))) { + sscanf(line, "-first metric%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("Cost", temp_var, strlen("Cost"))) + { + first_metric = Cost; + } + else if(!strncmp("Energy", temp_var, strlen("Energy"))) + { + first_metric = Energy; + } + else if(!strncmp("Bandwidth", temp_var, strlen("Bandwidth"))) + { + first_metric = Bandwidth; + } + else + { + cout << "Invalid Input for first metric!" << endl; + exit(1); + } + + } + if(!strncmp("-second metric", line, strlen("-second metric"))) { + sscanf(line, "-second metric%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("Cost", temp_var, strlen("Cost"))) + { + second_metric = Cost; + } + else if(!strncmp("Energy", temp_var, strlen("Energy"))) + { + second_metric = Energy; + } + else if(!strncmp("Bandwidth", temp_var, strlen("Bandwidth"))) + { + second_metric = Bandwidth; + } + else + { + cout << "Invalid Input for second metric!" << endl; + exit(1); + } + + } + if(!strncmp("-third metric", line, strlen("-third metric"))) { + sscanf(line, "-third metric%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("Cost", temp_var, strlen("Cost"))) + { + third_metric = Cost; + } + else if(!strncmp("Energy", temp_var, strlen("Energy"))) + { + third_metric = Energy; + } + else if(!strncmp("Bandwidth", temp_var, strlen("Bandwidth"))) + { + third_metric = Bandwidth; + } + else + { + cout << "Invalid Input for third metric!" << endl; + exit(1); + } + + } + if(!strncmp("-DIMM model", line, strlen("-DIMM model"))) { + sscanf(line, "-DIMM model%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("JUST_UDIMM", temp_var, strlen("JUST_UDIMM"))) + { + dimm_model = JUST_UDIMM; + } + else if(!strncmp("JUST_RDIMM", temp_var, strlen("JUST_RDIMM"))) + { + dimm_model = JUST_RDIMM; + } + else if(!strncmp("JUST_LRDIMM", temp_var, strlen("JUST_LRDIMM"))) + { + dimm_model = JUST_LRDIMM; + } + else if(!strncmp("ALL", temp_var, strlen("ALL"))) + { + dimm_model = ALL; + } + else + { + cout << "Invalid Input for DIMM model!" << endl; + exit(1); + } + + } + if(!strncmp("-Low Power Permitted", line, strlen("-Low Power Permitted"))) { + sscanf(line, "-Low Power Permitted%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("T", temp_var, strlen("T"))) + { + low_power_permitted = true; + } + else if(!strncmp("F", temp_var, strlen("F"))) + { + low_power_permitted = false; + } + else + { + cout << "Invalid Input for Low Power Permitted!" << endl; + exit(1); + } + + } + if(!strncmp("-load", line, strlen("-load"))) { + sscanf(line, "-load %lf", &(load)); + } + if(!strncmp("-row_buffer_hit_rate", line, strlen("-row_buffer_hit_rate"))) { + sscanf(line, "-row_buffer_hit_rate %lf", &(row_buffer_hit_rate)); + } + if(!strncmp("-rd_2_wr_ratio", line, strlen("-rd_2_wr_ratio"))) { + sscanf(line, "-rd_2_wr_ratio %lf", &(rd_2_wr_ratio)); + } + if(!strncmp("-same_bw_in_bob", line, strlen("-same_bw_in_bob"))) { + sscanf(line, "-same_bw_in_bob%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("T", temp_var, strlen("T"))) + { + same_bw_in_bob = true; + } + else if(!strncmp("F", temp_var, strlen("F"))) + { + same_bw_in_bob = false; + } + else + { + cout << "Invalid Input for same_bw_in_bob!" << endl; + exit(1); + } + + } + if(!strncmp("-mirror_in_bob", line, strlen("-mirror_in_bob"))) { + sscanf(line, "-mirror_in_bob%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("T", temp_var, strlen("T"))) + { + mirror_in_bob = true; + } + else if(!strncmp("F", temp_var, strlen("F"))) + { + mirror_in_bob = false; + } + else + { + cout << "Invalid Input for mirror_in_bob!" << endl; + exit(1); + } + + } + if(!strncmp("-total_power", line, strlen("-total_power"))) { + sscanf(line, "-total_power%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("T", temp_var, strlen("T"))) + { + total_power = true; + } + else if(!strncmp("F", temp_var, strlen("F"))) + { + total_power = false; + } + else + { + cout << "Invalid Input for total_power!" << endl; + exit(1); + } + + } + if(!strncmp("-verbose", line, strlen("-verbose"))) { + sscanf(line, "-verbose%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("T", temp_var, strlen("T"))) + { + verbose = true; + } + else if(!strncmp("F", temp_var, strlen("F"))) + { + verbose = false; + } + else + { + cout << "Invalid Input for same_bw_in_bob!" << endl; + exit(1); + } + + } + + + + + } + rpters_in_htree = true; + fclose(fp); +} + + void +InputParameter::display_ip() +{ + cout << "Cache size : " << cache_sz << endl; + cout << "Block size : " << line_sz << endl; + cout << "Associativity : " << assoc << endl; + cout << "Read only ports : " << num_rd_ports << endl; + cout << "Write only ports : " << num_wr_ports << endl; + cout << "Read write ports : " << num_rw_ports << endl; + cout << "Single ended read ports : " << num_se_rd_ports << endl; + if (fully_assoc||pure_cam) + { + cout << "Search ports : " << num_search_ports << endl; + } + cout << "Cache banks (UCA) : " << nbanks << endl; + cout << "Technology : " << F_sz_um << endl; + cout << "Temperature : " << temp << endl; + cout << "Tag size : " << tag_w << endl; + if (is_cache) + { + cout << "array type : " << "Cache" << endl; + } + if (pure_ram) + { + cout << "array type : " << "Scratch RAM" << endl; + } + if (pure_cam) + { + cout << "array type : " << "CAM" << endl; + } + cout << "Model as memory : " << is_main_mem << endl; + cout << "Model as 3D memory : " << is_3d_mem << endl; + cout << "Access mode : " << access_mode << endl; + cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl; + cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl; + cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl; + cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl; + cout << "Optimization target : " << ed << endl; + cout << "Design objective (UCA wt) : " << delay_wt << " " + << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt + << " " << area_wt << endl; + cout << "Design objective (UCA dev) : " << delay_dev << " " + << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev + << " " << area_dev << endl; + if (nuca) + { + cout << "Cores : " << cores << endl; + + + cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " " + << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca + << " " << area_wt_nuca << endl; + cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " " + << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca + << " " << area_dev_nuca << endl; + } + cout << "Cache model : " << nuca << endl; + cout << "Nuca bank : " << nuca_bank_count << endl; + cout << "Wire inside mat : " << wire_is_mat_type << endl; + cout << "Wire outside mat : " << wire_os_mat_type << endl; + cout << "Interconnect projection : " << ic_proj_type << endl; + cout << "Wire signaling : " << force_wiretype << endl; + cout << "Print level : " << print_detail << endl; + cout << "ECC overhead : " << add_ecc_b_ << endl; + cout << "Page size : " << page_sz_bits << endl; + cout << "Burst length : " << burst_len << endl; + cout << "Internal prefetch width : " << int_prefetch_w << endl; + cout << "Force cache config : " << g_ip->force_cache_config << endl; + if (g_ip->force_cache_config) { + cout << "Ndwl : " << g_ip->ndwl << endl; + cout << "Ndbl : " << g_ip->ndbl << endl; + cout << "Nspd : " << g_ip->nspd << endl; + cout << "Ndcm : " << g_ip->ndcm << endl; + cout << "Ndsam1 : " << g_ip->ndsam1 << endl; + cout << "Ndsam2 : " << g_ip->ndsam2 << endl; + } + cout << "Subarray Driver direction : " << g_ip->cl_vertical << endl; + + // CACTI-I/O + cout << "iostate : " ; + switch(iostate) + { + case(READ): cout << "READ" << endl; break; + case(WRITE): cout << "WRITE" << endl; break; + case(IDLE): cout << "IDLE" << endl; break; + case(SLEEP): cout << "SLEEP" << endl; break; + default: assert(false); + } + cout << "dram_ecc : " ; + switch(dram_ecc) + { + case(NO_ECC): cout << "NO_ECC" << endl; break; + case(SECDED): cout << "SECDED" << endl; break; + case(CHIP_KILL): cout << "CHIP_KILL" << endl; break; + default: assert(false); + } + cout << "io_type : " ; + switch(io_type) + { + case(DDR3): cout << "DDR3" << endl; break; + case(DDR4): cout << "DDR4" << endl; break; + case(LPDDR2): cout << "LPDDR2" << endl; break; + case(WideIO): cout << "WideIO" << endl; break; + case(Low_Swing_Diff): cout << "Low_Swing_Diff" << endl; break; + default: assert(false); + } + cout << "dram_dimm : " ; + switch(dram_dimm) + { + case(UDIMM): cout << "UDIMM" << endl; break; + case(RDIMM): cout << "RDIMM" << endl; break; + case(LRDIMM): cout << "LRDIMM" << endl; break; + default: assert(false); + } + + + +} + + + +powerComponents operator+(const powerComponents & x, const powerComponents & y) +{ + powerComponents z; + + z.dynamic = x.dynamic + y.dynamic; + z.leakage = x.leakage + y.leakage; + z.gate_leakage = x.gate_leakage + y.gate_leakage; + z.short_circuit = x.short_circuit + y.short_circuit; + z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage; + + return z; +} + +powerComponents operator*(const powerComponents & x, double const * const y) +{ + powerComponents z; + + z.dynamic = x.dynamic*y[0]; + z.leakage = x.leakage*y[1]; + z.gate_leakage = x.gate_leakage*y[2]; + z.short_circuit = x.short_circuit*y[3]; + z.longer_channel_leakage = x.longer_channel_leakage*y[1];//longer channel leakage has the same behavior as normal leakage + + return z; +} + + +powerDef operator+(const powerDef & x, const powerDef & y) +{ + powerDef z; + + z.readOp = x.readOp + y.readOp; + z.writeOp = x.writeOp + y.writeOp; + z.searchOp = x.searchOp + y.searchOp; + return z; +} + +powerDef operator*(const powerDef & x, double const * const y) +{ + powerDef z; + + z.readOp = x.readOp*y; + z.writeOp = x.writeOp*y; + z.searchOp = x.searchOp*y; + return z; +} + +uca_org_t cacti_interface(const string & infile_name) +{ + + //cout<<"TSV_proj_type: " << g_ip->TSV_proj_type << endl; + uca_org_t fin_res; + //uca_org_t result; + fin_res.valid = false; + + g_ip = new InputParameter(); + g_ip->parse_cfg(infile_name); + if(!g_ip->error_checking()) + exit(0); + // if (g_ip->print_input_args) + g_ip->display_ip(); + + + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. +// cout << winit.wire_res(256*8*64e-9) << endl; +// exit(0); + + + //CACTI3DD + // --- These two parameters are supposed for two different TSV technologies within one DRAM fabrication, currently assume one individual TSV geometry size for cost efficiency + g_ip->tsv_is_subarray_type = g_ip->TSV_proj_type; + g_ip->tsv_os_bank_type = g_ip->TSV_proj_type; + TSV tsv_test(Coarse);// ********* double len_ /* in um*/, double diam_, double TSV_pitch_, + if(g_ip->print_detail_debug) + { + tsv_test.print_TSV(); + } + +// For HighRadix Only +// //// Wire wirea(g_ip->wt, 1000); +// //// wirea.print_wire(); +// //// cout << "Wire Area " << wirea.area.get_area() << " sq. u" << endl; +// // winit.print_wire(); +// // +// HighRadix *hr; +// hr = new HighRadix(); +// hr->compute_power(); +// hr->print_router(); +// exit(0); +// +// double sub_switch_sz = 2; +// double rows = 32; +// for (int i=0; i<6; i++) { +// sub_switch_sz = pow(2, i); +// rows = 64/sub_switch_sz; +// hr = new HighRadix(sub_switch_sz, rows, .8/* freq */, 64, 2, 64, 0.7); +// hr->compute_power(); +// hr->print_router(); +// delete hr; +// } +// // HighRadix yarc; +// // yarc.compute_power(); +// // yarc.print_router(); +// winit.print_wire(); +// exit(0); +// For HighRadix Only End + + if (g_ip->nuca == 1) + { + Nuca n(&g_tp.peri_global); + n.sim_nuca(); + } + + //g_ip->display_ip(); + + + + IOTechParam iot(g_ip, g_ip->io_type, g_ip->num_mem_dq, g_ip->mem_data_width, g_ip->num_dq,g_ip->dram_dimm, 1,g_ip->bus_freq ); + Extio testextio(&iot); + testextio.extio_area(); + testextio.extio_eye(); + testextio.extio_power_dynamic(); + testextio.extio_power_phy(); + testextio.extio_power_term(); + + + /* + int freq[][4]={{400,533,667,800},{800,933,1066,1200}}; + + Mem_IO_type types[2]={DDR3,DDR4}; + + int max_load[3]={3,3,8}; + + for(int j=0;j<1;j++) + { + for(int connection=0;connection<3;connection++) + { + for(int frq=3;frq<4;frq++) + { + for(int load=1;load<=max_load[connection];load++) + { + IOTechParam iot(g_ip, types[j], load, 8, 72, connection, load, freq[j][frq]); + Extio testextio(&iot); + // testextio.extio_area(); + // testextio.extio_eye(); + testextio.extio_power_dynamic(); + testextio.extio_power_phy(); + testextio.extio_power_term(); + + } + cout << endl; + } + cout << endl; + } + cout << endl; + } + */ + + ///double total_io_p, total_phy_p, total_io_area, total_vmargin, total_tmargin; + //testextio.extio_power_area_timing(total_io_p, total_phy_p, total_io_area, total_vmargin, total_tmargin); + + solve(&fin_res); + + output_UCA(&fin_res); + output_data_csv(fin_res, infile_name + ".out"); + + + // Memcad Optimization + MemCadParameters memcad_params(g_ip); + solve_memcad(&memcad_params); + + + delete (g_ip); + return fin_res; +} + +//CACTI3DD's plain interface, please keep !!! +uca_org_t cacti_interface( + int dram_cap_tot_byte, + int line_size, + int associativity, + int rw_ports, + int excl_read_ports,// para5 + int excl_write_ports, + int single_ended_read_ports, + int search_ports, + int banks, + double tech_node,//para10 + int output_width, + int specific_tag, + int tag_width, + int access_mode, + int cache, //para15 + int main_mem, + int obj_func_delay, + int obj_func_dynamic_power, + int obj_func_leakage_power, + int obj_func_cycle_time, //para20 + int obj_func_area, + int dev_func_delay, + int dev_func_dynamic_power, + int dev_func_leakage_power, + int dev_func_area, //para25 + int dev_func_cycle_time, + int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate + int temp, + int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing + int data_arr_ram_cell_tech_flavor_in,//para30 + int data_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, + int tag_arr_peri_global_tech_flavor_in, + int interconnect_projection_type_in, + int wire_inside_mat_type_in,//para35 + int wire_outside_mat_type_in, + int REPEATERS_IN_HTREE_SEGMENTS_in, + int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, + int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, + int PAGE_SIZE_BITS_in,//para40 + int BURST_LENGTH_in, + int INTERNAL_PREFETCH_WIDTH_in, + int force_wiretype, + int wiretype, + int force_config,//para45 + int ndwl, + int ndbl, + int nspd, + int ndcm, + int ndsam1,//para50 + int ndsam2, + int ecc, + int is_3d_dram, + int burst_depth, + int IO_width, + int sys_freq, + int debug_detail, + int num_dies, + int tsv_gran_is_subarray, + int tsv_gran_os_bank, + int num_tier_row_sprd, + int num_tier_col_sprd, + int partition_level + ) +{ + g_ip = new InputParameter(); + + uca_org_t fin_res; + fin_res.valid = false; + + g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; + g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; + g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; + g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; + + g_ip->ic_proj_type = interconnect_projection_type_in; + g_ip->wire_is_mat_type = wire_inside_mat_type_in; + g_ip->wire_os_mat_type = wire_outside_mat_type_in; + g_ip->burst_len = BURST_LENGTH_in; + g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; + g_ip->page_sz_bits = PAGE_SIZE_BITS_in; + + g_ip->num_die_3d = num_dies; + g_ip->cache_sz = dram_cap_tot_byte; + g_ip->line_sz = line_size; + g_ip->assoc = associativity; + g_ip->nbanks = banks; + g_ip->out_w = output_width; + g_ip->specific_tag = specific_tag; + if (specific_tag == 0) { + g_ip->tag_w = 42; + } + else { + g_ip->tag_w = tag_width; + } + + g_ip->access_mode = access_mode; + g_ip->delay_wt = obj_func_delay; + g_ip->dynamic_power_wt = obj_func_dynamic_power; + g_ip->leakage_power_wt = obj_func_leakage_power; + g_ip->area_wt = obj_func_area; + g_ip->cycle_time_wt = obj_func_cycle_time; + g_ip->delay_dev = dev_func_delay; + g_ip->dynamic_power_dev = dev_func_dynamic_power; + g_ip->leakage_power_dev = dev_func_leakage_power; + g_ip->area_dev = dev_func_area; + g_ip->cycle_time_dev = dev_func_cycle_time; + g_ip->temp = temp; + g_ip->ed = ed_ed2_none; + + g_ip->F_sz_nm = tech_node; + g_ip->F_sz_um = tech_node / 1000; + g_ip->is_main_mem = (main_mem != 0) ? true : false; + g_ip->is_cache = (cache ==1) ? true : false; + g_ip->pure_ram = (cache ==0) ? true : false; + g_ip->pure_cam = (cache ==2) ? true : false; + g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; + g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; + g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; + + g_ip->num_rw_ports = rw_ports; + g_ip->num_rd_ports = excl_read_ports; + g_ip->num_wr_ports = excl_write_ports; + g_ip->num_se_rd_ports = single_ended_read_ports; + g_ip->num_search_ports = search_ports; + + g_ip->print_detail = 1; + g_ip->nuca = 0; + + if (force_wiretype == 0) + { + g_ip->wt = Global; + g_ip->force_wiretype = false; + } + else + { g_ip->force_wiretype = true; + if (wiretype==10) { + g_ip->wt = Global_10; + } + if (wiretype==20) { + g_ip->wt = Global_20; + } + if (wiretype==30) { + g_ip->wt = Global_30; + } + if (wiretype==5) { + g_ip->wt = Global_5; + } + if (wiretype==0) { + g_ip->wt = Low_swing; + } + } + //g_ip->wt = Global_5; + if (force_config == 0) + { + g_ip->force_cache_config = false; + } + else + { + g_ip->force_cache_config = true; + g_ip->ndbl=ndbl; + g_ip->ndwl=ndwl; + g_ip->nspd=nspd; + g_ip->ndcm=ndcm; + g_ip->ndsam1=ndsam1; + g_ip->ndsam2=ndsam2; + + + } + + if (ecc==0){ + g_ip->add_ecc_b_=false; + } + else + { + g_ip->add_ecc_b_=true; + } + + //CACTI3DD + g_ip->is_3d_mem = is_3d_dram; + g_ip->burst_depth = burst_depth; + g_ip->io_width =IO_width; + g_ip->sys_freq_MHz = sys_freq; + g_ip->print_detail_debug = debug_detail; + + g_ip->tsv_is_subarray_type = tsv_gran_is_subarray; + g_ip->tsv_os_bank_type = tsv_gran_os_bank; + + g_ip->partition_gran = partition_level; + g_ip->num_tier_row_sprd = num_tier_row_sprd; + g_ip->num_tier_col_sprd = num_tier_col_sprd; + if(partition_level == 3) + g_ip->fine_gran_bank_lvl = true; + else + g_ip->fine_gran_bank_lvl = false; + + if(!g_ip->error_checking()) + exit(0); + + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. + + //tsv + //TSV tsv_test(Coarse); + //tsv_test.print_TSV(); + + g_ip->display_ip(); + solve(&fin_res); + output_UCA(&fin_res); + output_data_csv_3dd(fin_res); + delete (g_ip); + + return fin_res; +} + +//cacti6.5's plain interface, please keep !!! +uca_org_t cacti_interface( + int cache_size, + int line_size, + int associativity, + int rw_ports, + int excl_read_ports, + int excl_write_ports, + int single_ended_read_ports, + int banks, + double tech_node, // in nm + int page_sz, + int burst_length, + int pre_width, + int output_width, + int specific_tag, + int tag_width, + int access_mode, //0 normal, 1 seq, 2 fast + int cache, //scratch ram or cache + int main_mem, + int obj_func_delay, + int obj_func_dynamic_power, + int obj_func_leakage_power, + int obj_func_area, + int obj_func_cycle_time, + int dev_func_delay, + int dev_func_dynamic_power, + int dev_func_leakage_power, + int dev_func_area, + int dev_func_cycle_time, + int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate + int temp, + int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing + int data_arr_ram_cell_tech_flavor_in, // 0-4 + int data_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, + int tag_arr_peri_global_tech_flavor_in, + int interconnect_projection_type_in, // 0 - aggressive, 1 - normal + int wire_inside_mat_type_in, + int wire_outside_mat_type_in, + int is_nuca, // 0 - UCA, 1 - NUCA + int core_count, + int cache_level, // 0 - L2, 1 - L3 + int nuca_bank_count, + int nuca_obj_func_delay, + int nuca_obj_func_dynamic_power, + int nuca_obj_func_leakage_power, + int nuca_obj_func_area, + int nuca_obj_func_cycle_time, + int nuca_dev_func_delay, + int nuca_dev_func_dynamic_power, + int nuca_dev_func_leakage_power, + int nuca_dev_func_area, + int nuca_dev_func_cycle_time, + int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported + int p_input) +{ + g_ip = new InputParameter(); + g_ip->add_ecc_b_ = true; + + g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; + g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; + g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; + g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; + + g_ip->ic_proj_type = interconnect_projection_type_in; + g_ip->wire_is_mat_type = wire_inside_mat_type_in; + g_ip->wire_os_mat_type = wire_outside_mat_type_in; + g_ip->burst_len = burst_length; + g_ip->int_prefetch_w = pre_width; + g_ip->page_sz_bits = page_sz; + + g_ip->cache_sz = cache_size; + g_ip->line_sz = line_size; + g_ip->assoc = associativity; + g_ip->nbanks = banks; + g_ip->out_w = output_width; + g_ip->specific_tag = specific_tag; + if (tag_width == 0) { + g_ip->tag_w = 42; + } + else { + g_ip->tag_w = tag_width; + } + + g_ip->access_mode = access_mode; + g_ip->delay_wt = obj_func_delay; + g_ip->dynamic_power_wt = obj_func_dynamic_power; + g_ip->leakage_power_wt = obj_func_leakage_power; + g_ip->area_wt = obj_func_area; + g_ip->cycle_time_wt = obj_func_cycle_time; + g_ip->delay_dev = dev_func_delay; + g_ip->dynamic_power_dev = dev_func_dynamic_power; + g_ip->leakage_power_dev = dev_func_leakage_power; + g_ip->area_dev = dev_func_area; + g_ip->cycle_time_dev = dev_func_cycle_time; + g_ip->ed = ed_ed2_none; + + switch(wt) { + case (0): + g_ip->force_wiretype = 0; + g_ip->wt = Global; + break; + case (1): + g_ip->force_wiretype = 1; + g_ip->wt = Global; + break; + case (2): + g_ip->force_wiretype = 1; + g_ip->wt = Global_5; + break; + case (3): + g_ip->force_wiretype = 1; + g_ip->wt = Global_10; + break; + case (4): + g_ip->force_wiretype = 1; + g_ip->wt = Global_20; + break; + case (5): + g_ip->force_wiretype = 1; + g_ip->wt = Global_30; + break; + case (6): + g_ip->force_wiretype = 1; + g_ip->wt = Low_swing; + break; + default: + cout << "Unknown wire type!\n"; + exit(0); + } + + g_ip->delay_wt_nuca = nuca_obj_func_delay; + g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power; + g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power; + g_ip->area_wt_nuca = nuca_obj_func_area; + g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time; + g_ip->delay_dev_nuca = dev_func_delay; + g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power; + g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power; + g_ip->area_dev_nuca = nuca_dev_func_area; + g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time; + g_ip->nuca = is_nuca; + g_ip->nuca_bank_count = nuca_bank_count; + if(nuca_bank_count > 0) { + g_ip->force_nuca_bank = 1; + } + g_ip->cores = core_count; + g_ip->cache_level = cache_level; + + g_ip->temp = temp; + + g_ip->F_sz_nm = tech_node; + g_ip->F_sz_um = tech_node / 1000; + g_ip->is_main_mem = (main_mem != 0) ? true : false; + g_ip->is_cache = (cache != 0) ? true : false; + g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; + + g_ip->num_rw_ports = rw_ports; + g_ip->num_rd_ports = excl_read_ports; + g_ip->num_wr_ports = excl_write_ports; + g_ip->num_se_rd_ports = single_ended_read_ports; + g_ip->print_detail = 1; + g_ip->nuca = 0; + + g_ip->wt = Global_5; + g_ip->force_cache_config = false; + g_ip->force_wiretype = false; + g_ip->print_input_args = p_input; + + + uca_org_t fin_res; + fin_res.valid = false; + + if (g_ip->error_checking() == false) exit(0); + if (g_ip->print_input_args) + g_ip->display_ip(); + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. + + if (g_ip->nuca == 1) + { + Nuca n(&g_tp.peri_global); + n.sim_nuca(); + } + solve(&fin_res); + + output_UCA(&fin_res); + + delete (g_ip); + return fin_res; +} + +//McPAT's plain interface, please keep !!! +uca_org_t cacti_interface( + int cache_size, + int line_size, + int associativity, + int rw_ports, + int excl_read_ports,// para5 + int excl_write_ports, + int single_ended_read_ports, + int search_ports, + int banks, + double tech_node,//para10 + int output_width, + int specific_tag, + int tag_width, + int access_mode, + int cache, //para15 + int main_mem, + int obj_func_delay, + int obj_func_dynamic_power, + int obj_func_leakage_power, + int obj_func_cycle_time, //para20 + int obj_func_area, + int dev_func_delay, + int dev_func_dynamic_power, + int dev_func_leakage_power, + int dev_func_area, //para25 + int dev_func_cycle_time, + int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate + int temp, + int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing + int data_arr_ram_cell_tech_flavor_in,//para30 + int data_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, + int tag_arr_peri_global_tech_flavor_in, + int interconnect_projection_type_in, + int wire_inside_mat_type_in,//para35 + int wire_outside_mat_type_in, + int REPEATERS_IN_HTREE_SEGMENTS_in, + int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, + int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, + int PAGE_SIZE_BITS_in,//para40 + int BURST_LENGTH_in, + int INTERNAL_PREFETCH_WIDTH_in, + int force_wiretype, + int wiretype, + int force_config,//para45 + int ndwl, + int ndbl, + int nspd, + int ndcm, + int ndsam1,//para50 + int ndsam2, + int ecc) +{ + g_ip = new InputParameter(); + + uca_org_t fin_res; + fin_res.valid = false; + + g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; + g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; + g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; + g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; + + g_ip->ic_proj_type = interconnect_projection_type_in; + g_ip->wire_is_mat_type = wire_inside_mat_type_in; + g_ip->wire_os_mat_type = wire_outside_mat_type_in; + g_ip->burst_len = BURST_LENGTH_in; + g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; + g_ip->page_sz_bits = PAGE_SIZE_BITS_in; + + g_ip->cache_sz = cache_size; + g_ip->line_sz = line_size; + g_ip->assoc = associativity; + g_ip->nbanks = banks; + g_ip->out_w = output_width; + g_ip->specific_tag = specific_tag; + if (specific_tag == 0) { + g_ip->tag_w = 42; + } + else { + g_ip->tag_w = tag_width; + } + + g_ip->access_mode = access_mode; + g_ip->delay_wt = obj_func_delay; + g_ip->dynamic_power_wt = obj_func_dynamic_power; + g_ip->leakage_power_wt = obj_func_leakage_power; + g_ip->area_wt = obj_func_area; + g_ip->cycle_time_wt = obj_func_cycle_time; + g_ip->delay_dev = dev_func_delay; + g_ip->dynamic_power_dev = dev_func_dynamic_power; + g_ip->leakage_power_dev = dev_func_leakage_power; + g_ip->area_dev = dev_func_area; + g_ip->cycle_time_dev = dev_func_cycle_time; + g_ip->temp = temp; + g_ip->ed = ed_ed2_none; + + g_ip->F_sz_nm = tech_node; + g_ip->F_sz_um = tech_node / 1000; + g_ip->is_main_mem = (main_mem != 0) ? true : false; + g_ip->is_cache = (cache ==1) ? true : false; + g_ip->pure_ram = (cache ==0) ? true : false; + g_ip->pure_cam = (cache ==2) ? true : false; + g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; + g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; + g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; + + g_ip->num_rw_ports = rw_ports; + g_ip->num_rd_ports = excl_read_ports; + g_ip->num_wr_ports = excl_write_ports; + g_ip->num_se_rd_ports = single_ended_read_ports; + g_ip->num_search_ports = search_ports; + + g_ip->print_detail = 1; + g_ip->nuca = 0; + + if (force_wiretype == 0) + { + g_ip->wt = Global; + g_ip->force_wiretype = false; + } + else + { g_ip->force_wiretype = true; + if (wiretype==10) { + g_ip->wt = Global_10; + } + if (wiretype==20) { + g_ip->wt = Global_20; + } + if (wiretype==30) { + g_ip->wt = Global_30; + } + if (wiretype==5) { + g_ip->wt = Global_5; + } + if (wiretype==0) { + g_ip->wt = Low_swing; + } + } + //g_ip->wt = Global_5; + if (force_config == 0) + { + g_ip->force_cache_config = false; + } + else + { + g_ip->force_cache_config = true; + g_ip->ndbl=ndbl; + g_ip->ndwl=ndwl; + g_ip->nspd=nspd; + g_ip->ndcm=ndcm; + g_ip->ndsam1=ndsam1; + g_ip->ndsam2=ndsam2; + + + } + + if (ecc==0){ + g_ip->add_ecc_b_=false; + } + else + { + g_ip->add_ecc_b_=true; + } + + + if(!g_ip->error_checking()) + exit(0); + + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. + + g_ip->display_ip(); + solve(&fin_res); + output_UCA(&fin_res); + output_data_csv(fin_res); + delete (g_ip); + + return fin_res; +} + + + +bool InputParameter::error_checking() +{ + int A; + bool seq_access = false; + fast_access = true; + + switch (access_mode) + { + case 0: + seq_access = false; + fast_access = false; + break; + case 1: + seq_access = true; + fast_access = false; + break; + case 2: + seq_access = false; + fast_access = true; + break; + } + + if(is_main_mem) + { + if(ic_proj_type == 0 && !g_ip->is_3d_mem) + { + cerr << "DRAM model supports only conservative interconnect projection!\n\n"; + return false; + } + } + + + uint32_t B = line_sz; + + if (B < 1) + { + cerr << "Block size must >= 1" << endl; + return false; + } + else if (B*8 < out_w) + { + cerr << "Block size must be at least " << out_w/8 << endl; + return false; + } + + if (F_sz_um <= 0) + { + cerr << "Feature size must be > 0" << endl; + return false; + } + else if (F_sz_um > 0.091) + { + cerr << "Feature size must be <= 90 nm" << endl; + return false; + } + + + uint32_t RWP = num_rw_ports; + uint32_t ERP = num_rd_ports; + uint32_t EWP = num_wr_ports; + uint32_t NSER = num_se_rd_ports; + uint32_t SCHP = num_search_ports; + +//TODO: revisit this. This is an important feature. thought this should be used +// // If multiple banks and multiple ports are specified, then if number of ports is less than or equal to +// // the number of banks, we assume that the multiple ports are implemented via the multiple banks. +// // In such a case we assume that each bank has 1 RWP port. +// if ((RWP + ERP + EWP) <= nbanks && nbanks>1) +// { +// RWP = 1; +// ERP = 0; +// EWP = 0; +// NSER = 0; +// } +// else if ((RWP < 0) || (EWP < 0) || (ERP < 0)) +// { +// cerr << "Ports must >=0" << endl; +// return false; +// } +// else if (RWP > 2) +// { +// cerr << "Maximum of 2 read/write ports" << endl; +// return false; +// } +// else if ((RWP+ERP+EWP) < 1) + // Changed to new implementation: + // The number of ports specified at input is per bank + if ((RWP+ERP+EWP) < 1) + { + cerr << "Must have at least one port" << endl; + return false; + } + + if (is_pow2(nbanks) == false) + { + cerr << "Number of subbanks should be greater than or equal to 1 and should be a power of 2" << endl; + return false; + } + + int C = cache_sz/nbanks; + if (C < 64 && !g_ip->is_3d_mem) + { + cerr << "Cache size must >=64" << endl; + return false; + } + +//TODO: revisit this +// if (pure_ram==true && assoc!=1) +// { +// cerr << "Pure RAM must have assoc as 1" << endl; +// return false; +// } + + //fully assoc and cam check + if (is_cache && assoc==0) + fully_assoc =true; + else + fully_assoc = false; + + if (pure_cam==true && assoc!=0) + { + cerr << "Pure CAM must have associativity as 0" << endl; + return false; + } + + if (assoc==0 && (pure_cam==false && is_cache ==false)) + { + cerr << "Only CAM or Fully associative cache can have associativity as 0" << endl; + return false; + } + + if ((fully_assoc==true || pure_cam==true) + && (data_arr_ram_cell_tech_type!= tag_arr_ram_cell_tech_type + || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type )) + { + cerr << "CAM and fully associative cache must have same device type for both data and tag array" << endl; + return false; + } + + if ((fully_assoc==true || pure_cam==true) + && (data_arr_ram_cell_tech_type== lp_dram || data_arr_ram_cell_tech_type== comm_dram)) + { + cerr << "DRAM based CAM and fully associative cache are not supported" << endl; + return false; + } + + if ((fully_assoc==true || pure_cam==true) + && (is_main_mem==true)) + { + cerr << "CAM and fully associative cache cannot be as main memory" << endl; + return false; + } + + if ((fully_assoc || pure_cam) && SCHP<1) + { + cerr << "CAM and fully associative must have at least 1 search port" << endl; + return false; + } + + if (RWP==0 && ERP==0 && SCHP>0 && ((fully_assoc || pure_cam))) + { + ERP=SCHP; + } + +// if ((!(fully_assoc || pure_cam)) && SCHP>=1) +// { +// cerr << "None CAM and fully associative cannot have search ports" << endl; +// return false; +// } + + if (assoc == 0) + { + A = C/B; + //fully_assoc = true; + } + else + { + if (assoc == 1) + { + A = 1; + //fully_assoc = false; + } + else + { + //fully_assoc = false; + A = assoc; + if (is_pow2(A) == false) + { + cerr << "Associativity must be a power of 2" << endl; + return false; + } + } + } + + if (C/(B*A) <= 1 && assoc!=0 && !g_ip->is_3d_mem) + { + cerr << "Number of sets is too small: " << endl; + cerr << " Need to either increase cache size, or decrease associativity or block size" << endl; + cerr << " (or use fully associative cache)" << endl; + return false; + } + + block_sz = B; + + /*dt: testing sequential access mode*/ + if(seq_access) + { + tag_assoc = A; + data_assoc = 1; + is_seq_acc = true; + } + else + { + tag_assoc = A; + data_assoc = A; + is_seq_acc = false; + } + + if (assoc==0) + { + data_assoc = 1; + } + num_rw_ports = RWP; + num_rd_ports = ERP; + num_wr_ports = EWP; + num_se_rd_ports = NSER; + if (!(fully_assoc || pure_cam)) + num_search_ports = 0; + nsets = C/(B*A); + + if (temp < 300 || temp > 400 || temp%10 != 0) + { + cerr << temp << " Temperature must be between 300 and 400 Kelvin and multiple of 10." << endl; + return false; + } + + if (nsets < 1 && !g_ip->is_3d_mem) + { + cerr << "Less than one set..." << endl; + return false; + } + + power_gating = (array_power_gated + || bitline_floating + || wl_power_gated + || cl_power_gated + || interconect_power_gated)?true:false; + + return true; +} + +void output_data_csv_3dd(const uca_org_t & fin_res) +{ + //TODO: the csv output should remain + fstream file("out.csv", ios::in); + bool print_index = file.fail(); + file.close(); + + file.open("out.csv", ios::out|ios::app); + if (file.fail() == true) + { + cerr << "File out.csv could not be opened successfully" << endl; + } + else + { + //print_index = false; + if (print_index == true) + { + file << "Tech node (nm), "; + file << "Number of tiers, "; + file << "Capacity (MB) per die, "; + file << "Number of banks, "; + file << "Page size in bits, "; + //file << "Output width (bits), "; + file << "Burst depth, "; + file << "IO width, "; + file << "Ndwl, "; + file << "Ndbl, "; + file << "N rows in subarray, "; + file << "N cols in subarray, "; +// file << "Access time (ns), "; +// file << "Random cycle time (ns), "; +// file << "Multisubbank interleave cycle time (ns), "; + +// file << "Delay request network (ns), "; +// file << "Delay inside mat (ns), "; +// file << "Delay reply network (ns), "; +// file << "Tag array access time (ns), "; +// file << "Data array access time (ns), "; +// file << "Refresh period (microsec), "; +// file << "DRAM array availability (%), "; + + + +// file << "Dynamic search energy (nJ), "; +// file << "Dynamic read energy (nJ), "; +// file << "Dynamic write energy (nJ), "; +// file << "Tag Dynamic read energy (nJ), "; +// file << "Data Dynamic read energy (nJ), "; +// file << "Dynamic read power (mW), "; +// file << "Standby leakage per bank(mW), "; +// file << "Leakage per bank with leak power management (mW), "; +// file << "Leakage per bank with leak power management (mW), "; +// file << "Refresh power as percentage of standby leakage, "; + file << "Area (mm2), "; + +// file << "Nspd, "; +// file << "Ndcm, "; +// file << "Ndsam_level_1, "; +// file << "Ndsam_level_2, "; + file << "Data arrary area efficiency %, "; +// file << "Ntwl, "; +// file << "Ntbl, "; +// file << "Ntspd, "; +// file << "Ntcm, "; +// file << "Ntsam_level_1, "; +// file << "Ntsam_level_2, "; +// file << "Tag arrary area efficiency %, "; + +// file << "Resistance per unit micron (ohm-micron), "; +// file << "Capacitance per unit micron (fF per micron), "; +// file << "Unit-length wire delay (ps), "; +// file << "FO4 delay (ps), "; +// file << "delay route to bank (including crossb delay) (ps), "; +// file << "Crossbar delay (ps), "; +// file << "Dyn read energy per access from closed page (nJ), "; +// file << "Dyn read energy per access from open page (nJ), "; +// file << "Leak power of an subbank with page closed (mW), "; +// file << "Leak power of a subbank with page open (mW), "; +// file << "Leak power of request and reply networks (mW), "; +// file << "Number of subbanks, "; + + file << "Number of TSVs in total, "; + file << "Delay of TSVs (ns) worst case, "; + file << "Area of TSVs (mm2) in total, "; + file << "Energy of TSVs (nJ) per access, "; + + file << "t_RCD (ns), "; + file << "t_RAS (ns), "; + file << "t_RC (ns), "; + file << "t_CAS (ns), "; + file << "t_RP (ns), "; + + + file << "Activate energy (nJ), "; + file << "Read energy (nJ), "; + file << "Write energy (nJ), "; + file << "Precharge energy (nJ), "; + //file << "tRCD, "; + //file << "CAS latency, "; + //file << "Precharge delay, "; +// file << "Perc dyn energy bitlines, "; +// file << "perc dyn energy wordlines, "; +// file << "perc dyn energy outside mat, "; +// file << "Area opt (perc), "; +// file << "Delay opt (perc), "; +// file << "Repeater opt (perc), "; + //file << "Aspect ratio"; + file << "t_RRD (ns), "; + file << "Number tiers for a row, "; + file << "Number tiers for a column, "; + file << "delay_row_activate_net, " ; + file << "delay_row_predecode_driver_and_block, " ; + file << "delay_row_decoder, " ; + file << "delay_local_wordline , " ; + file << "delay_bitlines, " ; + file << "delay_sense_amp, " ; + + file << "delay_column_access_net, " ; + file << "delay_column_predecoder, " ; + file << "delay_column_decoder, " ; + file << "delay_column_selectline, " ; + file << "delay_datapath_net, " ; + file << "delay_global_data, " ; + file << "delay_local_data_and_drv, " ; + file << "delay_data_buffer, " ; + file << "delay_subarray_output_driver, " ; + + file << "energy_row_activate_net, "; + file << "energy_row_predecode_driver_and_block, "; + file << "energy_row_decoder, "; + file << "energy_local_wordline, "; + file << "energy_bitlines, "; + file << "energy_sense_amp, "; + + file << "energy_column_access_net, "; + file << "energy_column_predecoder, "; + file << "energy_column_decoder, "; + file << "energy_column_selectline, "; + file << "energy_datapath_net, "; + file << "energy_global_data, "; + file << "energy_local_data_and_drv, "; + file << "energy_subarray_output_driver, "; + file << "energy_data_buffer, "; + + file << "area_subarray, "; + file << "area_lwl_drv, "; + file << "area_row_predec_dec, "; + file << "area_col_predec_dec, "; + file << "area_bus, "; + file << "area_address_bus, "; + file << "area_data_bus, "; + file << "area_data_drv, "; + file << "area_IOSA, "; + file << endl; + } + file << g_ip->F_sz_nm << ", "; + file << g_ip->num_die_3d << ", "; + file << g_ip->cache_sz * 1024 / g_ip->num_die_3d << ", "; + file << g_ip->nbanks << ", "; + file << g_ip->page_sz_bits << ", " ; +// file << g_ip->tag_assoc << ", "; + //file << g_ip->out_w << ", "; + file << g_ip->burst_depth << ", "; + file << g_ip->io_width << ", "; + + file << fin_res.data_array2->Ndwl << ", "; + file << fin_res.data_array2->Ndbl << ", "; + file << fin_res.data_array2->num_row_subarray << ", "; + file << fin_res.data_array2->num_col_subarray << ", "; +// file << fin_res.access_time*1e+9 << ", "; +// file << fin_res.cycle_time*1e+9 << ", "; +// file << fin_res.data_array2->multisubbank_interleave_cycle_time*1e+9 << ", "; +// file << fin_res.data_array2->delay_request_network*1e+9 << ", "; +// file << fin_res.data_array2->delay_inside_mat*1e+9 << ", "; +// file << fin_res.data_array2.delay_reply_network*1e+9 << ", "; + +// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) +// { +// file << fin_res.tag_array2->access_time*1e+9 << ", "; +// } +// else +// { +// file << 0 << ", "; +// } +// file << fin_res.data_array2->access_time*1e+9 << ", "; +// file << fin_res.data_array2->dram_refresh_period*1e+6 << ", "; +// file << fin_res.data_array2->dram_array_availability << ", "; +/* if (g_ip->fully_assoc || g_ip->pure_cam) + { + file << fin_res.power.searchOp.dynamic*1e+9 << ", "; + } + else + { + file << "N/A" << ", "; + } + */ +// file << fin_res.power.readOp.dynamic*1e+9 << ", "; +// file << fin_res.power.writeOp.dynamic*1e+9 << ", "; +// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) +// { +// file << fin_res.tag_array2->power.readOp.dynamic*1e+9 << ", "; +// } +// else +// { +// file << "NA" << ", "; +// } +// file << fin_res.data_array2->power.readOp.dynamic*1e+9 << ", "; +// if (g_ip->fully_assoc || g_ip->pure_cam) +// { +// file << fin_res.power.searchOp.dynamic*1000/fin_res.cycle_time << ", "; +// } +// else +// { +// file << fin_res.power.readOp.dynamic*1000/fin_res.cycle_time << ", "; +// } + +// file <<( fin_res.power.readOp.leakage + fin_res.power.readOp.gate_leakage )*1000 << ", "; +// file << fin_res.leak_power_with_sleep_transistors_in_mats*1000 << ", "; +// file << fin_res.data_array.refresh_power / fin_res.data_array.total_power.readOp.leakage << ", "; + file << fin_res.data_array2->area *1e-6 << ", "; + +// file << fin_res.data_array2->Nspd << ", "; +// file << fin_res.data_array2->deg_bl_muxing << ", "; +// file << fin_res.data_array2->Ndsam_lev_1 << ", "; +// file << fin_res.data_array2->Ndsam_lev_2 << ", "; + file << fin_res.data_array2->area_efficiency << ", "; +/* if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) + { + file << fin_res.tag_array2->Ndwl << ", "; + file << fin_res.tag_array2->Ndbl << ", "; + file << fin_res.tag_array2->Nspd << ", "; + file << fin_res.tag_array2->deg_bl_muxing << ", "; + file << fin_res.tag_array2->Ndsam_lev_1 << ", "; + file << fin_res.tag_array2->Ndsam_lev_2 << ", "; + file << fin_res.tag_array2->area_efficiency << ", "; + } + else + { + file << "N/A" << ", "; + file << "N/A"<< ", "; + file << "N/A" << ", "; + file << "N/A" << ", "; + file << "N/A" << ", "; + file << "N/A" << ", "; + file << "N/A" << ", "; + } +*/ + file << fin_res.data_array2->num_TSV_tot << ", "; + file << fin_res.data_array2->delay_TSV_tot *1e9 << ", "; + file << fin_res.data_array2->area_TSV_tot *1e-6 << ", "; + file << fin_res.data_array2->dyn_pow_TSV_per_access *1e9 << ", "; + + file << fin_res.data_array2->t_RCD *1e9 << ", "; + file << fin_res.data_array2->t_RAS *1e9 << ", "; + file << fin_res.data_array2->t_RC *1e9 << ", "; + file << fin_res.data_array2->t_CAS *1e9 << ", "; + file << fin_res.data_array2->t_RP *1e9 << ", "; + + + +// file << g_tp.wire_inside_mat.R_per_um << ", "; +// file << g_tp.wire_inside_mat.C_per_um / 1e-15 << ", "; +// file << g_tp.unit_len_wire_del / 1e-12 << ", "; +// file << g_tp.FO4 / 1e-12 << ", "; +// file << fin_res.data_array.delay_route_to_bank / 1e-9 << ", "; +// file << fin_res.data_array.delay_crossbar / 1e-9 << ", "; +// file << fin_res.data_array.dyn_read_energy_from_closed_page / 1e-9 << ", "; +// file << fin_res.data_array.dyn_read_energy_from_open_page / 1e-9 << ", "; +// file << fin_res.data_array.leak_power_subbank_closed_page / 1e-3 << ", "; +// file << fin_res.data_array.leak_power_subbank_open_page / 1e-3 << ", "; +// file << fin_res.data_array.leak_power_request_and_reply_networks / 1e-3 << ", "; +// file << fin_res.data_array.number_subbanks << ", " ; + //file << fin_res.data_array.page_size_in_bits << ", " ; + + file << fin_res.data_array2->activate_energy * 1e9 << ", " ; + file << fin_res.data_array2->read_energy * 1e9 << ", " ; + file << fin_res.data_array2->write_energy * 1e9 << ", " ; + file << fin_res.data_array2->precharge_energy * 1e9 << ", " ; + //file << fin_res.data_array.trcd * 1e9 << ", " ; + //file << fin_res.data_array.cas_latency * 1e9 << ", " ; + //file << fin_res.data_array.precharge_delay * 1e9 << ", " ; + //file << fin_res.data_array.all_banks_height / fin_res.data_array.all_banks_width; + + file << fin_res.data_array2->t_RRD * 1e9 << ", " ; + file << g_ip->num_tier_row_sprd << ", " ; + file << g_ip->num_tier_col_sprd << ", " ; + + file << fin_res.data_array2->delay_row_activate_net * 1e9 << ", " ; + file << fin_res.data_array2->delay_row_predecode_driver_and_block * 1e9 << ", " ; + file << fin_res.data_array2->delay_row_decoder * 1e9 << ", " ; + file << fin_res.data_array2->delay_local_wordline * 1e9 << ", " ; + file << fin_res.data_array2->delay_bitlines * 1e9 << ", " ; + file << fin_res.data_array2->delay_sense_amp * 1e9 << ", " ; + file << fin_res.data_array2->delay_column_access_net * 1e9 << ", " ; + file << fin_res.data_array2->delay_column_predecoder * 1e9 << ", " ; + file << fin_res.data_array2->delay_column_decoder * 1e9 << ", " ; + file << fin_res.data_array2->delay_column_selectline * 1e9 << ", " ; + file << fin_res.data_array2->delay_datapath_net * 1e9 << ", " ; + file << fin_res.data_array2->delay_global_data * 1e9 << ", " ; + file << fin_res.data_array2->delay_local_data_and_drv * 1e9 << ", " ; + file << fin_res.data_array2->delay_data_buffer * 1e9 << ", " ; + file << fin_res.data_array2->delay_subarray_output_driver * 1e9 << ", " ; + + file << fin_res.data_array2->energy_row_activate_net * 1e9 << ", " ; + file << fin_res.data_array2->energy_row_predecode_driver_and_block * 1e9 << ", " ; + file << fin_res.data_array2->energy_row_decoder * 1e9 << ", " ; + file << fin_res.data_array2->energy_local_wordline * 1e9 << ", " ; + file << fin_res.data_array2->energy_bitlines * 1e9 << ", " ; + file << fin_res.data_array2->energy_sense_amp * 1e9 << ", " ; + + file << fin_res.data_array2->energy_column_access_net * 1e9 << ", " ; + file << fin_res.data_array2->energy_column_predecoder * 1e9 << ", " ; + file << fin_res.data_array2->energy_column_decoder * 1e9 << ", " ; + file << fin_res.data_array2->energy_column_selectline * 1e9 << ", " ; + file << fin_res.data_array2->energy_datapath_net * 1e9 << ", " ; + file << fin_res.data_array2->energy_global_data * 1e9 << ", " ; + file << fin_res.data_array2->energy_local_data_and_drv * 1e9 << ", " ; + file << fin_res.data_array2->energy_subarray_output_driver * 1e9 << ", " ; + file << fin_res.data_array2->energy_data_buffer * 1e9 << ", " ; + + file << fin_res.data_array2->area_subarray / 1e6 << ", " ; + file << fin_res.data_array2->area_lwl_drv / 1e6 << ", " ; + file << fin_res.data_array2->area_row_predec_dec / 1e6 << ", " ; + file << fin_res.data_array2->area_col_predec_dec / 1e6 << ", " ; + file << fin_res.data_array2->area_bus / 1e6 << ", " ; + file << fin_res.data_array2->area_address_bus / 1e6 << ", " ; + file << fin_res.data_array2->area_data_bus / 1e6 << ", " ; + file << fin_res.data_array2->area_data_drv / 1e6 << ", " ; + file << fin_res.data_array2->area_IOSA / 1e6 << ", " ; + file << fin_res.data_array2->area_sense_amp / 1e6 << ", " ; + file<F_sz_nm << ", "; + file << g_ip->cache_sz << ", "; + file << g_ip->nbanks << ", "; + file << g_ip->tag_assoc << ", "; + file << g_ip->out_w << ", "; + file << fin_res.access_time*1e+9 << ", "; + file << fin_res.cycle_time*1e+9 << ", "; +// file << fin_res.data_array2->multisubbank_interleave_cycle_time*1e+9 << ", "; +// file << fin_res.data_array2->delay_request_network*1e+9 << ", "; +// file << fin_res.data_array2->delay_inside_mat*1e+9 << ", "; +// file << fin_res.data_array2.delay_reply_network*1e+9 << ", "; + +// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) +// { +// file << fin_res.tag_array2->access_time*1e+9 << ", "; +// } +// else +// { +// file << 0 << ", "; +// } +// file << fin_res.data_array2->access_time*1e+9 << ", "; +// file << fin_res.data_array2->dram_refresh_period*1e+6 << ", "; +// file << fin_res.data_array2->dram_array_availability << ", "; + if (g_ip->fully_assoc || g_ip->pure_cam) + { + file << fin_res.power.searchOp.dynamic*1e+9 << ", "; + } + else + { + file << "N/A" << ", "; + } + file << fin_res.power.readOp.dynamic*1e+9 << ", "; + file << fin_res.power.writeOp.dynamic*1e+9 << ", "; +// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) +// { +// file << fin_res.tag_array2->power.readOp.dynamic*1e+9 << ", "; +// } +// else +// { +// file << "NA" << ", "; +// } +// file << fin_res.data_array2->power.readOp.dynamic*1e+9 << ", "; +// if (g_ip->fully_assoc || g_ip->pure_cam) +// { +// file << fin_res.power.searchOp.dynamic*1000/fin_res.cycle_time << ", "; +// } +// else +// { +// file << fin_res.power.readOp.dynamic*1000/fin_res.cycle_time << ", "; +// } + + file <<( fin_res.power.readOp.leakage + fin_res.power.readOp.gate_leakage )*1000 << ", "; +// file << fin_res.leak_power_with_sleep_transistors_in_mats*1000 << ", "; +// file << fin_res.data_array.refresh_power / fin_res.data_array.total_power.readOp.leakage << ", "; + file << fin_res.area*1e-6 << ", "; + + file << fin_res.data_array2->Ndwl << ", "; + file << fin_res.data_array2->Ndbl << ", "; + file << fin_res.data_array2->Nspd << ", "; + file << fin_res.data_array2->deg_bl_muxing << ", "; + file << fin_res.data_array2->Ndsam_lev_1 << ", "; + file << fin_res.data_array2->Ndsam_lev_2 << ", "; + file << fin_res.data_array2->area_efficiency << ", "; + if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) + { + file << fin_res.tag_array2->Ndwl << ", "; + file << fin_res.tag_array2->Ndbl << ", "; + file << fin_res.tag_array2->Nspd << ", "; + file << fin_res.tag_array2->deg_bl_muxing << ", "; + file << fin_res.tag_array2->Ndsam_lev_1 << ", "; + file << fin_res.tag_array2->Ndsam_lev_2 << ", "; + file << fin_res.tag_array2->area_efficiency << ", "; + } + else + { + file << "N/A" << ", "; + file << "N/A"<< ", "; + file << "N/A" << ", "; + file << "N/A" << ", "; + file << "N/A" << ", "; + file << "N/A" << ", "; + file << "N/A" << ", "; + } + +// file << g_tp.wire_inside_mat.R_per_um << ", "; +// file << g_tp.wire_inside_mat.C_per_um / 1e-15 << ", "; +// file << g_tp.unit_len_wire_del / 1e-12 << ", "; +// file << g_tp.FO4 / 1e-12 << ", "; +// file << fin_res.data_array.delay_route_to_bank / 1e-9 << ", "; +// file << fin_res.data_array.delay_crossbar / 1e-9 << ", "; +// file << fin_res.data_array.dyn_read_energy_from_closed_page / 1e-9 << ", "; +// file << fin_res.data_array.dyn_read_energy_from_open_page / 1e-9 << ", "; +// file << fin_res.data_array.leak_power_subbank_closed_page / 1e-3 << ", "; +// file << fin_res.data_array.leak_power_subbank_open_page / 1e-3 << ", "; +// file << fin_res.data_array.leak_power_request_and_reply_networks / 1e-3 << ", "; +// file << fin_res.data_array.number_subbanks << ", " ; +// file << fin_res.data_array.page_size_in_bits << ", " ; +// file << fin_res.data_array.activate_energy * 1e9 << ", " ; +// file << fin_res.data_array.read_energy * 1e9 << ", " ; +// file << fin_res.data_array.write_energy * 1e9 << ", " ; +// file << fin_res.data_array.precharge_energy * 1e9 << ", " ; +// file << fin_res.data_array.trcd * 1e9 << ", " ; +// file << fin_res.data_array.cas_latency * 1e9 << ", " ; +// file << fin_res.data_array.precharge_delay * 1e9 << ", " ; +// file << fin_res.data_array.all_banks_height / fin_res.data_array.all_banks_width; + file<is_3d_mem) + { + + cout<<"------- CACTI (version "<< VER_MAJOR_CACTI <<"."<< VER_MINOR_CACTI<<"."VER_COMMENT_CACTI + << " of " << VER_UPDATE_CACTI << ") 3D DRAM Main Memory -------"<cache_sz) << endl; + if(g_ip->num_die_3d>1) + { + cout << " Stacked die count: " << (int) g_ip->num_die_3d << endl; + if(g_ip->TSV_proj_type == 1) + cout << " TSV projection: industrial conservative" << endl; + else + cout << " TSV projection: ITRS aggressive" << endl; + } + cout << " Number of banks: " << (int) g_ip->nbanks << endl; + cout << " Technology size (nm): " << g_ip->F_sz_nm << endl; + cout << " Page size (bits): " << g_ip->page_sz_bits << endl; + cout << " Burst depth: " << g_ip->burst_depth << endl; + cout << " Chip IO width: " << g_ip->io_width << endl; + cout << " Best Ndwl: " << fr->data_array2->Ndwl << endl; + cout << " Best Ndbl: " << fr->data_array2->Ndbl << endl; + cout << " # rows in subarray: " << fr->data_array2->num_row_subarray << endl; + cout << " # columns in subarray: " << fr->data_array2->num_col_subarray << endl; + + cout <<"\nResults:\n"; + cout<<"Timing Components:"<data_array2->t_RCD * 1e9 << " ns" <data_array2->t_RAS * 1e9 << " ns" <data_array2->t_RC * 1e9 << " ns" <data_array2->t_CAS * 1e9 << " ns" <data_array2->t_RP* 1e9 << " ns" <data_array2->t_RRD* 1e9 << " ns" <data_array2->t_RRD * 1e9 << " ns"<data_array2->activate_energy * 1e9 << " nJ" <data_array2->read_energy * 1e9 << " nJ" <data_array2->write_energy * 1e9 << " nJ" <data_array2->precharge_energy * 1e9 << " nJ" <data_array2->activate_power * 1e3 << " mW" <data_array2->read_power * 1e3 << " mW" <data_array2->write_power * 1e3 << " mW" <burst_depth)/(g_ip->sys_freq_MHz*1e6)/2) * 1e3 << " mW" <data_array2->area/1e6<<" mm2"<partition_gran>0) ? fr->data_array2->area : (fr->data_array2->area/0.5); + double DRAM_area_per_die = (g_ip->partition_gran>0) ? fr->data_array2->area : (fr->data_array2->area + fr->data_array2->area_ram_cells*0.65); + //double DRAM_area_per_die = (g_ip->partition_gran>0) ? fr->data_array2->area : (fr->data_array2->area + 2.5e9*(double)(g_ip->F_sz_um)*(g_ip->F_sz_um)); + double area_efficiency_per_die = (g_ip->partition_gran>0) ? fr->data_array2->area_efficiency : (fr->data_array2->area_ram_cells / DRAM_area_per_die *100); + double DRAM_width = (g_ip->partition_gran>0) ? fr->data_array2->all_banks_width : (fr->data_array2->all_banks_width + (DRAM_area_per_die-fr->data_array2->area)/fr->data_array2->all_banks_height); + cout<<" DRAM core area: "<< fr->data_array2->area/1e6 <<" mm2"<partition_gran == 0) + cout<<" DRAM area per die: "<< DRAM_area_per_die/1e6 <<" mm2"<data_array2->all_banks_height/1e3 <<" mm"<num_die_3d>1) + { + cout<<"TSV Components:"<data_array2->area_TSV_tot /1e6 <<" mm2"<data_array2->delay_TSV_tot * 1e9 <<" ns"<data_array2->dyn_pow_TSV_per_access * 1e9 <<" nJ"<is_3d_mem) + { + // if (NUCA) + if (0) { + cout << "\n\n Detailed Bank Stats:\n"; + cout << " Bank Size (bytes): %d\n" << + (int) (g_ip->cache_sz); + } + else { + if (g_ip->data_arr_ram_cell_tech_type == 3) { + cout << "\n---------- CACTI (version "<< VER_MAJOR_CACTI <<"."<< VER_MINOR_CACTI<<"."VER_COMMENT_CACTI + << " of " << VER_UPDATE_CACTI << "), Uniform Cache Access " << + "Logic Process Based DRAM Model ----------\n"; + } + else if (g_ip->data_arr_ram_cell_tech_type == 4) { + cout << "\n---------- CACTI (version "<< VER_MAJOR_CACTI <<"."<< VER_MINOR_CACTI<<"."VER_COMMENT_CACTI + << " of " << VER_UPDATE_CACTI << "), Uniform" << + "Cache Access Commodity DRAM Model ----------\n"; + } + else { + cout << "\n---------- CACTI (version "<< VER_MAJOR_CACTI <<"."<< VER_MINOR_CACTI<<"."VER_COMMENT_CACTI + << " of " << VER_UPDATE_CACTI << "), Uniform Cache Access " + "SRAM Model ----------\n"; + } + cout << "\nCache Parameters:\n"; + cout << " Total cache size (bytes): " << + (int) (g_ip->cache_sz) << endl; + } + + cout << " Number of banks: " << (int) g_ip->nbanks << endl; + if (g_ip->fully_assoc|| g_ip->pure_cam) + cout << " Associativity: fully associative\n"; + else { + if (g_ip->tag_assoc == 1) + cout << " Associativity: direct mapped\n"; + else + cout << " Associativity: " << + g_ip->tag_assoc << endl; + } + + + cout << " Block size (bytes): " << g_ip->line_sz << endl; + cout << " Read/write Ports: " << + g_ip->num_rw_ports << endl; + cout << " Read ports: " << + g_ip->num_rd_ports << endl; + cout << " Write ports: " << + g_ip->num_wr_ports << endl; + if (g_ip->fully_assoc|| g_ip->pure_cam) + cout << " search ports: " << + g_ip->num_search_ports << endl; + cout << " Technology size (nm): " << + g_ip->F_sz_nm << endl << endl; + + cout << " Access time (ns): " << fr->access_time*1e9 << endl; + cout << " Cycle time (ns): " << fr->cycle_time*1e9 << endl; + if (g_ip->data_arr_ram_cell_tech_type >= 4) { + cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl; + cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl; + cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl; + cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl; + cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl; + cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl; + cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl; + cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl; + cout << " Refresh power (mW): " << + fr->data_array2->refresh_power*1e3 << endl; + } + else { + if ((g_ip->fully_assoc|| g_ip->pure_cam)) + { + cout << " Total dynamic associative search energy per access (nJ): " << + fr->power.searchOp.dynamic*1e9 << endl; +// cout << " Total dynamic read energy per access (nJ): " << +// fr->power.readOp.dynamic*1e9 << endl; +// cout << " Total dynamic write energy per access (nJ): " << +// fr->power.writeOp.dynamic*1e9 << endl; + } +// else +// { + cout << " Total dynamic read energy per access (nJ): " << + fr->power.readOp.dynamic*1e9 << endl; + cout << " Total dynamic write energy per access (nJ): " << + fr->power.writeOp.dynamic*1e9 << endl; +// } + cout << " Total leakage power of a bank" + " (mW): " << fr->power.readOp.leakage*1e3 << endl; + cout << " Total gate leakage power of a bank" + " (mW): " << fr->power.readOp.gate_leakage*1e3 << endl; + } + + if (g_ip->data_arr_ram_cell_tech_type ==3 || g_ip->data_arr_ram_cell_tech_type ==4) + { + } + cout << " Cache height x width (mm): " << + fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl << endl; + + + cout << " Best Ndwl : " << fr->data_array2->Ndwl << endl; + cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl; + cout << " Best Nspd : " << fr->data_array2->Nspd << endl; + cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl; + cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl; + cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl; + + if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) + { + cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl; + cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl; + cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl; + cout << " Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl; + cout << " Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl; + cout << " Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl; + } + + switch (fr->data_array2->wt) { + case (0): + cout << " Data array, H-tree wire type: Delay optimized global wires\n"; + break; + case (1): + cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n"; + break; + case (2): + cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n"; + break; + case (3): + cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n"; + break; + case (4): + cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n"; + break; + case (5): + cout << " Data array, wire type: Low swing wires\n"; + break; + default: + cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt <pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) { + switch (fr->tag_array2->wt) { + case (0): + cout << " Tag array, H-tree wire type: Delay optimized global wires\n"; + break; + case (1): + cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n"; + break; + case (2): + cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n"; + break; + case (3): + cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n"; + break; + case (4): + cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n"; + break; + case (5): + cout << " Tag array, wire type: Low swing wires\n"; + break; + default: + cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt <is_3d_mem) + if (g_ip->print_detail) + { + //if(g_ip->fully_assoc) return; + + if (g_ip->is_3d_mem) + { + cout << endl << endl << "3D DRAM Detail Components:" << endl << endl; + cout << endl << "Time Components:" << endl << endl; + cout << "\t row activation bus delay (ns): " << fr->data_array2->delay_row_activate_net*1e9 << endl; + cout << "\t row predecoder delay (ns): " << fr->data_array2->delay_row_predecode_driver_and_block*1e9 << endl; + cout << "\t row decoder delay (ns): " << fr->data_array2->delay_row_decoder*1e9 << endl; + cout << "\t local wordline delay (ns): " << fr->data_array2->delay_local_wordline*1e9 << endl; + cout << "\t bitline delay (ns): " << fr->data_array2->delay_bitlines*1e9 << endl; + cout << "\t sense amp delay (ns): " << fr->data_array2->delay_sense_amp*1e9 << endl; + cout << "\t column access bus delay (ns): " << fr->data_array2->delay_column_access_net*1e9 << endl; + cout << "\t column predecoder delay (ns): " << fr->data_array2->delay_column_predecoder*1e9 << endl; + cout << "\t column decoder delay (ns): " << fr->data_array2->delay_column_decoder*1e9 << endl; + //cout << "\t column selectline delay (ns): " << fr->data_array2->delay_column_selectline*1e9 << endl; + cout << "\t datapath bus delay (ns): " << fr->data_array2->delay_datapath_net*1e9 << endl; + cout << "\t global dataline delay (ns): " << fr->data_array2->delay_global_data*1e9 << endl; + cout << "\t local dataline delay (ns): " << fr->data_array2->delay_local_data_and_drv*1e9 << endl; + cout << "\t data buffer delay (ns): " << fr->data_array2->delay_data_buffer*1e9 << endl; + cout << "\t subarray output driver delay (ns): " << fr->data_array2->delay_subarray_output_driver*1e9 << endl; + + cout << endl << "Energy Components:" << endl << endl; + cout << "\t row activation bus energy (nJ): " << fr->data_array2->energy_row_activate_net*1e9 << endl; + cout << "\t row predecoder energy (nJ): " << fr->data_array2->energy_row_predecode_driver_and_block*1e9 << endl; + cout << "\t row decoder energy (nJ): " << fr->data_array2->energy_row_decoder*1e9 << endl; + cout << "\t local wordline energy (nJ): " << fr->data_array2->energy_local_wordline*1e9 << endl; + cout << "\t bitline energy (nJ): " << fr->data_array2->energy_bitlines*1e9 << endl; + cout << "\t sense amp energy (nJ): " << fr->data_array2->energy_sense_amp*1e9 << endl; + cout << "\t column access bus energy (nJ): " << fr->data_array2->energy_column_access_net*1e9 << endl; + cout << "\t column predecoder energy (nJ): " << fr->data_array2->energy_column_predecoder*1e9 << endl; + cout << "\t column decoder energy (nJ): " << fr->data_array2->energy_column_decoder*1e9 << endl; + cout << "\t column selectline energy (nJ): " << fr->data_array2->energy_column_selectline*1e9 << endl; + cout << "\t datapath bus energy (nJ): " << fr->data_array2->energy_datapath_net*1e9 << endl; + cout << "\t global dataline energy (nJ): " << fr->data_array2->energy_global_data*1e9 << endl; + cout << "\t local dataline energy (nJ): " << fr->data_array2->energy_local_data_and_drv*1e9 << endl; + cout << "\t data buffer energy (nJ): " << fr->data_array2->energy_subarray_output_driver*1e9 << endl; + //cout << "\t subarray output driver energy (nJ): " << fr->data_array2->energy_data_buffer*1e9 << endl; + + cout << endl << "Area Components:" << endl << endl; + //cout << "\t subarray area (mm2): " << fr->data_array2->area_subarray/1e6 << endl; + cout << "\t DRAM cell area (mm2): " << fr->data_array2->area_ram_cells/1e6 << endl; + cout << "\t local WL driver area (mm2): " << fr->data_array2->area_lwl_drv/1e6 << endl; + cout << "\t subarray sense amp area (mm2): " << fr->data_array2->area_sense_amp/1e6 << endl; + cout << "\t row predecoder/decoder area (mm2): " << fr->data_array2->area_row_predec_dec/1e6 << endl; + cout << "\t column predecoder/decoder area (mm2): " << fr->data_array2->area_col_predec_dec/1e6 << endl; + cout << "\t center stripe bus area (mm2): " << fr->data_array2->area_bus/1e6 << endl; + cout << "\t address bus area (mm2): " << fr->data_array2->area_address_bus/1e6 << endl; + cout << "\t data bus area (mm2): " << fr->data_array2->area_data_bus/1e6 << endl; + cout << "\t data driver area (mm2): " << fr->data_array2->area_data_drv/1e6 << endl; + cout << "\t IO secondary sense amp area (mm2): " << fr->data_array2->area_IOSA/1e6 << endl; + cout << "\t TSV area (mm2): "<< fr->data_array2->area_TSV_tot /1e6 << endl; + + } + else //if (!g_ip->is_3d_mem) + { + if (g_ip->power_gating) + { + /* Energy/Power stats */ + cout << endl << endl << "Power-gating Components:" << endl << endl; + /* Data array power-gating stats */ + if (!(g_ip->pure_cam || g_ip->fully_assoc)) + cout << " Data array: " << endl; + else if (g_ip->pure_cam) + cout << " CAM array: " << endl; + else + cout << " Fully associative cache array: " << endl; + + cout << "\t Sub-array Sleep Tx size (um) - " << + fr->data_array2->sram_sleep_tx_width << endl; + + // cout << "\t Sub-array Sleep Tx total size (um) - " << + // fr->data_array2->sram_sleep_tx_width << endl; + + cout << "\t Sub-array Sleep Tx total area (mm^2) - " << + fr->data_array2->sram_sleep_tx_area*1e-6 << endl; + + cout << "\t Sub-array wakeup time (ns) - " << + fr->data_array2->sram_sleep_wakeup_latency*1e9 << endl; + + cout << "\t Sub-array Tx energy (nJ) - " << + fr->data_array2->sram_sleep_wakeup_energy*1e9 << endl; + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + cout << endl; + cout << "\t WL Sleep Tx size (um) - " << + fr->data_array2->wl_sleep_tx_width << endl; + + // cout << "\t WL Sleep total Tx size (um) - " << + // fr->data_array2->wl_sleep_tx_width << endl; + + cout << "\t WL Sleep Tx total area (mm^2) - " << + fr->data_array2->wl_sleep_tx_area*1e-6 << endl; + + cout << "\t WL wakeup time (ns) - " << + fr->data_array2->wl_sleep_wakeup_latency*1e9 << endl; + + cout << "\t WL Tx energy (nJ) - " << + fr->data_array2->wl_sleep_wakeup_energy*1e9 << endl; + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + cout << endl; + cout << "\t BL floating wakeup time (ns) - " << + fr->data_array2->bl_floating_wakeup_latency*1e9 << endl; + + cout << "\t BL floating Tx energy (nJ) - " << + fr->data_array2->bl_floating_wakeup_energy*1e9 << endl; + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + cout << endl; + + cout << "\t Active mats per access - " << fr->data_array2->num_active_mats<data_array2->num_submarray_mats<pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) + { + cout << " Tag array: " << endl; + cout << "\t Sub-array Sleep Tx size (um) - " << + fr->tag_array2->sram_sleep_tx_width << endl; + + // cout << "\t Sub-array Sleep Tx total size (um) - " << + // fr->tag_array2->sram_sleep_tx_width << endl; + + cout << "\t Sub-array Sleep Tx total area (mm^2) - " << + fr->tag_array2->sram_sleep_tx_area*1e-6 << endl; + + cout << "\t Sub-array wakeup time (ns) - " << + fr->tag_array2->sram_sleep_wakeup_latency*1e9 << endl; + + cout << "\t Sub-array Tx energy (nJ) - " << + fr->tag_array2->sram_sleep_wakeup_energy*1e9 << endl; + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + cout << endl; + cout << "\t WL Sleep Tx size (um) - " << + fr->tag_array2->wl_sleep_tx_width << endl; + + // cout << "\t WL Sleep total Tx size (um) - " << + // fr->tag_array2->wl_sleep_tx_width << endl; + + cout << "\t WL Sleep Tx total area (mm^2) - " << + fr->tag_array2->wl_sleep_tx_area*1e-6 << endl; + + cout << "\t WL wakeup time (ns) - " << + fr->tag_array2->wl_sleep_wakeup_latency*1e9 << endl; + + cout << "\t WL Tx energy (nJ) - " << + fr->tag_array2->wl_sleep_wakeup_energy*1e9 << endl; + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + cout << endl; + cout << "\t BL floating wakeup time (ns) - " << + fr->tag_array2->bl_floating_wakeup_latency*1e9 << endl; + + cout << "\t BL floating Tx energy (nJ) - " << + fr->tag_array2->bl_floating_wakeup_energy*1e9 << endl; + //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + cout << endl; + + cout << "\t Active mats per access - " << fr->tag_array2->num_active_mats<tag_array2->num_submarray_mats<data_array2->access_time/1e-9 << endl; + + cout << "\tH-tree input delay (ns): " << + fr->data_array2->delay_route_to_bank * 1e9 + + fr->data_array2->delay_input_htree * 1e9 << endl; + + if (!(g_ip->pure_cam || g_ip->fully_assoc)) + { + cout << "\tDecoder + wordline delay (ns): " << + fr->data_array2->delay_row_predecode_driver_and_block * 1e9 + + fr->data_array2->delay_row_decoder * 1e9 << endl; + } + else + { + cout << "\tCAM search delay (ns): " << + fr->data_array2->delay_matchlines * 1e9 << endl; + } + + cout << "\tBitline delay (ns): " << + fr->data_array2->delay_bitlines/1e-9 << endl; + + cout << "\tSense Amplifier delay (ns): " << + fr->data_array2->delay_sense_amp * 1e9 << endl; + + + cout << "\tH-tree output delay (ns): " << + fr->data_array2->delay_subarray_output_driver * 1e9 + + fr->data_array2->delay_dout_htree * 1e9 << endl; + + if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) + { + /* tag array stats */ + cout << endl << " Tag side (with Output driver) (ns): " << + fr->tag_array2->access_time/1e-9 << endl; + + cout << "\tH-tree input delay (ns): " << + fr->tag_array2->delay_route_to_bank * 1e9 + + fr->tag_array2->delay_input_htree * 1e9 << endl; + + cout << "\tDecoder + wordline delay (ns): " << + fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 + + fr->tag_array2->delay_row_decoder * 1e9 << endl; + + cout << "\tBitline delay (ns): " << + fr->tag_array2->delay_bitlines/1e-9 << endl; + + cout << "\tSense Amplifier delay (ns): " << + fr->tag_array2->delay_sense_amp * 1e9 << endl; + + cout << "\tComparator delay (ns): " << + fr->tag_array2->delay_comparator * 1e9 << endl; + + cout << "\tH-tree output delay (ns): " << + fr->tag_array2->delay_subarray_output_driver * 1e9 + + fr->tag_array2->delay_dout_htree * 1e9 << endl; + } + + + + /* Energy/Power stats */ + cout << endl << endl << "Power Components:" << endl << endl; + + if (!(g_ip->pure_cam || g_ip->fully_assoc)) + { + cout << " Data array: Total dynamic read energy/access (nJ): " << + fr->data_array2->power.readOp.dynamic * 1e9 << endl; + + cout << "\tTotal energy in H-tree (that includes both " + "address and data transfer) (nJ): " << + (fr->data_array2->power_addr_input_htree.readOp.dynamic + + fr->data_array2->power_data_output_htree.readOp.dynamic + + fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + + cout << "\tOutput Htree inside bank Energy (nJ): " << + fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; + cout << "\tDecoder (nJ): " << + fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; + cout << "\tWordline (nJ): " << + fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitline mux & associated drivers (nJ): " << + fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tSense amp mux & associated drivers (nJ): " << + fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; + + cout << "\tBitlines precharge and equalization circuit (nJ): " << + fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines (nJ): " << + fr->data_array2->power_bitlines.readOp.dynamic * 1e9 << endl; + cout << "\tSense amplifier energy (nJ): " << + fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; + + cout << "\tTotal leakage power of a bank (mW): " << + fr->data_array2->power.readOp.leakage * 1e3 << endl; + cout << "\tTotal leakage power in H-tree (that includes both " + "address and data network) ((mW)): " << + (fr->data_array2->power_addr_input_htree.readOp.leakage + + fr->data_array2->power_data_output_htree.readOp.leakage + + fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl; + + cout << "\tTotal leakage power in cells (mW): " << + (fr->data_array2->array_leakage) * 1e3 << endl; + cout << "\tTotal leakage power in row logic(mW): " << + (fr->data_array2->wl_leakage) * 1e3 << endl; + cout << "\tTotal leakage power in column logic(mW): " << + (fr->data_array2->cl_leakage) * 1e3 << endl; + + cout << "\tTotal gate leakage power in H-tree (that includes both " + "address and data network) ((mW)): " << + (fr->data_array2->power_addr_input_htree.readOp.gate_leakage + + fr->data_array2->power_data_output_htree.readOp.gate_leakage + + fr->data_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl; + } + + else if (g_ip->pure_cam) + { + + cout << " CAM array:"<data_array2->power.searchOp.dynamic * 1e9 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "match key and data transfer) (nJ): " << + (fr->data_array2->power_htree_in_search.searchOp.dynamic + + fr->data_array2->power_htree_out_search.searchOp.dynamic + + fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl; + cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " << + (fr->data_array2->power_htree_in_search.searchOp.dynamic + + fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl; + cout << "\tSearchlines (nJ): " << + fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + + fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl; + cout << "\tMatchlines (nJ): " << + fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + + fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl; + + + cout <data_array2->power.readOp.dynamic * 1e9 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "address and data transfer) (nJ): " << + (fr->data_array2->power_addr_input_htree.readOp.dynamic + + fr->data_array2->power_data_output_htree.readOp.dynamic + + fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + cout << "\tOutput Htree inside bank Energy (nJ): " << + fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; + cout << "\tDecoder (nJ): " << + fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; + cout << "\tWordline (nJ): " << + fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitline mux & associated drivers (nJ): " << + fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tSense amp mux & associated drivers (nJ): " << + fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines (nJ): " << + fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + + fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl; + cout << "\tSense amplifier energy (nJ): " << + fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; + + cout << endl <<" Total leakage power of a bank (mW): " << + fr->data_array2->power.readOp.leakage * 1e3 << endl; + } + else + { + cout << " Fully associative array:"<data_array2->power.searchOp.dynamic * 1e9 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "match key and data transfer) (nJ): " << + (fr->data_array2->power_htree_in_search.searchOp.dynamic + + fr->data_array2->power_htree_out_search.searchOp.dynamic + + fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl; + cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " << + (fr->data_array2->power_htree_in_search.searchOp.dynamic + + fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl; + cout << "\tSearchlines (nJ): " << + fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + + fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl; + cout << "\tMatchlines (nJ): " << + fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + + fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl; + cout << "\tData portion wordline (nJ): " << + fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 << endl; + cout << "\tData Bitlines (nJ): " << + fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 + + fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9 << endl; + cout << "\tSense amplifier energy (nJ): " << + fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl; + + + cout <data_array2->power.readOp.dynamic * 1e9 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "address and data transfer) (nJ): " << + (fr->data_array2->power_addr_input_htree.readOp.dynamic + + fr->data_array2->power_data_output_htree.readOp.dynamic + + fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + cout << "\tOutput Htree inside bank Energy (nJ): " << + fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; + cout << "\tDecoder (nJ): " << + fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; + cout << "\tWordline (nJ): " << + fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitline mux & associated drivers (nJ): " << + fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tSense amp mux & associated drivers (nJ): " << + fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines (nJ): " << + fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + + fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl; + cout << "\tSense amplifier energy (nJ): " << + fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; + + cout << endl <<" Total leakage power of a bank (mW): " << + fr->data_array2->power.readOp.leakage * 1e3 << endl; + } + + + if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) + { + cout << endl << " Tag array: Total dynamic read energy/access (nJ): " << + fr->tag_array2->power.readOp.dynamic * 1e9 << endl; + cout << "\tTotal leakage read/write power of a bank (mW): " << + fr->tag_array2->power.readOp.leakage * 1e3 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "address and data transfer) (nJ): " << + (fr->tag_array2->power_addr_input_htree.readOp.dynamic + + fr->tag_array2->power_data_output_htree.readOp.dynamic + + fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + + cout << "\tOutput Htree inside a bank Energy (nJ): " << + fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; + cout << "\tDecoder (nJ): " << + fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + + fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; + cout << "\tWordline (nJ): " << + fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitline mux & associated drivers (nJ): " << + fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + + fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + + fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tSense amp mux & associated drivers (nJ): " << + fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines precharge and equalization circuit (nJ): " << + fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines (nJ): " << + fr->tag_array2->power_bitlines.readOp.dynamic * 1e9 << endl; + cout << "\tSense amplifier energy (nJ): " << + fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; + + cout << "\tTotal leakage power of a bank (mW): " << + fr->tag_array2->power.readOp.leakage * 1e3 << endl; + cout << "\tTotal leakage power in H-tree (that includes both " + "address and data network) ((mW)): " << + (fr->tag_array2->power_addr_input_htree.readOp.leakage + + fr->tag_array2->power_data_output_htree.readOp.leakage + + fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl; + + cout << "\tTotal leakage power in cells (mW): " << + (fr->tag_array2->array_leakage) * 1e3 << endl; + cout << "\tTotal leakage power in row logic(mW): " << + (fr->tag_array2->wl_leakage) * 1e3 << endl; + cout << "\tTotal leakage power in column logic(mW): " << + (fr->tag_array2->cl_leakage) * 1e3 << endl; + cout << "\tTotal gate leakage power in H-tree (that includes both " + "address and data network) ((mW)): " << + (fr->tag_array2->power_addr_input_htree.readOp.gate_leakage + + fr->tag_array2->power_data_output_htree.readOp.gate_leakage + + fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl; + } + + cout << endl << endl << "Area Components:" << endl << endl; + /* Data array area stats */ + if (!(g_ip->pure_cam || g_ip->fully_assoc)) + cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; + else if (g_ip->pure_cam) + cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; + else + cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; + cout << "\tHeight (mm): " << + fr->data_array2->all_banks_height*1e-3 << endl; + cout << "\tWidth (mm): " << + fr->data_array2->all_banks_width*1e-3 << endl; + if (g_ip->print_detail) { + cout << "\tArea efficiency (Memory cell area/Total area) - " << + fr->data_array2->area_efficiency << " %" << endl; + cout << "\t\tMAT Height (mm): " << + fr->data_array2->mat_height*1e-3 << endl; + cout << "\t\tMAT Length (mm): " << + fr->data_array2->mat_length*1e-3 << endl; + cout << "\t\tSubarray Height (mm): " << + fr->data_array2->subarray_height*1e-3 << endl; + cout << "\t\tSubarray Length (mm): " << + fr->data_array2->subarray_length*1e-3 << endl; + } + + /* Tag array area stats */ + if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) + { + cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl; + cout << "\tHeight (mm): " << + fr->tag_array2->all_banks_height*1e-3 << endl; + cout << "\tWidth (mm): " << + fr->tag_array2->all_banks_width*1e-3 << endl; + if (g_ip->print_detail) + { + cout << "\tArea efficiency (Memory cell area/Total area) - " << + fr->tag_array2->area_efficiency << " %" << endl; + cout << "\t\tMAT Height (mm): " << + fr->tag_array2->mat_height*1e-3 << endl; + cout << "\t\tMAT Length (mm): " << + fr->tag_array2->mat_length*1e-3 << endl; + cout << "\t\tSubarray Height (mm): " << + fr->tag_array2->subarray_height*1e-3 << endl; + cout << "\t\tSubarray Length (mm): " << + fr->tag_array2->subarray_length*1e-3 << endl; + } + } + + }//if (!g_ip->is_3d_mem) + + + + Wire wpr; + wpr.print_wire(); + + //cout << "FO4 = " << g_tp.FO4 << endl; + } +} + +//McPAT's plain interface, please keep !!! +uca_org_t cacti_interface(InputParameter * const local_interface) +{ +// g_ip = new InputParameter(); + //g_ip->add_ecc_b_ = true; + + uca_org_t fin_res; + fin_res.valid = false; + + g_ip = local_interface; + +// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; +// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; +// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; +// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; +// +// g_ip->ic_proj_type = interconnect_projection_type_in; +// g_ip->wire_is_mat_type = wire_inside_mat_type_in; +// g_ip->wire_os_mat_type = wire_outside_mat_type_in; +// g_ip->burst_len = BURST_LENGTH_in; +// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; +// g_ip->page_sz_bits = PAGE_SIZE_BITS_in; +// +// g_ip->cache_sz = cache_size; +// g_ip->line_sz = line_size; +// g_ip->assoc = associativity; +// g_ip->nbanks = banks; +// g_ip->out_w = output_width; +// g_ip->specific_tag = specific_tag; +// if (tag_width == 0) { +// g_ip->tag_w = 42; +// } +// else { +// g_ip->tag_w = tag_width; +// } +// +// g_ip->access_mode = access_mode; +// g_ip->delay_wt = obj_func_delay; +// g_ip->dynamic_power_wt = obj_func_dynamic_power; +// g_ip->leakage_power_wt = obj_func_leakage_power; +// g_ip->area_wt = obj_func_area; +// g_ip->cycle_time_wt = obj_func_cycle_time; +// g_ip->delay_dev = dev_func_delay; +// g_ip->dynamic_power_dev = dev_func_dynamic_power; +// g_ip->leakage_power_dev = dev_func_leakage_power; +// g_ip->area_dev = dev_func_area; +// g_ip->cycle_time_dev = dev_func_cycle_time; +// g_ip->temp = temp; +// +// g_ip->F_sz_nm = tech_node; +// g_ip->F_sz_um = tech_node / 1000; +// g_ip->is_main_mem = (main_mem != 0) ? true : false; +// g_ip->is_cache = (cache ==1) ? true : false; +// g_ip->pure_ram = (cache ==0) ? true : false; +// g_ip->pure_cam = (cache ==2) ? true : false; +// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; +// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; +// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; +// +// g_ip->num_rw_ports = rw_ports; +// g_ip->num_rd_ports = excl_read_ports; +// g_ip->num_wr_ports = excl_write_ports; +// g_ip->num_se_rd_ports = single_ended_read_ports; +// g_ip->num_search_ports = search_ports; +// +// g_ip->print_detail = 1; +// g_ip->nuca = 0; +// g_ip->is_cache=true; +// +// if (force_wiretype == 0) +// { +// g_ip->wt = Global; +// g_ip->force_wiretype = false; +// } +// else +// { g_ip->force_wiretype = true; +// if (wiretype==10) { +// g_ip->wt = Global_10; +// } +// if (wiretype==20) { +// g_ip->wt = Global_20; +// } +// if (wiretype==30) { +// g_ip->wt = Global_30; +// } +// if (wiretype==5) { +// g_ip->wt = Global_5; +// } +// if (wiretype==0) { +// g_ip->wt = Low_swing; +// } +// } +// //g_ip->wt = Global_5; +// if (force_config == 0) +// { +// g_ip->force_cache_config = false; +// } +// else +// { +// g_ip->force_cache_config = true; +// g_ip->ndbl=ndbl; +// g_ip->ndwl=ndwl; +// g_ip->nspd=nspd; +// g_ip->ndcm=ndcm; +// g_ip->ndsam1=ndsam1; +// g_ip->ndsam2=ndsam2; +// +// +// } +// +// if (ecc==0){ +// g_ip->add_ecc_b_=false; +// } +// else +// { +// g_ip->add_ecc_b_=true; +// } + + + g_ip->error_checking(); + + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. + + solve(&fin_res); + +// g_ip->display_ip(); +// output_UCA(&fin_res); +// output_data_csv(fin_res); + + // delete (g_ip); + + return fin_res; +} + +//McPAT's plain interface, please keep !!! +uca_org_t init_interface(InputParameter* const local_interface) +{ + // g_ip = new InputParameter(); + //g_ip->add_ecc_b_ = true; + + uca_org_t fin_res; + fin_res.valid = false; + + g_ip = local_interface; + + +// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; +// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; +// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; +// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; +// +// g_ip->ic_proj_type = interconnect_projection_type_in; +// g_ip->wire_is_mat_type = wire_inside_mat_type_in; +// g_ip->wire_os_mat_type = wire_outside_mat_type_in; +// g_ip->burst_len = BURST_LENGTH_in; +// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; +// g_ip->page_sz_bits = PAGE_SIZE_BITS_in; +// +// g_ip->cache_sz = cache_size; +// g_ip->line_sz = line_size; +// g_ip->assoc = associativity; +// g_ip->nbanks = banks; +// g_ip->out_w = output_width; +// g_ip->specific_tag = specific_tag; +// if (tag_width == 0) { +// g_ip->tag_w = 42; +// } +// else { +// g_ip->tag_w = tag_width; +// } +// +// g_ip->access_mode = access_mode; +// g_ip->delay_wt = obj_func_delay; +// g_ip->dynamic_power_wt = obj_func_dynamic_power; +// g_ip->leakage_power_wt = obj_func_leakage_power; +// g_ip->area_wt = obj_func_area; +// g_ip->cycle_time_wt = obj_func_cycle_time; +// g_ip->delay_dev = dev_func_delay; +// g_ip->dynamic_power_dev = dev_func_dynamic_power; +// g_ip->leakage_power_dev = dev_func_leakage_power; +// g_ip->area_dev = dev_func_area; +// g_ip->cycle_time_dev = dev_func_cycle_time; +// g_ip->temp = temp; +// +// g_ip->F_sz_nm = tech_node; +// g_ip->F_sz_um = tech_node / 1000; +// g_ip->is_main_mem = (main_mem != 0) ? true : false; +// g_ip->is_cache = (cache ==1) ? true : false; +// g_ip->pure_ram = (cache ==0) ? true : false; +// g_ip->pure_cam = (cache ==2) ? true : false; +// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; +// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; +// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; +// +// g_ip->num_rw_ports = rw_ports; +// g_ip->num_rd_ports = excl_read_ports; +// g_ip->num_wr_ports = excl_write_ports; +// g_ip->num_se_rd_ports = single_ended_read_ports; +// g_ip->num_search_ports = search_ports; +// +// g_ip->print_detail = 1; +// g_ip->nuca = 0; +// +// if (force_wiretype == 0) +// { +// g_ip->wt = Global; +// g_ip->force_wiretype = false; +// } +// else +// { g_ip->force_wiretype = true; +// if (wiretype==10) { +// g_ip->wt = Global_10; +// } +// if (wiretype==20) { +// g_ip->wt = Global_20; +// } +// if (wiretype==30) { +// g_ip->wt = Global_30; +// } +// if (wiretype==5) { +// g_ip->wt = Global_5; +// } +// if (wiretype==0) { +// g_ip->wt = Low_swing; +// } +// } +// //g_ip->wt = Global_5; +// if (force_config == 0) +// { +// g_ip->force_cache_config = false; +// } +// else +// { +// g_ip->force_cache_config = true; +// g_ip->ndbl=ndbl; +// g_ip->ndwl=ndwl; +// g_ip->nspd=nspd; +// g_ip->ndcm=ndcm; +// g_ip->ndsam1=ndsam1; +// g_ip->ndsam2=ndsam2; +// +// +// } +// +// if (ecc==0){ +// g_ip->add_ecc_b_=false; +// } +// else +// { +// g_ip->add_ecc_b_=true; +// } + + + g_ip->error_checking(); + + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. + //solve(&fin_res); + //g_ip->display_ip(); + + //solve(&fin_res); + //output_UCA(&fin_res); + //output_data_csv(fin_res); + // delete (g_ip); + + return fin_res; +} + +void reconfigure(InputParameter *local_interface, uca_org_t *fin_res) +{ + // Copy the InputParameter to global interface (g_ip) and do error checking. + g_ip = local_interface; + g_ip->error_checking(); + + // Initialize technology parameters + init_tech_params(g_ip->F_sz_um,false); + + Wire winit; // Do not delete this line. It initializes wires. + + // This corresponds to solve() in the initialization process. + update(fin_res); +} + diff --git a/T1/TP1/cacti-master/io.h b/T1/TP1/cacti-master/io.h new file mode 100644 index 0000000..7c82fee --- /dev/null +++ b/T1/TP1/cacti-master/io.h @@ -0,0 +1,45 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + +#ifndef __IO_H__ +#define __IO_H__ + + +#include "const.h" +#include "cacti_interface.h" + + +void output_data_csv(const uca_org_t & fin_res, string fn="out.csv"); +void output_UCA(uca_org_t * fin_res); +void output_data_csv_3dd(const uca_org_t & fin_res); + +#endif diff --git a/T1/TP1/cacti-master/lpddr.cfg b/T1/TP1/cacti-master/lpddr.cfg new file mode 100644 index 0000000..80ecc23 --- /dev/null +++ b/T1/TP1/cacti-master/lpddr.cfg @@ -0,0 +1,254 @@ +# Cache size +//-size (bytes) 2048 +//-size (bytes) 4096 +//-size (bytes) 32768 +//-size (bytes) 131072 +//-size (bytes) 262144 +//-size (bytes) 1048576 +//-size (bytes) 2097152 +//-size (bytes) 4194304 +-size (bytes) 8388608 +//-size (bytes) 16777216 +//-size (bytes) 33554432 +//-size (bytes) 134217728 +//-size (bytes) 67108864 +//-size (bytes) 1073741824 + +# power gating +-Array Power Gating - "false" +-WL Power Gating - "false" +-CL Power Gating - "false" +-Bitline floating - "false" +-Interconnect Power Gating - "false" +-Power Gating Performance Loss 0.01 + +# Line size +//-block size (bytes) 8 +-block size (bytes) 64 + +# To model Fully Associative cache, set associativity to zero +//-associativity 0 +//-associativity 2 +//-associativity 4 +//-associativity 8 +-associativity 8 + +-read-write port 1 +-exclusive read port 0 +-exclusive write port 0 +-single ended read ports 0 + +# Multiple banks connected using a bus +-UCA bank count 1 +-technology (u) 0.022 +//-technology (u) 0.040 +//-technology (u) 0.032 +//-technology (u) 0.090 + +# following three parameters are meaningful only for main memories + +-page size (bits) 8192 +-burst length 8 +-internal prefetch width 8 + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Data array cell type - "itrs-hp" +//-Data array cell type - "itrs-lstp" +//-Data array cell type - "itrs-lop" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Data array peripheral type - "itrs-hp" +//-Data array peripheral type - "itrs-lstp" +//-Data array peripheral type - "itrs-lop" + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Tag array cell type - "itrs-hp" +//-Tag array cell type - "itrs-lstp" +//-Tag array cell type - "itrs-lop" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Tag array peripheral type - "itrs-hp" +//-Tag array peripheral type - "itrs-lstp" +//-Tag array peripheral type - "itrs-lop + +# Bus width include data bits and address bits required by the decoder +//-output/input bus width 16 +-output/input bus width 512 + +// 300-400 in steps of 10 +-operating temperature (K) 360 + +# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file) +# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report) +-cache type "cache" +//-cache type "ram" +//-cache type "main memory" + +# to model special structure like branch target buffers, directory, etc. +# change the tag size parameter +# if you want cacti to calculate the tagbits, set the tag size to "default" +-tag size (b) "default" +//-tag size (b) 22 + +# fast - data and tag access happen in parallel +# sequential - data array is accessed after accessing the tag array +# normal - data array lookup and tag access happen in parallel +# final data block is broadcasted in data array h-tree +# after getting the signal from the tag array +//-access mode (normal, sequential, fast) - "fast" +-access mode (normal, sequential, fast) - "normal" +//-access mode (normal, sequential, fast) - "sequential" + + +# DESIGN OBJECTIVE for UCA (or banks in NUCA) +-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0 + +# Percentage deviation from the minimum value +# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization +# that compromises at most 10% delay. +# NOTE: Try reasonable values for % deviation. Inconsistent deviation +# percentage values will not produce any valid organizations. For example, +# 0:0:100:100:100 will try to identify an organization that has both +# least delay and dynamic power. Since such an organization is not possible, CACTI will +# throw an error. Refer CACTI-6 Technical report for more details +-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000 + +# Objective for NUCA +-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100 +-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 + +# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for +# energy-delay or energy-delay sq. product +# Note: Optimize tag will disable weight or deviate values mentioned above +# Set it to NONE to let weight and deviate values determine the +# appropriate cache configuration +//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED" +-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2" +//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE" + +-Cache model (NUCA, UCA) - "UCA" +//-Cache model (NUCA, UCA) - "NUCA" + +# In order for CACTI to find the optimal NUCA bank value the following +# variable should be assigned 0. +-NUCA bank count 0 + +# NOTE: for nuca network frequency is set to a default value of +# 5GHz in time.c. CACTI automatically +# calculates the maximum possible frequency and downgrades this value if necessary + +# By default CACTI considers both full-swing and low-swing +# wires to find an optimal configuration. However, it is possible to +# restrict the search space by changing the signaling from "default" to +# "fullswing" or "lowswing" type. +-Wire signaling (fullswing, lowswing, default) - "Global_30" +//-Wire signaling (fullswing, lowswing, default) - "default" +//-Wire signaling (fullswing, lowswing, default) - "lowswing" + +//-Wire inside mat - "global" +-Wire inside mat - "semi-global" +//-Wire outside mat - "global" +-Wire outside mat - "semi-global" + +-Interconnect projection - "conservative" +//-Interconnect projection - "aggressive" + +# Contention in network (which is a function of core count and cache level) is one of +# the critical factor used for deciding the optimal bank count value +# core count can be 4, 8, or 16 +//-Core count 4 +-Core count 8 +//-Core count 16 +-Cache level (L2/L3) - "L3" + +-Add ECC - "true" + +//-Print level (DETAILED, CONCISE) - "CONCISE" +-Print level (DETAILED, CONCISE) - "DETAILED" + +# for debugging +//-Print input parameters - "true" +-Print input parameters - "false" +# force CACTI to model the cache with the +# following Ndbl, Ndwl, Nspd, Ndsam, +# and Ndcm values +//-Force cache config - "true" +-Force cache config - "false" +-Ndwl 1 +-Ndbl 1 +-Nspd 0 +-Ndcm 1 +-Ndsam1 0 +-Ndsam2 0 + + + +#### Default CONFIGURATION values for baseline external IO parameters to DRAM. More details can be found in the CACTI-IO technical report (), especially Chapters 2 and 3. + +# Memory Type (D=DDR3, L=LPDDR2, W=WideIO). Additional memory types can be defined by the user in extio_technology.cc, along with their technology and configuration parameters. + +//-dram_type "D" +-dram_type "L" +//-dram_type "W" +//-dram_type "S" + +# Memory State (R=Read, W=Write, I=Idle or S=Sleep) + +//-iostate "R" +-iostate "W" +//-iostate "I" +//-iostate "S" + +#Address bus timing. To alleviate the timing on the command and address bus due to high loading (shared across all memories on the channel), the interface allows for multi-cycle timing options. + +-addr_timing 0.5 //DDR +//-addr_timing 1.0 //SDR (half of DQ rate) +//-addr_timing 2.0 //2T timing (One fourth of DQ rate) +//-addr_timing 3.0 // 3T timing (One sixth of DQ rate) + +# Memory Density (Gbit per memory/DRAM die) + +-mem_density 8 Gb //Valid values 2^n Gb + +# IO frequency (MHz) (frequency of the external memory interface). + +-bus_freq 533 MHz //As of current memory standards (2013), valid range 0 to 1.5 GHz for DDR3, 0 to 533 MHz for LPDDR2, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential. However this can change, and the user is free to define valid ranges based on new memory types or extending beyond existing standards for existing dram types. + +# Duty Cycle (fraction of time in the Memory State defined above) + +-duty_cycle 1.0 //Valid range 0 to 1.0 + +# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR +#-activity_dq .50 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR + +# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_ca 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR, 0 to 0.25 for 2T, and 0 to 0.17 for 3T +#-activity_ca 0.25 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR, 0 to 0.25 for 2T, and 0 to 0.17 for 3T + +# Number of DQ pins + +-num_dq 72 //Number of DQ pins. Includes ECC pins. + +# Number of DQS pins. DQS is a data strobe that is sent along with a small number of data-lanes so the source synchronous timing is local to these DQ bits. Typically, 1 DQS per byte (8 DQ bits) is used. The DQS is also typucally differential, just like the CLK pin. + +-num_dqs 36 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins. + +# Number of CA pins + +-num_ca 35 //Valid range 0 to 35 pins. +#-num_ca 25 //Valid range 0 to 35 pins. + +# Number of CLK pins. CLK is typically a differential pair. In some cases additional CLK pairs may be used to limit the loading on the CLK pin. + +-num_clk 2 //2 x differential pair. Valid values: 0/2/4. + +# Number of Physical Ranks + +-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per buffer/register. If multiple LRDIMMs or buffer chips exist, the analysis for capacity and power is reported per buffer/register. + +# Width of the Memory Data Bus + +-mem_data_width 32 //x4 or x8 or x16 or x32 memories. For WideIO upto x128. diff --git a/T1/TP1/cacti-master/main.cc b/T1/TP1/cacti-master/main.cc new file mode 100644 index 0000000..04899f1 --- /dev/null +++ b/T1/TP1/cacti-master/main.cc @@ -0,0 +1,270 @@ +/*------------------------------------------------------------ + * CACTI 6.5 + * Copyright 2008 Hewlett-Packard Development Corporation + * All Rights Reserved + * + * Permission to use, copy, and modify this software and its documentation is + * hereby granted only under the following terms and conditions. Both the + * above copyright notice and this permission notice must appear in all copies + * of the software, derivative works or modified versions, and any portions + * thereof, and both notices must appear in supporting documentation. + * + * Users of this software agree to the terms and conditions set forth herein, and + * hereby grant back to Hewlett-Packard Company and its affiliated companies ("HP") + * a non-exclusive, unrestricted, royalty-free right and license under any changes, + * enhancements or extensions made to the core functions of the software, including + * but not limited to those affording compatibility with other hardware or software + * environments, but excluding applications which incorporate this software. + * Users further agree to use their best efforts to return to HP any such changes, + * enhancements or extensions that they make and inform HP of noteworthy uses of + * this software. Correspondence should be provided to HP at: + * + * Director of Intellectual Property Licensing + * Office of Strategy and Technology + * Hewlett-Packard Company + * 1501 Page Mill Road + * Palo Alto, California 94304 + * + * This software may be distributed (but not offered for sale or transferred + * for compensation) to third parties, provided such third parties agree to + * abide by the terms and conditions of this notice. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND HP DISCLAIMS ALL + * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL HP + * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL + * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR + * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + *------------------------------------------------------------*/ + +#include "io.h" +#include + +#include "Ucache.h" + +using namespace std; + + +int main(int argc,char *argv[]) +{ + + uca_org_t result; + if (argc != 53 && argc != 55 && argc !=64) + { + bool infile_specified = false; + string infile_name(""); + + for (int32_t i = 0; i < argc; i++) + { + if (argv[i] == string("-infile")) + { + infile_specified = true; + i++; + infile_name = argv[i]; + } + } + if (infile_specified == false) + { + cerr << " Invalid arguments -- how to use CACTI:" << endl; + cerr << " 1) cacti -infile " << endl; + cerr << " 2) cacti arg1 ... arg52 -- please refer to the README file" << endl; + cerr << " No. of arguments input - " << argc << endl; + exit(1); + } + else + { + result = cacti_interface(infile_name); + } + } + else if (argc == 53) + { + result = cacti_interface(atoi(argv[ 1]), + atoi(argv[ 2]), + atoi(argv[ 3]), + atoi(argv[ 4]), + atoi(argv[ 5]), + atoi(argv[ 6]), + atoi(argv[ 7]), + atoi(argv[ 8]), + atoi(argv[ 9]), + atof(argv[10]), + atoi(argv[11]), + atoi(argv[12]), + atoi(argv[13]), + atoi(argv[14]), + atoi(argv[15]), + atoi(argv[16]), + atoi(argv[17]), + atoi(argv[18]), + atoi(argv[19]), + atoi(argv[20]), + atoi(argv[21]), + atoi(argv[22]), + atoi(argv[23]), + atoi(argv[24]), + atoi(argv[25]), + atoi(argv[26]), + atoi(argv[27]), + atoi(argv[28]), + atoi(argv[29]), + atoi(argv[30]), + atoi(argv[31]), + atoi(argv[32]), + atoi(argv[33]), + atoi(argv[34]), + atoi(argv[35]), + atoi(argv[36]), + atoi(argv[37]), + atoi(argv[38]), + atoi(argv[39]), + atoi(argv[40]), + atoi(argv[41]), + atoi(argv[42]), + atoi(argv[43]), + atoi(argv[44]), + atoi(argv[45]), + atoi(argv[46]), + atoi(argv[47]), + atoi(argv[48]), + atoi(argv[49]), + atoi(argv[50]), + atoi(argv[51]), + atoi(argv[52])); + } + else if (argc == 55) + { + result = cacti_interface(atoi(argv[ 1]), + atoi(argv[ 2]), + atoi(argv[ 3]), + atoi(argv[ 4]), + atoi(argv[ 5]), + atoi(argv[ 6]), + atoi(argv[ 7]), + atoi(argv[ 8]), + atof(argv[ 9]), + atoi(argv[10]), + atoi(argv[11]), + atoi(argv[12]), + atoi(argv[13]), + atoi(argv[14]), + atoi(argv[15]), + atoi(argv[16]), + atoi(argv[17]), + atoi(argv[18]), + atoi(argv[19]), + atoi(argv[20]), + atoi(argv[21]), + atoi(argv[22]), + atoi(argv[23]), + atoi(argv[24]), + atoi(argv[25]), + atoi(argv[26]), + atoi(argv[27]), + atoi(argv[28]), + atoi(argv[29]), + atoi(argv[30]), + atoi(argv[31]), + atoi(argv[32]), + atoi(argv[33]), + atoi(argv[34]), + atoi(argv[35]), + atoi(argv[36]), + atoi(argv[37]), + atoi(argv[38]), + atoi(argv[39]), + atoi(argv[40]), + atoi(argv[41]), + atoi(argv[42]), + atoi(argv[43]), + atoi(argv[44]), + atoi(argv[45]), + atoi(argv[46]), + atoi(argv[47]), + atoi(argv[48]), + atoi(argv[49]), + atoi(argv[50]), + atoi(argv[51]), + atoi(argv[52]), + atoi(argv[53]), + atoi(argv[54])); + } + else if (argc == 64) + { + result = cacti_interface(atoi(argv[ 1]), + atoi(argv[ 2]), + atoi(argv[ 3]), + atoi(argv[ 4]), + atoi(argv[ 5]), + atoi(argv[ 6]), + atoi(argv[ 7]), + atoi(argv[ 8]), + atof(argv[ 9]), + atoi(argv[10]), + atoi(argv[11]), + atoi(argv[12]), + atoi(argv[13]), + atoi(argv[14]), + atoi(argv[15]), + atoi(argv[16]), + atoi(argv[17]), + atoi(argv[18]), + atoi(argv[19]), + atoi(argv[20]), + atoi(argv[21]), + atoi(argv[22]), + atoi(argv[23]), + atoi(argv[24]), + atoi(argv[25]), + atoi(argv[26]), + atoi(argv[27]), + atoi(argv[28]), + atoi(argv[29]), + atoi(argv[30]), + atoi(argv[31]), + atoi(argv[32]), + atoi(argv[33]), + atoi(argv[34]), + atoi(argv[35]), + atoi(argv[36]), + atoi(argv[37]), + atoi(argv[38]), + atoi(argv[39]), + atoi(argv[40]), + atoi(argv[41]), + atoi(argv[42]), + atoi(argv[43]), + atoi(argv[44]), + atoi(argv[45]), + atoi(argv[46]), + atoi(argv[47]), + atoi(argv[48]), + atoi(argv[49]), + atoi(argv[50]), + atoi(argv[51]), + atoi(argv[52]), + atoi(argv[53]), + atoi(argv[54]), + atoi(argv[55]), + atoi(argv[56]), + atoi(argv[57]), + atoi(argv[58]), + atoi(argv[59]), + atoi(argv[60]), + atoi(argv[61]), + atoi(argv[62]), + atoi(argv[63])); + } + + cout << "=============================================\n\n"; + // print_g_tp(); //function to test technology paramters. +// g_tp.display(); + result.cleanup(); +// delete result.data_array2; +// if (result.tag_array2!=NULL) +// delete result.tag_array2; + + return 0; +} + diff --git a/T1/TP1/cacti-master/makefile b/T1/TP1/cacti-master/makefile new file mode 100644 index 0000000..394019f --- /dev/null +++ b/T1/TP1/cacti-master/makefile @@ -0,0 +1,28 @@ +TAR = cacti + +.PHONY: dbg opt depend clean clean_dbg clean_opt + +all: dbg + +dbg: $(TAR).mk obj_dbg + @$(MAKE) TAG=dbg -C . -f $(TAR).mk + +opt: $(TAR).mk obj_opt + @$(MAKE) TAG=opt -C . -f $(TAR).mk + +obj_dbg: + mkdir $@ + +obj_opt: + mkdir $@ + +clean: clean_dbg clean_opt + +clean_dbg: obj_dbg + @$(MAKE) TAG=dbg -C . -f $(TAR).mk clean + rm -rf $< + +clean_opt: obj_opt + @$(MAKE) TAG=opt -C . -f $(TAR).mk clean + rm -rf $< + diff --git a/T1/TP1/cacti-master/mat.cc b/T1/TP1/cacti-master/mat.cc new file mode 100644 index 0000000..f290daf --- /dev/null +++ b/T1/TP1/cacti-master/mat.cc @@ -0,0 +1,1940 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#include "mat.h" +#include + + +Mat::Mat(const DynamicParameter & dyn_p) + :dp(dyn_p), + power_subarray_out_drv(), + delay_fa_tag(0), delay_cam(0), + delay_before_decoder(0), delay_bitline(0), + delay_wl_reset(0), delay_bl_restore(0), + delay_searchline(0), delay_matchchline(0), + delay_cam_sl_restore(0), delay_cam_ml_reset(0), + delay_fa_ram_wl(0),delay_hit_miss_reset(0), + delay_hit_miss(0), + subarray(dp, dp.fully_assoc), + power_bitline(), per_bitline_read_energy(0), + deg_bl_muxing(dp.deg_bl_muxing), + num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir), + delay_writeback(0), + cell(subarray.cell), cam_cell(subarray.cam_cell), + is_dram(dyn_p.is_dram), + pure_cam(dyn_p.pure_cam), + num_mats(dp.num_mats), + power_sa(), delay_sa(0), + leak_power_sense_amps_closed_page_state(0), + leak_power_sense_amps_open_page_state(0), + delay_subarray_out_drv(0), + delay_comparator(0), power_comparator(), + num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat), + num_subarrays_per_mat(dp.num_subarrays/dp.num_mats), + num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir), + array_leakage(0), + wl_leakage(0), + cl_leakage(0) + { + assert(num_subarrays_per_mat <= 4); + assert(num_subarrays_per_row <= 2); + is_fa = (dp.fully_assoc) ? true : false; + camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them. + + if (is_fa || pure_cam) + num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat; + + if (dp.use_inp_params == 1) { + RWP = dp.num_rw_ports; + ERP = dp.num_rd_ports; + EWP = dp.num_wr_ports; + SCHP = dp.num_search_ports; + } + else { + RWP = g_ip->num_rw_ports; + ERP = g_ip->num_rd_ports; + EWP = g_ip->num_wr_ports; + SCHP = g_ip->num_search_ports; + + } + + double number_sa_subarray; + + if (!is_fa && !pure_cam) + { + number_sa_subarray = subarray.num_cols / deg_bl_muxing; + } + else if (is_fa && !pure_cam) + { + number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing; + } + + else + { + number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing; + } + + int num_dec_signals = subarray.num_rows; + double C_ld_bit_mux_dec_out = 0; + double C_ld_sa_mux_lev_1_dec_out = 0; + double C_ld_sa_mux_lev_2_dec_out = 0; + double R_wire_wl_drv_out; + + if (!is_fa && !pure_cam) + { + R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um; + } + else if (is_fa && !pure_cam) + { + R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ; + } + else + { + R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um; + } + + double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA + double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w; + + if (deg_bl_muxing > 1) + { + C_ld_bit_mux_dec_out = + (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell + num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); + } + + if (dp.Ndsam_lev_1 > 1) + { + C_ld_sa_mux_lev_1_dec_out = + (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + + num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); + } + if (dp.Ndsam_lev_2 > 1) + { + C_ld_sa_mux_lev_2_dec_out = + (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + + num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); + } + + if (num_subarrays_per_row >= 2) + { + // wire heads for both right and left side of a mat, so half the resistance + R_wire_bit_mux_dec_out /= 2.0; + R_wire_sa_mux_dec_out /= 2.0; + } + + + row_dec = new Decoder( + num_dec_signals, + false, + subarray.C_wl, + R_wire_wl_drv_out, + false/*is_fa*/, + is_dram, + true, + camFlag? cam_cell:cell); + + row_dec->nodes_DSTN = subarray.num_rows;//TODO: this is not a good way for OOO programming +// if (is_fa && (!dp.is_tag)) +// { +// row_dec->exist = true; +// } + bit_mux_dec = new Decoder( + deg_bl_muxing,// This number is 1 for FA or CAM + false, + C_ld_bit_mux_dec_out, + R_wire_bit_mux_dec_out, + false/*is_fa*/, + is_dram, + false, + camFlag? cam_cell:cell); + sa_mux_lev_1_dec = new Decoder( + dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM + dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal + C_ld_sa_mux_lev_1_dec_out, + R_wire_sa_mux_dec_out, + false/*is_fa*/, + is_dram, + false, + camFlag? cam_cell:cell); + sa_mux_lev_2_dec = new Decoder( + dp.Ndsam_lev_2, // This number is 1 for FA or CAM + false, + C_ld_sa_mux_lev_2_dec_out, + R_wire_sa_mux_dec_out, + false/*is_fa*/, + is_dram, + false, + camFlag? cam_cell:cell); + + double C_wire_predec_blk_out; + double R_wire_predec_blk_out; + + if (!is_fa && !pure_cam) + { + + C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h; + R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h; + + } + else //for pre-decode block's load is same for both FA and CAM + { + C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h; + R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h; + } + + + if (is_fa||pure_cam) + num_dec_signals += _log2(num_subarrays_per_mat); + + PredecBlk * r_predec_blk1 = new PredecBlk( + num_dec_signals, + row_dec, + C_wire_predec_blk_out, + R_wire_predec_blk_out, + num_subarrays_per_mat, + is_dram, + true); + PredecBlk * r_predec_blk2 = new PredecBlk( + num_dec_signals, + row_dec, + C_wire_predec_blk_out, + R_wire_predec_blk_out, + num_subarrays_per_mat, + is_dram, + false); + PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true); + PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false); + PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true); + PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false); + PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true); + PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false); + dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true); + dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false); + + PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram); + PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram); + PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram); + PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram); + PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram); + PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram); + PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram); + PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram); + way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram); + dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram); + + r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2); + b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2); + sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2); + sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2); + + subarray_out_wire = new Wire(dp.wtype, g_ip->cl_vertical?subarray.area.w:subarray.area.h);//Bug should be subarray.area.w Owen and + //subarray_out_wire = new Wire(g_ip->wt, g_ip->cl_vertical?subarray.area.w:subarray.area.h);//Bug should be subarray.area.w Owen and + + double driver_c_gate_load; + double driver_c_wire_load; + double driver_r_wire_load; + + if (is_fa || pure_cam) + + { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same + driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); + driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; + cam_bl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + if (!pure_cam) + { + //This is only used for fully asso not pure CAM + driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); + driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um; + bl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + } + } + + else + { + driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); + driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um; + bl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + } + double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP); + double w_row_decoder = area_row_decoder / subarray.area.get_h(); + + double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux = + compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); + + /* This means the subarray drivers are along the vertical direction since / subarray.area.get_w() is used; + * so the subarray_out_wire (actually the drivers) under the subarray and along the x direction + * So as mentioned above @ line 271 + * subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and + * change the out_wire (driver to along y direction need carefully rethinking + * rather than just simply switch w with h ) + * */ + double h_subarray_out_drv = subarray_out_wire->area.get_area() * + (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w(); + + + h_subarray_out_drv *= (RWP + ERP + SCHP); + + double h_comparators = 0.0; + double w_row_predecode_output_wires = 0.0; + double h_bit_mux_dec_out_wires = 0.0; + double h_senseamp_mux_dec_out_wires = 0.0; + + if ((!is_fa)&&(dp.is_tag)) + { + //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat; + h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w()); + h_comparators *= (RWP + ERP); + } + + //power-gating circuit + bool is_footer = false; + double Isat_subarray = 2* simplified_nmos_Isat(g_tp.sram.cell_nmos_w, is_dram, true);//only one wordline active in a subarray 2 means two inverters in an SRAM cell + double detalV_array;//, deltaV_wl, deltaV_floatingBL; + double c_wakeup_array; + + if (!(is_fa || pure_cam) && g_ip->power_gating) + {//for SRAM only at this moment + c_wakeup_array = drain_C_(g_tp.sram.cell_pmos_w, PCH, 1, 1, cell.h, is_dram, true);//1 inv + c_wakeup_array += 2*drain_C_(g_tp.sram.cell_pmos_w, PCH, 1, 1, cell.h, is_dram, true) + + drain_C_(g_tp.sram.cell_nmos_w, NCH, 1, 1, cell.h, is_dram, true);//1 inv + c_wakeup_array *= subarray.num_rows; + detalV_array = g_tp.sram_cell.Vdd-g_tp.sram_cell.Vcc_min; + + sram_sleep_tx = new Sleep_tx (g_ip->perfloss, + Isat_subarray, + is_footer, + c_wakeup_array, + detalV_array, + 1, + cell); + + subarray.area.set_h(subarray.area.h+ sram_sleep_tx->area.h); + + //TODO: add the sleep tx in the wl driver and + } + + + int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits); + int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits); + w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) * + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); + + + double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) * + (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + + h_subarray_out_drv + h_comparators); + + double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder); + + if (deg_bl_muxing > 1) + { + h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); + } + if (dp.Ndsam_lev_1 > 1) + { + h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP); + } + if (dp.Ndsam_lev_2 > 1) + { + h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP); + } + + double h_addr_datain_wires; + if (!g_ip->ver_htree_wires_over_array) + { + h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + + (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) * + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); + + if (is_fa || pure_cam) + { + h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit + (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) * + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) + + (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP; + } + //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux + + //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv); + h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators + + h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) + + h_addr_datain_wires + + h_bit_mux_dec_out_wires + + h_senseamp_mux_dec_out_wires; + + } + + // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area; + double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() + + b_mux_predec_blk_drv1->area.get_area() + + sa_mux_lev_1_predec_blk_drv1->area.get_area() + + sa_mux_lev_2_predec_blk_drv1->area.get_area() + + way_sel_drv1->area.get_area() + + r_predec_blk_drv2->area.get_area() + + b_mux_predec_blk_drv2->area.get_area() + + sa_mux_lev_1_predec_blk_drv2->area.get_area() + + sa_mux_lev_2_predec_blk_drv2->area.get_area() + + r_predec_blk1->area.get_area() + + b_mux_predec_blk1->area.get_area() + + sa_mux_lev_1_predec_blk1->area.get_area() + + sa_mux_lev_2_predec_blk1->area.get_area() + + r_predec_blk2->area.get_area() + + b_mux_predec_blk2->area.get_area() + + sa_mux_lev_1_predec_blk2->area.get_area() + + sa_mux_lev_2_predec_blk2->area.get_area() + + bit_mux_dec->area.get_area() + + sa_mux_lev_1_dec->area.get_area() + + sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP); + + /// double area_efficiency_mat; + + +// if (!is_fa) +// { + assert(num_subarrays_per_mat/num_subarrays_per_row>0); + area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area; + area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area; + area.w = (area.h*area.w + area_mat_center_circuitry) / area.h; + /// = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area(); + +// cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<is_3d_mem) + { + h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + + h_subarray_out_drv); + area.h = subarray.area.h + h_non_cell_area; + area.w = subarray.area.w; + if (g_ip->print_detail_debug) + cout << "actual subarray width: " << cell.w * subarray.num_cols /1e3 << " mm" << endl; + } + + if (g_ip->print_detail_debug) + { + cout<<"h_non_cell_area"<0); + assert(area.w>0); +// } +// else +// { +// area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area; +// area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area; +// area.w = (area.h*area.w + area_mat_center_circuitry) / area.h; +// area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area(); +// } + } + + + +Mat::~Mat() +{ + delete row_dec; + delete bit_mux_dec; + delete sa_mux_lev_1_dec; + delete sa_mux_lev_2_dec; + + delete r_predec->blk1; + delete r_predec->blk2; + delete b_mux_predec->blk1; + delete b_mux_predec->blk2; + delete sa_mux_lev_1_predec->blk1; + delete sa_mux_lev_1_predec->blk2; + delete sa_mux_lev_2_predec->blk1; + delete sa_mux_lev_2_predec->blk2; + delete dummy_way_sel_predec_blk1; + delete dummy_way_sel_predec_blk2; + + delete r_predec->drv1; + delete r_predec->drv2; + delete b_mux_predec->drv1; + delete b_mux_predec->drv2; + delete sa_mux_lev_1_predec->drv1; + delete sa_mux_lev_1_predec->drv2; + delete sa_mux_lev_2_predec->drv1; + delete sa_mux_lev_2_predec->drv2; + delete way_sel_drv1; + delete dummy_way_sel_predec_blk_drv2; + + delete r_predec; + delete b_mux_predec; + delete sa_mux_lev_1_predec; + delete sa_mux_lev_2_predec; + + delete subarray_out_wire; + if (!pure_cam) + delete bl_precharge_eq_drv; + + if (is_fa || pure_cam) + { + delete sl_precharge_eq_drv ; + delete sl_data_drv ; + delete cam_bl_precharge_eq_drv; + delete ml_precharge_drv; + delete ml_to_ram_wl_drv; + } + if (!sram_sleep_tx) + { + delete sram_sleep_tx; + } +} + + + +double Mat::compute_delays(double inrisetime) +{ + int k; + double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl; + double outrisetime_search, outrisetime, row_dec_outrisetime; + // delay calculation for tags of fully associative cache + if (is_fa || pure_cam) + { + //Compute search access time + outrisetime_search = compute_cam_delay(inrisetime); + if (is_fa) + { + bl_precharge_eq_drv->compute_delay(0); + k = ml_to_ram_wl_drv->number_gates - 1; + rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true); + C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) + + drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true); + C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load; + tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2; + delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); + + R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); + r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in + R_bl = subarray.num_rows * r_b_metal; + C_bl = subarray.C_bl; + delay_bl_restore = bl_precharge_eq_drv->delay + + log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))* + (R_bl_precharge * C_bl + R_bl * C_bl / 2); + + + outrisetime_search = compute_bitline_delay(outrisetime_search); + outrisetime_search = compute_sa_delay(outrisetime_search); + } + outrisetime_search = compute_subarray_out_drv(outrisetime_search); + subarray_out_wire->set_in_rise_time(outrisetime_search); + outrisetime_search = subarray_out_wire->signal_rise_time(); + delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; + + + //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited. + outrisetime = r_predec->compute_delays(inrisetime); + row_dec_outrisetime = row_dec->compute_delays(outrisetime); + + outrisetime = b_mux_predec->compute_delays(inrisetime); + bit_mux_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); + sa_mux_lev_1_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); + sa_mux_lev_2_dec->compute_delays(outrisetime); + + if (pure_cam) + { + outrisetime = compute_bitline_delay(row_dec_outrisetime); + outrisetime = compute_sa_delay(outrisetime); + } + return outrisetime_search; + } + else + { + bl_precharge_eq_drv->compute_delay(0); + if (row_dec->exist == true) + { + int k = row_dec->num_gates - 1; + double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true); + // TODO: this 4*cell.h number must be revisited + double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) + + drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true); + double C_ld = row_dec->C_ld_dec_out; + double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2; + delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); + } + double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); + double r_b_metal = cell.h * g_tp.wire_local.R_per_um; + double R_bl = subarray.num_rows * r_b_metal; + double C_bl = subarray.C_bl; + + if (is_dram) + { + delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2); + } + else + { + delay_bl_restore = bl_precharge_eq_drv->delay + + log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))* + (R_bl_precharge * C_bl + R_bl * C_bl / 2); + } + } + + + + outrisetime = r_predec->compute_delays(inrisetime); + row_dec_outrisetime = row_dec->compute_delays(outrisetime); + + outrisetime = b_mux_predec->compute_delays(inrisetime); + bit_mux_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); + sa_mux_lev_1_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); + sa_mux_lev_2_dec->compute_delays(outrisetime); + + //CACTI3DD + if(g_ip->is_3d_mem) + { + row_dec_outrisetime = inrisetime; + } + + outrisetime = compute_bitline_delay(row_dec_outrisetime); + outrisetime = compute_sa_delay(outrisetime); + outrisetime = compute_subarray_out_drv(outrisetime); + subarray_out_wire->set_in_rise_time(outrisetime); + outrisetime = subarray_out_wire->signal_rise_time(); + + delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; + + if (dp.is_tag == true && dp.fully_assoc == false) + { + compute_comparator_delay(0); + } + + if (row_dec->exist == false) + { + delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay); + } + return outrisetime; +} + + + +double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() +{ + + double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) + + compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP)); // precharge circuitry + + if (deg_bl_muxing > 1) + { + height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height + // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height + } + + height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height + + if (dp.Ndsam_lev_1 > 1) + { + height += compute_tr_width_after_folding( + g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height + //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); + } + + if (dp.Ndsam_lev_2 > 1) + { + height += compute_tr_width_after_folding( + g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height + //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); + + // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux + height += 2 * compute_tr_width_after_folding( + pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); + height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); + } + + // TODO: this should be uncommented... + /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1) + { + //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP); + double width_write_driver_write_mux = width_write_driver_or_write_mux(); + double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux, + cell.w * + // deg_bl_muxing * + dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP)); + height += height_write_driver_write_mux; + }*/ + + if (g_ip->is_3d_mem) + { + //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP); + double width_write_driver_write_mux = width_write_driver_or_write_mux(); + double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux, cell.w); + height += height_write_driver_write_mux; + } + + return height; +} + + + +double Mat::compute_cam_delay(double inrisetime) +{ + + double out_time_ramp, this_delay; + double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load; + + + double Wfaprechp, Wdummyn, Wdummyinvn, Wdummyinvp, Waddrnandn, Waddrnandp, + Wfanorn, Wfanorp, W_hit_miss_n, W_hit_miss_p; + + /** + double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p, + Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp, + Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp, + Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p; + **/ + + double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng; + int Htagbits; + + double driver_c_gate_load; + double driver_c_wire_load; + double driver_r_wire_load; + //double searchline_precharge_time; + + double leak_power_cc_inverters_sram_cell = 0; + double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0; + double leak_power_RD_port_sram_cell = 0; + double leak_power_SCHP_port_sram_cell = 0; + double leak_comparator_cam_cell =0; + + double gate_leak_comparator_cam_cell = 0; + double gate_leak_power_cc_inverters_sram_cell = 0; + double gate_leak_power_RD_port_sram_cell = 0; + double gate_leak_power_SCHP_port_sram_cell = 0; + + c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um; + c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um; + r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um; + r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um; + + dynSearchEng = 0.0; + delay_matchchline = 0.0; + double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram); + bool linear_scaling = false; + + if (linear_scaling) + { + /// Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process + /// Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process + /// Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + /// Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + /// Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process + /// Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process + /// Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + /// Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + /// Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + /// Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + /// Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + /// Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + /// Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process + /// Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + /// Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + /// Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + /// Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + /// Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + /// Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + + Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process + Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process + Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + W_hit_miss_n = Wdummyn; + W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; + //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort + } + else + { + /// Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process + /// Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process + /// Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + /// Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + /// Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process + /// Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process + /// Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + /// Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + /// Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + /// Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + /// Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + /// Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + /// Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process + /// Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + /// Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + /// Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + /// Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + /// Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + /// Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + + Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process + Wdummyn = g_tp.cam.cell_nmos_w; + Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process + Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process + Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + W_hit_miss_n = Wdummyn; + W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; + } + + Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0)); + + /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators. + search_line_delay, search_line_power, search_line_restore_delay for cycle time computation. + From the driver(am and an) to the comparators in all the rows including the dummy row, + Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */ + + //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports + //Searchline precharge routes horizontally + driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); + driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; + + sl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + //searchline data driver ; subarray.num_rows + 1 is because of the dummy row + //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines + driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false); + driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; + driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; + sl_data_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + sl_precharge_eq_drv->compute_delay(0); + double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr + double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um; + double R_bl = (subarray.num_rows + 1) * r_b_metal; + double C_bl = subarray.C_bl_cam; + delay_cam_sl_restore = sl_precharge_eq_drv->delay + + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2); + + out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside) + + //matchline ops delay + delay_matchchline += sl_data_drv->delay; + + /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/ + //matchline delay, matchline power, matchline_reset for cycle time computation, + + ////matchline precharge circuitry routes vertically + //There are two matchline precharge driver chains per subarray. + driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram); + driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; + driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; + + ml_precharge_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + ml_precharge_drv->compute_delay(0); + + + rd = tr_R_on(Wdummyn, NCH, 2, is_dram); + c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit + + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline + + Cwire = c_matchline_metal * Htagbits; + Rwire = r_matchline_metal * Htagbits; + c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram); + + double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram); + //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; + double R_ml = Rwire; + double C_ml = Cwire + c_intrinsic; + delay_cam_ml_reset = ml_precharge_drv->delay + + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too + + //matchline ops delay + tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); + this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL); + delay_matchchline += this_delay; + out_time_ramp = this_delay / VTHFA3; + + dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise + * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves + + /* third stage, from the NAND2 gates to the drivers in the dummy row */ + rd = tr_R_on(Waddrnandn, NCH, 2, is_dram); + c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + + drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2; + c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram); + tf = rd * (c_intrinsic + c_gate_load); + this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE); + out_time_ramp = this_delay / (1 - VTHFA4); + delay_matchchline += this_delay; + + //only the dummy row has the extra inverter between NAND and NOR gates + dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl; + + /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */ + rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram); + c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2; + Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2; + c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram); + tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); + this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL); + out_time_ramp = this_delay / VTHFA5; + delay_matchchline += this_delay; + + dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; + + /*final statge from the NOR gate to drive the wordline of the data portion */ + + //searchline data driver There are two matchline precharge driver chains per subarray. + driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic + driver_c_wire_load = subarray.C_wl_ram; + driver_r_wire_load = subarray.R_wl_ram; + + ml_to_ram_wl_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + + + rd = tr_R_on(Wfanorn, NCH, 1, is_dram); + c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram); + c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram); + tf = rd * (c_intrinsic + c_gate_load); + this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE); + out_time_ramp = this_delay / (1-0.5); + delay_matchchline += this_delay; + + out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp); + + //c_gate_load energy is computed in ml_to_ram_wl_drv + dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; + + + /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/ + /*Precharge the hitting logic */ + c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram); + Cwire = c_searchline_metal * subarray.num_rows; + Rwire = r_searchline_metal * subarray.num_rows; + c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows; + + rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false); + //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; + double R_hit_miss = Rwire; + double C_hit_miss = Cwire + c_intrinsic; + delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2); + dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + + /*hitting logic evaluation */ + c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram); + Cwire = c_searchline_metal * subarray.num_rows; + Rwire = r_searchline_metal * subarray.num_rows; + c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows; + + rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false); + tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); + + delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL); + + if (is_fa) + delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss); + + dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + + /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/ + + power_matchline.searchOp.dynamic = dynSearchEng; + + //leakage in one subarray + double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2? + double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true); + double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2; + double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv + + leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd; + leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd; + leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd; + leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd; + leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports + + power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell + + leak_comparator_cam_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + + leak_power_RD_port_sram_cell * ERP + + leak_power_SCHP_port_sram_cell*SCHP; +// power_matchline.searchOp.leakage += leak_comparator_cam_cell; + power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise + power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd; + power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; + power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd; + //In idle states, the hit/miss txs are closed (on) therefore no Isub + power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+ + // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd; + + //in idle state, Ig_on only possibly exist in access transistors of read only ports + double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true); + double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2; + double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2; + + gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd; + gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd; + gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd; + gate_leak_power_SCHP_port_sram_cell = 0; + + //cout<<"power_matchline.searchOp.leakage"<array_power_gated? g_tp.sram_cell.Vcc_min : g_tp.sram_cell.Vdd); + leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * (g_ip->bitline_floating? g_tp.sram.Vbitfloating : g_tp.sram_cell.Vdd); + leak_power_RD_port_sram_cell = Iport_erp * (g_ip->bitline_floating? g_tp.sram.Vbitfloating : g_tp.sram_cell.Vdd); +// +// leak_power_cc_inverters_sram_cell_gated = leak_power_cc_inverters_sram_cell/g_tp.sram_cell.Vdd*g_tp.sram_cell.Vcc_min; +// leak_power_acc_tr_RW_or_WR_port_sram_cell_floating = leak_power_acc_tr_RW_or_WR_port_sram_cell/g_tp.sram_cell.Vdd*g_tp.sram.Vbitfloating; +// leak_power_RD_port_sram_cell_floating = leak_power_RD_port_sram_cell_floating/g_tp.sram_cell.Vdd*g_tp.sram.Vbitfloating; +// + + + //in idle state, Ig_on only possibly exist in access transistors of read only ports + double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true); + double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true); + + gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd; + gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd; + } + + + double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram); + double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram); + double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); + double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram); + double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) + + drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + + drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); + double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); + + if (is_dram) + { + double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl)); + //tstep = 2.3 * fraction * r_dev * + tstep = fraction * r_dev * (g_ip->is_3d_mem==1?1:2.3) * + (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) / + (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)); + delay_writeback = tstep; + dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * + (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/; + dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) * + (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100; + per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * + (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd; + } + else + { + double tau; + + if (deg_bl_muxing > 1) + { + tau = (R_cell_pull_down + R_cell_acc) * + (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + + R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + + R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + + R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); + dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* + subarray.num_cols * num_subarrays_per_mat*/; + blfloating_c += (C_bl + 2 * C_drain_bit_mux) * 2; + dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * + 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing); + blfloating_c += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *2; + dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) * + num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2; + //Write Ops are differential for SRAM + + } + else + { + tau = (R_cell_pull_down + R_cell_acc) * + (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 + + R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); + dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * + 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/; + + blfloating_c += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * 2; + dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) * + num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2; + + } + tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense)); + + +// if (g_ip->array_power_gated) +// power_bitline.readOp.leakage = +// leak_power_cc_inverters_sram_cell_gated + +// leak_power_acc_tr_RW_or_WR_port_sram_cell_floating + +// leak_power_acc_tr_RW_or_WR_port_sram_cell_floating * (RWP + EWP - 1) + +// leak_power_RD_port_sram_cell_floating * ERP; +// else + power_bitline.readOp.leakage = + leak_power_cc_inverters_sram_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + + leak_power_RD_port_sram_cell * ERP; + + power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell + + gate_leak_power_RD_port_sram_cell * ERP; + + } + +// cout<<"leak_power_cc_inverters_sram_cell"<repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); + gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); + tf = rd * C_ld; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay_subarray_out_drv += this_delay; + inrisetime = this_delay/(1.0 - 0.5); + power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 + power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd; + + + return inrisetime; +} + + + +double Mat::compute_comparator_delay(double inrisetime) +{ + int A = g_ip->tag_assoc; + + int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already + // a multiple of 4. + + /* First Inverter */ + double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram); + double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram); + double tf = Req*Ceq; + double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL); + double nextinputtime = st1del/VTHCOMPINV; + power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + + //For each degree of associativity + //there are 4 such quarter comparators + double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A; + double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A; + /* Second Inverter */ + Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram); + tf = Req*Ceq; + double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE); + nextinputtime = st2del/(1.0-VTHCOMPINV); + power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A; + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A; + + /* Third Inverter */ + Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram); + tf = Req*Ceq; + double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL); + nextinputtime = st3del/(VTHEVALINV); + power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A; + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A; + + /* Final Inverter (virtual ground driver) discharging compare part */ + double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram); + double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */ + double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) + + drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram); + double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) + + drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) + + gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram); + power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1); + lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2 + + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A; + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter + + /* time to go to threshold of mux driver */ + double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND); + /* take into account non-zero input rise time */ + double m = g_tp.peri_global.Vdd/nextinputtime; + double Tcomparatorni; + + if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m)) + { + double a = m; + double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth); + double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth); + Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a); + } + else + { + Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m; + } + delay_comparator = Tcomparatorni+st1del+st2del+st3del; + power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd; + power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd; + + return Tcomparatorni / (1.0 - VTHMUXNAND);; +} + + + +void Mat::compute_power_energy() +{ + //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power + //when search all subarrays and all mats are fully active + //when plain read/write only one subarray in a single mat is active. + + // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat. + // FIXME + //CACTI3DD + if (g_ip->is_3d_mem) + { + if (g_ip->print_detail_debug) + cout << "mat.cc: subarray.num_cols = " << subarray.num_cols << endl; + power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; + //power_bl_precharge_eq_drv = num_subarrays_per_mat; + + power_sa.readOp.dynamic *= subarray.num_cols; + + power_bitline.readOp.dynamic *= subarray.num_cols; + + power_subarray_out_drv.readOp.dynamic = power_subarray_out_drv.readOp.dynamic * g_ip->io_width * g_ip->burst_depth;//* subarray.num_cols; + + if (g_ip->print_detail_debug) + { + //cout<<"mat.cc: g_ip->burst_len = "<< g_ip->burst_len << endl; + cout<<"mat.cc: power_bl_precharge_eq_drv.readOp.dynamic = "<< power_bl_precharge_eq_drv.readOp.dynamic * 1e9 << " nJ" <power.readOp.dynamic + + b_mux_predec->power.readOp.dynamic + + sa_mux_lev_1_predec->power.readOp.dynamic + + sa_mux_lev_2_predec->power.readOp.dynamic; + + // add energy consumed in decoders + power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic; + if (!(is_fa||pure_cam)) + power_row_decoders.readOp.dynamic *= num_subarrays_per_mat; + + // add energy consumed in bitline prechagers, SAs, and bitlines + if (!(is_fa||pure_cam)) + { + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; + power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; + + //Add sense amps energy + num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ; + + // add energy consumed in bitlines + //cout<<"bitline power"<power.readOp.dynamic) * num_do_b_mat; + + power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + + power_subarray_out_drv.readOp.dynamic; + + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; + } + + else if (is_fa) + { + //for plain read/write only one subarray in a mat is active + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic + + cam_bl_precharge_eq_drv->power.readOp.dynamic; + power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; + + //Add sense amps energy + num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing; + num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing; + power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search; + power_sa.readOp.dynamic *= num_sa_subarray; + + + // add energy consumed in bitlines + power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic; + power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram); + power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram); + power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram; + + //Add subarray output energy + power_subarray_out_drv.searchOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; + power_subarray_out_drv.readOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; + + + power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + + power_subarray_out_drv.readOp.dynamic; + + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; + + //add energy consumed inside cam + power_matchline.searchOp.dynamic *= num_subarrays_per_mat; + power_searchline_precharge = sl_precharge_eq_drv->power; + power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_searchline = sl_data_drv->power; + power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;; + power_matchline_precharge = ml_precharge_drv->power; + power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat; + power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power; + power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic; + + power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic; + + power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; + //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; + + } + else + { + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; + //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; + //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; + //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat; + + //Add sense amps energy + num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing; + power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat; + power_sa.searchOp.dynamic = 0; + + power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam; + power_bitline.searchOp.dynamic = 0; + power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam; + + power_subarray_out_drv.searchOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; + power_subarray_out_drv.readOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; + + power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + + power_subarray_out_drv.readOp.dynamic; + + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; + + + ////add energy consumed inside cam + power_matchline.searchOp.dynamic *= num_subarrays_per_mat; + power_searchline_precharge = sl_precharge_eq_drv->power; + power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_searchline = sl_data_drv->power; + power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;; + power_matchline_precharge = ml_precharge_drv->power; + power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat; + power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power; + power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic; + + power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic; + + power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; + //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; + + } + + }//CACTI3DD + + int number_output_drivers_subarray; + + +// // calculate leakage power + if (!(is_fa || pure_cam)) + { + number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); + + power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP); + + //num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_subarray_out_drv.readOp.leakage = + (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); + + power.readOp.leakage += power_bitline.readOp.leakage + + power_bl_precharge_eq_drv.readOp.leakage + + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage; + + power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP); + power.readOp.leakage += power_comparator.readOp.leakage; + + array_leakage = power_bitline.readOp.leakage; + + cl_leakage = + power_bl_precharge_eq_drv.readOp.leakage + + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage + + power_comparator.readOp.leakage; + + + + //Decoder blocks + power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; + power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing; + power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1; + power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2; + + if (!g_ip->wl_power_gated) + power.readOp.leakage += r_predec->power.readOp.leakage + + b_mux_predec->power.readOp.leakage + + sa_mux_lev_1_predec->power.readOp.leakage + + sa_mux_lev_2_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage + + power_bit_mux_decoders.readOp.leakage + + power_sa_mux_lev_1_decoders.readOp.leakage + + power_sa_mux_lev_2_decoders.readOp.leakage; + else + power.readOp.leakage += (r_predec->power.readOp.leakage + + b_mux_predec->power.readOp.leakage + + sa_mux_lev_1_predec->power.readOp.leakage + + sa_mux_lev_2_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage + + power_bit_mux_decoders.readOp.leakage + + power_sa_mux_lev_1_decoders.readOp.leakage + + power_sa_mux_lev_2_decoders.readOp.leakage)/g_tp.peri_global.Vdd*g_tp.peri_global.Vcc_min; + + wl_leakage = r_predec->power.readOp.leakage + + b_mux_predec->power.readOp.leakage + + sa_mux_lev_1_predec->power.readOp.leakage + + sa_mux_lev_2_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage + + power_bit_mux_decoders.readOp.leakage + + power_sa_mux_lev_1_decoders.readOp.leakage + + power_sa_mux_lev_2_decoders.readOp.leakage; + + //++++Below is gate leakage + power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP); + + //num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_subarray_out_drv.readOp.gate_leakage = + (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); + + power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + + power_bl_precharge_eq_drv.readOp.gate_leakage + + power_sa.readOp.gate_leakage + + power_subarray_out_drv.readOp.gate_leakage; + //cout<<"leakage"<power_gating) + { + + //cout<<"leakage1"<area.get_area()*subarray.num_cols * num_subarrays_per_mat*dp.num_mats; + array_wakeup_e.readOp.dynamic = sram_sleep_tx->wakeup_power.readOp.dynamic * num_subarrays_per_mat*subarray.num_cols*dp.num_act_mats_hor_dir; + array_wakeup_t = sram_sleep_tx->wakeup_delay; + + wl_sleep_tx_area = row_dec->sleeptx->area.get_area()*subarray.num_rows * num_subarrays_per_mat*dp.num_mats; + wl_wakeup_e.readOp.dynamic = row_dec->sleeptx->wakeup_power.readOp.dynamic * num_subarrays_per_mat*subarray.num_rows*dp.num_act_mats_hor_dir; + wl_wakeup_t = row_dec->sleeptx->wakeup_delay; + + } + + // gate_leakage power + power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; + power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing; + power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1; + power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2; + + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + b_mux_predec->power.readOp.gate_leakage + + sa_mux_lev_1_predec->power.readOp.gate_leakage + + sa_mux_lev_2_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage + + power_bit_mux_decoders.readOp.gate_leakage + + power_sa_mux_lev_1_decoders.readOp.gate_leakage + + power_sa_mux_lev_2_decoders.readOp.gate_leakage; + } + else if (is_fa) + { + int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); + + power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + + //cout<<"leakage3"<power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + + power.readOp.leakage += power_bitline.readOp.leakage + + power_bl_precharge_eq_drv.readOp.leakage + + power_bl_precharge_eq_drv.searchOp.leakage + + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage; + + //cout<<"leakage4"<power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; + power.readOp.leakage += r_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage; + + //cout<<"leakage5"<power.readOp.leakage; + power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; + + power.readOp.leakage += power_cam_all_active.searchOp.leakage; + +// cout<<"leakage6"<power.readOp.gate_leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + + //cout<<"leakage3"<power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + + power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + + power_bl_precharge_eq_drv.readOp.gate_leakage + + power_bl_precharge_eq_drv.searchOp.gate_leakage + + power_sa.readOp.gate_leakage + + power_subarray_out_drv.readOp.gate_leakage; + + //cout<<"leakage4"<power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage; + + //cout<<"leakage5"<power.readOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; + + power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; + + } + else + { + int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); + + //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + + + power_subarray_out_drv.readOp.leakage = + (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + + power.readOp.leakage += //power_bitline.readOp.leakage + + //power_bl_precharge_eq_drv.readOp.leakage + + power_bl_precharge_eq_drv.searchOp.leakage + + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage; + + // leakage power + power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP); + power.readOp.leakage += r_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage; + + //inside cam + power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; + power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage; + power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; + + power.readOp.leakage += power_cam_all_active.searchOp.leakage; + + //+++Below is gate leakage + power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + + + power_subarray_out_drv.readOp.gate_leakage = + (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + + power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage + + //power_bl_precharge_eq_drv.readOp.gate_leakage + + power_bl_precharge_eq_drv.searchOp.gate_leakage + + power_sa.readOp.gate_leakage + + power_subarray_out_drv.readOp.gate_leakage; + + // gate_leakage power + power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP); + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage; + + //inside cam + power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; + + power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; + } +} + diff --git a/T1/TP1/cacti-master/mat.h b/T1/TP1/cacti-master/mat.h new file mode 100644 index 0000000..c265e50 --- /dev/null +++ b/T1/TP1/cacti-master/mat.h @@ -0,0 +1,176 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#ifndef __MAT_H__ +#define __MAT_H__ + +#include "component.h" +#include "decoder.h" +#include "wire.h" +#include "subarray.h" +#include "powergating.h" + +class Mat : public Component +{ + public: + Mat(const DynamicParameter & dyn_p); + ~Mat(); + double compute_delays(double inrisetime); // return outrisetime + void compute_power_energy(); + + const DynamicParameter & dp; + + // TODO: clean up pointers and powerDefs below + Decoder * row_dec; + Decoder * bit_mux_dec; + Decoder * sa_mux_lev_1_dec; + Decoder * sa_mux_lev_2_dec; + PredecBlk * dummy_way_sel_predec_blk1; + PredecBlk * dummy_way_sel_predec_blk2; + PredecBlkDrv * way_sel_drv1; + PredecBlkDrv * dummy_way_sel_predec_blk_drv2; + + Predec * r_predec; + Predec * b_mux_predec; + Predec * sa_mux_lev_1_predec; + Predec * sa_mux_lev_2_predec; + + Wire * subarray_out_wire; + Driver * bl_precharge_eq_drv; + Driver * cam_bl_precharge_eq_drv;//bitline pre-charge circuit is separated for CAM and RAM arrays. + Driver * ml_precharge_drv;//matchline prechange driver + Driver * sl_precharge_eq_drv;//searchline prechage driver + Driver * sl_data_drv;//search line data driver + Driver * ml_to_ram_wl_drv;//search line data driver + + + powerDef power_row_decoders; + powerDef power_bit_mux_decoders; + powerDef power_sa_mux_lev_1_decoders; + powerDef power_sa_mux_lev_2_decoders; + powerDef power_fa_cam; // TODO: leakage power is not computed yet + powerDef power_bl_precharge_eq_drv; + powerDef power_subarray_out_drv; + powerDef power_cam_all_active; + powerDef power_searchline_precharge; + powerDef power_matchline_precharge; + powerDef power_ml_to_ram_wl_drv; + + double delay_fa_tag, delay_cam; + double delay_before_decoder; + double delay_bitline; + double delay_wl_reset; + double delay_bl_restore; + + double delay_searchline; + double delay_matchchline; + double delay_cam_sl_restore; + double delay_cam_ml_reset; + double delay_fa_ram_wl; + + double delay_hit_miss_reset; + double delay_hit_miss; + + Subarray subarray; + powerDef power_bitline, power_searchline, power_matchline, power_bitline_gated; + double per_bitline_read_energy; + int deg_bl_muxing; + int num_act_mats_hor_dir; + double delay_writeback; + Area cell,cam_cell; + bool is_dram,is_fa, pure_cam, camFlag; + int num_mats; + powerDef power_sa; + double delay_sa; + double leak_power_sense_amps_closed_page_state; + double leak_power_sense_amps_open_page_state; + double delay_subarray_out_drv; + double delay_subarray_out_drv_htree; + double delay_comparator; + powerDef power_comparator; + int num_do_b_mat; + int num_so_b_mat; + int num_sa_subarray; + int num_sa_subarray_search; + double C_bl; + + uint32_t num_subarrays_per_mat; // the number of subarrays in a mat + uint32_t num_subarrays_per_row; // the number of subarrays in a row of a mat + + double array_leakage; + double wl_leakage; + double cl_leakage; + + Sleep_tx * sram_sleep_tx; + Sleep_tx * wl_sleep_tx; + Sleep_tx * cl_sleep_tx; + + powerDef array_wakeup_e; + double array_wakeup_t; + double array_sleep_tx_area; + + powerDef blfloating_wakeup_e; + double blfloating_wakeup_t; + double blfloating_sleep_tx_area; + + powerDef wl_wakeup_e; + double wl_wakeup_t; + double wl_sleep_tx_area; + + powerDef cl_wakeup_e; + double cl_wakeup_t; + double cl_sleep_tx_area; + + double compute_bitline_delay(double inrisetime); + double compute_sa_delay(double inrisetime); + double compute_subarray_out_drv(double inrisetime); + + private: + double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); + double width_write_driver_or_write_mux(); + double compute_comparators_height(int tagbits, int number_ways_in_mat, double subarray_mem_cell_area_w); + double compute_cam_delay(double inrisetime); + //double compute_bitline_delay(double inrisetime); + //double compute_sa_delay(double inrisetime); + //double compute_subarray_out_drv(double inrisetime); + double compute_comparator_delay(double inrisetime); + + int RWP; + int ERP; + int EWP; + int SCHP; +}; + + + +#endif diff --git a/T1/TP1/cacti-master/memcad.cc b/T1/TP1/cacti-master/memcad.cc new file mode 100644 index 0000000..64bf32a --- /dev/null +++ b/T1/TP1/cacti-master/memcad.cc @@ -0,0 +1,599 @@ +#include "memcad.h" +#include +#include +#include +#include +#include +#include + +using namespace std; + + +vector *memcad_all_channels; + +vector *memcad_all_bobs; + +vector *memcad_all_memories; + +vector *memcad_best_results; + +bool compare_channels(channel_conf* first, channel_conf* second) +{ + if(first->capacity != second->capacity) + return (first->capacity < second->capacity); + + MemCad_metrics first_metric = first->memcad_params->first_metric; + MemCad_metrics second_metric = first->memcad_params->second_metric; + MemCad_metrics third_metric = first->memcad_params->third_metric; + + switch(first_metric) + { + case(Cost): + if(first->cost != second->cost) + return (first->cost < second->cost); + break; + case(Bandwidth): + if(first->bandwidth != second->bandwidth) + return (first->bandwidth > second->bandwidth); + break; + case(Energy): + if( fabs(first->energy_per_access - second->energy_per_access)>EPS) + return (first->energy_per_access < second->energy_per_access); + break; + default: + assert(false); + } + + switch(second_metric) + { + case(Cost): + if(first->cost != second->cost) + return (first->cost < second->cost); + break; + case(Bandwidth): + if(first->bandwidth != second->bandwidth) + return (first->bandwidth > second->bandwidth); + break; + case(Energy): + if( fabs(first->energy_per_access - second->energy_per_access)>EPS) + return (first->energy_per_access < second->energy_per_access); + break; + default: + assert(false); + } + + switch(third_metric) + { + case(Cost): + if(first->cost != second->cost) + return (first->cost < second->cost); + break; + case(Bandwidth): + if(first->bandwidth != second->bandwidth) + return (first->bandwidth > second->bandwidth); + break; + case(Energy): + if( fabs(first->energy_per_access - second->energy_per_access)>EPS) + return (first->energy_per_access < second->energy_per_access); + break; + default: + assert(false); + } + return true; +} + + +void prune_channels() +{ + vector * temp = new vector(); + int last_added = -1; + for(unsigned int i=0;i< memcad_all_channels->size();i++) + { + if(last_added != (*memcad_all_channels)[i]->capacity) + { + temp->push_back(clone((*memcad_all_channels)[i])); + last_added = (*memcad_all_channels)[i]->capacity; + } + } + + for(unsigned int i=0;i< memcad_all_channels->size();i++) + { + delete (*memcad_all_channels)[i]; + } + memcad_all_channels->clear(); + delete memcad_all_channels; + memcad_all_channels = temp; +} + +void find_all_channels(MemCadParameters * memcad_params) +{ + + int DIMM_size[]={0,4,8,16,32,64}; + Mem_IO_type current_io_type = memcad_params->io_type; + DIMM_Model current_dimm_model = memcad_params->dimm_model; + + + memcad_all_channels= new vector(); + + // channels can have up to 3 DIMMs per channel + // di is the capacity if i-th dimm in the channel + for(int d1=0; d1<6;d1++) + { + for(int d2=d1;d2<6;d2++) + { + for(int d3=d2;d3<6;d3++) + { + // channel capacity should not exceed the entire memory capacity. + if((DIMM_size[d1]+DIMM_size[d2]+DIMM_size[d3])>memcad_params->capacity) + continue; + + if( ((current_dimm_model== JUST_LRDIMM) || (current_dimm_model== ALL)) + && ((d1==0) || (MemoryParameters::cost[current_io_type][2][d1-1] dimm_cap; + dimm_cap.push_back(DIMM_size[d1]); if(d1>0) num_dimm_per_channel++; + dimm_cap.push_back(DIMM_size[d2]); if(d2>0) num_dimm_per_channel++; + dimm_cap.push_back(DIMM_size[d3]); if(d3>0) num_dimm_per_channel++; + + int max_index = bw_index(current_io_type, MemoryParameters::bandwidth_load[current_io_type][4-num_dimm_per_channel]); + for(int bw_id=0;bw_id<=max_index; ++bw_id) + { + int bandwidth = MemoryParameters::bandwidth_load[current_io_type][bw_id]; + channel_conf * new_channel = new channel_conf(memcad_params, dimm_cap, bandwidth, LRDIMM, false); + if(new_channel->cost push_back(new_channel); + } + + if((DIMM_size[d1]+DIMM_size[d2]+DIMM_size[d3])==0) + continue; + + if(memcad_params->low_power_permitted) + { + new_channel = new channel_conf(memcad_params, dimm_cap, bandwidth, LRDIMM, true); + if(new_channel->cost push_back(new_channel); + } + } + + } + } + + if( (current_dimm_model== JUST_RDIMM) || (current_dimm_model== ALL) + && ((d1==0) || (MemoryParameters::cost[current_io_type][1][d1-1] dimm_cap; + dimm_cap.push_back(DIMM_size[d1]); if(d1>0) num_dimm_per_channel++; + dimm_cap.push_back(DIMM_size[d2]); if(d2>0) num_dimm_per_channel++; + dimm_cap.push_back(DIMM_size[d3]); if(d3>0) num_dimm_per_channel++; + + if((DIMM_size[d1]+DIMM_size[d2]+DIMM_size[d3])==0) + continue; + + int max_index = bw_index(current_io_type, MemoryParameters::bandwidth_load[current_io_type][4-num_dimm_per_channel]); + + for(int bw_id=0;bw_id<=max_index; ++bw_id) + { + int bandwidth = MemoryParameters::bandwidth_load[current_io_type][bw_id]; + channel_conf * new_channel = new channel_conf(memcad_params, dimm_cap, bandwidth, RDIMM, false); + if(new_channel->cost push_back(new_channel); + } + + if(memcad_params->low_power_permitted) + { + new_channel = new channel_conf(memcad_params, dimm_cap, bandwidth, RDIMM, true); + if(new_channel->cost push_back(new_channel); + } + } + } + } + + if( (current_dimm_model== JUST_UDIMM) || (current_dimm_model== ALL) + && ((d1==0) || (MemoryParameters::cost[current_io_type][0][d1-1] dimm_cap; + dimm_cap.push_back(DIMM_size[d1]); if(d1>0) num_dimm_per_channel++; + dimm_cap.push_back(DIMM_size[d2]); if(d2>0) num_dimm_per_channel++; + dimm_cap.push_back(DIMM_size[d3]); if(d3>0) num_dimm_per_channel++; + + if((DIMM_size[d1]+DIMM_size[d2]+DIMM_size[d3])==0) + continue; + int max_index = bw_index(current_io_type, MemoryParameters::bandwidth_load[current_io_type][4-num_dimm_per_channel]); + for(int bw_id=0;bw_id<=max_index; ++bw_id) + { + int bandwidth = MemoryParameters::bandwidth_load[current_io_type][bw_id]; + channel_conf * new_channel = new channel_conf(memcad_params, dimm_cap, bandwidth, UDIMM, false); + if(new_channel->cost push_back(new_channel); + } + + if(memcad_params->low_power_permitted) + { + new_channel = new channel_conf(memcad_params, dimm_cap, bandwidth, UDIMM, true); + if(new_channel->cost push_back(new_channel); + } + } + } + } + + } + } + } + + sort(memcad_all_channels->begin(), memcad_all_channels->end(), compare_channels); + + + prune_channels(); + + if(memcad_params->verbose) + { + for(unsigned int i=0;isize();i++) + { + cout << *(*memcad_all_channels)[i] << endl; + } + } + +} + +bool compare_channels_bw(channel_conf* first, channel_conf* second) +{ + return (first->bandwidth < second->bandwidth); +} + +bool compare_bobs(bob_conf* first, bob_conf* second) +{ + if(first->capacity != second->capacity) + return (first->capacity < second->capacity); + + MemCad_metrics first_metric = first->memcad_params->first_metric; + MemCad_metrics second_metric = first->memcad_params->second_metric; + MemCad_metrics third_metric = first->memcad_params->third_metric; + + switch(first_metric) + { + case(Cost): + if(first->cost != second->cost) + return (first->cost < second->cost); + break; + case(Bandwidth): + if(first->bandwidth != second->bandwidth) + return (first->bandwidth > second->bandwidth); + break; + case(Energy): + if( fabs(first->energy_per_access - second->energy_per_access)>EPS) + return (first->energy_per_access < second->energy_per_access); + break; + default: + assert(false); + } + + switch(second_metric) + { + case(Cost): + if(first->cost != second->cost) + return (first->cost < second->cost); + break; + case(Bandwidth): + if(first->bandwidth != second->bandwidth) + return (first->bandwidth > second->bandwidth); + break; + case(Energy): + if( fabs(first->energy_per_access - second->energy_per_access)>EPS) + return (first->energy_per_access < second->energy_per_access); + break; + default: + assert(false); + } + + switch(third_metric) + { + case(Cost): + if(first->cost != second->cost) + return (first->cost < second->cost); + break; + case(Bandwidth): + if(first->bandwidth != second->bandwidth) + return (first->bandwidth > second->bandwidth); + break; + case(Energy): + if( fabs(first->energy_per_access - second->energy_per_access)>EPS) + return (first->energy_per_access < second->energy_per_access); + break; + default: + assert(false); + } + return true; +} + +void prune_bobs() +{ + vector * temp = new vector(); + int last_added = -1; + for(unsigned int i=0;i< memcad_all_bobs->size();i++) + { + if(last_added != (*memcad_all_bobs)[i]->capacity) + { + temp->push_back(clone((*memcad_all_bobs)[i])); + last_added = (*memcad_all_bobs)[i]->capacity; + } + } + + for(unsigned int i=0;i< memcad_all_bobs->size();i++) + { + delete (*memcad_all_bobs)[i]; + } + memcad_all_bobs->clear(); + delete memcad_all_bobs; + memcad_all_bobs = temp; +} + +void find_bobs_recursive(MemCadParameters * memcad_params,int start,int end,int nb, list *channel_index) +{ + if(nb==1) + { + for(int i=start; i<=end;++i) + { + channel_index->push_back(i); + + vector temp; + for(list::iterator it= channel_index->begin(); it!= channel_index->end(); it++) + { + int idx = *it; + temp.push_back((*memcad_all_channels)[idx]); + } + memcad_all_bobs->push_back(new bob_conf(memcad_params, &temp)); + temp.clear(); + + channel_index->pop_back(); + } + return; + } + for(int i=start;i<=end;++i) + { + channel_index->push_back(i); + find_bobs_recursive(memcad_params,i,end,nb-1,channel_index); + channel_index->pop_back(); + } +} + +void find_all_bobs(MemCadParameters * memcad_params) +{ + memcad_all_bobs = new vector(); + if(memcad_params->mirror_in_bob) + { + for(unsigned int i=0;isize();++i) + { + vector channels; + for(int j=0;jnum_channels_per_bob;j++) + channels.push_back((*memcad_all_channels)[i]); + memcad_all_bobs->push_back(new bob_conf(memcad_params, &channels)); + channels.clear(); + } + } + else if(memcad_params->same_bw_in_bob) + { + sort(memcad_all_channels->begin(), memcad_all_channels->end(), compare_channels_bw); + vector start_index; start_index.push_back(0); + vector end_index; + int last_bw =(*memcad_all_channels)[0]->bandwidth; + for(unsigned int i=0;i< memcad_all_channels->size();i++) + { + if(last_bw!=(*memcad_all_channels)[i]->bandwidth) + { + end_index.push_back(i-1); + start_index.push_back(i); + last_bw = (*memcad_all_channels)[i]->bandwidth; + } + } + end_index.push_back(memcad_all_channels->size()-1); + + list channel_index; + + for(unsigned int i=0;i< start_index.size();++i) + { + find_bobs_recursive(memcad_params,start_index[i],end_index[i],memcad_params->num_channels_per_bob, &channel_index); + } + + } + else + { + cout << "We do not support different frequencies per in a BoB!" << endl; + assert(false); + } + + + sort(memcad_all_bobs->begin(), memcad_all_bobs->end(), compare_bobs); + prune_bobs(); + if(memcad_params->verbose) + { + for(unsigned int i=0;isize();i++) + { + cout << *(*memcad_all_bobs)[i] << endl; + } + } +} + +void find_mems_recursive(MemCadParameters * memcad_params, int remaining_capacity, int start, int nb, list* bobs_index) +{ + + if(nb==1) + { + for(unsigned int i=start; i< memcad_all_bobs->size();++i) + { + if((*memcad_all_bobs)[i]->capacity != remaining_capacity) + continue; + + bobs_index->push_back(i); + vector temp; + for(list::iterator it= bobs_index->begin(); it!= bobs_index->end(); it++) + { + int index = *it; + temp.push_back((*memcad_all_bobs)[index]); + } + memcad_all_memories->push_back(new memory_conf(memcad_params, &temp)); + temp.clear(); + bobs_index->pop_back(); + } + return; + } + + for(unsigned int i=start; isize();i++) + { + if((*memcad_all_bobs)[i]->capacity > remaining_capacity) + continue; + + int new_remaining_capacity = remaining_capacity-(*memcad_all_bobs)[i]->capacity; + bobs_index->push_back(i); + find_mems_recursive(memcad_params, new_remaining_capacity, i, nb-1, bobs_index); + bobs_index->pop_back(); + } +} + +//void find_mems_recursive(MemCadParameters * memcad_params, int start, int + +bool compare_memories(memory_conf* first, memory_conf* second) +{ + if(first->capacity != second->capacity) + return (first->capacity < second->capacity); + + MemCad_metrics first_metric = first->memcad_params->first_metric; + MemCad_metrics second_metric = first->memcad_params->second_metric; + MemCad_metrics third_metric = first->memcad_params->third_metric; + + switch(first_metric) + { + case(Cost): + if(first->cost != second->cost) + return (first->cost < second->cost); + break; + case(Bandwidth): + if(first->bandwidth != second->bandwidth) + return (first->bandwidth > second->bandwidth); + break; + case(Energy): + if( fabs(first->energy_per_access - second->energy_per_access)>EPS) + return (first->energy_per_access < second->energy_per_access); + break; + default: + assert(false); + } + + switch(second_metric) + { + case(Cost): + if(first->cost != second->cost) + return (first->cost < second->cost); + break; + case(Bandwidth): + if(first->bandwidth != second->bandwidth) + return (first->bandwidth > second->bandwidth); + break; + case(Energy): + if( fabs(first->energy_per_access - second->energy_per_access)>EPS) + return (first->energy_per_access < second->energy_per_access); + break; + default: + assert(false); + } + + switch(third_metric) + { + case(Cost): + if(first->cost != second->cost) + return (first->cost < second->cost); + break; + case(Bandwidth): + if(first->bandwidth != second->bandwidth) + return (first->bandwidth > second->bandwidth); + break; + case(Energy): + if( fabs(first->energy_per_access - second->energy_per_access)>EPS) + return (first->energy_per_access < second->energy_per_access); + break; + default: + assert(false); + } + return true; +} + +bool find_all_memories(MemCadParameters * memcad_params) +{ + memcad_all_memories = new vector(); + + list bobs_index; + find_mems_recursive(memcad_params, memcad_params->capacity, 0,memcad_params->num_bobs, &bobs_index); + + sort(memcad_all_memories->begin(), memcad_all_memories->end(), compare_memories); + + if(memcad_params->verbose) + { + cout << "all possible results:" << endl; + for(unsigned int i=0;isize();i++) + { + cout << *(*memcad_all_memories)[i] << endl; + } + } + if(memcad_all_memories->size()==0) + { + cout << "No result found " << endl; + return false; + } + cout << "top 3 best memory configurations are:" << endl; + int min_num_results = (memcad_all_memories->size()>3?3:memcad_all_memories->size()); + for(int i=0;isize();++i) + { + delete (*memcad_all_channels)[i]; + } + delete memcad_all_channels; + + for(unsigned int i=0;isize();++i) + { + delete (*memcad_all_bobs)[i]; + } + delete memcad_all_bobs; + + for(unsigned int i=0;isize();++i) + { + delete (*memcad_all_memories)[i]; + } + delete memcad_all_memories; +} + + +void solve_memcad(MemCadParameters * memcad_params) +{ + + find_all_channels(memcad_params); + find_all_bobs(memcad_params); + find_all_memories(memcad_params); + clean_results(); +} + diff --git a/T1/TP1/cacti-master/memcad.h b/T1/TP1/cacti-master/memcad.h new file mode 100644 index 0000000..fa534e3 --- /dev/null +++ b/T1/TP1/cacti-master/memcad.h @@ -0,0 +1,30 @@ +#ifndef __MEMCAD_H__ +#define __MEMCAD_H__ + +#include "memcad_parameters.h" +#include + + +extern vector *memcad_all_channels; + +extern vector *memcad_all_bobs; + +extern vector *memcad_all_memories; + +extern vector *memcad_best_results; + + + +void find_all_channels(MemCadParameters * memcad_params); + +void find_all_bobs(MemCadParameters * memcad_params); + +bool find_all_memories(MemCadParameters * memcad_params); + +void clean_results(); + +void solve_memcad(MemCadParameters * memcad_params); + +#endif + + diff --git a/T1/TP1/cacti-master/memcad_parameters.cc b/T1/TP1/cacti-master/memcad_parameters.cc new file mode 100644 index 0000000..295e431 --- /dev/null +++ b/T1/TP1/cacti-master/memcad_parameters.cc @@ -0,0 +1,466 @@ +#include "memcad_parameters.h" +#include +#include + +MemCadParameters::MemCadParameters(InputParameter * g_ip) +{ + // default value + io_type=DDR4; // DDR3 vs. DDR4 + capacity=400; // in GB + num_bobs=4; // default=4me + num_channels_per_bob=2; // 1 means no bob + capacity_wise=true; // true means the load on each channel is proportional to its capacity. + first_metric=Cost; + second_metric=Bandwidth; + third_metric=Energy; + dimm_model=ALL; + low_power_permitted=false; + load=0.9; // between 0 to 1 + row_buffer_hit_rate=1; + rd_2_wr_ratio=2; + same_bw_in_bob=true; // true if all the channels in the bob have the same bandwidth + mirror_in_bob=true;// true if all the channels in the bob have the same configs + total_power=false; // false means just considering I/O Power. + verbose=false; + // values for input + io_type=g_ip->io_type; + capacity=g_ip->capacity; + num_bobs=g_ip->num_bobs; + num_channels_per_bob=g_ip->num_channels_per_bob; + first_metric=g_ip->first_metric; + second_metric=g_ip->second_metric; + third_metric=g_ip->third_metric; + dimm_model=g_ip->dimm_model; + ///low_power_permitted=g_ip->low_power_permitted; + ///load=g_ip->load; + ///row_buffer_hit_rate=g_ip->row_buffer_hit_rate; + ///rd_2_wr_ratio=g_ip->rd_2_wr_ratio; + ///same_bw_in_bob=g_ip->same_bw_in_bob; + mirror_in_bob=g_ip->mirror_in_bob; + ///total_power=g_ip->total_power; + verbose=g_ip->verbose; + +} + +void MemCadParameters::print_inputs() +{ + +} + +bool MemCadParameters::sanity_check() +{ + + return true; +} + + +double MemoryParameters::VDD[2][2][4]= //[lp:hp][ddr3:ddr4][frequency index] +{ + { + {1.5,1.5,1.5,1.5}, + {1.2,1.2,1.2,1.2} + }, + { + {1.35,1.35,1.35,1.35}, + {1.0,1.0,1.0,1.0} + } +}; + +double MemoryParameters::IDD0[2][4]= +{ + {55,60,65,75}, + {58,58,60,64} +}; + +double MemoryParameters::IDD2P0[2][4]= +{ + {20,20,20,20}, + {20,20,20,20} +}; + +double MemoryParameters::IDD2P1[2][4]= +{ + {30,30,32,37}, + {30,30,30,32} +}; + +double MemoryParameters::IDD2N[2][4]= +{ + {40,42,45,50}, + {44,44,46,50} +}; + +double MemoryParameters::IDD3P[2][4]= +{ + {45,50,55,60}, + {44,44,44,44} +}; + +double MemoryParameters::IDD3N[2][4]= +{ + {42,47,52,57}, + {44,44,44,44} +}; + +double MemoryParameters::IDD4R[2][4]= +{ + {120,135,155,175}, + {140,140,150,160} +}; + +double MemoryParameters::IDD4W[2][4]= +{ + {100,125,145,165}, + {156,156,176,196} +}; + +double MemoryParameters::IDD5[2][4]= +{ + {150,205,210,220}, + {190,190,190,192} +}; + +double MemoryParameters::io_energy_read[2][3][3][4] =// [ddr3:ddr4][udimm:rdimm:lrdimm][load 1:2:3][frequency 0:1:2:3] +{ + { //ddr3 + {//udimm + {2592.33, 2593.33, 3288.784, 4348.612}, + {2638.23, 2640.23, 3941.584, 5415.492}, + {2978.659, 2981.659, 4816.644, 6964.162} + + }, + {//rdimm + {2592.33, 3087.071, 3865.044, 4844.982}, + {2932.759, 3733.318, 4237.634, 5415.492}, + {3572.509, 4603.109, 5300.004, 6964.162} + }, + {//lrdimm + {4628.966, 6357.625, 7079.348, 9680.454}, + {5368.26, 6418.788, 7428.058, 10057.164}, + {5708.689, 7065.038, 7808.678, 10627.674} + + } + + }, + { //ddr + {//udimm + {2135.906, 2633.317, 2750.919, 2869.406}, + {2458.714, 2695.791, 2822.298, 3211.111}, + {2622.85, 3030.048, 3160.265, 3534.448} + + }, + {//rdimm + {2135.906, 2633.317, 2750.919, 2869.406}, + {2458.714, 2695.791, 3088.886, 3211.111}, + {2622.85, 3030.048, 3312.468, 3758.445} + + }, + {//lrdimm + {4226.903, 5015.342, 5490.61, 5979.864}, + {4280.471, 5319.132, 5668.945, 6060.216}, + {4603.279, 5381.605, 5740.325, 6401.926} + + } + + } +}; + +double MemoryParameters::io_energy_write[2][3][3][4] = +{ + { //ddr3 + {//udimm + {2758.951, 2984.854, 3571.804, 4838.902}, + {2804.851, 3768.524, 4352.214, 5580.362}, + {3213.897, 3829.684, 5425.854, 6933.512} + + }, + {//rdimm + {2758.951, 3346.104, 3931.154, 4838.902}, + {3167.997, 4114.754, 4696.724, 5580.362}, + {3561.831, 3829.684, 6039.994, 8075.542} + + }, + {//lrdimm + {4872.238, 5374.314, 7013.868, 9267.574}, + {5701.502, 6214.348, 7449.758, 10045.004}, + {5747.402, 6998.018, 8230.168, 10786.464} + + } + + }, + { //ddr4 + {//udimm + {2525.129, 2840.853, 2979.037, 3293.608}, + {2933.756, 3080.126, 3226.497, 3979.698}, + {3293.964, 3753.37, 3906.137, 4312.448} + + }, + {//rdimm + {2525.129, 2840.853, 3155.117, 3293.608}, + {2933.756, 3080.126, 3834.757, 3979.698}, + {3293.964, 3753.37, 4413.037, 5358.078} + + }, + {//lrdimm + {4816.453, 5692.314, 5996.134, 6652.936}, + {4870.021, 5754.788, 6067.514, 6908.636}, + {5298.373, 5994.07, 6491.054, 7594.726} + + } + + } +}; + +double MemoryParameters::T_RAS[2] = {35,35}; + +double MemoryParameters::T_RC[2] = {47.5,47.5}; + +double MemoryParameters::T_RP[2] = {13,13}; + +double MemoryParameters::T_RFC[2] = {340,260}; + +double MemoryParameters::T_REFI[2] = {7800,7800}; + +int MemoryParameters::bandwidth_load[2][4]={{400,533,667,800},{800,933,1066,1200}}; + +double MemoryParameters::cost[2][3][5] = +{ + { + {40.38,76.13,INF,INF,INF}, + {42.24,64.17,122.6,304.3,INF}, + {INF,INF,211.3,287.5,1079.5} + }, + { + {25.99,45.99,INF,INF,INF}, + {32.99,60.45,126,296.3,INF}, + {INF,INF,278.99,333,1474} + } +}; + + + +/////////////////////////////////////////////////////////////////////////////////// + +double calculate_power(double load, double row_buffer_hr, double rd_wr_ratio, int chips_per_rank, int frequency_index, int lp) +{ + return 0; +} + +int bw_index(Mem_IO_type type, int bandwidth) +{ + if(type==DDR3) + { + if(bandwidth<=400) + return 0; + else if(bandwidth <= 533) + return 1; + else if(bandwidth <= 667) + return 2; + else + return 3; + } + else + { + if(bandwidth<=800) + return 0; + else if(bandwidth <= 933) + return 1; + else if(bandwidth <= 1066) + return 2; + else + return 3; + } + return 0; +} + +channel_conf::channel_conf(MemCadParameters * memcad_params, const vector& dimm_cap, int bandwidth, Mem_DIMM type, bool low_power) +:memcad_params(memcad_params),type(type),low_power(low_power),bandwidth(bandwidth),latency(0),valid(true) +{ + //assert(memcad_params); + assert(dimm_cap.size() <=DIMM_PER_CHANNEL); + assert(memcad_params->io_type<2); // So far, we just support DDR3 and DDR4. + // upading capacity + num_dimm_per_channel=0; + capacity =0; + for(int i=0;i<5;i++) histogram_capacity[i]=0; + for(unsigned int i=0;i0) + bandwidth =0; + + //bandwidth = MemoryParameters::bandwidth_load[memcad_params->io_type][4-num_dimm_per_channel]; + // updating channel cost + cost =0; + for(int i=0;i<5;++i) + cost += histogram_capacity[i] * MemoryParameters::cost[memcad_params->io_type][type][i]; + + // update energy + calc_power(); + +} + +void channel_conf::calc_power() +{ + + double read_ratio = memcad_params->rd_2_wr_ratio/(1.0+memcad_params->rd_2_wr_ratio); + double write_ratio = 1.0/(1.0+memcad_params->rd_2_wr_ratio); + Mem_IO_type current_io_type = memcad_params->io_type; + double capacity_ratio = (capacity/(double) memcad_params->capacity ); + + double T_BURST = 4; // memory cycles + + energy_per_read = MemoryParameters::io_energy_read[current_io_type][type][num_dimm_per_channel-1][bw_index(current_io_type,bandwidth)]; + energy_per_read /= (bandwidth/T_BURST); + + energy_per_write = MemoryParameters::io_energy_write[current_io_type][type][num_dimm_per_channel-1][bw_index(current_io_type,bandwidth)]; + energy_per_write /= (bandwidth/T_BURST); + if(memcad_params->capacity_wise) + { + energy_per_read *= capacity_ratio; + energy_per_write *= capacity_ratio; + } + + energy_per_access = read_ratio* energy_per_read + write_ratio*energy_per_write; + +} + +channel_conf* clone(channel_conf* origin) +{ + vector temp; + int size =4; + for(int i=0;i<5;++i) + { + for(int j=0;jhistogram_capacity[i];++j) + { + temp.push_back(size); + } + size *=2; + } + channel_conf * new_channel = new channel_conf(origin->memcad_params,temp,origin->bandwidth, origin->type,origin->low_power); + return new_channel; +} + +ostream& operator<<(ostream &os, const channel_conf& ch_cnf) +{ + os << "cap: " << ch_cnf.capacity << " GB "; + os << "bw: " << ch_cnf.bandwidth << " (MHz) "; + os << "cost: $" << ch_cnf.cost << " "; + os << "dpc: " << ch_cnf.num_dimm_per_channel << " "; + os << "energy: " << ch_cnf.energy_per_access << " (nJ) "; + os << " DIMM: " << ((ch_cnf.type==UDIMM)?" UDIMM ":((ch_cnf.type==RDIMM)?" RDIMM ":"LRDIMM ")); + os << " low power: " << ((ch_cnf.low_power)? "T ":"F "); + os << "[ "; + for(int i=0;i<5;i++) + os << ch_cnf.histogram_capacity[i] << "(" << (1<<(i+2)) << "GB) "; + os << "]"; + return os; +} + + +bob_conf::bob_conf(MemCadParameters * memcad_params, vector * in_channels) +:memcad_params(memcad_params),num_channels(0),capacity(0),bandwidth(0) +,energy_per_read(0),energy_per_write(0),energy_per_access(0),cost(0),latency(0),valid(true) +{ + + assert(in_channels->size() <= MAX_NUM_CHANNELS_PER_BOB); + for(int i=0;isize();++i) + { + channels[i] = (*in_channels)[i]; + num_channels++; + capacity += (*in_channels)[i]->capacity; + cost += (*in_channels)[i]->cost; + bandwidth += (*in_channels)[i]->bandwidth; + energy_per_read += (*in_channels)[i]->energy_per_read; + energy_per_write += (*in_channels)[i]->energy_per_write; + energy_per_access += (*in_channels)[i]->energy_per_access; + } +} + +bob_conf* clone(bob_conf* origin) +{ + vector temp; + for(int i=0;ichannels)[i]==0 ) + break; + temp.push_back( (origin->channels)[i] ); + } + + bob_conf * new_bob = new bob_conf(origin->memcad_params,&temp); + return new_bob; +} + +ostream & operator <<(ostream &os, const bob_conf& bob_cnf) +{ + os << " " << "BoB " ; + os << "cap: " << bob_cnf.capacity << " GB "; + os << "num_channels: " << bob_cnf.num_channels << " "; + os << "bw: " << bob_cnf.bandwidth << " (MHz) "; + os << "cost: $" << bob_cnf.cost << " "; + os << "energy: " << bob_cnf.energy_per_access << " (nJ) "; + os << endl; + os << " " << " ==============" << endl; + for(int i=0;i * in_bobs) +:memcad_params(memcad_params),num_bobs(0),capacity(0),bandwidth(0) +,energy_per_read(0),energy_per_write(0),energy_per_access(0),cost(0),latency(0),valid(true) +{ + assert(in_bobs->size() <= MAX_NUM_BOBS); + for(int i=0;isize();++i) + { + bobs[i] = (*in_bobs)[i]; + num_bobs++; + capacity += (*in_bobs)[i]->capacity; + cost += (*in_bobs)[i]->cost; + bandwidth += (*in_bobs)[i]->bandwidth; + energy_per_read += (*in_bobs)[i]->energy_per_read; + energy_per_write += (*in_bobs)[i]->energy_per_write; + energy_per_access += (*in_bobs)[i]->energy_per_access; + } +} + +ostream & operator <<(ostream &os, const memory_conf& mem_cnf) +{ + os << "Memory " ; + os << "cap: " << mem_cnf.capacity << " GB "; + os << "num_bobs: " << mem_cnf.num_bobs << " "; + os << "bw: " << mem_cnf.bandwidth << " (MHz) "; + os << "cost: $" << mem_cnf.cost << " "; + os << "energy: " << mem_cnf.energy_per_access << " (nJ) "; + os << endl; + os << " {" << endl; + for(int i=0;i +#include +#include "cacti_interface.h" +#include "const.h" +#include "parameter.h" + +using namespace std; + +///#define INF 1000000 +#define EPS 0.0000001 + +#define MAX_DIMM_PER_CHANNEL 3 +#define MAX_CAP_PER_DIMM 64 +#define MAX_RANKS_PER_DIMM 4 +#define MIN_BW_PER_CHANNEL 400 +#define MAX_DDR3_CHANNEL_BW 800 +#define MAX_DDR4_CHANNEL_BW 1600 +#define MAX_NUM_CHANNELS_PER_BOB 2 +#define MAX_NUM_BOBS 6 +#define DIMM_PER_CHANNEL 3 + +/* +enum Mem_IO_type +{ + DDR3, + DDR4, + LPDDR2, + WideIO, + Low_Swing_Diff, + Serial +}; + +enum Mem_DIMM +{ + UDIMM, + RDIMM, + LRDIMM +}; +*/ + + + +class MemCadParameters +{ + public: + + Mem_IO_type io_type; // DDR3 vs. DDR4 + + int capacity; // in GB + + int num_bobs; // default=4me + + ///int bw_per_channel; // defaul=1600 MHz; + + ///bool with_bob; + + int num_channels_per_bob; // 1 means no bob + + bool capacity_wise; // true means the load on each channel is proportional to its capacity. + + ///int min_bandwith; + + MemCad_metrics first_metric; + + MemCad_metrics second_metric; + + MemCad_metrics third_metric; + + DIMM_Model dimm_model; + + bool low_power_permitted; // Not yet implemented. It determines acceptable VDDs. + + double load; // between 0 to 1 + + double row_buffer_hit_rate; + + double rd_2_wr_ratio; + + bool same_bw_in_bob; // true if all the channels in the bob have the same bandwidth. + + + bool mirror_in_bob;// true if all the channels in the bob have the same configs + + bool total_power; // false means just considering I/O Power + + bool verbose; + + // Functions + MemCadParameters(InputParameter * g_ip); + void print_inputs(); + bool sanity_check(); + +}; + + +////////////////////////////////////////////////////////////////////////////////// + +class MemoryParameters +{ + public: + // Power Parameteres + static double VDD[2][2][4]; + + static double IDD0[2][4]; + + static double IDD1[2][4]; + + static double IDD2P0[2][4]; + + static double IDD2P1[2][4]; + + static double IDD2N[2][4]; + + static double IDD3P[2][4]; + + static double IDD3N[2][4]; + + static double IDD4R[2][4]; + + static double IDD4W[2][4]; + + static double IDD5[2][4]; + + static double io_energy_read[2][3][3][4]; + + static double io_energy_write[2][3][3][4]; + + // Timing Parameters + static double T_RAS[2]; + + static double T_RC[2]; + + static double T_RP[2]; + + static double T_RFC[2]; + + static double T_REFI[2]; + + // Bandwidth Parameters + static int bandwidth_load[2][4]; + + // Cost Parameters + static double cost[2][3][5]; + + + // Functions + MemoryParameters(); + + int bw_index(Mem_IO_type type, int bandwidth); +}; + +/////////////////////////////////////////////////////////////////////////// + +int bw_index(Mem_IO_type type, int bandwidth); + + +/////////////////////////////////////////////////////////////////////////// + +class channel_conf +{ + public: + MemCadParameters *memcad_params; + + Mem_DIMM type; + int num_dimm_per_channel; + int histogram_capacity[5]; // 0->4GB, 1->8GB, 2->16GB, 3->32GB, 4->64GB + bool low_power; + + int capacity; + int bandwidth; + double energy_per_read; + double energy_per_write; + double energy_per_access; + + double cost; + double latency; + + bool valid; + // Functions + channel_conf(MemCadParameters * memcad_params, const vector& dimm_cap, int bandwidth, Mem_DIMM type, bool low_power); + + void calc_power(); + + friend channel_conf* clone(channel_conf*); + friend ostream & operator<<(ostream &os, const channel_conf& ch_cnf); + +}; + + +/////////////////////////////////////////////////////////////////////////// + +class bob_conf +{ + public: + MemCadParameters *memcad_params; + int num_channels; + channel_conf *channels[MAX_NUM_CHANNELS_PER_BOB]; + + int capacity; + int bandwidth; + double energy_per_read; + double energy_per_write; + double energy_per_access; + + double cost; + double latency; + + bool valid; + + bob_conf(MemCadParameters * memcad_params, vector * channels); + + friend bob_conf* clone(bob_conf*); + friend ostream & operator <<(ostream &os, const bob_conf& bob_cnf); +}; + +/////////////////////////////////////////////////////////////////////////// + + +class memory_conf +{ + public: + MemCadParameters *memcad_params; + int num_bobs; + bob_conf* bobs[MAX_NUM_BOBS]; + + int capacity; + int bandwidth; + double energy_per_read; + double energy_per_write; + double energy_per_access; + + double cost; + double latency; + + bool valid; + + memory_conf(MemCadParameters * memcad_params, vector * bobs); + friend ostream & operator <<(ostream &os, const memory_conf& bob_cnf); +}; + + + + + + +#endif + + diff --git a/T1/TP1/cacti-master/memorybus.cc b/T1/TP1/cacti-master/memorybus.cc new file mode 100644 index 0000000..c626c92 --- /dev/null +++ b/T1/TP1/cacti-master/memorybus.cc @@ -0,0 +1,741 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "memorybus.h" +#include "wire.h" +#include +#include +#include + +Memorybus::Memorybus( + enum Wire_type wire_model, double mat_w, double mat_h, double subarray_w_, double subarray_h_, + int _row_add_bits, int _col_add_bits, int _data_bits, int _ndbl, int _ndwl, /*enum Htree_type htree_type,*/ + enum Memorybus_type membus_type_, const DynamicParameter & dp_, + /*TechnologyParameter::*/DeviceType *dt): + dp(dp_), + in_rise_time(0), out_rise_time(0), + is_dram(dp.is_dram), + membus_type(membus_type_), + mat_width(mat_w), mat_height(mat_h), subarray_width(subarray_w_), subarray_height(subarray_h_), + data_bits(_data_bits), ndbl(_ndbl), ndwl(_ndwl), + wt(wire_model), deviceType(dt) +{ + if (g_ip->print_detail_debug) + cout << "memorybus.cc: membus_type = " << membus_type << endl; + power.readOp.dynamic = 0; + power.readOp.leakage = 0; + power.readOp.gate_leakage = 0; + power.searchOp.dynamic =0; + delay = 0; + + cell.h = g_tp.dram.b_h; + cell.w = g_tp.dram.b_w; + + if (!g_ip->is_3d_mem) + assert(ndbl >= 2 && ndwl >= 2); + + if (g_ip->print_detail_debug) + { + cout << "burst length: " << g_ip->burst_depth <io_width <io_width; //g_ip->out_w; //x4, x8, x16 chip + burst_length = g_ip->burst_depth; //g_ip->burst_len; //DDR2 4, DDR3 8 + data_bits = chip_IO_width * burst_length; + + row_add_bits = _row_add_bits; + col_add_bits = _col_add_bits; + + + max_unpipelined_link_delay = 0; //TODO + min_w_nmos = g_tp.min_w_nmos_; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; + + + semi_repeated_global_line = 0; // 1: semi-repeated global line, repeaters in decoder stripes; 0: Non-repeated global line, slower + ndwl = _ndwl/ g_ip->num_tier_row_sprd; + ndbl = _ndbl/ g_ip->num_tier_col_sprd; + num_subarray_global_IO = ndbl>16?16:ndbl; + + switch (membus_type) + { + case Data_path: + data_bits = chip_IO_width * burst_length; + Network(); + break; + case Row_add_path: + add_bits = _row_add_bits; + num_dec_signals = dp.num_r_subarray * ndbl; + Network(); + break; + case Col_add_path: + add_bits = _col_add_bits; + num_dec_signals = dp.num_c_subarray * ndwl / data_bits; + Network(); + break; + default: + assert(0); + break; + } + + assert(power.readOp.dynamic >= 0); + assert(power.readOp.leakage >= 0); +} + +Memorybus::~Memorybus() +{ + delete center_stripe; + delete bank_bus; + switch (membus_type) + { + case Data_path: + delete local_data; + delete global_data; + delete local_data_drv; + if(semi_repeated_global_line) + delete global_data_drv; + delete out_seg; + break; + case Row_add_path: + delete global_WL; + delete add_predec; + delete add_dec; + delete lwl_drv; + break; + case Col_add_path: + delete column_sel; + delete add_predec; + delete add_dec; + break; + default: + assert(0); + break; + } +} + +// ---For 3D DRAM, the bank height and length is reduced to 1/num_tier_row_sprd and 1/num_tier_col_sprd. +// ---As a result, ndwl and ndbl are also reduced to the same ratio, but he number of banks increase to the product of these two parameters +void Memorybus::Network() +{ + //double POLY_RESISTIVITY = 0.148; //ohm-micron + double R_wire_dec_out = 0; + double C_ld_dec_out = 0; + double bank_bus_length = 0; + double area_bank_vertical_peripheral_circuitry = 0, area_bank_horizontal_peripheral_circuitry = 0; + + area_sense_amp = (mat_height - subarray_height) * mat_width * ndbl * ndwl; + area_subarray = subarray_height * subarray_width * ndbl * ndwl; + + // ---Because in 3D DRAM mat only has one subarray, but contains the subarray peripheral circuits such as SA. Detail see mat.cc is_3d_mem part. + subarray_height = mat_height; + subarray_width = mat_width; + + if(g_ip->partition_gran == 0)// Coarse_rank_level: add/data bus around + { + height_bank = subarray_height * ndbl + (col_add_bits + row_add_bits)*g_tp.wire_outside_mat.pitch/2 + data_bits*g_tp.wire_outside_mat.pitch; + length_bank = subarray_width * ndwl + (col_add_bits + row_add_bits)*g_tp.wire_outside_mat.pitch/2 + data_bits*g_tp.wire_outside_mat.pitch; + area_address_bus = (row_add_bits + col_add_bits) *g_tp.wire_outside_mat.pitch * sqrt(length_bank * height_bank); + area_data_bus = data_bits *g_tp.wire_outside_mat.pitch * sqrt(length_bank * height_bank); + } + else if(g_ip->partition_gran == 1)//Fine_rank_level: add bus replaced by TSVs + { + height_bank = subarray_height * ndbl; + length_bank = subarray_width * ndwl; + area_address_bus = 0; + area_data_bus = data_bits *g_tp.wire_outside_mat.pitch * sqrt(length_bank * height_bank); + } + else if(g_ip->partition_gran == 2)//Coarse_bank_level: add/data bus replaced by TSVs + { + height_bank = subarray_height * ndbl; + length_bank = subarray_width * ndwl; + area_address_bus = 0; + area_data_bus = 0; + } + + + + + if (g_ip->print_detail_debug) + { + cout << "memorybus.cc: N subarrays per mat = " << dp.num_subarrays / dp.num_mats << endl; + cout << "memorybus.cc: g_tp.wire_local.pitch = " << g_tp.wire_local.pitch /1e3 << " mm" << endl; + cout << "memorybus.cc: subarray_width = " << subarray_width /1e3 << " mm" << endl; + cout << "memorybus.cc: subarray_height = " << subarray_height /1e3 << " mm" << endl; + cout << "memorybus.cc: mat_height = " << mat_height /1e3 << " mm" << endl; + cout << "memorybus.cc: mat_width = " << mat_width /1e3 << " mm" << endl; + cout << "memorybus.cc: height_bank = " << height_bank /1e3 << " mm" << endl; + cout << "memorybus.cc: length_bank = " << length_bank /1e3 << " mm" << endl; + } + + int num_banks_hor_dir = 1 << (int)ceil((double)_log2( g_ip->nbanks * g_ip->num_tier_row_sprd )/2 ) ; + int num_banks_ver_dir = 1 << (int)ceil((double)_log2( g_ip->nbanks * g_ip->num_tier_col_sprd * g_ip->num_tier_row_sprd /num_banks_hor_dir ) ); + + if (g_ip->print_detail_debug) + { + cout<<"horz bank #: "<nbanks = " << g_ip->nbanks << endl; + cout << "memorybus.cc: num_banks_hor_dir = " << num_banks_hor_dir << endl; + } + + // ************************************* Wire Interconnections ***************************************** + double center_stripe_length = 0.5 * double(num_banks_hor_dir) * height_bank; + if(g_ip->print_detail_debug) + { + cout << "memorybus.cc: center_stripe wire length = " << center_stripe_length << " um"<< endl; + } + center_stripe = new Wire(wt, center_stripe_length); + area_bus = 2.0 * center_stripe_length * (row_add_bits + col_add_bits + data_bits) *g_tp.wire_outside_mat.pitch / g_ip->nbanks; + + //if (g_ip->partition_gran == 0) + //area_bus = (row_add_bits + col_add_bits) *g_tp.wire_outside_mat.pitch * center_stripe_length; + if (membus_type == Row_add_path) + { + int num_lwl_per_gwl = 4; + global_WL = new Wire(wt, length_bank, 1, 1, 1, inside_mat, CU_RESISTIVITY, &(g_tp.peri_global)); + //local_WL = new Wire(wt, length_bank/num_lwl_drv, local_wires, POLY_RESISTIVITY, &(g_tp.dram_wl)); + num_lwl_drv = ndwl; + //C_GWL = num_lwl_drv * gate_C(g_tp.min_w_nmos_+min_w_pmos,0) + c_w_metal * dp.num_c_subarray * ndwl; + if(semi_repeated_global_line) + { + C_GWL = (double)num_lwl_per_gwl * gate_C(g_tp.min_w_nmos_+min_w_pmos,0) + g_tp.wire_inside_mat.C_per_um * (subarray_width + g_tp.wire_local.pitch); + R_GWL = g_tp.wire_inside_mat.R_per_um * (subarray_width + g_tp.wire_local.pitch); + } + else + { + C_GWL = (double)num_lwl_drv * num_lwl_per_gwl * gate_C(g_tp.min_w_nmos_+min_w_pmos,0) + g_tp.wire_inside_mat.C_per_um * length_bank; + R_GWL = length_bank * g_tp.wire_inside_mat.R_per_um; + } + + lwl_driver_c_gate_load = dp.num_c_subarray * gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true); + //lwl_driver_c_wire_load = subarray_width * g_tp.wire_local.C_per_um; + //lwl_driver_r_wire_load = subarray_width * g_tp.wire_local.R_per_um; + + if (g_ip->print_detail_debug) + { + cout<<"C_GWL: "<repeater_size = " << column_sel->repeater_size << endl; + + bank_bus_length = double(num_banks_ver_dir) * 0.5 * MAX(length_bank, height_bank); + bank_bus = new Wire(wt, bank_bus_length); + } + else if (membus_type == Data_path) + { + local_data = new Wire(wt, subarray_width, 1, 1, 1, inside_mat, CU_RESISTIVITY, &(g_tp.peri_global)); + global_data = new Wire(wt, sqrt(length_bank * height_bank), 1, 1, 1, outside_mat, CU_RESISTIVITY, &(g_tp.peri_global)); + + if(semi_repeated_global_line) + { + C_global_data = g_tp.wire_inside_mat.C_per_um * (subarray_height + g_tp.wire_local.pitch); + R_global_data = g_tp.wire_inside_mat.R_per_um * (subarray_height + g_tp.wire_local.pitch) ; + + } + else + { + C_global_data = g_tp.wire_inside_mat.C_per_um * height_bank /2; + R_global_data = g_tp.wire_inside_mat.R_per_um * height_bank /2; + } + + global_data_drv = new Driver( + 0, + C_global_data, + R_global_data, + is_dram); + global_data_drv->compute_delay(0); + global_data_drv->compute_area(); + //---Unrepeated local dataline + double local_data_c_gate_load = dp.num_c_subarray * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, cell.w, is_dram); + //double local_data_c_gate_load = 0; + double local_data_c_wire_load = dp.num_c_subarray * g_tp.dram.b_w * g_tp.wire_inside_mat.C_per_um; + double local_data_r_wire_load = dp.num_c_subarray * g_tp.dram.b_w * g_tp.wire_inside_mat.R_per_um; + //double local_data_r_gate_load = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); + double local_data_r_gate_load = 0; + + double tf = (local_data_c_gate_load + local_data_c_wire_load) * (local_data_r_wire_load + local_data_r_gate_load); + double this_delay = horowitz(0, tf, 0.5, 0.5, RISE); + //double local_data_outrisetime = this_delay/(1.0-0.5); + + //---Unrepeated and undriven local dataline, not significant growth + //local_data->delay = this_delay; + //local_data->power.readOp.dynamic = (local_data_c_gate_load + local_data_c_wire_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + + + double data_drv_c_gate_load = local_data_c_gate_load; + double data_drv_c_wire_load = local_data_c_wire_load; + double data_drv_r_wire_load = local_data_r_gate_load + local_data_r_wire_load; + + //---Assume unrepeated global data path, too high RC + //double data_drv_c_wire_load = height_bank * g_tp.wire_outside_mat.C_per_um; + //double data_drv_r_wire_load = height_bank * g_tp.wire_inside_mat.R_per_um; + + + local_data_drv = new Driver( + data_drv_c_gate_load, + data_drv_c_wire_load, + data_drv_r_wire_load, + is_dram); + local_data_drv->compute_delay(0); + local_data_drv->compute_area(); + + if (g_ip->print_detail_debug) + { + cout<<"C: "<delay * 1e9 <<" ns"<repeater_size * gate_C(g_tp.min_w_nmos_+min_w_pmos,0), + global_data->repeater_spacing * g_tp.wire_inside_mat.C_per_um, + global_data->repeater_spacing * g_tp.wire_inside_mat.R_per_um, + is_dram);*/ + + //bank_bus_length = double(num_banks_ver_dir) * 0.5 * (height_bank + 0.5*double(row_add_bits+col_add_bits+data_bits)*g_tp.wire_outside_mat.pitch) - height_bank + length_bank; + bank_bus_length = double(num_banks_ver_dir) * 0.5 * MAX(length_bank, height_bank); + bank_bus = new Wire(wt, bank_bus_length); + if (g_ip->print_detail_debug) + cout << "memorybus.cc: bank_bus_length = " << bank_bus_length << endl; + + out_seg = new Wire(wt, 0.25 * num_banks_hor_dir * (length_bank + (row_add_bits+col_add_bits+data_bits)*g_tp.wire_outside_mat.pitch) ); + area_IOSA = (875+500)*g_ip->F_sz_um*g_ip->F_sz_um * data_bits;//Reference: + area_data_drv = local_data_drv->area.get_area() * data_bits; + if(ndbl>16) + { + area_IOSA *= (double)ndbl/16.0; + area_data_drv *= (double)ndbl/16.0; + } + area_local_dataline = data_bits*subarray_width *g_tp.wire_local.pitch*ndbl; + + } + + + // Row decoder + if (membus_type == Row_add_path || membus_type == Col_add_path ) + { + + if (g_ip->print_detail_debug) + { + cout << "memorybus.cc: num_dec_signals = " << num_dec_signals << endl; + cout << "memorybus.cc: C_ld_dec_out = " << C_ld_dec_out << endl; + cout << "memorybus.cc: R_wire_dec_out = " << R_wire_dec_out << endl; + cout << "memorybus.cc: is_dram = " << is_dram << endl; + cout << "memorybus.cc: cell.h = " << cell.h << endl; + } + + add_dec = new Decoder( + (num_dec_signals>16)?num_dec_signals:16, + false, + C_ld_dec_out, + R_wire_dec_out, + false, + is_dram, + membus_type == Row_add_path?true:false, + cell); + + + + // Predecoder and decoder for GWL + double C_wire_predec_blk_out; + double R_wire_predec_blk_out; + C_wire_predec_blk_out = 0; // num_subarrays_per_row * dp.num_r_subarray * g_tp.wire_inside_mat.C_per_um * cell.h; + R_wire_predec_blk_out = 0; // num_subarrays_per_row * dp.num_r_subarray * g_tp.wire_inside_mat.R_per_um * cell.h; + + + //int num_subarrays_per_mat = dp.num_subarrays/dp.num_mats; + int num_dec_per_predec = 1; + PredecBlk * add_predec_blk1 = new PredecBlk( + num_dec_signals, + add_dec, + C_wire_predec_blk_out, + R_wire_predec_blk_out, + num_dec_per_predec, + is_dram, + true); + + + + PredecBlk * add_predec_blk2 = new PredecBlk( + num_dec_signals, + add_dec, + C_wire_predec_blk_out, + R_wire_predec_blk_out, + num_dec_per_predec, + is_dram, + false); + + + + PredecBlkDrv * add_predec_blk_drv1 = new PredecBlkDrv(0, add_predec_blk1, is_dram); + PredecBlkDrv * add_predec_blk_drv2 = new PredecBlkDrv(0, add_predec_blk2, is_dram); + + add_predec = new Predec(add_predec_blk_drv1, add_predec_blk_drv2); + + + + if (membus_type == Row_add_path) + { + area_row_predec_dec = add_predec_blk_drv1->area.get_area() + add_predec_blk_drv2->area.get_area() + + add_predec_blk1->area.get_area() + add_predec_blk2->area.get_area() + num_dec_signals * add_dec->area.get_area(); + + + area_lwl_drv = num_lwl_drv/2.0 * dp.num_r_subarray * ndbl * lwl_drv->area.get_area(); //num_lwl_drv is ndwl/the lwl driver count one gwl connects. two adjacent lwls share one driver. + + if (g_ip->print_detail_debug) + { + cout<<"memorybus.cc: area_bank_vertical_peripheral_circuitry = " << area_bank_vertical_peripheral_circuitry /1e6<<" mm2"<area.get_area() + add_predec_blk_drv2->area.get_area() + + add_predec_blk1->area.get_area() + add_predec_blk2->area.get_area() + num_dec_signals * add_dec->area.get_area(); + if(ndbl>16) + { + area_col_predec_dec *= (double)ndbl/16.0; + } + } + + area_bank_vertical_peripheral_circuitry = area_row_predec_dec + area_lwl_drv + area_address_bus + area_data_bus ; + area_bank_horizontal_peripheral_circuitry = area_col_predec_dec + area_data_drv + (area_bus + area_IOSA)/g_ip->nbanks; + + if (g_ip->print_detail_debug) + { + cout<<"memorybus.cc: add_predec_blk_drv1->area = " << add_predec_blk_drv1->area.get_area() /1e6<<" mm2"<area = " << add_predec_blk_drv2->area.get_area() /1e6<<" mm2"<area = " << add_predec_blk1->area.get_area() /1e6<<" mm2"<area = " << add_predec_blk2->area.get_area() /1e6<<" mm2"<area = " << num_dec_signals * add_dec->area.get_area() /1e6<<" mm2"<delay + bank_bus->delay; + delay += delay_bus; + //outrisetime = local_data_drv->compute_delay(inrisetime); + //local_data_drv_outrisetime = local_data_drv->delay; + delay_global_data = (semi_repeated_global_line >0) ? (global_data_drv->delay*num_subarray_global_IO) : (global_data_drv->delay + global_data->delay); + if(g_ip->partition_gran==0 || g_ip->partition_gran==1) + delay += delay_global_data; + //delay += local_data->delay; + delay_local_data = local_data_drv->delay; + delay += delay_local_data; + delay_data_buffer = 2 * 1e-6/(double)g_ip->sys_freq_MHz; + //delay += bank.mat.delay_subarray_out_drv_htree; + delay += delay_data_buffer; + //cout << 1e3/(double)g_ip->sys_freq_MHz<< endl; + //delay += out_seg->delay * burst_length; + if (g_ip->print_detail_debug) + cout << "memorybus.cc: data path delay = " << delay << endl; + out_rise_time = 0; + } + else + { + delay = 0; + delay_bus = center_stripe->delay + bank_bus->delay; + delay += delay_bus; + predec_outrisetime = add_predec->compute_delays(inrisetime); + add_dec_outrisetime = add_dec->compute_delays(predec_outrisetime); + delay_add_predecoder = add_predec->delay; + delay += delay_add_predecoder; + + if (membus_type == Row_add_path) + { + if(semi_repeated_global_line) + { + delay_add_decoder = add_dec->delay * ndwl; + if(g_ip->page_sz_bits > 8192) + delay_add_decoder /= (double)(g_ip->page_sz_bits / 8192); + } + else + { + delay_add_decoder = add_dec->delay; + } + delay += delay_add_decoder; + // There is no function to compute_delay in wire.cc, need to double check if center_stripe->delay and bank_bus->delay is correct. + lwl_drv_outrisetime = lwl_drv->compute_delay(add_dec_outrisetime); + ///tf = (lwl_driver_c_gate_load + lwl_driver_c_wire_load) * lwl_driver_r_wire_load; + // ### no need for global_WL->delay + // delay_WL = global_WL->delay + lwl_drv->delay + horowitz(lwl_drv_outrisetime, tf, 0.5, 0.5, RISE); + delay_lwl_drv = lwl_drv->delay; + if(!g_ip->fine_gran_bank_lvl) + delay += delay_lwl_drv; + if (g_ip->print_detail_debug) + cout << "memorybus.cc: row add path delay = " << delay << endl; + + out_rise_time = lwl_drv_outrisetime; + } + + else if (membus_type == Col_add_path) + { + if(semi_repeated_global_line) + { + delay_add_decoder = add_dec->delay * num_subarray_global_IO; + } + else + { + delay += column_sel->delay; + delay_add_decoder = add_dec->delay; + } + delay += delay_add_decoder; + + out_rise_time = 0; + if (g_ip->print_detail_debug) + { + //cout << "memorybus.cc, compute_delays col: center_stripe->delay = " << center_stripe->delay << endl; + //cout << "memorybus.cc, compute_delays col: bank_bus->delay = " << bank_bus->delay << endl; + //cout << "memorybus.cc, compute_delays col: add_predec->delay = " << add_predec->delay << endl; + //cout << "memorybus.cc, compute_delays col: add_dec->delay = " << add_dec->delay << endl; + + cout << "memorybus.cc: column add path delay = " << delay << endl; + } + + } + else + { + assert(0); + } + } + + + // Double check! + out_rise_time = delay / (1.0-0.5); + // Is delay_wl_reset necessary here? Is the 'false' condition appropriate? See the same code as in mat.cc + /*if (add_dec->exist == false) + { + int delay_wl_reset = MAX(add_predec->blk1->delay, add_predec->blk2->delay); + //delay += delay_wl_reset; + }*/ + + return out_rise_time; +} + + + + +void Memorybus::compute_power_energy() +{ + double coeff1[4] = {(double)add_bits, (double)add_bits, (double)add_bits, (double)add_bits}; + double coeff2[4] = {(double)data_bits, (double)data_bits, (double)data_bits, (double)data_bits}; + double coeff3[4] = {(double)num_lwl_drv, (double)num_lwl_drv, (double)num_lwl_drv, (double)num_lwl_drv}; + double coeff4[4] = {(double)burst_length*chip_IO_width, (double)burst_length*chip_IO_width, + (double)burst_length*chip_IO_width, (double)burst_length*chip_IO_width}; + double coeff5[4] = {(double)ndwl, (double)ndwl, (double)ndwl, (double)ndwl}; + double coeff6[4] = {(double)num_subarray_global_IO, (double)num_subarray_global_IO, (double)num_subarray_global_IO, (double)num_subarray_global_IO}; + + //double coeff4[4] = {(double)num_dec_signals, (double)num_dec_signals, (double)num_dec_signals, (double)num_dec_signals}; + switch (membus_type) + { + case Data_path: + power_bus = (center_stripe->power + bank_bus->power) * coeff2; + power_local_data = local_data_drv->power * coeff2; + power_global_data = semi_repeated_global_line >0 ? (global_data_drv->power*coeff2) : (global_data_drv->power+global_data->power); + + power_global_data.readOp.dynamic = power_global_data.readOp.dynamic + 1.8/1e3*deviceType->Vdd*10.0/1e9/64*data_bits; + power = power_bus + power_local_data; + if(!g_ip->fine_gran_bank_lvl) + power = power + power_global_data; + //power += local_data->power; + + power_burst = out_seg->power * coeff4;//Account for burst read, approxmate the wire length by the center stripe + //power = power + power_burst; + if(g_ip->print_detail_debug) + { + cout << "memorybus.cc: data path center stripe energy = " << center_stripe->power.readOp.dynamic*1e9 << " nJ" << endl; + cout << "memorybus.cc: data path bank bus energy = " << bank_bus->power.readOp.dynamic*1e9 << " nJ" << endl; + cout << "memorybus.cc: data path data driver energy = " << local_data_drv->power.readOp.dynamic*1e9 << " nJ" << endl; + } + break; + case Row_add_path: + power_bus = (center_stripe->power + bank_bus->power) * coeff1; + power_add_predecoder = add_predec->power; + if(semi_repeated_global_line) + { + power_add_decoders = add_dec->power * coeff5; + //power_add_decoders.readOp.dynamic /= (g_ip->page_sz_bits > 8192)?((double)g_ip->page_sz_bits/8192):1; + if(g_ip->page_sz_bits > 8192) + power_add_decoders.readOp.dynamic /= (double)(g_ip->page_sz_bits / 8192); + } + else + power_add_decoders = add_dec->power;// * (1<< add_predec->blk1->number_input_addr_bits); + power_lwl_drv = lwl_drv->power * coeff3; + //power_local_WL.readOp.dynamic = num_lwl_drv * C_LWL * deviceType->Vdd * deviceType->Vdd; + power = power_bus + power_add_predecoder + power_add_decoders + power_lwl_drv; + break; + case Col_add_path: + power_bus = (center_stripe->power + bank_bus->power) * coeff1;// + column_sel->power * double(chip_IO_width * burst_length); + power_add_predecoder = add_predec->power; + if(semi_repeated_global_line) + { + power_add_decoders = add_dec->power * coeff6; + power_add_decoders.readOp.dynamic = power_add_decoders.readOp.dynamic * g_ip->page_sz_bits / data_bits; + power_col_sel.readOp.dynamic = 0; + } + else + { + power_add_decoders = add_dec->power;// * (1<< add_predec->blk1->number_input_addr_bits); + power_col_sel.readOp.dynamic = column_sel->power.readOp.dynamic * g_ip->page_sz_bits / data_bits; + } + power = power_bus + power_add_predecoder + power_add_decoders; + if(!g_ip->fine_gran_bank_lvl) + power = power + power_col_sel; + break; + default: + assert(0); + break; + } + + return; + +} + + + diff --git a/T1/TP1/cacti-master/memorybus.h b/T1/TP1/cacti-master/memorybus.h new file mode 100644 index 0000000..b4eb280 --- /dev/null +++ b/T1/TP1/cacti-master/memorybus.h @@ -0,0 +1,150 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + +#ifndef __MEMORYBUS_H__ +#define __MEMORYBUS_H__ + +#include "basic_circuit.h" +#include "component.h" +#include "parameter.h" +//#include "assert.h" +#include "cacti_interface.h" +//#include "wire.h" +class Wire; +//#include "area.h" +#include "decoder.h" + +class Memorybus : public Component +{ + public: + Memorybus(enum Wire_type wire_model, double mat_w, double mat_h, double subarray_w, double subarray_h, + int _row_add_bits, int _col_add_bits, int _data_bits, int _ndbl, int _ndwl, /*enum Htree_type htree_type,*/ + enum Memorybus_type membus_type, const DynamicParameter & dp_, + /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global) + ); + ~Memorybus(); + + //void in_membus(); + //void out_membus(); + void Network(); + + // repeaters only at h-tree nodes + void limited_in_membus(); + void limited_out_membus(); + void input_nand(double s1, double s2, double l); + //void output_buffer(double s1, double s2, double l); + + const DynamicParameter & dp; + + double in_rise_time, out_rise_time; + + void set_in_rise_time(double rt) + { + in_rise_time = rt; + } + + double max_unpipelined_link_delay; + powerDef power_bit; + void memory_bus(); + + double height_bank, length_bank; // The actual height and length of a single bank including all wires between subarrays. + Wire * center_stripe; + Wire * bank_bus; + Wire * global_WL; //3 hierarchical connection wires. + Wire * column_sel; + Wire * local_data; + Wire * global_data; + Wire * out_seg; + // Driver for LWL connecting GWL, same as in mat.cc + double lwl_driver_c_gate_load, lwl_driver_c_wire_load, lwl_driver_r_wire_load; + + powerDef power_bus; + powerDef power_lwl_drv; + powerDef power_add_decoders; + powerDef power_global_WL; + powerDef power_local_WL; + powerDef power_add_predecoder; + powerDef power_burst; + powerDef power_col_sel; + powerDef power_local_data; + powerDef power_global_data; + double delay_bus, delay_add_predecoder, delay_add_decoder, delay_lwl_drv, delay_global_data, delay_local_data, delay_data_buffer; + double area_lwl_drv, area_row_predec_dec, area_col_predec_dec, area_subarray, area_bus, area_address_bus, area_data_bus, area_data_drv, area_IOSA, area_local_dataline, area_sense_amp; + + + Area cell; + bool is_dram; + + Driver * lwl_drv, * local_data_drv, * global_data_drv ; + Predec * add_predec; + Decoder * add_dec; + + double compute_delays(double inrisetime); // return outrisetime + void compute_power_energy(); // + + + + + private: + double wire_bw; + double init_wire_bw; // bus width at root + enum Memorybus_type membus_type; +// double htree_hnodes; +// double htree_vnodes; + double mat_width; + double mat_height; + double subarray_width, subarray_height; + //int add_bits, data_in_bits,search_data_in_bits,data_out_bits, search_data_out_bits; + int row_add_bits, col_add_bits; + int add_bits, data_bits, num_dec_signals; + int semi_repeated_global_line; + + int ndbl, ndwl; +// bool uca_tree; // should have full bandwidth to access all banks in the array simultaneously +// bool search_tree; + + enum Wire_type wt; + double min_w_nmos; + double min_w_pmos; + + int num_lwl_drv; //Ratio between GWL and LWL, how many local WL drives each GWL drives. + int chip_IO_width; + int burst_length; + int num_subarray_global_IO; + + double C_GWL, C_LWL, R_GWL, R_LWL, C_colsel, R_colsel, C_global_data, R_global_data; // Capacitance of global/local WLs. + + /*TechnologyParameter::*/DeviceType *deviceType; +}; + +#endif + diff --git a/T1/TP1/cacti-master/nuca.cc b/T1/TP1/cacti-master/nuca.cc new file mode 100644 index 0000000..02e44a1 --- /dev/null +++ b/T1/TP1/cacti-master/nuca.cc @@ -0,0 +1,611 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#include "nuca.h" +#include "Ucache.h" +#include + +unsigned int MIN_BANKSIZE=65536; +#define FIXED_OVERHEAD 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */ +#define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */ +#define CONTR_2_BANK_LAT 0 + +int cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cycle time */]; + + Nuca::Nuca( + /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global) + ):deviceType(dt) +{ + init_cont(); +} + +void +Nuca::init_cont() +{ + FILE *cont; + char line[5000]; + char jk[5000]; + cont = fopen("contention.dat", "r"); + if (!cont) { + cout << "contention.dat file is missing!\n"; + exit(0); + } + + for(int i=0; i<2; i++) { + for(int j=2; j<5; j++) { + for(int k=0; k nuca_list; + Router *router_s[ROUTER_TYPES]; + router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global)); + router_s[0]->print_router(); + router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global)); + router_s[1]->print_router(); + router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global)); + router_s[2]->print_router(); + + int core_in; // to store no. of cores + + /* to search diff grid organizations */ + double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat, + curr_acclat; + double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power, + avg_leakage_power; + + double opt_acclat = INF;//, opt_avg_lat = INF, opt_tot_lat = INF; + int opt_rows = 0; + int opt_columns = 0; +// double opt_totno_hops = 0; + double opt_avg_hop = 0; + double opt_dyn_power = 0, opt_leakage_power = 0; + min_values_t minval; + + int bank_start = 0; + + int flit_width = 0; + + /* vertical and horizontal hop latency values */ + int ver_hop_lat, hor_hop_lat; /* in cycles */ + + + /* no. of different bank sizes to consider */ + int iterations; + + + g_ip->nuca_cache_sz = g_ip->cache_sz; + nuca_list.push_back(new nuca_org_t()); + + if (g_ip->cache_level == 0) l2_c = 1; + else l2_c = 0; + + if (g_ip->cores <= 4) core_in = 2; + else if (g_ip->cores <= 8) core_in = 3; + else if (g_ip->cores <= 16) core_in = 4; + else {cout << "Number of cores should be <= 16!\n"; exit(0);} + + + // set the lower bound to an appropriate value. this depends on cache associativity + if (g_ip->assoc > 2) { + i = 2; + while (i != g_ip->assoc) { + MIN_BANKSIZE *= 2; + i *= 2; + } + } + + iterations = (int)logtwo((int)g_ip->cache_sz/MIN_BANKSIZE); + + if (g_ip->force_wiretype) + { + if (g_ip->wt == Low_swing) { + wt_min = Low_swing; + wt_max = Low_swing; + } + else { + wt_min = Global; + wt_max = Low_swing-1; + } + } + else { + wt_min = Global; + wt_max = Low_swing; + } + if (g_ip->nuca_bank_count != 0) { // simulate just one bank + if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 && + g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 && + g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) { + fprintf(stderr,"Incorrect bank count value! Please fix the value in cache.cfg\n"); + } + bank_start = (int)logtwo((double)g_ip->nuca_bank_count); + iterations = bank_start+1; + g_ip->cache_sz = g_ip->cache_sz/g_ip->nuca_bank_count; + } + cout << "Simulating various NUCA configurations\n"; + for (it=bank_start; itnuca_cache_sz/g_ip->cache_sz; + cout << "====" << g_ip->cache_sz << "\n"; + + for (wr=wt_min; wr<=wt_max; wr++) { + + for (ro=0; roflit_size; //initialize router + nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time; + + /* calculate router and wire parameters */ + + double vlength = ures.cache_ht; /* length of the wire (u)*/ + double hlength = ures.cache_len; // u + + /* find delay, area, and power for wires */ + wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength); + wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength); + + + hor_hop_lat = calc_cycles(wire_horizontal[wr]->delay, + 1/(nuca_list.back()->nuca_pda.cycle_time*.001)); + ver_hop_lat = calc_cycles(wire_vertical[wr]->delay, + 1/(nuca_list.back()->nuca_pda.cycle_time*.001)); + + /* + * assume a grid like topology and explore for optimal network + * configuration using different row and column count values. + */ + for (c=1; c<=(unsigned int)bank_count; c++) { + while (bank_count%c != 0) c++; + r = bank_count/c; + + /* + * to find the avg access latency of a NUCA cache, uncontended + * access time to each bank from the + * cache controller is calculated. + * avg latency = + * sum of the access latencies to individual banks)/bank + * count value. + */ + totno_hops = totno_hhops = totno_vhops = tot_lat = 0; +/// k = 1; + for (i=0; idelay*avg_hop) + + calc_cycles(ures.access_time, + 1/(nuca_list.back()->nuca_pda.cycle_time*.001)); + + /* avg access lat of nuca */ + avg_dyn_power = + avg_hop * + (router_s[ro]->power.readOp.dynamic) + avg_hhop * + (wire_horizontal[wr]->power.readOp.dynamic) * + (g_ip->block_sz*8 + 64) + avg_vhop * + (wire_vertical[wr]->power.readOp.dynamic) * + (g_ip->block_sz*8 + 64) + ures.power.readOp.dynamic; + + avg_leakage_power = + bank_count * router_s[ro]->power.readOp.leakage + + avg_hhop * (wire_horizontal[wr]->power.readOp.leakage* + wire_horizontal[wr]->delay) * flit_width + + avg_vhop * (wire_vertical[wr]->power.readOp.leakage * + wire_horizontal[wr]->delay); + + if (curr_acclat < opt_acclat) { + opt_acclat = curr_acclat; +/// opt_tot_lat = tot_lat; +/// opt_avg_lat = avg_lat; +/// opt_totno_hops = totno_hops; + opt_avg_hop = avg_hop; + opt_rows = r; + opt_columns = c; + opt_dyn_power = avg_dyn_power; + opt_leakage_power = avg_leakage_power; + } + totno_hops = 0; + tot_lat = 0; + totno_hhops = 0; + totno_vhops = 0; + } + nuca_list.back()->wire_pda.power.readOp.dynamic = + opt_avg_hop * flit_width * + (wire_horizontal[wr]->power.readOp.dynamic + + wire_vertical[wr]->power.readOp.dynamic); + nuca_list.back()->avg_hops = opt_avg_hop; + /* network delay/power */ + nuca_list.back()->h_wire = wire_horizontal[wr]; + nuca_list.back()->v_wire = wire_vertical[wr]; + nuca_list.back()->router = router_s[ro]; + /* bank delay/power */ + + nuca_list.back()->bank_pda.delay = ures.access_time; + nuca_list.back()->bank_pda.power = ures.power; + nuca_list.back()->bank_pda.area.h = ures.cache_ht; + nuca_list.back()->bank_pda.area.w = ures.cache_len; + nuca_list.back()->bank_pda.cycle_time = ures.cycle_time; + + num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/, + 1/(nuca_list.back()->nuca_pda.cycle_time*.001/*GHz*/)); + if(num_cyc%2 != 0) num_cyc++; + if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles + + if (it < 7) { + nuca_list.back()->nuca_pda.delay = opt_acclat + + cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; + nuca_list.back()->contention = + cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; + } + else { + nuca_list.back()->nuca_pda.delay = opt_acclat + + cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; + nuca_list.back()->contention = + cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; + } + nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power; + nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power; + + /* array organization */ + nuca_list.back()->bank_count = bank_count; + nuca_list.back()->rows = opt_rows; + nuca_list.back()->columns = opt_columns; + calculate_nuca_area (nuca_list.back()); + + minval.update_min_values(nuca_list.back()); + nuca_list.push_back(new nuca_org_t()); + opt_acclat = BIGNUM; + + } + } + g_ip->cache_sz /= 2; + } + + delete(nuca_list.back()); + nuca_list.pop_back(); + opt_n = find_optimal_nuca(&nuca_list, &minval); + print_nuca(opt_n); + g_ip->cache_sz = g_ip->nuca_cache_sz/opt_n->bank_count; + + list::iterator niter; + for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) + { + delete *niter; + } + nuca_list.clear(); + + for(int i=0; i < ROUTER_TYPES; i++) + { + delete router_s[i]; + } + g_ip->display_ip(); + // g_ip->force_cache_config = true; + // g_ip->ndwl = 8; + // g_ip->ndbl = 16; + // g_ip->nspd = 4; + // g_ip->ndcm = 1; + // g_ip->ndsam1 = 8; + // g_ip->ndsam2 = 32; + +} + + + void +Nuca::print_nuca (nuca_org_t *fr) +{ + printf("\n---------- CACTI version 6.5, Non-uniform Cache Access " + "----------\n\n"); + printf("Optimal number of banks - %d\n", fr->bank_count); + printf("Grid organization rows x columns - %d x %d\n", + fr->rows, fr->columns); + printf("Network frequency - %g GHz\n", + (1/fr->nuca_pda.cycle_time)*1e3); + printf("Cache dimension (mm x mm) - %g x %g\n", + fr->nuca_pda.area.h*1e-3, + fr->nuca_pda.area.w*1e-3); + + fr->router->print_router(); + + printf("\n\nWire stats:\n"); + if (fr->h_wire->wt == Global) { + printf("\tWire type - Full swing global wires with least " + "possible delay\n"); + } + else if (fr->h_wire->wt == Global_5) { + printf("\tWire type - Full swing global wires with " + "5%% delay penalty\n"); + } + else if (fr->h_wire->wt == Global_10) { + printf("\tWire type - Full swing global wires with " + "10%% delay penalty\n"); + } + else if (fr->h_wire->wt == Global_20) { + printf("\tWire type - Full swing global wires with " + "20%% delay penalty\n"); + } + else if (fr->h_wire->wt == Global_30) { + printf("\tWire type - Full swing global wires with " + "30%% delay penalty\n"); + } + else if(fr->h_wire->wt == Low_swing) { + printf("\tWire type - Low swing wires\n"); + } + + printf("\tHorizontal link delay - %g (ns)\n", + fr->h_wire->delay*1e9); + printf("\tVertical link delay - %g (ns)\n", + fr->v_wire->delay*1e9); + printf("\tDelay/length - %g (ns/mm)\n", + fr->h_wire->delay*1e9/fr->bank_pda.area.w); + printf("\tHorizontal link energy -dynamic/access %g (nJ)\n" + "\t -leakage %g (nW)\n\n", + fr->h_wire->power.readOp.dynamic*1e9, + fr->h_wire->power.readOp.leakage*1e9); + printf("\tVertical link energy -dynamic/access %g (nJ)\n" + "\t -leakage %g (nW)\n\n", + fr->v_wire->power.readOp.dynamic*1e9, + fr->v_wire->power.readOp.leakage*1e9); + printf("\n\n"); + fr->v_wire->print_wire(); + printf("\n\nBank stats:\n"); +} + + + nuca_org_t * +Nuca::find_optimal_nuca (list *n, min_values_t *minval) +{ + double cost = 0; + double min_cost = BIGNUM; + nuca_org_t *res = NULL; + float d, a, dp, lp, c; + int v; + dp = g_ip->dynamic_power_wt_nuca; + lp = g_ip->leakage_power_wt_nuca; + a = g_ip->area_wt_nuca; + d = g_ip->delay_wt_nuca; + c = g_ip->cycle_time_wt_nuca; + + list::iterator niter; + + + for (niter = n->begin(); niter != n->end(); niter++) { + fprintf(stderr, "\n-----------------------------" + "---------------\n"); + + + printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t " + "bank_dpower = %g \tleak = %g \tcycle = %g\n", + (*niter)->bank_count, + (*niter)->nuca_pda.delay, + (*niter)->nuca_pda.power.readOp.dynamic, + (*niter)->h_wire->wt, + (*niter)->bank_pda.power.readOp.dynamic, + (*niter)->nuca_pda.power.readOp.leakage, + (*niter)->nuca_pda.cycle_time); + + + if (g_ip->ed == 1) { + cost = ((*niter)->nuca_pda.delay/minval->min_delay)* + ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn); + if (min_cost > cost) { + min_cost = cost; + res = ((*niter)); + } + } + else if (g_ip->ed == 2) { + cost = ((*niter)->nuca_pda.delay/minval->min_delay)* + ((*niter)->nuca_pda.delay/minval->min_delay)* + ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn); + if (min_cost > cost) { + min_cost = cost; + res = ((*niter)); + } + } + else { + /* + * check whether the current organization + * meets the input deviation constraints + */ + v = check_nuca_org((*niter), minval); + if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling + + if (v) { + cost = (d * ((*niter)->nuca_pda.delay/minval->min_delay) + + c * ((*niter)->nuca_pda.cycle_time/minval->min_cyc) + + dp * ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn) + + lp * ((*niter)->nuca_pda.power.readOp.leakage/minval->min_leakage) + + a * ((*niter)->nuca_pda.area.get_area()/minval->min_area)); + fprintf(stderr, "cost = %g\n", cost); + + if (min_cost > cost) { + min_cost = cost; + res = ((*niter)); + } + } + else { + niter = n->erase(niter); + if (niter !=n->begin()) + niter --; + } + } + } + return res; +} + + int +Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) +{ + if (((n->nuca_pda.delay - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev_nuca) { + return 0; + } + if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > + g_ip->dynamic_power_dev_nuca) { + return 0; + } + if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > + g_ip->leakage_power_dev_nuca) { + return 0; + } + if (((n->nuca_pda.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > + g_ip->cycle_time_dev_nuca) { + return 0; + } + if (((n->nuca_pda.area.get_area() - minval->min_area)/minval->min_area)*100 > + g_ip->area_dev_nuca) { + return 0; + } + return 1; +} + + void +Nuca::calculate_nuca_area (nuca_org_t *nuca) +{ + nuca->nuca_pda.area.h= + nuca->rows * ((nuca->h_wire->wire_width + + nuca->h_wire->wire_spacing) + * nuca->router->flit_size + + nuca->bank_pda.area.h); + + nuca->nuca_pda.area.w = + nuca->columns * ((nuca->v_wire->wire_width + + nuca->v_wire->wire_spacing) + * nuca->router->flit_size + + nuca->bank_pda.area.w); +} + diff --git a/T1/TP1/cacti-master/nuca.h b/T1/TP1/cacti-master/nuca.h new file mode 100644 index 0000000..4996968 --- /dev/null +++ b/T1/TP1/cacti-master/nuca.h @@ -0,0 +1,101 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + +#ifndef __NUCA_H__ +#define __NUCA_H__ + +#include "basic_circuit.h" +#include "component.h" +#include "parameter.h" +#include "assert.h" +#include "cacti_interface.h" +#include "wire.h" +#include "mat.h" +#include "io.h" +#include "router.h" +#include + + + +class nuca_org_t { + public: + ~nuca_org_t(); +// int size; + /* area, power, access time, and cycle time stats */ + Component nuca_pda; + Component bank_pda; + Component wire_pda; + Wire *h_wire; + Wire *v_wire; + Router *router; + /* for particular network configuration + * calculated based on a cycle accurate + * simulation Ref: CACTI 6 - Tech report + */ + double contention; + + /* grid network stats */ + double avg_hops; + int rows; + int columns; + int bank_count; +}; + + + +class Nuca : public Component +{ + public: + Nuca( + /*TechnologyParameter::*/DeviceType *dt); + void print_router(); + ~Nuca(); + void sim_nuca(); + void init_cont(); + int calc_cycles(double lat, double oper_freq); + void calculate_nuca_area (nuca_org_t *nuca); + int check_nuca_org (nuca_org_t *n, min_values_t *minval); + nuca_org_t * find_optimal_nuca (list *n, min_values_t *minval); + void print_nuca(nuca_org_t *n); + void print_cont_stats(); + + private: + + /*TechnologyParameter::*/DeviceType *deviceType; + int wt_min, wt_max; + Wire *wire_vertical[WIRE_TYPES], + *wire_horizontal[WIRE_TYPES]; + +}; + + +#endif diff --git a/T1/TP1/cacti-master/parameter.cc b/T1/TP1/cacti-master/parameter.cc new file mode 100644 index 0000000..3300b95 --- /dev/null +++ b/T1/TP1/cacti-master/parameter.cc @@ -0,0 +1,2837 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#include +#include +#include + +#include "parameter.h" +#include "area.h" + +#include "basic_circuit.h" +#include + +using namespace std; + + +InputParameter * g_ip; +TechnologyParameter g_tp; + +// ali +bool is_equal(double first, double second) +{ + + if((first == 0) && (second ==0)) + { + return true; + } + + if((second==0) || (second!=second)) + return true; + + if((first!=first) || (second!=second)) // both are NaNs + { + return true; + } + if(first==0) + { + if(fabs(first-second)<(second*0.000001)) + return true; + } + else + { + if(fabs(first-second)<(first*0.000001)) + return true; + } + + return false; +} + +/** +void DeviceType::display(uint32_t indent) const +{ + string indent_str(indent, ' '); + + cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl; + cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl; + cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl; + cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl; + cout << indent_str << "C_junc_sw = " << setw(12) << C_junc_sidewall << " F/um^2" << endl; + cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl; + cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl; + cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl; + cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl; + cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl; + cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl; + cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl; + cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl; + cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl; + cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl; + cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl; + cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl; + cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl; +} +**/ +bool DeviceType::isEqual(const DeviceType & dev) +{ + if( !is_equal(C_g_ideal,dev.C_g_ideal)) {display(0); cout << "\n\n\n"; dev.display(0); assert(false);} + if( !is_equal(C_fringe,dev.C_fringe)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + + if( !is_equal(C_overlap , dev.C_overlap)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(C_junc , dev.C_junc)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(C_junc_sidewall , dev.C_junc_sidewall)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(l_phy , dev.l_phy)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(l_elec , dev.l_elec)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(R_nch_on , dev.R_nch_on)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(R_pch_on , dev.R_pch_on)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(Vdd , dev.Vdd)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(Vth , dev.Vth)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} +//// if( !is_equal(Vcc_min , dev.Vcc_min)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(I_on_n , dev.I_on_n)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(I_on_p , dev.I_on_p)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(I_off_n , dev.I_off_n)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(I_off_p , dev.I_off_p)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(I_g_on_n , dev.I_g_on_n)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(I_g_on_p , dev.I_g_on_p)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(C_ox , dev.C_ox)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(t_ox , dev.t_ox)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(n_to_p_eff_curr_drv_ratio , dev.n_to_p_eff_curr_drv_ratio)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + if( !is_equal(long_channel_leakage_reduction , dev.long_channel_leakage_reduction)) {display(0);cout << "\n\n\n"; dev.display(0); assert(false);} + if( !is_equal(Mobility_n , dev.Mobility_n)) {display(0); cout << "\n\n\n"; dev.display(0);assert(false);} + + // auxilary parameters + ///if( !is_equal(Vdsat , dev.Vdsat)) {display(0); cout << "\n\n\n"; dev.display(0); assert(false);} + ///if( !is_equal(gmp_to_gmn_multiplier , dev.gmp_to_gmn_multiplier)) {display(0); cout << "\n\n\n"; dev.display(0); assert(false);} + + return true; +} + +double scan_single_input_double(char* line, const char* name, const char* unit_name, bool print) +{ + double temp; + char unit[300]; + memset(unit,0,300); + sscanf(&line[strlen(name)], "%*[ \t]%s%*[ \t]%lf",unit,&temp); + if(print) + cout << name << ": " << temp << " " << unit << endl; + return temp; +} + +double scan_five_input_double(char* line, const char* name, const char* unit_name, int flavor, bool print) +{ + double temp[5]; + char unit[300]; + memset(unit,0,300); + sscanf(&line[strlen(name)], "%*[ \t]%s%*[ \t]%lf%*[ \t]%lf%*[ \t]%lf%*[ \t]%lf%*[ \t]%lf" + ,unit,&(temp[0]),&(temp[1]),&(temp[2]),&(temp[3]), &(temp[4]) ); + + if (print) + cout << name << "[" << flavor <<"]: " << temp[flavor] << " " << unit<< endl; + return temp[flavor]; + +} + +void scan_five_input_double_temperature(char* line, const char* name, const char* unit_name, int flavor, unsigned int temperature, bool print, double & result) +{ + double temp[5]; + unsigned int thermal_temp; + char unit[300]; + memset(unit,0,300); + sscanf(&line[strlen(name)], "%*[ \t]%s%*[ \t]%u%*[ \t]%lf%*[ \t]%lf%*[ \t]%lf%*[ \t]%lf%*[ \t]%lf" + ,unit,&thermal_temp,&(temp[0]),&(temp[1]),&(temp[2]),&(temp[3]), &(temp[4]) ); + + + if(thermal_temp==(temperature-300)) + { + if (print) + cout << name << ": " << temp[flavor] << " "<< unit << endl; + + result = temp[flavor]; + } + +} + +void DeviceType::assign(const string & in_file, int tech_flavor, unsigned int temperature) +{ + FILE *fp = fopen(in_file.c_str(), "r"); + char line[5000]; + //char temp_var[5000]; + + //double temp[5]; + //unsigned int thermal_temp; + + double nmos_effective_resistance_multiplier; + + if(!fp) { + cout << in_file << " is missing!\n"; + exit(-1); + } + + while(fscanf(fp, "%[^\n]\n", line) != EOF) + { + if (!strncmp("-C_g_ideal", line, strlen("-C_g_ideal"))) + { + C_g_ideal=scan_five_input_double(line,"-C_g_ideal","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-C_fringe", line, strlen("-C_fringe"))) + { + C_fringe=scan_five_input_double(line,"-C_fringe","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-C_junc_sw", line, strlen("-C_junc_sw"))) + { + C_junc_sidewall =scan_five_input_double(line,"-C_junc_sw","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-C_junc", line, strlen("-C_junc"))) + { + C_junc=scan_five_input_double(line,"-C_junc","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + + if (!strncmp("-l_phy", line, strlen("-l_phy"))) + { + l_phy=scan_five_input_double(line,"-l_phy","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-l_elec", line, strlen("-l_elec"))) + { + l_elec=scan_five_input_double(line,"-l_elec","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-nmos_effective_resistance_multiplier", line, strlen("-nmos_effective_resistance_multiplier"))) + { + nmos_effective_resistance_multiplier=scan_five_input_double(line,"-nmos_effective_resistance_multiplier","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-Vdd", line, strlen("-Vdd"))) + { + Vdd=scan_five_input_double(line,"-Vdd","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-Vth", line, strlen("-Vth"))) + { + Vth=scan_five_input_double(line,"-Vth","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-Vdsat", line, strlen("-Vdsat"))) + { + Vdsat=scan_five_input_double(line,"-Vdsat","V",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-I_on_n", line, strlen("-I_on_n"))) + { + I_on_n=scan_five_input_double(line,"-I_on_n","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-I_on_p", line, strlen("-I_on_p"))) + { + I_on_p = scan_five_input_double(line,"-I_on_p","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-I_off_n", line, strlen("-I_off_n"))) + { + scan_five_input_double_temperature(line,"-I_off_n","F/um",tech_flavor,temperature,g_ip->print_detail_debug,I_off_n); + continue; + } + if (!strncmp("-I_g_on_n", line, strlen("-I_g_on_n"))) + { + scan_five_input_double_temperature(line,"-I_g_on_n","F/um",tech_flavor,temperature,g_ip->print_detail_debug,I_g_on_n); + continue; + } + if (!strncmp("-C_ox", line, strlen("-C_ox"))) + { + C_ox=scan_five_input_double(line,"-C_ox","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-t_ox", line, strlen("-t_ox"))) + { + t_ox=scan_five_input_double(line,"-t_ox","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-n2p_drv_rt", line, strlen("-n2p_drv_rt"))) + { + n_to_p_eff_curr_drv_ratio=scan_five_input_double(line,"-n2p_drv_rt","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-lch_lk_rdc", line, strlen("-lch_lk_rdc"))) + { + long_channel_leakage_reduction=scan_five_input_double(line,"-lch_lk_rdc","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-Mobility_n", line, strlen("-Mobility_n"))) + { + Mobility_n=scan_five_input_double(line,"-Mobility_n","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-gmp_to_gmn_multiplier", line, strlen("-gmp_to_gmn_multiplier"))) + { + gmp_to_gmn_multiplier=scan_five_input_double(line,"-gmp_to_gmn_multiplier","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-n_to_p_eff_curr_drv_ratio", line, strlen("-n_to_p_eff_curr_drv_ratio"))) + { + n_to_p_eff_curr_drv_ratio=scan_five_input_double(line,"-n_to_p_eff_curr_drv_ratio","F/um",tech_flavor,g_ip->print_detail_debug); + continue; + } + + } + + C_overlap = 0.2*C_g_ideal; + if(tech_flavor>=3) + R_nch_on = nmos_effective_resistance_multiplier * g_tp.vpp / I_on_n;//ohm-micron + else + R_nch_on = nmos_effective_resistance_multiplier * Vdd / I_on_n;//ohm-micron + R_pch_on = n_to_p_eff_curr_drv_ratio * R_nch_on;//ohm-micron + I_off_p = I_off_n; + I_g_on_p = I_g_on_n; + if(g_ip->print_detail_debug) + { + ///cout << nmos_effective_resistance_multiplier << " -- " << Vdd << " -- " << I_on_n << " -- " << n_to_p_eff_curr_drv_ratio << endl; + cout << "C_overlap: " << C_overlap << " F/um" << endl; + cout << "R_nch_on: " << R_nch_on << " ohm-micron" << endl; + cout << "R_pch_on: " << R_pch_on << " ohm-micron" << endl; + } + + fclose(fp); + +} + + +void DeviceType::interpolate(double alpha, const DeviceType& dev1, const DeviceType& dev2) +{ + C_g_ideal = alpha*dev1.C_g_ideal+(1-alpha)*dev2.C_g_ideal; + C_fringe = alpha*dev1.C_fringe+(1-alpha)*dev2.C_fringe; + C_overlap = alpha*dev1.C_overlap+(1-alpha)*dev2.C_overlap; + C_junc = alpha*dev1.C_junc+(1-alpha)*dev2.C_junc; + l_phy = alpha*dev1.l_phy+(1-alpha)*dev2.l_phy; + l_elec = alpha*dev1.l_elec+(1-alpha)*dev2.l_elec; + R_nch_on = alpha*dev1.R_nch_on+(1-alpha)*dev2.R_nch_on; + R_pch_on = alpha*dev1.R_pch_on+(1-alpha)*dev2.R_pch_on; + Vdd = alpha*dev1.Vdd+(1-alpha)*dev2.Vdd; + Vth = alpha*dev1.Vth+(1-alpha)*dev2.Vth; + Vcc_min = alpha*dev1.Vcc_min+(1-alpha)*dev2.Vcc_min; + I_on_n = alpha*dev1.I_on_n+(1-alpha)*dev2.I_on_n; + I_on_p = alpha*dev1.I_on_p+(1-alpha)*dev2.I_on_p; + I_off_n = alpha*dev1.I_off_n+(1-alpha)*dev2.I_off_n; + I_off_p = alpha*dev1.I_off_p+(1-alpha)*dev2.I_off_p; + I_g_on_n = alpha*dev1.I_g_on_n+(1-alpha)*dev2.I_g_on_n; + I_g_on_p = alpha*dev1.I_g_on_p+(1-alpha)*dev2.I_g_on_p; + C_ox = alpha*dev1.C_ox+(1-alpha)*dev2.C_ox; + t_ox = alpha*dev1.t_ox+(1-alpha)*dev2.t_ox; + n_to_p_eff_curr_drv_ratio = alpha*dev1.n_to_p_eff_curr_drv_ratio+(1-alpha)*dev2.n_to_p_eff_curr_drv_ratio; + long_channel_leakage_reduction = alpha*dev1.long_channel_leakage_reduction+(1-alpha)*dev2.long_channel_leakage_reduction; + Mobility_n = alpha*dev1.Mobility_n+(1-alpha)*dev2.Mobility_n; + Vdsat = alpha*dev1.Vdsat + (1-alpha)*dev2.Vdsat; + gmp_to_gmn_multiplier = alpha*dev1.gmp_to_gmn_multiplier + (1-alpha)*dev2.gmp_to_gmn_multiplier; + n_to_p_eff_curr_drv_ratio = alpha*dev1.n_to_p_eff_curr_drv_ratio + (1-alpha)*dev2.n_to_p_eff_curr_drv_ratio; + + C_junc_sidewall = dev1.C_junc_sidewall; +} + + +double scan_input_double_inter_type(char* line, const char * name, const char * unit_name, int proj_type, int tech_flavor, bool print) +{ + assert(proj_typeprint_detail_debug; + + while(fscanf(fp, "%[^\n]\n", line) != EOF) + { + if (!strncmp("-wire_pitch", line, strlen("-wire_pitch"))) + { + pitch =scan_input_double_inter_type(line,"-wire_pitch","um",g_ip->ic_proj_type,tech_flavor,print); + continue; + } + if (!strncmp("-barrier_thickness", line, strlen("-barrier_thickness"))) + { + barrier_thickness =scan_input_double_inter_type(line,"-barrier_thickness","ohm",g_ip->ic_proj_type,tech_flavor,print); + continue; + } + if (!strncmp("-dishing_thickness", line, strlen("-dishing_thickness"))) + { + dishing_thickness =scan_input_double_inter_type(line,"-dishing_thickness","um",g_ip->ic_proj_type,tech_flavor,print); + continue; + } + if (!strncmp("-alpha_scatter", line, strlen("-alpha_scatter"))) + { + alpha_scatter =scan_input_double_inter_type(line,"-alpha_scatter","um",g_ip->ic_proj_type,tech_flavor,print); + continue; + } + if (!strncmp("-aspect_ratio", line, strlen("-aspect_ratio"))) + { + aspect_ratio =scan_input_double_inter_type(line,"-aspect_ratio","um",g_ip->ic_proj_type,tech_flavor,print); + continue; + } + if (!strncmp("-miller_value", line, strlen("-miller_value"))) + { + miller_value =scan_input_double_inter_type(line,"-miller_value","um",g_ip->ic_proj_type,tech_flavor,print); + continue; + } + if (!strncmp("-horiz_dielectric_constant", line, strlen("-horiz_dielectric_constant"))) + { + horiz_dielectric_constant =scan_input_double_inter_type(line,"-horiz_dielectric_constant","um",g_ip->ic_proj_type,tech_flavor,print); + continue; + } + if (!strncmp("-vert_dielectric_constant", line, strlen("-vert_dielectric_constant"))) + { + vert_dielectric_constant =scan_input_double_inter_type(line,"-vert_dielectric_constant","um",g_ip->ic_proj_type,tech_flavor,print); + continue; + } + if (!strncmp("-ild_thickness", line, strlen("-ild_thickness"))) + { + ild_thickness =scan_input_double_inter_type(line,"-ild_thickness","um",g_ip->ic_proj_type,tech_flavor,print); + continue; + } + if (!strncmp("-fringe_cap", line, strlen("-fringe_cap"))) + { + fringe_cap =scan_input_double_inter_type(line,"-fringe_cap","um",g_ip->ic_proj_type,tech_flavor,print); + continue; + } + if (!strncmp("-wire_r_per_micron", line, strlen("-wire_r_per_micron"))) + { + R_per_um =scan_input_double_inter_type(line,"-wire_r_per_micron","um",g_ip->ic_proj_type,tech_flavor,print); + continue; + } + if (!strncmp("-wire_c_per_micron", line, strlen("-wire_c_per_micron"))) + { + C_per_um =scan_input_double_inter_type(line,"-wire_c_per_micron","um",g_ip->ic_proj_type,tech_flavor,print); + continue; + } + if (!strncmp("-resistivity", line, strlen("-resistivity"))) + { + resistivity =scan_input_double_inter_type(line,"-resistivity","um",g_ip->ic_proj_type,tech_flavor,print); + continue; + } + } + + pitch *= g_ip->F_sz_um; + wire_width = pitch/ 2; //micron + wire_thickness = aspect_ratio * wire_width;//micron + wire_spacing = pitch - wire_width;//micron + if((projection_type!=1) || (tech_flavor!=3)) + { + R_per_um = wire_resistance(resistivity, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron + if(print) + cout << R_per_um << " = wire_resistance(" << resistivity << "," << wire_width << "," << + wire_thickness << "," << barrier_thickness << "," << dishing_thickness << "," << alpha_scatter << ")\n"; + + + C_per_um = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness, miller_value, horiz_dielectric_constant, + vert_dielectric_constant, fringe_cap);//F/micron. + if(print) + cout << C_per_um << " = wire_capacitance(" << wire_width << "," << wire_thickness << "," << wire_spacing + << "," << ild_thickness << "," << miller_value << "," << horiz_dielectric_constant + << "," << vert_dielectric_constant << "," << fringe_cap << ")\n"; + + } + fclose(fp); +} + +bool InterconnectType::isEqual(const InterconnectType & inter) +{ + if( !is_equal(pitch , inter.pitch)) {display(0); assert(false);} + if( !is_equal(R_per_um , inter.R_per_um)) {display(0); assert(false);} + if( !is_equal(C_per_um , inter.C_per_um)) {display(0); assert(false);} + if( !is_equal(horiz_dielectric_constant , inter.horiz_dielectric_constant)) {display(0); assert(false);} + if( !is_equal(vert_dielectric_constant , inter.vert_dielectric_constant)) {display(0); assert(false);} + if( !is_equal(aspect_ratio , inter.aspect_ratio)) {display(0); assert(false);} + if( !is_equal(miller_value , inter.miller_value)) {display(0); assert(false);} + if( !is_equal(ild_thickness , inter.ild_thickness)) {display(0); assert(false);} + + //auxilary parameters + ///if( !is_equal(wire_width , inter.wire_width)) {display(0); assert(false);} + ///if( !is_equal(wire_thickness , inter.wire_thickness)) {display(0); assert(false);} + ///if( !is_equal(wire_spacing , inter.wire_spacing)) {display(0); assert(false);} + ///if( !is_equal(barrier_thickness , inter.barrier_thickness)) {display(0); assert(false);} + ///if( !is_equal(dishing_thickness , inter.dishing_thickness)) {display(0); assert(false);} + ///if( !is_equal(alpha_scatter , inter.alpha_scatter)) {display(0); assert(false);} + ///if( !is_equal(fringe_cap , inter.fringe_cap)) {display(0); assert(false);} + + return true; +} + +void InterconnectType::interpolate(double alpha, const InterconnectType & inter1, const InterconnectType & inter2) +{ + pitch = alpha*inter1.pitch + (1-alpha)*inter2.pitch; + R_per_um = alpha*inter1.R_per_um + (1-alpha)*inter2.R_per_um; + C_per_um = alpha*inter1.C_per_um + (1-alpha)*inter2.C_per_um; + horiz_dielectric_constant = alpha*inter1.horiz_dielectric_constant + (1-alpha)*inter2.horiz_dielectric_constant; + vert_dielectric_constant = alpha*inter1.vert_dielectric_constant + (1-alpha)*inter2.vert_dielectric_constant; + aspect_ratio = alpha*inter1.aspect_ratio + (1-alpha)*inter2.aspect_ratio; + miller_value = alpha*inter1.miller_value + (1-alpha)*inter2.miller_value; + ild_thickness = alpha*inter1.ild_thickness + (1-alpha)*inter2.ild_thickness; + +} + +void scan_five_input_double_mem_type(char* line, const char* name, const char* unit_name, int flavor, int cell_type, bool print, double & result) +{ + double temp[5]; + int cell_type_temp; + char unit[300]; + memset(unit,0,300); + + sscanf(&line[strlen(name)], "%*[ \t]%s%*[ \t]%d%*[ \t]%lf%*[ \t]%lf%*[ \t]%lf%*[ \t]%lf%*[ \t]%lf" + ,unit,&cell_type_temp,&(temp[0]),&(temp[1]),&(temp[2]),&(temp[3]), &(temp[4]) ); + + + if(cell_type_temp==cell_type) + { + if (print) + cout << name << ": " << temp[flavor] << " "<< unit << endl; + + result = temp[flavor]; + } +} + +// cell_type --> sram(0),cam(1),dram(2) +void MemoryType::assign(const string & in_file, int tech_flavor, int cell_type) +{ + FILE *fp = fopen(in_file.c_str(), "r"); + char line[5000]; + //char temp_var[5000]; + + //double temp; + //unsigned int thermal_temp; + + double vdd_cell,vdd; + + if(!fp) { + cout << in_file << " is missing!\n"; + exit(-1); + } + while(fscanf(fp, "%[^\n]\n", line) != EOF) + { + if (!strncmp("-Vdd", line, strlen("-Vdd"))) + { + vdd=scan_five_input_double(line,"-Vdd","V",tech_flavor,g_ip->print_detail_debug); + continue; + } + if (!strncmp("-vdd_cell", line, strlen("-vdd_cell"))) + { + scan_five_input_double_mem_type(line,"-vdd_cell","V",tech_flavor,cell_type, g_ip->print_detail_debug,vdd_cell); + continue; + } + if (!strncmp("-Wmemcella", line, strlen("-Wmemcella"))) + { + scan_five_input_double_mem_type(line,"-Wmemcella","V",tech_flavor,cell_type, g_ip->print_detail_debug,cell_a_w); + continue; + } + if (!strncmp("-Wmemcellpmos", line, strlen("-Wmemcellpmos"))) + { + scan_five_input_double_mem_type(line,"-Wmemcellpmos","V",tech_flavor,cell_type, g_ip->print_detail_debug,cell_pmos_w); + continue; + } + if (!strncmp("-Wmemcellnmos", line, strlen("-Wmemcellnmos"))) + { + scan_five_input_double_mem_type(line,"-Wmemcellnmos","V",tech_flavor,cell_type, g_ip->print_detail_debug,cell_nmos_w); + continue; + } + if (!strncmp("-area_cell", line, strlen("-area_cell"))) + { + scan_five_input_double_mem_type(line,"-area_cell","V",tech_flavor,cell_type, g_ip->print_detail_debug,area_cell); + continue; + } + if (!strncmp("-asp_ratio_cell", line, strlen("-asp_ratio_cell"))) + { + scan_five_input_double_mem_type(line,"-asp_ratio_cell","V",tech_flavor,cell_type, g_ip->print_detail_debug,asp_ratio_cell); + continue; + } + } + if(cell_type!=2) + cell_a_w *= g_ip->F_sz_um; + cell_pmos_w *= g_ip->F_sz_um; + cell_nmos_w *= g_ip->F_sz_um; + if(cell_type!=2) + area_cell *= (g_ip->F_sz_um* g_ip->F_sz_um); + ///assert(asp_ratio_cell!=0); + b_w = sqrt(area_cell / (asp_ratio_cell)); + b_h = asp_ratio_cell * b_w; + if(cell_type==2) //dram + Vbitpre = vdd_cell; + else // sram or cam + Vbitpre = vdd; + + + Vbitfloating = Vbitpre*0.7; + + //display(5); + +} + +void MemoryType::interpolate(double alpha, const MemoryType& mem1, const MemoryType& mem2) +{ + cell_a_w = alpha * mem1.cell_a_w + (1-alpha) * mem2.cell_a_w; + cell_pmos_w = alpha * mem1.cell_pmos_w + (1-alpha) * mem2.cell_pmos_w; + cell_nmos_w = alpha * mem1.cell_nmos_w + (1-alpha) * mem2.cell_nmos_w; + + area_cell = alpha * mem1.area_cell + (1-alpha) * mem2.area_cell; + asp_ratio_cell = alpha * mem1.asp_ratio_cell + (1-alpha) * mem2.asp_ratio_cell; + + Vbitpre = mem2.Vbitpre; + Vbitfloating = Vbitpre*0.7; + // updating dependant variables after scaling/interpolating + ///assert(asp_ratio_cell!=0); + b_w = sqrt(area_cell / (asp_ratio_cell)); + b_h = asp_ratio_cell * b_w; + //display(10); +} + +bool MemoryType::isEqual(const MemoryType & mem) +{ + if( !is_equal(b_w , mem.b_w)) {display(0); cout << "\n\n\n"; mem.display(0); assert(false);} + if( !is_equal(b_h , mem.b_h)) {display(0); cout << "\n\n\n"; mem.display(0); assert(false);} + if( !is_equal(cell_a_w , mem.cell_a_w)) {display(0); cout << "\n\n\n"; mem.display(0); assert(false);} + if( !is_equal(cell_pmos_w , mem.cell_pmos_w)) {display(0); cout << "\n\n\n"; mem.display(0); assert(false);} + if( !is_equal(cell_nmos_w , mem.cell_nmos_w)) {display(0); cout << "\n\n\n"; mem.display(0); assert(false);} + if( !is_equal(Vbitpre , mem.Vbitpre)) {display(0); cout << "\n\n\n"; mem.display(0); assert(false);} + ///if( !is_equal(Vbitfloating , mem.Vbitfloating)) {display(0); cout << "\n\n\n"; mem.display(0); assert(false);} + + // needed to calculate b_w b_h + ///if( !is_equal(area_cell , mem.area_cell)) {display(0); assert(false);} + ///if( !is_equal(asp_ratio_cell , mem.asp_ratio_cell)) {display(0); assert(false);} + + return true; +} + +void ScalingFactor::assign(const string & in_file) +{ + FILE *fp = fopen(in_file.c_str(), "r"); + char line[5000]; + //char temp_var[5000]; + if(!fp) + { + cout << in_file << " is missing!\n"; + exit(-1); + } + + while(fscanf(fp, "%[^\n]\n", line) != EOF) + { + if (!strncmp("-logic_scaling_co_eff", line, strlen("-logic_scaling_co_eff"))) + { + logic_scaling_co_eff = scan_single_input_double(line,"-logic_scaling_co_eff","F/um", g_ip->print_detail_debug); + continue; + } + if (!strncmp("-core_tx_density", line, strlen("-core_tx_density"))) + { + core_tx_density = scan_single_input_double(line,"-core_tx_density","F/um", g_ip->print_detail_debug); + continue; + } + + } + + fclose(fp); +} + +void ScalingFactor::interpolate(double alpha, const ScalingFactor& dev1, const ScalingFactor& dev2) +{ + logic_scaling_co_eff = alpha*dev1.logic_scaling_co_eff + (1-alpha)*dev2.logic_scaling_co_eff; + core_tx_density = alpha*dev1.core_tx_density + (1-alpha)*dev2.core_tx_density; +} + +bool ScalingFactor::isEqual(const ScalingFactor & scal) +{ + if( !is_equal(logic_scaling_co_eff,scal.logic_scaling_co_eff)) { display(0); assert(false);} + if( !is_equal(core_tx_density,scal.core_tx_density)) { display(0); assert(false);} + if( !is_equal(long_channel_leakage_reduction , scal.long_channel_leakage_reduction)) { display(0); assert(false);} + return true; +} + +void TechnologyParameter::find_upper_and_lower_tech(double technology, int &tech_lo, string& in_file_lo, int &tech_hi, string& in_file_hi) +{ + if (technology < 181 && technology > 179) + { + tech_lo = 180; + in_file_lo = "tech_params/180nm.dat"; + tech_hi = 180; + in_file_hi = "tech_params/180nm.dat"; + } + else if (technology < 91 && technology > 89) + { + tech_lo = 90; + in_file_lo = "tech_params/90nm.dat"; + tech_hi = 90; + in_file_hi = "tech_params/90nm.dat"; + } + else if (technology < 66 && technology > 64) + { + tech_lo = 65; + in_file_lo = "tech_params/65nm.dat"; + tech_hi = 65; + in_file_hi = "tech_params/65nm.dat"; + } + else if (technology < 46 && technology > 44) + { + tech_lo = 45; + in_file_lo = "tech_params/45nm.dat"; + tech_hi = 45; + in_file_hi = "tech_params/45nm.dat"; + } + else if (technology < 33 && technology > 31) + { + tech_lo = 32; + in_file_lo = "tech_params/32nm.dat"; + tech_hi = 32; + in_file_hi = "tech_params/32nm.dat"; + } + else if (technology < 23 && technology > 21) + { + tech_lo = 22; + in_file_lo = "tech_params/22nm.dat"; + tech_hi = 22; + in_file_hi = "tech_params/22nm.dat"; + } + else if (technology < 180 && technology > 90) + { + tech_lo = 180; + in_file_lo = "tech_params/180nm.dat"; + tech_hi = 90; + in_file_hi = "tech_params/90nm.dat"; + } + else if (technology < 90 && technology > 65) + { + tech_lo = 90; + in_file_lo = "tech_params/90nm.dat"; + tech_hi = 65; + in_file_hi = "tech_params/65nm.dat"; + } + else if (technology < 65 && technology > 45) + { + tech_lo = 65; + in_file_lo = "tech_params/65nm.dat"; + tech_hi = 45; + in_file_hi = "tech_params/45nm.dat"; + } + else if (technology < 45 && technology > 32) + { + tech_lo = 45; + in_file_lo = "tech_params/45nm.dat"; + tech_hi = 32; + in_file_hi = "tech_params/32nm.dat"; + } + else if (technology < 32 && technology > 22) + { + tech_lo = 32; + in_file_lo = "tech_params/32nm.dat"; + tech_hi = 22; + in_file_hi = "tech_params/22nm.dat"; + } + /** + else if (technology < 22 && technology > 16) + { + tech_lo = 22; + in_file_lo = "tech_params/22nm.dat"; + tech_hi = 16; + in_file_hi = "tech_params/16nm.dat"; + } + **/ + else + { + cout<<"Invalid technology nodes"<tsv_is_subarray_type; + } + else + { + tsv_type = g_ip->tsv_os_bank_type; + } + fp = fopen(in_file.c_str(), "r"); + while(fscanf(fp, "%[^\n]\n", line) != EOF) + { + if (!strncmp("-tsv_pitch", line, strlen("-tsv_pitch"))) + { + tsv_pitch = scan_input_double_tsv_type(line,"-tsv_pitch","F/um", g_ip->ic_proj_type, tsv_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-tsv_diameter", line, strlen("-tsv_diameter"))) + { + tsv_diameter = scan_input_double_tsv_type(line,"-tsv_diameter","F/um", g_ip->ic_proj_type, tsv_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-tsv_length", line, strlen("-tsv_length"))) + { + tsv_length = scan_input_double_tsv_type(line,"-tsv_length","F/um", g_ip->ic_proj_type, tsv_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-tsv_dielec_thickness", line, strlen("-tsv_dielec_thickness"))) + { + tsv_dielec_thickness = scan_input_double_tsv_type(line,"-tsv_dielec_thickness","F/um", g_ip->ic_proj_type, tsv_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-tsv_contact_resistance", line, strlen("-tsv_contact_resistance"))) + { + tsv_contact_resistance = scan_input_double_tsv_type(line,"-tsv_contact_resistance","F/um", g_ip->ic_proj_type, tsv_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-tsv_depletion_width", line, strlen("-tsv_depletion_width"))) + { + tsv_depletion_width = scan_input_double_tsv_type(line,"-tsv_depletion_width","F/um", g_ip->ic_proj_type, tsv_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-tsv_liner_dielectric_cons", line, strlen("-tsv_liner_dielectric_cons"))) + { + tsv_liner_dielectric_constant = scan_input_double_tsv_type(line,"-tsv_liner_dielectric_cons","F/um", g_ip->ic_proj_type, tsv_type, g_ip->print_detail_debug); + continue; + } + + tsv_length *= g_ip->num_die_3d; + if(iter==0) + { + tsv_parasitic_resistance_fine = tsv_resistance(BULK_CU_RESISTIVITY, tsv_length, tsv_diameter, tsv_contact_resistance); + tsv_parasitic_capacitance_fine = tsv_capacitance(tsv_length, tsv_diameter, tsv_pitch, tsv_dielec_thickness, tsv_liner_dielectric_constant, tsv_depletion_width); + tsv_minimum_area_fine = tsv_area(tsv_pitch); + } + else + { + tsv_parasitic_resistance_coarse = tsv_resistance(BULK_CU_RESISTIVITY, tsv_length, tsv_diameter, tsv_contact_resistance); + tsv_parasitic_capacitance_coarse = tsv_capacitance(tsv_length, tsv_diameter, tsv_pitch, tsv_dielec_thickness, tsv_liner_dielectric_constant, tsv_depletion_width); + tsv_minimum_area_coarse = tsv_area(tsv_pitch); + } + } + + fclose(fp); + } +} + +void TechnologyParameter::init(double technology, bool is_tag) +{ + FILE *fp ; + reset(); + char line[5000]; + //char temp_var[5000]; + + uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; + uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type; + + int tech_lo, tech_hi; + string in_file_lo, in_file_hi; + + double alpha; // used for technology interpolation + + + + + technology = technology * 1000.0; // in the unit of nm + + find_upper_and_lower_tech(technology, tech_lo,in_file_lo,tech_hi,in_file_hi); + // excluding some cases. + if((tech_lo==22) && (tech_hi==22)) + { + if (ram_cell_tech_type == 3 ) + { + cout<<"current version does not support eDRAM technologies at 22nm"<print_detail_debug); + continue; + } + if (!strncmp("-dram_cell_Vdd", line, strlen("-dram_cell_Vdd"))) + { + dram_cell_Vdd += alpha* scan_five_input_double(line,"-dram_cell_Vdd","F/um", ram_cell_tech_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-dram_cell_C", line, strlen("-dram_cell_C"))) + { + dram_cell_C += alpha* scan_five_input_double(line,"-dram_cell_C","F/um", ram_cell_tech_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-dram_cell_I_off_worst_case_len_temp", line, strlen("-dram_cell_I_off_worst_case_len_temp"))) + { + dram_cell_I_off_worst_case_len_temp += alpha* scan_five_input_double(line,"-dram_cell_I_off_worst_case_len_temp","F/um", ram_cell_tech_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-vpp", line, strlen("-vpp"))) + { + vpp += alpha* scan_five_input_double(line,"-vpp","F/um", ram_cell_tech_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-sckt_co_eff", line, strlen("-sckt_co_eff"))) + { + sckt_co_eff += alpha * scan_single_input_double(line,"-sckt_co_eff","F/um", g_ip->print_detail_debug); + continue; + } + if (!strncmp("-chip_layout_overhead", line, strlen("-chip_layout_overhead"))) + { + chip_layout_overhead += alpha * scan_single_input_double(line,"-chip_layout_overhead","F/um", g_ip->print_detail_debug); + continue; + } + if (!strncmp("-macro_layout_overhead", line, strlen("-macro_layout_overhead"))) + { + macro_layout_overhead += alpha * scan_single_input_double(line,"-macro_layout_overhead","F/um", g_ip->print_detail_debug); + continue; + } + } + fclose(fp); + + + DeviceType peri_global_lo, peri_global_hi; + peri_global_lo.assign(in_file_lo, peri_global_tech_type, g_ip->temp); + peri_global_hi.assign(in_file_hi, peri_global_tech_type, g_ip->temp); + peri_global.interpolate(alpha,peri_global_lo,peri_global_hi); + // in the original code some field of this devide has not been initialized/ + // I make them 0 for compatibility. + ///peri_global.I_on_p = 0.0; + + DeviceType sleep_tx_lo, sleep_tx_hi; + sleep_tx_lo.assign(in_file_lo, 1, g_ip->temp); + sleep_tx_hi.assign(in_file_hi, 1, g_ip->temp); + sleep_tx.interpolate(alpha, sleep_tx_lo, sleep_tx_hi); + + + DeviceType sram_cell_lo, sram_cell_hi; + sram_cell_lo.assign(in_file_lo, ram_cell_tech_type, g_ip->temp); + sram_cell_hi.assign(in_file_hi, ram_cell_tech_type, g_ip->temp); + sram_cell.interpolate(alpha, sram_cell_lo, sram_cell_hi); + // in the original code some field of this devide has not been initialized/ + // I make them 0 for compatibility. + //sram_cell.Vdd=0.0; + ///sram_cell.I_on_p=0.0; + ///sram_cell.C_ox=0.0; + + + DeviceType dram_acc_lo, dram_acc_hi; + dram_acc_lo.assign(in_file_lo, (ram_cell_tech_type==comm_dram? ram_cell_tech_type:dram_cell_tech_flavor), g_ip->temp); + dram_acc_hi.assign(in_file_hi, (ram_cell_tech_type==comm_dram? ram_cell_tech_type:dram_cell_tech_flavor), g_ip->temp); + dram_acc.interpolate(alpha, dram_acc_lo, dram_acc_hi); + // dram_acc exceptions + //dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n; + //dram_acc.R_pch_on = 0; + if(tech_lo<=22) + { + } + else if(tech_lo<=32) + { + if(ram_cell_tech_type == lp_dram) + dram_acc.Vth = 0.44129; + else + dram_acc.Vth = 1.0; + } + else if(tech_lo<=45) + { + if(ram_cell_tech_type == lp_dram) + dram_acc.Vth = 0.44559; + else + dram_acc.Vth = 1.0; + } + else if(tech_lo<=65) + { + if(ram_cell_tech_type == lp_dram) + dram_acc.Vth = 0.43806; + else + dram_acc.Vth = 1.0; + } + else if(tech_lo<=90) + { + if(ram_cell_tech_type == lp_dram) + dram_acc.Vth = 0.4545; + else + dram_acc.Vth = 1.0; + } + // in the original code some field of this devide has not been initialized/ + // I make them 0 for compatibility. + dram_acc.Vdd= 0.0; + dram_acc.I_on_p = 0.0; + dram_acc.I_off_n = 0.0; + dram_acc.I_off_p = 0.0; + dram_acc.C_ox = 0.0; + dram_acc.t_ox = 0.0; + dram_acc.n_to_p_eff_curr_drv_ratio = 0.0; + + DeviceType dram_wl_lo, dram_wl_hi; + dram_wl_lo.assign(in_file_lo, (ram_cell_tech_type==comm_dram? ram_cell_tech_type:dram_cell_tech_flavor), g_ip->temp); + dram_wl_hi.assign(in_file_hi, (ram_cell_tech_type==comm_dram? ram_cell_tech_type:dram_cell_tech_flavor), g_ip->temp); + dram_wl.interpolate(alpha, dram_wl_lo, dram_wl_hi); + // in the original code some field of this devide has not been initialized/ + // I make them 0 for compatibility. + dram_wl.Vdd = 0.0; + dram_wl.Vth = 0.0; + dram_wl.I_on_p = 0.0; + dram_wl.C_ox = 0.0; + dram_wl.t_ox = 0.0; + + // if ram_cell_tech_type is not 3 or 4 ( which means edram and comm-dram) + // then reset dram_wl dram_acc + + if(ram_cell_tech_type <3) + { + dram_acc.reset(); + dram_wl.reset(); + } + + + DeviceType cam_cell_lo, cam_cell_hi; + cam_cell_lo.assign(in_file_lo, ram_cell_tech_type, g_ip->temp); + cam_cell_hi.assign(in_file_hi, ram_cell_tech_type, g_ip->temp); + cam_cell.interpolate(alpha, cam_cell_lo, cam_cell_hi); + + MemoryType dram_lo, dram_hi; + dram_lo.assign(in_file_lo, ram_cell_tech_type, 2); // cell_type = dram(2) + dram_hi.assign(in_file_hi, ram_cell_tech_type, 2); + dram.interpolate(alpha,dram_lo,dram_hi); + + MemoryType sram_lo, sram_hi; + sram_lo.assign(in_file_lo, ram_cell_tech_type, 0); // cell_type = sram(0) + sram_hi.assign(in_file_hi, ram_cell_tech_type, 0); + sram.interpolate(alpha,sram_lo,sram_hi); + // sram cell execptions + /*sram_lo.assign(in_file_lo, 0, g_ip->temp); + sram.cell_a_w =sram_lo.cell_a_w; + sram.b_h = sram_lo.b_h; + sram.b_w = sram_lo.b_w; +*/ + MemoryType cam_lo, cam_hi; + cam_lo.assign(in_file_lo, ram_cell_tech_type, 1); // cell_type = sram(0) + cam_hi.assign(in_file_hi, ram_cell_tech_type, 1); + cam.interpolate(alpha,cam_lo,cam_hi); + + + ScalingFactor scaling_factor_lo, scaling_factor_hi; + scaling_factor_lo.assign(in_file_lo); + scaling_factor_hi.assign(in_file_hi); + scaling_factor.interpolate(alpha, scaling_factor_lo,scaling_factor_hi); + + //vcc_min + peri_global.Vcc_min += (alpha * peri_global_lo.Vdd + (1-alpha)*peri_global_hi.Vdd) * 0.35; + sleep_tx.Vcc_min += (alpha*sleep_tx_lo.Vdd+(1-alpha)*sleep_tx_hi.Vdd); + sram_cell.Vcc_min += (alpha*sram_cell_lo.Vdd +(1-alpha)*sram_cell_hi.Vdd)* 0.65; + + + + fp = fopen(in_file_hi.c_str(), "r"); + + while(fscanf(fp, "%[^\n]\n", line) != EOF) + { + if (!strncmp("-sense_delay", line, strlen("-sense_delay"))) + { + sense_delay = scan_single_input_double(line,"-sense_delay","F/um", g_ip->print_detail_debug); + continue; + } + if (!strncmp("-sense_dy_power", line, strlen("-sense_dy_power"))) + { + sense_dy_power = scan_single_input_double(line,"-sense_dy_power","F/um", g_ip->print_detail_debug); + continue; + } + if (!strncmp("-sckt_co_eff", line, strlen("-sckt_co_eff"))) + { + sckt_co_eff += (1-alpha)* scan_single_input_double(line,"-sckt_co_eff","F/um", g_ip->print_detail_debug); + continue; + } + if (!strncmp("-chip_layout_overhead", line, strlen("-chip_layout_overhead"))) + { + chip_layout_overhead += (1-alpha)* scan_single_input_double(line,"-chip_layout_overhead","F/um", g_ip->print_detail_debug); + continue; + } + if (!strncmp("-macro_layout_overhead", line, strlen("-macro_layout_overhead"))) + { + macro_layout_overhead += (1-alpha)* scan_single_input_double(line,"-macro_layout_overhead","F/um", g_ip->print_detail_debug); + continue; + } + if (!strncmp("-dram_cell_I_on", line, strlen("-dram_cell_I_on"))) + { + dram_cell_I_on += (1-alpha) * scan_five_input_double(line,"-dram_cell_I_on","F/um", ram_cell_tech_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-dram_cell_Vdd", line, strlen("-dram_cell_Vdd"))) + { + dram_cell_Vdd += (1-alpha) * scan_five_input_double(line,"-dram_cell_Vdd","F/um", ram_cell_tech_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-dram_cell_C", line, strlen("-dram_cell_C"))) + { + dram_cell_C += (1-alpha) * scan_five_input_double(line,"-dram_cell_C","F/um", ram_cell_tech_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-dram_cell_I_off_worst_case_len_temp", line, strlen("-dram_cell_I_off_worst_case_len_temp"))) + { + dram_cell_I_off_worst_case_len_temp += (1-alpha) * scan_five_input_double(line,"-dram_cell_I_off_worst_case_len_temp","F/um", ram_cell_tech_type, g_ip->print_detail_debug); + continue; + } + if (!strncmp("-vpp", line, strlen("-vpp"))) + { + vpp += (1-alpha)* scan_five_input_double(line,"-vpp","F/um", ram_cell_tech_type, g_ip->print_detail_debug); + continue; + } + } + fclose(fp); + + //Currently we are not modeling the resistance/capacitance of poly anywhere. + //Continuous function (or date have been processed) does not need linear interpolation + w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process + w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process + w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process + w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process + w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + + MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um; + MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um; + HPOWERRAIL = 2 * g_ip->F_sz_um; + cell_h_def = 50 * g_ip->F_sz_um; + w_poly_contact = g_ip->F_sz_um; + spacing_poly_to_contact = g_ip->F_sz_um; + spacing_poly_to_poly = 1.5 * g_ip->F_sz_um; + ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um; + + min_w_nmos_ = 3 * g_ip->F_sz_um / 2; + max_w_nmos_ = 100 * g_ip->F_sz_um; + w_iso = 12.5*g_ip->F_sz_um;//was 10 micron for the 0.8 micron process + w_sense_n = 3.75*g_ip->F_sz_um; // sense amplifier N-trans; was 3 micron for the 0.8 micron process + w_sense_p = 7.5*g_ip->F_sz_um; // sense amplifier P-trans; was 6 micron for the 0.8 micron process + w_sense_en = 5*g_ip->F_sz_um; // Sense enable transistor of the sense amplifier; was 4 micron for the 0.8 micron process + w_nmos_b_mux = 6 * min_w_nmos_; + w_nmos_sa_mux = 6 * min_w_nmos_; + + + w_pmos_bl_precharge = 6 * pmos_to_nmos_sz_ratio() * min_w_nmos_; + w_pmos_bl_eq = pmos_to_nmos_sz_ratio() * min_w_nmos_; + + + if (ram_cell_tech_type == comm_dram) + { + max_w_nmos_dec = 8 * g_ip->F_sz_um; + h_dec = 8; // in the unit of memory cell height + } + else + { + max_w_nmos_dec = g_tp.max_w_nmos_; + h_dec = 4; // in the unit of memory cell height + } + + + + double gmn_sense_amp_latch + = (peri_global.Mobility_n / 2) * peri_global.C_ox + * (w_sense_n / peri_global.l_elec) * peri_global.Vdsat; + double gmp_sense_amp_latch = peri_global.gmp_to_gmn_multiplier * gmn_sense_amp_latch; + gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch; + + + ///cout << "wire_local " << g_ip->ic_proj_type << " " << ((ram_cell_tech_type == comm_dram)?3:0) << endl; + InterconnectType wire_local_lo, wire_local_hi; + wire_local_lo.assign(in_file_lo,g_ip->ic_proj_type,(ram_cell_tech_type == comm_dram)?3:0); + wire_local_hi.assign(in_file_hi,g_ip->ic_proj_type,(ram_cell_tech_type == comm_dram)?3:0); + wire_local.interpolate(alpha,wire_local_lo,wire_local_hi); + + + ///cout << "wire_inside_mat " << g_ip->ic_proj_type << " " << g_ip->wire_is_mat_type << endl; + InterconnectType wire_inside_mat_lo, wire_inside_mat_hi; + wire_inside_mat_lo.assign(in_file_lo, g_ip->ic_proj_type, g_ip->wire_is_mat_type); + wire_inside_mat_hi.assign(in_file_hi, g_ip->ic_proj_type, g_ip->wire_is_mat_type); + wire_inside_mat.interpolate(alpha, wire_inside_mat_lo, wire_inside_mat_hi); + + ///cout << "wire_outside_mat " << g_ip->ic_proj_type << " " << g_ip->wire_os_mat_type << endl; + InterconnectType wire_outside_mat_lo, wire_outside_mat_hi; + wire_outside_mat_lo.assign(in_file_lo, g_ip->ic_proj_type, g_ip->wire_os_mat_type); + wire_outside_mat_hi.assign(in_file_hi, g_ip->ic_proj_type, g_ip->wire_os_mat_type); + wire_outside_mat.interpolate(alpha, wire_outside_mat_lo, wire_outside_mat_hi); + + unit_len_wire_del = wire_inside_mat.R_per_um * wire_inside_mat.C_per_um / 2; + + // assign value for TSV parameters + + assign_tsv(in_file_hi); + + fringe_cap = wire_local_hi.fringe_cap; // fringe_cap is similar for all wire types. + + double rd = tr_R_on(min_w_nmos_, NCH, 1); + double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(); + double c_load = gate_C(min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0); + double tf = rd * c_load; + kinv = horowitz(0, tf, 0.5, 0.5, RISE); + double KLOAD = 1; + c_load = KLOAD * (drain_C_(min_w_nmos_, NCH, 1, 1, cell_h_def) + + drain_C_(min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, cell_h_def) + + gate_C(min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0)); + tf = rd * c_load; + FO4 = horowitz(0, tf, 0.5, 0.5, RISE); + +} + +#define PRINT(A,X) cout << A << ": " << X << " , " << tech.X << endl + +bool TechnologyParameter::isEqual(const TechnologyParameter& tech) +{ + if(!is_equal(ram_wl_stitching_overhead_,tech.ram_wl_stitching_overhead_)) {assert(false);} //fs + if(!is_equal(min_w_nmos_,tech.min_w_nmos_)) {assert(false);} //fs + if(!is_equal(max_w_nmos_,tech.max_w_nmos_)) {assert(false);} //fs + if(!is_equal(max_w_nmos_dec,tech.max_w_nmos_dec)) {assert(false);} //fs+ ram_cell_tech_type + if(!is_equal(unit_len_wire_del,tech.unit_len_wire_del)) {assert(false);} //wire_inside_mat + if(!is_equal(FO4,tech.FO4)) {assert(false);} //fs + if(!is_equal(kinv,tech.kinv)) {assert(false);} //fs + if(!is_equal(vpp,tech.vpp )) {assert(false);}//input + if(!is_equal(w_sense_en,tech.w_sense_en)) {assert(false);}//fs + if(!is_equal(w_sense_n,tech.w_sense_n)) {assert(false);} //fs + if(!is_equal(w_sense_p,tech.w_sense_p)) {assert(false);} //fs + if(!is_equal(sense_delay,tech.sense_delay)) {PRINT("sense_delay",sense_delay); assert(false);} // input + if(!is_equal(sense_dy_power,tech.sense_dy_power)) {assert(false);} //input + if(!is_equal(w_iso,tech.w_iso)) {assert(false);} //fs + if(!is_equal(w_poly_contact,tech.w_poly_contact)) {assert(false);} //fs + if(!is_equal(spacing_poly_to_poly,tech.spacing_poly_to_poly)) {assert(false);} //fs + if(!is_equal(spacing_poly_to_contact,tech.spacing_poly_to_contact)) {assert(false);}//fs + + //CACTI3D auxilary variables + ///if(!is_equal(tsv_pitch,tech.tsv_pitch)) {assert(false);} + ///if(!is_equal(tsv_diameter,tech.tsv_diameter)) {assert(false);} + ///if(!is_equal(tsv_length,tech.tsv_length)) {assert(false);} + ///if(!is_equal(tsv_dielec_thickness,tech.tsv_dielec_thickness)) {assert(false);} + ///if(!is_equal(tsv_contact_resistance,tech.tsv_contact_resistance)) {assert(false);} + ///if(!is_equal(tsv_depletion_width,tech.tsv_depletion_width)) {assert(false);} + ///if(!is_equal(tsv_liner_dielectric_constant,tech.tsv_liner_dielectric_constant)) {assert(false);} + + //CACTI3DD TSV params + + if(!is_equal(tsv_parasitic_capacitance_fine,tech.tsv_parasitic_capacitance_fine )) {PRINT("tsv_parasitic_capacitance_fine",tsv_parasitic_capacitance_fine); assert(false);} + if(!is_equal(tsv_parasitic_resistance_fine,tech.tsv_parasitic_resistance_fine)) {assert(false);} + if(!is_equal(tsv_minimum_area_fine,tech.tsv_minimum_area_fine)) {assert(false);} + + if(!is_equal(tsv_parasitic_capacitance_coarse,tech.tsv_parasitic_capacitance_coarse)) {assert(false);} + if(!is_equal(tsv_parasitic_resistance_coarse,tech.tsv_parasitic_resistance_coarse)) {assert(false);} + if(!is_equal(tsv_minimum_area_coarse,tech.tsv_minimum_area_coarse)) {assert(false);} + + //fs + if(!is_equal(w_comp_inv_p1,tech.w_comp_inv_p1)) {assert(false);} + if(!is_equal(w_comp_inv_p2,tech.w_comp_inv_p2)) {assert(false);} + if(!is_equal(w_comp_inv_p3,tech.w_comp_inv_p3)) {assert(false);} + if(!is_equal(w_comp_inv_n1,tech.w_comp_inv_n1)) {assert(false);} + if(!is_equal(w_comp_inv_n2,tech.w_comp_inv_n2)) {assert(false);} + if(!is_equal(w_comp_inv_n3,tech.w_comp_inv_n3)) {assert(false);} + if(!is_equal(w_eval_inv_p,tech.w_eval_inv_p)) {assert(false);} + if(!is_equal(w_eval_inv_n,tech.w_eval_inv_n)) {assert(false);} + if(!is_equal(w_comp_n,tech.w_comp_n)) {assert(false);} + if(!is_equal(w_comp_p,tech.w_comp_p)) {assert(false);} + + if(!is_equal(dram_cell_I_on,tech.dram_cell_I_on)) {assert(false);} //ram_cell_tech_type + if(!is_equal(dram_cell_Vdd,tech.dram_cell_Vdd)) {assert(false);} + if(!is_equal(dram_cell_I_off_worst_case_len_temp,tech.dram_cell_I_off_worst_case_len_temp)) {assert(false);} + if(!is_equal(dram_cell_C,tech.dram_cell_C)) {assert(false);} + if(!is_equal(gm_sense_amp_latch,tech.gm_sense_amp_latch)) {assert(false);} // depends on many things + + if(!is_equal(w_nmos_b_mux,tech.w_nmos_b_mux)) {assert(false);} //fs + if(!is_equal(w_nmos_sa_mux,tech.w_nmos_sa_mux)) {assert(false);}//fs + if(!is_equal(w_pmos_bl_precharge,tech.w_pmos_bl_precharge)) {PRINT("w_pmos_bl_precharge",w_pmos_bl_precharge);assert(false);}//fs + if(!is_equal(w_pmos_bl_eq,tech.w_pmos_bl_eq)) {assert(false);}//fs + if(!is_equal(MIN_GAP_BET_P_AND_N_DIFFS,tech.MIN_GAP_BET_P_AND_N_DIFFS)) {assert(false);}//fs + if(!is_equal(MIN_GAP_BET_SAME_TYPE_DIFFS,tech.MIN_GAP_BET_SAME_TYPE_DIFFS)) {assert(false);}//fs + if(!is_equal(HPOWERRAIL,tech.HPOWERRAIL)) {assert(false);}//fs + if(!is_equal(cell_h_def,tech.cell_h_def)) {assert(false);}//fs + + if(!is_equal(chip_layout_overhead,tech.chip_layout_overhead )) {assert(false);}//input + if(!is_equal(macro_layout_overhead,tech.macro_layout_overhead)) {cout <cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer + + if (Ndwl != 1 || //Ndwl is fixed to 1 for CAM + Ndcm != 1 || //Ndcm is fixed to 1 for CAM + Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for CAM + Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one + Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one + Ndbl < 2) //FIXME: why should Ndbl be >1 for very small CAMs? + { + return; + } + + + + if (g_ip->specific_tag) + { + tagbits = int(ceil(g_ip->tag_w/8.0)*8); + } + else + { + tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS)/8.0)*8); + } + + //computation of no. of rows and cols of a subarray + tag_num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks*tagbits/8.0 * Ndbl));//TODO: error check input of tagbits and blocksize //TODO: for pure CAM, g_ip->block should be number of entries. + tag_num_c_subarray = tagbits; + + if (tag_num_r_subarray == 0) return; + if (tag_num_r_subarray > MAXSUBARRAYROWS) return; + if (tag_num_c_subarray < MINSUBARRAYCOLS) return; + if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; + num_r_subarray = tag_num_r_subarray; //FIXME: what about num_c_subarray? + + num_subarrays = Ndwl * Ndbl; + + // calculate cell dimensions + cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) + + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; + cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) + + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; + + //FIXME: curious where this is getting used in a CAM + cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports) + + 2 * wire_local.pitch*(g_ip->num_search_ports-1); + cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports) + + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1); + + // calculate wire parameters + + double c_b_metal = cell.h * wire_local.C_per_um; +// double C_bl; + + c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM + V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; + deg_bl_muxing = 1;//FA fix as 1 + // "/ 2.0" below is due to the fact that two adjacent access transistors share drain + // contacts in a physical layout + double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines +// C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + dram_refresh_period = 0; + + + // do/di: data in/out, for fully associative they are the data width for normal read and write + // so/si: search data in/out, for fully associative they are the data width for the search ops + // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write) + // so/si needs broadcase while do/di do not + + switch (Ndbl) { + case (0): + cout << " Invalid Ndbl \n"< num_mats_h_dir) + { + return; + } + + + num_di_b_mat = tagbits; + num_si_b_mat = tagbits;//*num_subarrays/num_mats; + + num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA + num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast + + int num_addr_b_row_dec = _log2(num_r_subarray); + num_addr_b_row_dec +=_log2(num_subarrays/num_mats); + int number_subbanks = num_mats / num_act_mats_hor_dir; + number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM + + num_rw_ports = g_ip->num_rw_ports; + num_rd_ports = g_ip->num_rd_ports; + num_wr_ports = g_ip->num_wr_ports; + num_se_rd_ports = g_ip->num_se_rd_ports; + num_search_ports = g_ip->num_search_ports; + + number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) + + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2); + + num_di_b_bank_per_port = tagbits; + num_si_b_bank_per_port = tagbits; + num_do_b_bank_per_port = tagbits; + num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays))); + + if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) + { + number_way_select_signals_mat = g_ip->data_assoc; + } + + // add ECC adjustment to all data signals that traverse on H-trees. + if (g_ip->add_ecc_b_ == true) + { + ECC_adjustment(); + } + + is_valid = true; +} + +void +DynamicParameter::init_FA() +{ + const InterconnectType &wire_local = g_tp.wire_local; + //Disabling 3D model since a 3D stacked FA is never tested + assert(NUMBER_STACKED_DIE_LAYERS == 1); + unsigned int capacity_per_die = g_ip->cache_sz; + + if (Ndwl != 1 || //Ndwl is fixed to 1 for FA + Ndcm != 1 || //Ndcm is fixed to 1 for FA + Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA + Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one + Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one + Ndbl < 2) + { + return; + } + + + //***********compute row, col of an subarray + + //either fully-asso or cam + if (g_ip->specific_tag) + { + tagbits = g_ip->tag_w; + } + else + { + tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz); + } + tagbits = (((tagbits + 3) >> 2) << 2); + + tag_num_r_subarray = (int)(capacity_per_die / (g_ip->nbanks*g_ip->block_sz * Ndbl)); + tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON); + if (tag_num_r_subarray == 0) return; + if (tag_num_r_subarray > MAXSUBARRAYROWS) return; + if (tag_num_c_subarray < MINSUBARRAYCOLS) return; + if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; + + data_num_r_subarray = tag_num_r_subarray; + data_num_c_subarray = 8 * g_ip->block_sz; + if (data_num_r_subarray == 0) return; + if (data_num_r_subarray > MAXSUBARRAYROWS) return; + if (data_num_c_subarray < MINSUBARRAYCOLS) return; + if (data_num_c_subarray > MAXSUBARRAYCOLS) return; + num_r_subarray = tag_num_r_subarray; + + num_subarrays = Ndwl * Ndbl; + //****************end of computation of row, col of an subarray + + // calculate wire parameters + cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) + + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; + cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) + + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; + + cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports) + + 2 * wire_local.pitch*(g_ip->num_search_ports-1); + cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports) + + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1); + + double c_b_metal = cell.h * wire_local.C_per_um; + // double C_bl; + + c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM + V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; + deg_bl_muxing = 1;//FA fix as 1 + // "/ 2.0" below is due to the fact that two adjacent access transistors share drain + // contacts in a physical layout + double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines + // C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + dram_refresh_period = 0; + + + // do/di: data in/out, for fully associative they are the data width for normal read and write + // so/si: search data in/out, for fully associative they are the data width for the search ops + // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write) + // so/si needs broadcase while do/di do not + + switch (Ndbl) { + case (0): + cout << " Invalid Ndbl \n"<block_sz;//TODO:internal perfetch should be considered also for fa + num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray; + + deg_sa_mux_l1_non_assoc = 1; + + deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc; + + num_act_mats_hor_dir = 1; + num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used + + //compute num_do_mat for tag + if (num_act_mats_hor_dir > num_mats_h_dir) + { + return; + } + + + //compute di for mat subbank and bank + if (fully_assoc) + { + num_di_b_mat = num_do_b_mat; + //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache, + //but inside the mat wire tracks need to be reserved for search data bus + num_si_b_mat = tagbits; + } + num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA + num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast + + int num_addr_b_row_dec = _log2(num_r_subarray); + num_addr_b_row_dec +=_log2(num_subarrays/num_mats); + int number_subbanks = num_mats / num_act_mats_hor_dir; + number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM + + num_rw_ports = g_ip->num_rw_ports; + num_rd_ports = g_ip->num_rd_ports; + num_wr_ports = g_ip->num_wr_ports; + num_se_rd_ports = g_ip->num_se_rd_ports; + num_search_ports = g_ip->num_search_ports; + + number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) + + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2); + + num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz? + num_si_b_bank_per_port = tagbits; + num_do_b_bank_per_port = g_ip->out_w + tagbits; + num_so_b_bank_per_port = g_ip->out_w; + + if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) + { + number_way_select_signals_mat = g_ip->data_assoc; + } + + // add ECC adjustment to all data signals that traverse on H-trees. + if (g_ip->add_ecc_b_ == true) + { + ECC_adjustment(); + } + + is_valid = true; +} + +//DynamicParameter::init_Mem() +//{ +//} +// +//DynamicParameter::init_3DMem() +//{ +//} + +//*** Calculate number of rows and columns in a subarray +bool +DynamicParameter::calc_subarr_rc(unsigned int capacity_per_die) { + // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be + // at least two because an array is assumed to have at least one mat. A mat + // consists of two rows and two columns of subarrays. + if (Ndwl < 2 || Ndbl < 2) + { + return false; + } + + if ((is_dram) && (!is_tag) && (Ndcm > 1)) + { + return false; // For a DRAM array, each bitline has its own sense-amp + } + + // if data array, let tagbits = 0 + if (is_tag) + { + if (g_ip->specific_tag) + { + tagbits = g_ip->tag_w; + } + else + { + tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) + + _log2(g_ip->tag_assoc*2 - 1); + + } +// tagbits = (((tagbits + 3) >> 2) << 2); //FIXME: NAV: Why are we doing this? + + num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * + g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd)); + num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl)); + } + else + { + num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * + g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd)); + num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl)); + if(g_ip->is_3d_mem) + { + double capacity_per_die_double = (double)g_ip->cache_sz / g_ip->num_die_3d; + //num_c_subarray = 1 << (int)ceil((double)_log2( 8*capacity_per_die / (g_ip->nbanks * Ndbl * Ndwl) )/2 ) ; + //num_r_subarray = 1 << (int)ceil((double)_log2( 8*capacity_per_die / (g_ip->nbanks * Ndbl * Ndwl * num_c_subarray) ) ); + num_c_subarray = g_ip->page_sz_bits/Ndwl; + num_r_subarray = 1 << (int)floor(_log2((double) g_ip->cache_sz / g_ip->num_die_3d + / num_c_subarray / g_ip->nbanks / Ndbl / Ndwl * 1024 * 1024 * 1024) +0.5); + if (g_ip->print_detail_debug) + { + cout << "parameter.cc: capacity_per_die_double = " << capacity_per_die_double << " Gbit"<< endl; + cout << "parameter.cc: g_ip->nbanks * Ndbl * Ndwl = " << (g_ip->nbanks * Ndbl * Ndwl) << endl; + //cout << "parameter.cc: subarray capacity = " << 8*capacity_per_die / (g_ip->nbanks * Ndbl * Ndwl) << endl; + //cout << "parameter.cc: total bit add per subarray = " << _log2( 8*capacity_per_die / (g_ip->nbanks * Ndbl * Ndwl) ) << endl; + cout << "parameter.cc: num_r_subarray = " << num_r_subarray << endl; + cout << "parameter.cc: num_c_subarray = " << num_c_subarray << endl; + } + + } + } + + if (num_r_subarray < MINSUBARRAYROWS) return false; + if (num_r_subarray == 0) return false; + if (num_r_subarray > MAXSUBARRAYROWS) return false; + if (num_c_subarray < MINSUBARRAYCOLS) return false; + if (num_c_subarray > MAXSUBARRAYCOLS) return false; + + + + num_subarrays = Ndwl * Ndbl; + return true; +} + + + + + +DynamicParameter::DynamicParameter( + bool is_tag_, + int pure_ram_, + int pure_cam_, + double Nspd_, + unsigned int Ndwl_, + unsigned int Ndbl_, + unsigned int Ndcm_, + unsigned int Ndsam_lev_1_, + unsigned int Ndsam_lev_2_, + Wire_type wt, + bool is_main_mem_): + is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_),Ndcm(Ndcm_), + Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_),wtype(wt), + number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0), + is_main_mem(is_main_mem_), cell(), is_valid(false) +{ + ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; + is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); + + unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer + const InterconnectType & wire_local = g_tp.wire_local; + fully_assoc = (g_ip->fully_assoc) ? true : false; + + if (pure_cam) + { + init_CAM(); + return; + } + + if (fully_assoc) { + init_FA(); + return; + } + + //*** Calculate number of rows and columns in a subarray + // Return if their dimensions do not meet the minimum specs + if (!calc_subarr_rc(capacity_per_die)) return; + + //** Calculate cell dimensions + if(is_tag) + { + cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + + g_ip->num_wr_ports); + cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports + + (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) + + wire_local.pitch * g_ip->num_se_rd_ports; + } + else + { + if (is_dram) + { + cell.h = g_tp.dram.b_h; + cell.w = g_tp.dram.b_w; + } + else + { + cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports + + g_ip->num_rw_ports - 1 + g_ip->num_rd_ports); + cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + + (g_ip->num_rd_ports - g_ip->num_se_rd_ports) + + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports; + } + } + + double c_b_metal = cell.h * wire_local.C_per_um; + double C_bl; + + if (is_dram) + { + deg_bl_muxing = 1; + if (ram_cell_tech_type == comm_dram) + { + double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; + C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + //C_bl = num_r_subarray * c_b_metal; + V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl); + if (V_b_sense < VBITSENSEMIN && !(g_ip->is_3d_mem && g_ip->force_cache_config) ) + { + return; + } + + dram_refresh_period = 64e-3; + + } + else + { + double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; + C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl); + + if (V_b_sense < VBITSENSEMIN) + { + return; //Sense amp input signal is smaller that minimum allowable sense amp input signal + } + V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value + //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C; + //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp; + dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp; + } + } + else + { //SRAM + V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; + deg_bl_muxing = Ndcm; + // "/ 2.0" below is due to the fact that two adjacent access transistors share drain + // contacts in a physical layout + double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; + C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + dram_refresh_period = 0; + } + + + // do/di: data in/out, for fully associative they are the data width for normal read and write + // so/si: search data in/out, for fully associative they are the data width for the search ops + // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write) + // so/si needs broadcase while do/di do not + + num_mats_h_dir = MAX(Ndwl / 2, 1); + num_mats_v_dir = MAX(Ndbl / 2, 1); + num_mats = num_mats_h_dir * num_mats_v_dir; + num_do_b_mat = MAX((num_subarrays/num_mats) * num_c_subarray / (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1); + + if (!(fully_assoc|| pure_cam) && (num_do_b_mat < (num_subarrays/num_mats))) + { + return; + } + + + int deg_sa_mux_l1_non_assoc; + //TODO:the i/o for subbank is not necessary and should be removed. + if (!is_tag) + { + if (is_main_mem == true) + { + num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w; + //CACTI3DD DRAM page size + if(g_ip->is_3d_mem) + num_do_b_subbank = g_ip->page_sz_bits; + deg_sa_mux_l1_non_assoc = Ndsam_lev_1; + } + else + { + if (g_ip->fast_access == true) + { + num_do_b_subbank = g_ip->out_w * g_ip->data_assoc; + deg_sa_mux_l1_non_assoc = Ndsam_lev_1; + } + else + { + + num_do_b_subbank = g_ip->out_w; + deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc; + if (deg_sa_mux_l1_non_assoc < 1) + { + return; + } + + } + } + } + else + { + num_do_b_subbank = tagbits * g_ip->tag_assoc; + if (num_do_b_mat < tagbits) + { + return; + } + deg_sa_mux_l1_non_assoc = Ndsam_lev_1; + //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir; + } + + deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc; + + num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat; + if (g_ip->is_3d_mem && num_act_mats_hor_dir == 0) + num_act_mats_hor_dir = 1; + if (num_act_mats_hor_dir == 0) + { + return; + } + + //compute num_do_mat for tag + if (is_tag) + { + if (!(fully_assoc || pure_cam)) + { + num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir; + num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat; + } + } + + if ((g_ip->is_cache == false && is_main_mem == true) || (PAGE_MODE == 1 && is_dram)) + { + if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != (int)g_ip->page_sz_bits) + { + return; + } + } + +// if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays + if (is_tag == false && g_ip->is_main_mem == true && + num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) + { + return; + } + + if (num_act_mats_hor_dir > num_mats_h_dir) + { + return; + } + + + //compute di for mat subbank and bank + if(!is_tag) + { + if(g_ip->fast_access == true) + { + num_di_b_mat = num_do_b_mat / g_ip->data_assoc; + } + else + { + num_di_b_mat = num_do_b_mat; + } + } + else + { + num_di_b_mat = tagbits; + } + + num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA + num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast + + int num_addr_b_row_dec = _log2(num_r_subarray); + if ((fully_assoc ||pure_cam)) + num_addr_b_row_dec +=_log2(num_subarrays/num_mats); + int number_subbanks = num_mats / num_act_mats_hor_dir; + number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM + + num_rw_ports = g_ip->num_rw_ports; + num_rd_ports = g_ip->num_rd_ports; + num_wr_ports = g_ip->num_wr_ports; + num_se_rd_ports = g_ip->num_se_rd_ports; + num_search_ports = g_ip->num_search_ports; + + if (is_dram && is_main_mem) + { + number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec, + _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2)); + if (g_ip->print_detail_debug) + { + cout << "parameter.cc: number_addr_bits_mat = " << num_addr_b_row_dec << endl; + cout << "parameter.cc: num_addr_b_row_dec = " << num_addr_b_row_dec << endl; + cout << "parameter.cc: num_addr_b_mux_sel = " << _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2) << endl; + } + } + else + { + number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) + + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2); + } + + if (is_tag) + { + num_di_b_bank_per_port = tagbits; + num_do_b_bank_per_port = g_ip->data_assoc; + } + else + { + num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc; + num_do_b_bank_per_port = g_ip->out_w; + } + + if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) + { + number_way_select_signals_mat = g_ip->data_assoc; + } + + // add ECC adjustment to all data signals that traverse on H-trees. + if (g_ip->add_ecc_b_ == true) ECC_adjustment(); + + is_valid = true; +} + +void +DynamicParameter::ECC_adjustment() { + num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_)); + num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_)); + num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_)); + num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_)); + num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_)); + num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_)); + + num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_)); + num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_)); + num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_)); + num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_)); + num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_)); + num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_)); +} + +//DynamicParameter::DynamicParameter( +// bool is_tag_, +// int pure_ram_, +// int pure_cam_, +// double Nspd_, +// unsigned int Ndwl_, +// unsigned int Ndbl_, +// unsigned int Ndcm_, +// unsigned int Ndsam_lev_1_, +// unsigned int Ndsam_lev_2_, +// Wire_type wt, +// bool is_main_mem_): +// is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_),Ndcm(Ndcm_), +// Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_),wtype(wt), +// number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0), +// is_main_mem(is_main_mem_), cell(), is_valid(false) +// ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; +// is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); +// +// unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer +// const /*TechnologyParameter::*/InterconnectType & wire_local = g_tp.wire_local; +// fully_assoc = (g_ip->fully_assoc) ? true : false; +// +// if (fully_assoc || pure_cam) +// { // fully-assocative cache -- ref: CACTi 2.0 report +// if (Ndwl != 1 || //Ndwl is fixed to 1 for FA +// Ndcm != 1 || //Ndcm is fixed to 1 for FA +// Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA +// Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one +// Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one +// Ndbl < 2) +// { +// return; +// } +// } +// +// if ((is_dram) && (!is_tag) && (Ndcm > 1)) +// { +// return; // For a DRAM array, each bitline has its own sense-amp +// } +// +// // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be +// // at least two because an array is assumed to have at least one mat. And a mat +// // is formed out of two horizontal subarrays and two vertical subarrays +// if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) +// { +// return; +// } +// +// //***********compute row, col of an subarray +// if (!(fully_assoc || pure_cam))//Not fully_asso nor cam +// { +// // if data array, let tagbits = 0 +// if (is_tag) +// { +// if (g_ip->specific_tag) +// { +// tagbits = g_ip->tag_w; +// } +// else +// { +// tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) + +// _log2(g_ip->tag_assoc*2 - 1) - _log2(g_ip->nbanks); +// +// } +// tagbits = (((tagbits + 3) >> 2) << 2); +// +// num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * +// g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON); +// num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON); +// //burst_length = 1; +// } +// else +// { +// num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * +// g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON); +// num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON); +// // burst_length = g_ip->block_sz * 8 / g_ip->out_w; +// if(g_ip->is_3d_mem) +// { +// double capacity_per_die_double = (double)g_ip->cache_sz / g_ip->num_die_3d; +// //num_c_subarray = 1 << (int)ceil((double)_log2( 8*capacity_per_die / (g_ip->nbanks * Ndbl * Ndwl) )/2 ) ; +// //num_r_subarray = 1 << (int)ceil((double)_log2( 8*capacity_per_die / (g_ip->nbanks * Ndbl * Ndwl * num_c_subarray) ) ); +// num_c_subarray = g_ip->page_sz_bits/Ndwl; +// num_r_subarray = 1 << (int)floor(_log2((double) g_ip->cache_sz / g_ip->num_die_3d +// / num_c_subarray / g_ip->nbanks / Ndbl / Ndwl * 1024 * 1024 * 1024) +0.5); +// if (g_ip->print_detail_debug) +// { +// cout << "parameter.cc: capacity_per_die_double = " << capacity_per_die_double << " Gbit"<< endl; +// cout << "parameter.cc: g_ip->nbanks * Ndbl * Ndwl = " << (g_ip->nbanks * Ndbl * Ndwl) << endl; +// //cout << "parameter.cc: subarray capacity = " << 8*capacity_per_die / (g_ip->nbanks * Ndbl * Ndwl) << endl; +// //cout << "parameter.cc: total bit add per subarray = " << _log2( 8*capacity_per_die / (g_ip->nbanks * Ndbl * Ndwl) ) << endl; +// cout << "parameter.cc: num_r_subarray = " << num_r_subarray << endl; +// cout << "parameter.cc: num_c_subarray = " << num_c_subarray << endl; +// } +// +// } +// } +// +// if (num_r_subarray < MINSUBARRAYROWS) return; +// if (num_r_subarray == 0) return; +// if (num_r_subarray > MAXSUBARRAYROWS) return; +// if (num_c_subarray < MINSUBARRAYCOLS) return; +// if (num_c_subarray > MAXSUBARRAYCOLS) return; +// +// } +// +// else +// {//either fully-asso or cam +// if (pure_cam) +// { +// if (g_ip->specific_tag) +// { +// tagbits = int(ceil(g_ip->tag_w/8.0)*8); +// } +// else +// { +// tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS)/8.0)*8); +//// cout<<"Pure CAM needs tag width to be specified"<> 2) << 2); +// +// tag_num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks*tagbits/8.0 * Ndbl));//TODO: error check input of tagbits and blocksize //TODO: for pure CAM, g_ip->block should be number of entries. +// //tag_num_c_subarray = (int)(tagbits + EPSILON); +// tag_num_c_subarray = tagbits; +// if (tag_num_r_subarray == 0) return; +// if (tag_num_r_subarray > MAXSUBARRAYROWS) return; +// if (tag_num_c_subarray < MINSUBARRAYCOLS) return; +// if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; +// num_r_subarray = tag_num_r_subarray; +// } +// else //fully associative +// { +// if (g_ip->specific_tag) +// { +// tagbits = g_ip->tag_w; +// } +// else +// { +// tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem. +// } +// tagbits = (((tagbits + 3) >> 2) << 2); +// +// tag_num_r_subarray = (int)(capacity_per_die / (g_ip->nbanks*g_ip->block_sz * Ndbl)); +// tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON); +// if (tag_num_r_subarray == 0) return; +// if (tag_num_r_subarray > MAXSUBARRAYROWS) return; +// if (tag_num_c_subarray < MINSUBARRAYCOLS) return; +// if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; +// +// data_num_r_subarray = tag_num_r_subarray; +// data_num_c_subarray = 8 * g_ip->block_sz; +// if (data_num_r_subarray == 0) return; +// if (data_num_r_subarray > MAXSUBARRAYROWS) return; +// if (data_num_c_subarray < MINSUBARRAYCOLS) return; +// if (data_num_c_subarray > MAXSUBARRAYCOLS) return; +// num_r_subarray = tag_num_r_subarray; +// } +// } +// +// num_subarrays = Ndwl * Ndbl; +// //****************end of computation of row, col of an subarray +// +// // calculate wire parameters +// if (fully_assoc || pure_cam) +// { +// cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) +// + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; +// cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) +// + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; +// +// cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports) +// + 2 * wire_local.pitch*(g_ip->num_search_ports-1); +// cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports) +// + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1); +// } +// else +// { +// if(is_tag) +// { +// cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + +// g_ip->num_wr_ports); +// cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports + +// (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) + +// wire_local.pitch * g_ip->num_se_rd_ports; +// } +// else +// { +// if (is_dram) +// { +// cell.h = g_tp.dram.b_h; +// cell.w = g_tp.dram.b_w; +// } +// else +// { +// cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports + +// g_ip->num_rw_ports - 1 + g_ip->num_rd_ports); +// cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + +// (g_ip->num_rd_ports - g_ip->num_se_rd_ports) + +// g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports; +// } +// } +// } +// +// double c_b_metal = cell.h * wire_local.C_per_um; +// double C_bl; +// +// if (!(fully_assoc || pure_cam)) +// { +// if (is_dram) +// { +// deg_bl_muxing = 1; +// if (ram_cell_tech_type == comm_dram) +// { +// double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; +// C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); +// //C_bl = num_r_subarray * c_b_metal; +// V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl); +// if (V_b_sense < VBITSENSEMIN && !(g_ip->is_3d_mem && g_ip->force_cache_config) ) +// { +// return; +// } +// +// /* +// C_bl = num_r_subarray * c_b_metal; +// V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl); +// if (V_b_sense < VBITSENSEMIN) +// { +// return; +// } +// V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value +// */ +// dram_refresh_period = 64e-3; +// +// } +// else +// { +// double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; +// C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); +// V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl); +// +// if (V_b_sense < VBITSENSEMIN) +// { +// return; //Sense amp input signal is smaller that minimum allowable sense amp input signal +// } +// V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value +// //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C; +// //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp; +// dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp; +// } +// } +// else +// { //SRAM +// V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; +// deg_bl_muxing = Ndcm; +// // "/ 2.0" below is due to the fact that two adjacent access transistors share drain +// // contacts in a physical layout +// double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; +// C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); +// dram_refresh_period = 0; +// } +// } +// else +// { +// c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM +// V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; +// deg_bl_muxing = 1;//FA fix as 1 +// // "/ 2.0" below is due to the fact that two adjacent access transistors share drain +// // contacts in a physical layout +// double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines +// C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); +// dram_refresh_period = 0; +// } +// +// +// // do/di: data in/out, for fully associative they are the data width for normal read and write +// // so/si: search data in/out, for fully associative they are the data width for the search ops +// // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write) +// // so/si needs broadcase while do/di do not +// +// if (fully_assoc || pure_cam) +// { +// switch (Ndbl) { +// case (0): +// cout << " Invalid Ndbl \n"<int_prefetch_w * g_ip->out_w; +// //CACTI3DD DRAM page size +// if(g_ip->is_3d_mem) +// num_do_b_subbank = g_ip->page_sz_bits; +// deg_sa_mux_l1_non_assoc = Ndsam_lev_1; +// } +// else +// { +// if (g_ip->fast_access == true) +// { +// num_do_b_subbank = g_ip->out_w * g_ip->data_assoc; +// deg_sa_mux_l1_non_assoc = Ndsam_lev_1; +// } +// else +// { +// +// num_do_b_subbank = g_ip->out_w; +// deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc; +// if (deg_sa_mux_l1_non_assoc < 1) +// { +// return; +// } +// +// } +// } +// } +// else +// { +// num_do_b_subbank = tagbits * g_ip->tag_assoc; +// if (num_do_b_mat < tagbits) +// { +// return; +// } +// deg_sa_mux_l1_non_assoc = Ndsam_lev_1; +// //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir; +// } +// } +// else +// { +// if (fully_assoc) +// { +// num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa +// num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray; +// } +// else +// { +// num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data +// num_do_b_subbank = tag_num_c_subarray; +// } +// +// deg_sa_mux_l1_non_assoc = 1; +// } +// +// deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc; +// +// if (fully_assoc || pure_cam) +// { +// num_act_mats_hor_dir = 1; +// num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used +// } +// else +// { +// num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat; +// if (g_ip->is_3d_mem && num_act_mats_hor_dir == 0) +// num_act_mats_hor_dir = 1; +// if (num_act_mats_hor_dir == 0) +// { +// return; +// } +// } +// +// //compute num_do_mat for tag +// if (is_tag) +// { +// if (!(fully_assoc || pure_cam)) +// { +// num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir; +// num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat; +// } +// } +// +// if ((g_ip->is_cache == false && is_main_mem == true) || (PAGE_MODE == 1 && is_dram)) +// { +// if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != (int)g_ip->page_sz_bits) +// { +// return; +// } +// } +// +//// if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays +// if (is_tag == false && g_ip->is_main_mem == true && +// num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) +// { +// return; +// } +// +// if (num_act_mats_hor_dir > num_mats_h_dir) +// { +// return; +// } +// +// +// //compute di for mat subbank and bank +// if (!(fully_assoc ||pure_cam)) +// { +// if(!is_tag) +// { +// if(g_ip->fast_access == true) +// { +// num_di_b_mat = num_do_b_mat / g_ip->data_assoc; +// } +// else +// { +// num_di_b_mat = num_do_b_mat; +// } +// } +// else +// { +// num_di_b_mat = tagbits; +// } +// } +// else +// { +// if (fully_assoc) +// { +// num_di_b_mat = num_do_b_mat; +// //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache, +// //but inside the mat wire tracks need to be reserved for search data bus +// num_si_b_mat = tagbits; +// } +// else +// { +// num_di_b_mat = tagbits; +// num_si_b_mat = tagbits;//*num_subarrays/num_mats; +// } +// +// } +// +// num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA +// num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast +// +// int num_addr_b_row_dec = _log2(num_r_subarray); +// if ((fully_assoc ||pure_cam)) +// num_addr_b_row_dec +=_log2(num_subarrays/num_mats); +// int number_subbanks = num_mats / num_act_mats_hor_dir; +// number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM +// +// num_rw_ports = g_ip->num_rw_ports; +// num_rd_ports = g_ip->num_rd_ports; +// num_wr_ports = g_ip->num_wr_ports; +// num_se_rd_ports = g_ip->num_se_rd_ports; +// num_search_ports = g_ip->num_search_ports; +// +// if (is_dram && is_main_mem) +// { +// number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec, +// _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2)); +// if (g_ip->print_detail_debug) +// { +// cout << "parameter.cc: number_addr_bits_mat = " << num_addr_b_row_dec << endl; +// cout << "parameter.cc: num_addr_b_row_dec = " << num_addr_b_row_dec << endl; +// cout << "parameter.cc: num_addr_b_mux_sel = " << _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2) << endl; +// } +// } +// else +// { +// number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) + +// _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2); +// } +// +// if (!(fully_assoc ||pure_cam)) +// { +// if (is_tag) +// { +// num_di_b_bank_per_port = tagbits; +// num_do_b_bank_per_port = g_ip->data_assoc; +// } +// else +// { +// num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc; +// num_do_b_bank_per_port = g_ip->out_w; +// } +// } +// else +// { +// if (fully_assoc) +// { +// num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz? +// num_si_b_bank_per_port = tagbits; +// num_do_b_bank_per_port = g_ip->out_w + tagbits; +// num_so_b_bank_per_port = g_ip->out_w; +// } +// else +// { +// num_di_b_bank_per_port = tagbits; +// num_si_b_bank_per_port = tagbits; +// num_do_b_bank_per_port = tagbits; +// num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays))); +// } +// } +// +// if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) +// { +// number_way_select_signals_mat = g_ip->data_assoc; +// } +// +// // add ECC adjustment to all data signals that traverse on H-trees. +// if (g_ip->add_ecc_b_ == true) +// { +// num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_)); +// num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_)); +// num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_)); +// num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_)); +// num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_)); +// num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_)); +// +// num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_)); +// num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_)); +// num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_)); +// num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_)); +// num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_)); +// num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_)); +// } +// +// is_valid = true; +//} diff --git a/T1/TP1/cacti-master/parameter.h b/T1/TP1/cacti-master/parameter.h new file mode 100644 index 0000000..2cbd49b --- /dev/null +++ b/T1/TP1/cacti-master/parameter.h @@ -0,0 +1,779 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#ifndef __PARAMETER_H__ +#define __PARAMETER_H__ + +#include "area.h" +#include "const.h" +#include "cacti_interface.h" +#include "io.h" + +// parameters which are functions of certain device technology +/** +class TechnologyParameter +{ + public: + class DeviceType + { + public: + double C_g_ideal; + double C_fringe; + double C_overlap; + double C_junc; // C_junc_area + double C_junc_sidewall; + double l_phy; + double l_elec; + double R_nch_on; + double R_pch_on; + double Vdd; + double Vth; + double Vcc_min;//allowed min vcc; for memory cell it is the lowest vcc for data retention. for logic it is the vcc to balance the leakage reduction and wakeup latency + double I_on_n; + double I_on_p; + double I_off_n; + double I_off_p; + double I_g_on_n; + double I_g_on_p; + double C_ox; + double t_ox; + double n_to_p_eff_curr_drv_ratio; + double long_channel_leakage_reduction; + double Mobility_n; + + DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0), + C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0), + Vdd(0), Vth(0), Vcc_min(0), + I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0),I_g_on_n(0),I_g_on_p(0), + C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0), + Mobility_n(0) { }; + void reset() + { + C_g_ideal = 0; + C_fringe = 0; + C_overlap = 0; + C_junc = 0; + l_phy = 0; + l_elec = 0; + R_nch_on = 0; + R_pch_on = 0; + Vdd = 0; + Vth = 0; + Vcc_min = 0; + I_on_n = 0; + I_on_p = 0; + I_off_n = 0; + I_off_p = 0; + I_g_on_n = 0; + I_g_on_p = 0; + C_ox = 0; + t_ox = 0; + n_to_p_eff_curr_drv_ratio = 0; + long_channel_leakage_reduction = 0; + Mobility_n = 0; + } + + void display(uint32_t indent = 0); + }; + class InterconnectType + { + public: + double pitch; + double R_per_um; + double C_per_um; + double horiz_dielectric_constant; + double vert_dielectric_constant; + double aspect_ratio; + double miller_value; + double ild_thickness; + + InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { }; + + void reset() + { + pitch = 0; + R_per_um = 0; + C_per_um = 0; + horiz_dielectric_constant = 0; + vert_dielectric_constant = 0; + aspect_ratio = 0; + miller_value = 0; + ild_thickness = 0; + } + + void display(uint32_t indent = 0); + }; + class MemoryType + { + public: + double b_w; + double b_h; + double cell_a_w; + double cell_pmos_w; + double cell_nmos_w; + double Vbitpre; + double Vbitfloating;//voltage when floating bitline is supported + + void reset() + { + b_w = 0; //fs and tech + b_h = 0; //fs and tech + cell_a_w = 0; // ram_cell_tech_type + cell_pmos_w = 0; //fs + cell_nmos_w = 0; + Vbitpre = 0; + Vbitfloating = 0; + } + + void display(uint32_t indent = 0); + }; + + class ScalingFactor + { + public: + double logic_scaling_co_eff; + double core_tx_density; + double long_channel_leakage_reduction; + + ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0), + long_channel_leakage_reduction(0) { }; + + void reset() + { + logic_scaling_co_eff= 0; + core_tx_density = 0; + long_channel_leakage_reduction= 0; + } + + void display(uint32_t indent = 0); + }; + + double ram_wl_stitching_overhead_; //fs + double min_w_nmos_; //fs + double max_w_nmos_; //fs + double max_w_nmos_dec; //fs+ ram_cell_tech_type + double unit_len_wire_del; //wire_inside_mat + double FO4; //fs + double kinv; //fs + double vpp; //input + double w_sense_en;//fs + double w_sense_n; //fs + double w_sense_p; //fs + double sense_delay; // input + double sense_dy_power; //input + double w_iso; //fs + double w_poly_contact; //fs + double spacing_poly_to_poly; //fs + double spacing_poly_to_contact;//fs + + //CACTI3DD TSV params + double tsv_parasitic_capacitance_fine; + double tsv_parasitic_resistance_fine; + double tsv_minimum_area_fine; + + double tsv_parasitic_capacitance_coarse; + double tsv_parasitic_resistance_coarse; + double tsv_minimum_area_coarse; + + //fs + double w_comp_inv_p1; + double w_comp_inv_p2; + double w_comp_inv_p3; + double w_comp_inv_n1; + double w_comp_inv_n2; + double w_comp_inv_n3; + double w_eval_inv_p; + double w_eval_inv_n; + double w_comp_n; + double w_comp_p; + + double dram_cell_I_on; //ram_cell_tech_type + double dram_cell_Vdd; + double dram_cell_I_off_worst_case_len_temp; + double dram_cell_C; + double gm_sense_amp_latch; // depends on many things + + double w_nmos_b_mux;//fs + double w_nmos_sa_mux;//fs + double w_pmos_bl_precharge;//fs + double w_pmos_bl_eq;//fs + double MIN_GAP_BET_P_AND_N_DIFFS;//fs + double MIN_GAP_BET_SAME_TYPE_DIFFS;//fs + double HPOWERRAIL;//fs + double cell_h_def;//fs + + double chip_layout_overhead; //input + double macro_layout_overhead; + double sckt_co_eff; + + double fringe_cap;//input + + uint64_t h_dec; //ram_cell_tech_type + + DeviceType sram_cell; // SRAM cell transistor + DeviceType dram_acc; // DRAM access transistor + DeviceType dram_wl; // DRAM wordline transistor + DeviceType peri_global; // peripheral global + DeviceType cam_cell; // SRAM cell transistor + + DeviceType sleep_tx; // Sleep transistor cell transistor + + InterconnectType wire_local; + InterconnectType wire_inside_mat; + InterconnectType wire_outside_mat; + + ScalingFactor scaling_factor; + + MemoryType sram; + MemoryType dram; + MemoryType cam; + + void display(uint32_t indent = 0); + + void reset() + { + dram_cell_Vdd = 0; + dram_cell_I_on = 0; + dram_cell_C = 0; + vpp = 0; + + sense_delay = 0; + sense_dy_power = 0; + fringe_cap = 0; +// horiz_dielectric_constant = 0; +// vert_dielectric_constant = 0; +// aspect_ratio = 0; +// miller_value = 0; +// ild_thickness = 0; + + dram_cell_I_off_worst_case_len_temp = 0; + + sram_cell.reset(); + dram_acc.reset(); + dram_wl.reset(); + peri_global.reset(); + cam_cell.reset(); + sleep_tx.reset(); + + scaling_factor.reset(); + + wire_local.reset(); + wire_inside_mat.reset(); + wire_outside_mat.reset(); + + sram.reset(); + dram.reset(); + cam.reset(); + + chip_layout_overhead = 0; + macro_layout_overhead = 0; + sckt_co_eff = 0; + } +}; + +**/ +//ali +class DeviceType +{ + public: + double C_g_ideal; + double C_fringe; + double C_overlap; + double C_junc; // C_junc_area + double C_junc_sidewall; + double l_phy; + double l_elec; + double R_nch_on; + double R_pch_on; + double Vdd; + double Vth; + double Vcc_min;//allowed min vcc; for memory cell it is the lowest vcc for data retention. for logic it is the vcc to balance the leakage reduction and wakeup latency + double I_on_n; + double I_on_p; + double I_off_n; + double I_off_p; + double I_g_on_n; + double I_g_on_p; + double C_ox; + double t_ox; + double n_to_p_eff_curr_drv_ratio; + double long_channel_leakage_reduction; + double Mobility_n; + + // auxilary parameters + double Vdsat; + double gmp_to_gmn_multiplier; + + + DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0), + C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0), + Vdd(0), Vth(0), Vcc_min(0), + I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0),I_g_on_n(0),I_g_on_p(0), + C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0), + Mobility_n(0) { reset();}; + + void assign(const string & in_file, int tech_flavor, unsigned int temp); + void interpolate(double alpha, const DeviceType& dev1, const DeviceType& dev2); + void reset() + { + C_g_ideal=0; + C_fringe=0; + C_overlap=0; + C_junc=0; // C_junc_area + C_junc_sidewall=0; + l_phy=0; + l_elec=0; + R_nch_on=0; + R_pch_on=0; + Vdd=0; + Vth=0; + Vcc_min=0;//allowed min vcc, for memory cell it is the lowest vcc for data retention. for logic it is the vcc to balance the leakage reduction and wakeup latency + I_on_n=0; + I_on_p=0; + I_off_n=0; + I_off_p=0; + I_g_on_n=0; + I_g_on_p=0; + C_ox=0; + t_ox=0; + n_to_p_eff_curr_drv_ratio=0; + long_channel_leakage_reduction=0; + Mobility_n=0; + + // auxilary parameters + Vdsat=0; + gmp_to_gmn_multiplier=0; + } + + void display(uint32_t indent = 0) const; + bool isEqual(const DeviceType & dev); +}; + +class InterconnectType +{ + public: + double pitch; + double R_per_um; + double C_per_um; + double horiz_dielectric_constant; + double vert_dielectric_constant; + double aspect_ratio; + double miller_value; + double ild_thickness; + + //auxilary parameters + double wire_width; + double wire_thickness; + double wire_spacing; + double barrier_thickness; + double dishing_thickness; + double alpha_scatter; + double fringe_cap; + + + InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { reset(); }; + + void reset() + { + pitch=0; + R_per_um=0; + C_per_um=0; + horiz_dielectric_constant=0; + vert_dielectric_constant=0; + aspect_ratio=0; + miller_value=0; + ild_thickness=0; + + //auxilary parameters + wire_width=0; + wire_thickness=0; + wire_spacing=0; + barrier_thickness=0; + dishing_thickness=0; + alpha_scatter=0; + fringe_cap=0; + + } + void assign(const string & in_file, int projection_type, int tech_flavor); + void interpolate(double alpha, const InterconnectType & inter1, const InterconnectType & inter2); + void display(uint32_t indent = 0); + bool isEqual(const InterconnectType & inter); +}; + +class MemoryType +{ + public: + double b_w; + double b_h; + double cell_a_w; + double cell_pmos_w; + double cell_nmos_w; + double Vbitpre; + double Vbitfloating;//voltage when floating bitline is supported + + // needed to calculate b_w b_h + double area_cell; + double asp_ratio_cell; + + MemoryType(){reset();} + void reset() + { + b_w=0; + b_h=0; + cell_a_w=0; + cell_pmos_w=0; + cell_nmos_w=0; + Vbitpre=0; + Vbitfloating=0; + } + void assign(const string & in_file, int tech_flavor, int cell_type); // sram(0),cam(1),dram(2) + void interpolate(double alpha, const MemoryType& dev1, const MemoryType& dev2); + void display(uint32_t indent = 0) const; + bool isEqual(const MemoryType & mem); +}; + +class ScalingFactor +{ + public: + double logic_scaling_co_eff; + double core_tx_density; + double long_channel_leakage_reduction; + + ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0), + long_channel_leakage_reduction(0) { reset(); }; + + void reset() + { + logic_scaling_co_eff=0; + core_tx_density=0; + long_channel_leakage_reduction=0; + } + void assign(const string & in_file); + void interpolate(double alpha, const ScalingFactor& dev1, const ScalingFactor& dev2); + void display(uint32_t indent = 0); + bool isEqual(const ScalingFactor & scal); +}; + +// parameters which are functions of certain device technology +class TechnologyParameter +{ + public: + double ram_wl_stitching_overhead_; //fs + double min_w_nmos_; //fs + double max_w_nmos_; //fs + double max_w_nmos_dec; //fs+ ram_cell_tech_type + double unit_len_wire_del; //wire_inside_mat + double FO4; //fs + double kinv; //fs + double vpp; //input + double w_sense_en;//fs + double w_sense_n; //fs + double w_sense_p; //fs + double sense_delay; // input + double sense_dy_power; //input + double w_iso; //fs + double w_poly_contact; //fs + double spacing_poly_to_poly; //fs + double spacing_poly_to_contact;//fs + + //CACTI3D auxilary variables + double tsv_pitch; + double tsv_diameter; + double tsv_length; + double tsv_dielec_thickness; + double tsv_contact_resistance; + double tsv_depletion_width; + double tsv_liner_dielectric_constant; + + //CACTI3DD TSV params + + double tsv_parasitic_capacitance_fine; + double tsv_parasitic_resistance_fine; + double tsv_minimum_area_fine; + + double tsv_parasitic_capacitance_coarse; + double tsv_parasitic_resistance_coarse; + double tsv_minimum_area_coarse; + + //fs + double w_comp_inv_p1; + double w_comp_inv_p2; + double w_comp_inv_p3; + double w_comp_inv_n1; + double w_comp_inv_n2; + double w_comp_inv_n3; + double w_eval_inv_p; + double w_eval_inv_n; + double w_comp_n; + double w_comp_p; + + double dram_cell_I_on; //ram_cell_tech_type + double dram_cell_Vdd; + double dram_cell_I_off_worst_case_len_temp; + double dram_cell_C; + double gm_sense_amp_latch; // depends on many things + + double w_nmos_b_mux;//fs + double w_nmos_sa_mux;//fs + double w_pmos_bl_precharge;//fs + double w_pmos_bl_eq;//fs + double MIN_GAP_BET_P_AND_N_DIFFS;//fs + double MIN_GAP_BET_SAME_TYPE_DIFFS;//fs + double HPOWERRAIL;//fs + double cell_h_def;//fs + + double chip_layout_overhead; //input + double macro_layout_overhead; + double sckt_co_eff; + + double fringe_cap;//input + + uint64_t h_dec; //ram_cell_tech_type + + DeviceType sram_cell; // SRAM cell transistor + DeviceType dram_acc; // DRAM access transistor + DeviceType dram_wl; // DRAM wordline transistor + DeviceType peri_global; // peripheral global + DeviceType cam_cell; // SRAM cell transistor + + DeviceType sleep_tx; // Sleep transistor cell transistor + + InterconnectType wire_local; + InterconnectType wire_inside_mat; + InterconnectType wire_outside_mat; + + ScalingFactor scaling_factor; + + MemoryType sram; + MemoryType dram; + MemoryType cam; + + void display(uint32_t indent = 0); + bool isEqual(const TechnologyParameter & tech); + + + void find_upper_and_lower_tech(double technology, int &tech_lo, string& in_file_lo, int &tech_hi, string& in_file_hi); + void assign_tsv(const string & in_file); + void init(double technology, bool is_tag); + TechnologyParameter() + { + reset(); + } + void reset() + { + ram_wl_stitching_overhead_ =0; //fs + min_w_nmos_ =0; //fs + max_w_nmos_ =0; //fs + max_w_nmos_dec =0; //fs+ ram_cell_tech_type + unit_len_wire_del =0; //wire_inside_mat + FO4 =0; //fs + kinv =0; //fs + vpp =0; //input + w_sense_en =0;//fs + w_sense_n =0; //fs + w_sense_p =0; //fs + sense_delay =0; // input + sense_dy_power =0; //input + w_iso =0; //fs + w_poly_contact =0; //fs + spacing_poly_to_poly =0; //fs + spacing_poly_to_contact =0;//fs + + //CACTI3D auxilary variables + tsv_pitch =0; + tsv_diameter =0; + tsv_length =0; + tsv_dielec_thickness =0; + tsv_contact_resistance =0; + tsv_depletion_width =0; + tsv_liner_dielectric_constant =0; + + //CACTI3DD TSV params + + tsv_parasitic_capacitance_fine =0; + tsv_parasitic_resistance_fine =0; + tsv_minimum_area_fine =0; + + tsv_parasitic_capacitance_coarse =0; + tsv_parasitic_resistance_coarse =0; + tsv_minimum_area_coarse =0; + + //fs + w_comp_inv_p1 =0; + w_comp_inv_p2 =0; + w_comp_inv_p3 =0; + w_comp_inv_n1 =0; + w_comp_inv_n2 =0; + w_comp_inv_n3 =0; + w_eval_inv_p =0; + w_eval_inv_n =0; + w_comp_n =0; + w_comp_p =0; + + dram_cell_I_on =0; //ram_cell_tech_type + dram_cell_Vdd =0; + dram_cell_I_off_worst_case_len_temp =0; + dram_cell_C =0; + gm_sense_amp_latch =0; // depends on many things + + w_nmos_b_mux =0;//fs + w_nmos_sa_mux =0;//fs + w_pmos_bl_precharge =0;//fs + w_pmos_bl_eq =0;//fs + MIN_GAP_BET_P_AND_N_DIFFS =0;//fs + MIN_GAP_BET_SAME_TYPE_DIFFS =0;//fs + HPOWERRAIL =0;//fs + cell_h_def =0;//fs + + chip_layout_overhead = 0; + macro_layout_overhead = 0; + sckt_co_eff = 0; + + fringe_cap=0;//input + + h_dec=0; //ram_cell_tech_type + + sram_cell.reset(); + dram_acc.reset(); + dram_wl.reset(); + peri_global.reset(); + cam_cell.reset(); + sleep_tx.reset(); + + scaling_factor.reset(); + + wire_local.reset(); + wire_inside_mat.reset(); + wire_outside_mat.reset(); + + sram.reset(); + dram.reset(); + cam.reset(); + + + } +}; + +//end ali + +class DynamicParameter +{ + public: + bool is_tag; + bool pure_ram; + bool pure_cam; + bool fully_assoc; + int tagbits; + int num_subarrays; // only for leakage computation -- the number of subarrays per bank + int num_mats; // only for leakage computation -- the number of mats per bank + double Nspd; + int Ndwl; + int Ndbl; + int Ndcm; + int deg_bl_muxing; + int deg_senseamp_muxing_non_associativity; + int Ndsam_lev_1; + int Ndsam_lev_2; + Wire_type wtype; // merge from cacti-7 code to cacti3d code. + + int number_addr_bits_mat; // per port + int number_subbanks_decode; // per_port + int num_di_b_bank_per_port; + int num_do_b_bank_per_port; + int num_di_b_mat; + int num_do_b_mat; + int num_di_b_subbank; + int num_do_b_subbank; + + int num_si_b_mat; + int num_so_b_mat; + int num_si_b_subbank; + int num_so_b_subbank; + int num_si_b_bank_per_port; + int num_so_b_bank_per_port; + + int number_way_select_signals_mat; + int num_act_mats_hor_dir; + + int num_act_mats_hor_dir_sl; + bool is_dram; + double V_b_sense; + unsigned int num_r_subarray; + unsigned int num_c_subarray; + int tag_num_r_subarray;//: fully associative cache tag and data must be computed together, data and tag must be separate + int tag_num_c_subarray; + int data_num_r_subarray; + int data_num_c_subarray; + int num_mats_h_dir; + int num_mats_v_dir; + uint32_t ram_cell_tech_type; + double dram_refresh_period; + + DynamicParameter(); + DynamicParameter( + bool is_tag_, + int pure_ram_, + int pure_cam_, + double Nspd_, + unsigned int Ndwl_, + unsigned int Ndbl_, + unsigned int Ndcm_, + unsigned int Ndsam_lev_1_, + unsigned int Ndsam_lev_2_, + Wire_type wt, // merged from cacti-7 to cacti3d + bool is_main_mem_); + + int use_inp_params; + unsigned int num_rw_ports; + unsigned int num_rd_ports; + unsigned int num_wr_ports; + unsigned int num_se_rd_ports; // number of single ended read ports + unsigned int num_search_ports; + unsigned int out_w;// == nr_bits_out + bool is_main_mem; + Area cell, cam_cell;//cell is the sram_cell in both nomal cache/ram and FA. + bool is_valid; + private: + void ECC_adjustment(); + void init_CAM(); + void init_FA(); + bool calc_subarr_rc(unsigned int cap); //to calculate and check subarray rows and columns +}; + + + +extern InputParameter * g_ip; +extern TechnologyParameter g_tp; + +#endif + diff --git a/T1/TP1/cacti-master/powergating.cc b/T1/TP1/cacti-master/powergating.cc new file mode 100644 index 0000000..e0fbd90 --- /dev/null +++ b/T1/TP1/cacti-master/powergating.cc @@ -0,0 +1,129 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "area.h" +#include "powergating.h" +#include "parameter.h" +#include +#include +#include + +using namespace std; + +//TODO: although DTSN is used,since for memory array, the number of sleep txs +//is related to the number of rows and cols. so All calculations are still base on +//single sleep tx cases + +Sleep_tx::Sleep_tx( + double _perf_with_sleep_tx, + double _active_Isat,//of circuit block, not sleep tx + bool _is_footer, + double _c_circuit_wakeup, + double _V_delta, + int _num_sleep_tx, +// double _vt_circuit, +// double _vt_sleep_tx, +// double _mobility,//of sleep tx +// double _c_ox,//of sleep tx + const Area & cell_) +:perf_with_sleep_tx(_perf_with_sleep_tx), + active_Isat(_active_Isat), + is_footer(_is_footer), + c_circuit_wakeup(_c_circuit_wakeup), + V_delta(_V_delta), + num_sleep_tx(_num_sleep_tx), +// vt_circuit(_vt_circuit), +// vt_sleep_tx(_vt_sleep_tx), +// mobility(_mobility), +// c_ox(_c_ox) + cell(cell_), + is_sleep_tx(true) +{ + + //a single sleep tx in a network + double raw_area, raw_width, raw_hight; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true); + vdd = g_tp.peri_global.Vdd; + vt_circuit = g_tp.peri_global.Vth; + vt_sleep_tx = g_tp.sleep_tx.Vth; + mobility = g_tp.sleep_tx.Mobility_n; + c_ox = g_tp.sleep_tx.C_ox; + + width = active_Isat/(perf_with_sleep_tx*mobility*c_ox*(vdd-vt_circuit)*(vdd-vt_sleep_tx))*g_ip->F_sz_um;//W/L uses physical numbers + width /= num_sleep_tx; + + raw_area = compute_gate_area(INV, 1, width, p_to_n_sz_ratio*width, cell.w*2)/2; //Only single device, assuming device is laide on the side + raw_width = cell.w; + raw_hight = raw_area/cell.w; + area.set_h(raw_hight); + area.set_w(raw_width); + + compute_penalty(); + +} + +double Sleep_tx::compute_penalty() +{ + //V_delta = VDD - VCCmin nothing to do with threshold of sleep tx. Although it might be OK to use sleep tx to control the V_delta +// double c_load; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true); + + if (is_footer) + { + c_intrinsic_sleep = drain_C_(width, NCH, 1, 1, area.h, false, false, false,is_sleep_tx); +// V_delta = _V_delta; + wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep)*V_delta/(simplified_nmos_Isat(width, false, false, false,is_sleep_tx)/Ilinear_to_Isat_ratio); + wakeup_power.readOp.dynamic = (c_circuit_wakeup + c_intrinsic_sleep)*g_tp.sram_cell.Vdd*V_delta; + //no 0.5 because the half of the energy spend in entering sleep and half of the energy will be spent in waking up. And they are pairs + } + else + { + c_intrinsic_sleep = drain_C_(width*p_to_n_sz_ratio, PCH, 1, 1, area.h, false, false, false,is_sleep_tx); +// V_delta = _V_delta; + wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep)*V_delta/(simplified_pmos_Isat(width, false, false, false,is_sleep_tx)/Ilinear_to_Isat_ratio); + wakeup_power.readOp.dynamic = (c_circuit_wakeup + c_intrinsic_sleep)*g_tp.sram_cell.Vdd*V_delta; + } + + return wakeup_delay; + +/* + The number of cycles in the wake-up latency set the constraint on the + minimum number of idle clock cycles needed before a processor + can enter in the corresponding sleep mode without any wakeup + overhead. + + If the circuit is half way to sleep then waken up, it is still OK + just the wakeup latency will be shorter than the wakeup time from full asleep. + So, the sleep time and energy does not matter +*/ + +} + diff --git a/T1/TP1/cacti-master/powergating.h b/T1/TP1/cacti-master/powergating.h new file mode 100644 index 0000000..c453399 --- /dev/null +++ b/T1/TP1/cacti-master/powergating.h @@ -0,0 +1,86 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef POWERGATING_H_ +#define POWERGATING_H_ + +#include "component.h" + +class Sleep_tx : public Component +{ +public: + Sleep_tx( + double _perf_with_sleep_tx, + double _active_Isat,//of circuit block, not sleep tx + bool _is_footer, + double _c_circuit_wakeup, + double _V_delta, + int _num_sleep_tx, + // double _vt_circuit, + // double _vt_sleep_tx, + // double _mobility,//of sleep tx + // double _c_ox,//of sleep tx + const Area & cell_); + + double perf_with_sleep_tx; + double active_Isat; + bool is_footer; + + double vt_circuit; + double vt_sleep_tx; + double vdd;// of circuit block not sleep tx + double mobility;//of sleep tx + double c_ox; + double width; + double c_circuit_wakeup; + double c_intrinsic_sleep; + double delay, wakeup_delay; + powerDef power, wakeup_power; +// double c_circuit_sleep; +// double sleep_delay; +// powerDef sleep_power; + double V_delta; + + int num_sleep_tx; + + const Area & cell; + bool is_sleep_tx; + + + +// void compute_area(); + double compute_penalty(); // return outrisetime + + void leakage_feedback(double temperature){}; + ~Sleep_tx(){}; +}; + +#endif /* POWERGATING_H_ */ diff --git a/T1/TP1/cacti-master/regression.test b/T1/TP1/cacti-master/regression.test new file mode 100644 index 0000000..af757b7 --- /dev/null +++ b/T1/TP1/cacti-master/regression.test @@ -0,0 +1,45 @@ +cache 4 types +./cacti -infile test_configs/cache1.cfg #L1 2-way 32K +./cacti -infile test_configs/cache2.cfg #L2 4-way 256K +./cacti -infile test_configs/cache3.cfg #L3 8-way 16M +./cacti -infile test_configs/cache4.cfg #L1 full-asso 4K with single search port +RAM 4 types +./cacti -infile test_configs/ram1.cfg # 16M +./cacti -infile test_configs/ram2.cfg # itrs-hp itrs-lstp +./cacti -infile test_configs/ram3.cfg # two banks no-ecc 128M +./cacti -infile test_configs/ram4.cfg # 32K 2-way +CAM 4 types +./cacti -infile test_configs/cam1.cfg # same as ram1 but ram->cam and full-asso +./cacti -infile test_configs/cam2.cfg # same as cam1 with line size = 128 +./cacti -infile test_configs/cam3.cfg # cam1 for 40nm technology +./cacti -infile test_configs/cam4.cfg # ca1 with exclusive read and write port +NUCA 4 types +./cacti -infile test_configs/nuca1.cfg # +./cacti -infile test_configs/nuca2.cfg +./cacti -infile test_configs/nuca3.cfg +./cacti -infile test_configs/nuca3.cfg +eDRAM 4 types +./cacti -infile test_configs/edram1.cfg # +./cacti -infile test_configs/edram2.cfg +./cacti -infile test_configs/edram3.cfg +./cacti -infile test_configs/edram4.cfg +DRAM 4 types +./cacti -infile test_configs/dram1.cfg # +./cacti -infile test_configs/dram2.cfg +./cacti -infile test_configs/dram3.cfg +./cacti -infile test_configs/dram4.cfg +IO 4 different parameters +./cacti -infile test_configs/io1.cfg # +./cacti -infile test_configs/io2.cfg +./cacti -infile test_configs/io3.cfg +./cacti -infile test_configs/io4.cfg +Power gating 4 types +./cacti -infile test_configs/power_gate1.cfg +./cacti -infile test_configs/power_gate2.cfg +./cacti -infile test_configs/power_gate3.cfg +./cacti -infile test_configs/power_gate4.cfg +3D 4 types +./cacti -infile test_configs/3D1.cfg +./cacti -infile test_configs/3D2.cfg +./cacti -infile test_configs/3D3.cfg +./cacti -infile test_configs/3D4.cfg \ No newline at end of file diff --git a/T1/TP1/cacti-master/router.cc b/T1/TP1/cacti-master/router.cc new file mode 100644 index 0000000..929c773 --- /dev/null +++ b/T1/TP1/cacti-master/router.cc @@ -0,0 +1,311 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#include "router.h" + +Router::Router( + double flit_size_, + double vc_buf, /* vc size = vc_buffer_size * flit_size */ + double vc_c, + /*TechnologyParameter::*/DeviceType *dt, + double I_, + double O_, + double M_ + ):flit_size(flit_size_), + deviceType(dt), + I(I_), + O(O_), + M(M_) +{ + vc_buffer_size = vc_buf; + vc_count = vc_c; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; + double technology = g_ip->F_sz_um; + + Vdd = dt->Vdd; + + /*Crossbar parameters. Transmisson gate is employed for connector*/ + NTtr = 10*technology*1e-6/2; /*Transmission gate's nmos tr. length*/ + PTtr = 20*technology*1e-6/2; /* pmos tr. length*/ + wt = 15*technology*1e-6/2; /*track width*/ + ht = 15*technology*1e-6/2; /*track height*/ +// I = 5; /*Number of crossbar input ports*/ +// O = 5; /*Number of crossbar output ports*/ + NTi = 12.5*technology*1e-6/2; + PTi = 25*technology*1e-6/2; + + NTid = 60*technology*1e-6/2; //m + PTid = 120*technology*1e-6/2; // m + NTod = 60*technology*1e-6/2; // m + PTod = 120*technology*1e-6/2; // m + + calc_router_parameters(); +} + +Router::~Router(){} + + +double //wire cap with triple spacing +Router::Cw3(double length) { + Wire wc(g_ip->wt, length, 1, 3, 3); + return (wc.wire_cap(length)); +} + +/*Function to calculate the gate capacitance*/ +double +Router::gate_cap(double w) { + return (double) gate_C (w*1e6 /*u*/, 0); +} + +/*Function to calculate the diffusion capacitance*/ +double +Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, + double s /*number of stacking transistors*/) { + return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def); +} + + +/*crossbar related functions */ + +// Model for simple transmission gate +double +Router::transmission_buf_inpcap() { + return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1); +} + +double +Router::transmission_buf_outcap() { + return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1); +} + +double +Router::transmission_buf_ctrcap() { + return gate_cap(NTtr)+gate_cap(PTtr); +} + +double +Router::crossbar_inpline() { + return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) + + gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1)); +} + +double +Router::crossbar_outline() { + return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) + + gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1)); +} + +double +Router::crossbar_ctrline() { + return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() + + diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) + + gate_cap(NTi) + gate_cap(PTi)); +} + +double +Router::tr_crossbar_power() { + return (crossbar_inpline()*Vdd*Vdd*flit_size/2 + + crossbar_outline()*Vdd*Vdd*flit_size/2)*2; +} + +void Router::buffer_stats() +{ + DynamicParameter dyn_p; + dyn_p.is_tag = false; + dyn_p.pure_cam = false; + dyn_p.fully_assoc = false; + dyn_p.pure_ram = true; + dyn_p.is_dram = false; + dyn_p.is_main_mem = false; + dyn_p.num_subarrays = 1; + dyn_p.num_mats = 1; + dyn_p.Ndbl = 1; + dyn_p.Ndwl = 1; + dyn_p.Nspd = 1; + dyn_p.deg_bl_muxing = 1; + dyn_p.deg_senseamp_muxing_non_associativity = 1; + dyn_p.Ndsam_lev_1 = 1; + dyn_p.Ndsam_lev_2 = 1; + dyn_p.Ndcm = 1; + dyn_p.number_addr_bits_mat = 8; + dyn_p.number_way_select_signals_mat = 1; + dyn_p.number_subbanks_decode = 0; + dyn_p.num_act_mats_hor_dir = 1; + dyn_p.V_b_sense = Vdd; // FIXME check power calc. + dyn_p.ram_cell_tech_type = 0; + dyn_p.num_r_subarray = (int) vc_buffer_size; + dyn_p.num_c_subarray = (int) flit_size * (int) vc_count; + dyn_p.num_mats_h_dir = 1; + dyn_p.num_mats_v_dir = 1; + dyn_p.num_do_b_subbank = (int)flit_size; + dyn_p.num_di_b_subbank = (int)flit_size; + dyn_p.num_do_b_mat = (int) flit_size; + dyn_p.num_di_b_mat = (int) flit_size; + dyn_p.num_do_b_mat = (int) flit_size; + dyn_p.num_di_b_mat = (int) flit_size; + dyn_p.num_do_b_bank_per_port = (int) flit_size; + dyn_p.num_di_b_bank_per_port = (int) flit_size; + dyn_p.out_w = (int) flit_size; + + dyn_p.use_inp_params = 1; + dyn_p.num_wr_ports = (unsigned int) vc_count; + dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book + dyn_p.num_rw_ports = 0; + dyn_p.num_se_rd_ports =0; + dyn_p.num_search_ports =0; + + + + dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports + + dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports); + dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 + + (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) + + dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports; + + Mat buff(dyn_p); + buff.compute_delays(0); + buff.compute_power_energy(); + buffer.power.readOp = buff.power.readOp; + buffer.power.writeOp = buffer.power.readOp; //FIXME + buffer.area = buff.area; +} + + + + void +Router::cb_stats () +{ + if (1) { + Crossbar c_b(I, O, flit_size); + c_b.compute_power(); + crossbar.delay = c_b.delay; + crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic; + crossbar.power.readOp.leakage = c_b.power.readOp.leakage; + crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage; + crossbar.area = c_b.area; +// c_b.print_crossbar(); + } + else { + crossbar.power.readOp.dynamic = tr_crossbar_power(); + crossbar.power.readOp.leakage = flit_size * I * O * + cmos_Isub_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg); + crossbar.power.readOp.gate_leakage = flit_size * I * O * + cmos_Ig_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg); + } +} + +void +Router::get_router_power() +{ + /* calculate buffer stats */ + buffer_stats(); + + /* calculate cross-bar stats */ + cb_stats(); + + /* calculate arbiter stats */ + Arbiter vcarb(vc_count, flit_size, buffer.area.w); + Arbiter cbarb(I, flit_size, crossbar.area.w); + vcarb.compute_power(); + cbarb.compute_power(); + arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I + + cbarb.power.readOp.dynamic * O; + arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I + + cbarb.power.readOp.leakage * O; + arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I + + cbarb.power.readOp.gate_leakage * O; + +// arb_stats(); + power.readOp.dynamic = ((buffer.power.readOp.dynamic+buffer.power.writeOp.dynamic) + + crossbar.power.readOp.dynamic + + arbiter.power.readOp.dynamic)*MIN(I, O)*M; + double pppm_t[4] = {1,I,I,1}; + power = power + (buffer.power*pppm_t + crossbar.power + arbiter.power)*pppm_lkg; + +} + + void +Router::get_router_delay () +{ + FREQUENCY=5; // move this to config file --TODO + cycle_time = (1/(double)FREQUENCY)*1e3; //ps + delay = 4; + max_cyc = 17 * g_tp.FO4; //s + max_cyc *= 1e12; //ps + if (cycle_time < max_cyc) { + FREQUENCY = (1/max_cyc)*1e3; //GHz + } +} + + void +Router::get_router_area() +{ + area.h = I*buffer.area.h; + area.w = buffer.area.w+crossbar.area.w; +} + + void +Router::calc_router_parameters() +{ + /* calculate router frequency and pipeline cycles */ + get_router_delay(); + + /* router power stats */ + get_router_power(); + + /* area stats */ + get_router_area(); +} + + void +Router::print_router() +{ + cout << "\n\nRouter stats:\n"; + cout << "\tRouter Area - "<< area.get_area()*1e-6<<"(mm^2)\n"; + cout << "\tMaximum possible network frequency - " << (1/max_cyc)*1e3 << "GHz\n"; + cout << "\tNetwork frequency - " << FREQUENCY <<" GHz\n"; + cout << "\tNo. of Virtual channels - " << vc_count << "\n"; + cout << "\tNo. of pipeline stages - " << delay << endl; + cout << "\tLink bandwidth - " << flit_size << " (bits)\n"; + cout << "\tNo. of buffer entries per virtual channel - "<< vc_buffer_size << "\n"; + cout << "\tSimple buffer Area - "<< buffer.area.get_area()*1e-6<<"(mm^2)\n"; + cout << "\tSimple buffer access (Read) - " << buffer.power.readOp.dynamic * 1e9 <<" (nJ)\n"; + cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 <<" (mW)\n"; + cout << "\tCrossbar Area - "<< crossbar.area.get_area()*1e-6<<"(mm^2)\n"; + cout << "\tCross bar access energy - " << crossbar.power.readOp.dynamic * 1e9<<" (nJ)\n"; + cout << "\tCross bar leakage power - " << crossbar.power.readOp.leakage * 1e3<<" (mW)\n"; + cout << "\tArbiter access energy (VC arb + Crossbar arb) - "< +#include +#include "basic_circuit.h" +#include "cacti_interface.h" +#include "component.h" +#include "mat.h" +#include "parameter.h" +#include "wire.h" +#include "crossbar.h" +#include "arbiter.h" + + + +class Router : public Component +{ + public: + Router( + double flit_size_, + double vc_buf, /* vc size = vc_buffer_size * flit_size */ + double vc_count, + /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global), + double I_ = 5, + double O_ = 5, + double M_ = 0.6); + ~Router(); + + + void print_router(); + + Component arbiter, crossbar, buffer; + + double cycle_time, max_cyc; + double flit_size; + double vc_count; + double vc_buffer_size; /* vc size = vc_buffer_size * flit_size */ + + private: + /*TechnologyParameter::*/DeviceType *deviceType; + double FREQUENCY; // move this to config file --TODO + double Cw3(double len); + double gate_cap(double w); + double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, double stack); + enum Wire_type wtype; + enum Wire_placement wire_placement; + //corssbar + double NTtr, PTtr, wt, ht, I, O, NTi, PTi, NTid, PTid, NTod, PTod, TriS1, TriS2; + double M; //network load + double transmission_buf_inpcap(); + double transmission_buf_outcap(); + double transmission_buf_ctrcap(); + double crossbar_inpline(); + double crossbar_outline(); + double crossbar_ctrline(); + double tr_crossbar_power(); + void cb_stats (); + double arb_power(); + void arb_stats (); + double buffer_params(); + void buffer_stats(); + + + //arbiter + + //buffer + + //router params + double Vdd; + + void calc_router_parameters(); + void get_router_area(); + void get_router_power(); + void get_router_delay(); + + double min_w_pmos; + + +}; + +#endif diff --git a/T1/TP1/cacti-master/sample_config_files/ddr3_cache.cfg b/T1/TP1/cacti-master/sample_config_files/ddr3_cache.cfg new file mode 100644 index 0000000..76f71c9 --- /dev/null +++ b/T1/TP1/cacti-master/sample_config_files/ddr3_cache.cfg @@ -0,0 +1,259 @@ +# Cache size +//-size (bytes) 2048 +//-size (bytes) 4096 +//-size (bytes) 32768 +//-size (bytes) 131072 +//-size (bytes) 262144 +//-size (bytes) 1048576 +//-size (bytes) 2097152 +//-size (bytes) 4194304 +-size (bytes) 8388608 +//-size (bytes) 16777216 +//-size (bytes) 33554432 +//-size (bytes) 134217728 +//-size (bytes) 67108864 +//-size (bytes) 1073741824 + +# power gating +-Array Power Gating - "false" +-WL Power Gating - "false" +-CL Power Gating - "false" +-Bitline floating - "false" +-Interconnect Power Gating - "false" +-Power Gating Performance Loss 0.01 + +# Line size +//-block size (bytes) 8 +-block size (bytes) 64 + +# To model Fully Associative cache, set associativity to zero +//-associativity 0 +//-associativity 2 +//-associativity 4 +//-associativity 8 +-associativity 8 + +-read-write port 1 +-exclusive read port 0 +-exclusive write port 0 +-single ended read ports 0 + +# Multiple banks connected using a bus +-UCA bank count 1 +-technology (u) 0.022 +//-technology (u) 0.040 +//-technology (u) 0.032 +//-technology (u) 0.090 + +# following three parameters are meaningful only for main memories + +-page size (bits) 8192 +-burst length 8 +-internal prefetch width 8 + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Data array cell type - "itrs-hp" +//-Data array cell type - "itrs-lstp" +//-Data array cell type - "itrs-lop" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Data array peripheral type - "itrs-hp" +//-Data array peripheral type - "itrs-lstp" +//-Data array peripheral type - "itrs-lop" + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Tag array cell type - "itrs-hp" +//-Tag array cell type - "itrs-lstp" +//-Tag array cell type - "itrs-lop" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Tag array peripheral type - "itrs-hp" +//-Tag array peripheral type - "itrs-lstp" +//-Tag array peripheral type - "itrs-lop + +# Bus width include data bits and address bits required by the decoder +//-output/input bus width 16 +-output/input bus width 512 + +// 300-400 in steps of 10 +-operating temperature (K) 360 + +# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file) +# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report) +-cache type "cache" +//-cache type "ram" +//-cache type "main memory" + +# to model special structure like branch target buffers, directory, etc. +# change the tag size parameter +# if you want cacti to calculate the tagbits, set the tag size to "default" +-tag size (b) "default" +//-tag size (b) 22 + +# fast - data and tag access happen in parallel +# sequential - data array is accessed after accessing the tag array +# normal - data array lookup and tag access happen in parallel +# final data block is broadcasted in data array h-tree +# after getting the signal from the tag array +//-access mode (normal, sequential, fast) - "fast" +-access mode (normal, sequential, fast) - "normal" +//-access mode (normal, sequential, fast) - "sequential" + + +# DESIGN OBJECTIVE for UCA (or banks in NUCA) +-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0 + +# Percentage deviation from the minimum value +# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization +# that compromises at most 10% delay. +# NOTE: Try reasonable values for % deviation. Inconsistent deviation +# percentage values will not produce any valid organizations. For example, +# 0:0:100:100:100 will try to identify an organization that has both +# least delay and dynamic power. Since such an organization is not possible, CACTI will +# throw an error. Refer CACTI-6 Technical report for more details +-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000 + +# Objective for NUCA +-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100 +-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 + +# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for +# energy-delay or energy-delay sq. product +# Note: Optimize tag will disable weight or deviate values mentioned above +# Set it to NONE to let weight and deviate values determine the +# appropriate cache configuration +//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED" +-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2" +//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE" + +-Cache model (NUCA, UCA) - "UCA" +//-Cache model (NUCA, UCA) - "NUCA" + +# In order for CACTI to find the optimal NUCA bank value the following +# variable should be assigned 0. +-NUCA bank count 0 + +# NOTE: for nuca network frequency is set to a default value of +# 5GHz in time.c. CACTI automatically +# calculates the maximum possible frequency and downgrades this value if necessary + +# By default CACTI considers both full-swing and low-swing +# wires to find an optimal configuration. However, it is possible to +# restrict the search space by changing the signaling from "default" to +# "fullswing" or "lowswing" type. +-Wire signaling (fullswing, lowswing, default) - "Global_30" +//-Wire signaling (fullswing, lowswing, default) - "default" +//-Wire signaling (fullswing, lowswing, default) - "lowswing" + +//-Wire inside mat - "global" +-Wire inside mat - "semi-global" +//-Wire outside mat - "global" +-Wire outside mat - "semi-global" + +-Interconnect projection - "conservative" +//-Interconnect projection - "aggressive" + +# Contention in network (which is a function of core count and cache level) is one of +# the critical factor used for deciding the optimal bank count value +# core count can be 4, 8, or 16 +//-Core count 4 +-Core count 8 +//-Core count 16 +-Cache level (L2/L3) - "L3" + +-Add ECC - "true" + +//-Print level (DETAILED, CONCISE) - "CONCISE" +-Print level (DETAILED, CONCISE) - "DETAILED" + +# for debugging +//-Print input parameters - "true" +-Print input parameters - "false" +# force CACTI to model the cache with the +# following Ndbl, Ndwl, Nspd, Ndsam, +# and Ndcm values +//-Force cache config - "true" +-Force cache config - "false" +-Ndwl 1 +-Ndbl 1 +-Nspd 0 +-Ndcm 1 +-Ndsam1 0 +-Ndsam2 0 + + + +#### Default CONFIGURATION values for baseline external IO parameters to DRAM. + +# Memory Type (D=DDR3, L=LPDDR2, W=WideIO, S=Low-swing differential) + +-dram_type "D" +//-dram_type "L" +//-dram_type "W" +//-dram_type "S" + +# Memory State (R=Read, W=Write, I=Idle or S=Sleep) + +//-iostate "R" +-iostate "W" +//-iostate "I" +//-iostate "S" + +# Is ECC Enabled (Y=Yes, N=No) + +-dram_ecc "Y" + +#Address bus timing + +//-addr_timing 0.5 //DDR, for LPDDR2 and LPDDR3 +-addr_timing 1.0 //SDR for DDR3, Wide-IO +//-addr_timing 2.0 //2T timing +//addr_timing 3.0 // 3T timing + +# Bandwidth (Gbytes per second, this is the effective bandwidth) + +-bus_bw 12.8 GBps //Valid range 0 to 2*bus_freq*num_dq + +# Memory Density (Gbit per memory/DRAM die) + +-mem_density 4 Gb //Valid values 2^n Gb + +# IO frequency (MHz) (frequency of the external memory interface). + +-bus_freq 800 MHz //Valid range 0 to 1.5 GHz for DDR3, 0 to 1.2 GHz for LPDDR3, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential + +# Duty Cycle (fraction of time in the Memory State defined above) + +-duty_cycle 1.0 //Valid range 0 to 1.0 + +# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR + +# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_ca 0.5 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR + +# Number of DQ pins + +-num_dq 72 //Include ECC pins as well (if present). If ECC pins are included, the bus bandwidth is 2*(num_dq-#of ECC pins)*bus_freq. Valid range 0 to 72. + +# Number of DQS pins + +-num_dqs 18 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins. + +# Number of CA pins + +-num_ca 25 //Valid range 0 to 35 pins. + +# Number of CLK pins + +-num_clk 2 //2 x differential pair. Valid values: 0/2/4. + +# Number of Physical Ranks + +-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per DIMM or buffer chip + +# Width of the Memory Data Bus + +-mem_data_width 8 //x4 or x8 or x16 or x32 or x128 memories \ No newline at end of file diff --git a/T1/TP1/cacti-master/sample_config_files/diff_ddr3_cache.cfg b/T1/TP1/cacti-master/sample_config_files/diff_ddr3_cache.cfg new file mode 100644 index 0000000..7c6f492 --- /dev/null +++ b/T1/TP1/cacti-master/sample_config_files/diff_ddr3_cache.cfg @@ -0,0 +1,259 @@ +# Cache size +//-size (bytes) 2048 +//-size (bytes) 4096 +//-size (bytes) 32768 +//-size (bytes) 131072 +//-size (bytes) 262144 +//-size (bytes) 1048576 +//-size (bytes) 2097152 +//-size (bytes) 4194304 +-size (bytes) 8388608 +//-size (bytes) 16777216 +//-size (bytes) 33554432 +//-size (bytes) 134217728 +//-size (bytes) 67108864 +//-size (bytes) 1073741824 + +# power gating +-Array Power Gating - "false" +-WL Power Gating - "false" +-CL Power Gating - "false" +-Bitline floating - "false" +-Interconnect Power Gating - "false" +-Power Gating Performance Loss 0.01 + +# Line size +//-block size (bytes) 8 +-block size (bytes) 64 + +# To model Fully Associative cache, set associativity to zero +//-associativity 0 +//-associativity 2 +//-associativity 4 +//-associativity 8 +-associativity 8 + +-read-write port 1 +-exclusive read port 0 +-exclusive write port 0 +-single ended read ports 0 + +# Multiple banks connected using a bus +-UCA bank count 1 +-technology (u) 0.022 +//-technology (u) 0.040 +//-technology (u) 0.032 +//-technology (u) 0.090 + +# following three parameters are meaningful only for main memories + +-page size (bits) 8192 +-burst length 8 +-internal prefetch width 8 + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Data array cell type - "itrs-hp" +//-Data array cell type - "itrs-lstp" +//-Data array cell type - "itrs-lop" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Data array peripheral type - "itrs-hp" +//-Data array peripheral type - "itrs-lstp" +//-Data array peripheral type - "itrs-lop" + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Tag array cell type - "itrs-hp" +//-Tag array cell type - "itrs-lstp" +//-Tag array cell type - "itrs-lop" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Tag array peripheral type - "itrs-hp" +//-Tag array peripheral type - "itrs-lstp" +//-Tag array peripheral type - "itrs-lop + +# Bus width include data bits and address bits required by the decoder +//-output/input bus width 16 +-output/input bus width 512 + +// 300-400 in steps of 10 +-operating temperature (K) 360 + +# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file) +# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report) +-cache type "cache" +//-cache type "ram" +//-cache type "main memory" + +# to model special structure like branch target buffers, directory, etc. +# change the tag size parameter +# if you want cacti to calculate the tagbits, set the tag size to "default" +-tag size (b) "default" +//-tag size (b) 22 + +# fast - data and tag access happen in parallel +# sequential - data array is accessed after accessing the tag array +# normal - data array lookup and tag access happen in parallel +# final data block is broadcasted in data array h-tree +# after getting the signal from the tag array +//-access mode (normal, sequential, fast) - "fast" +-access mode (normal, sequential, fast) - "normal" +//-access mode (normal, sequential, fast) - "sequential" + + +# DESIGN OBJECTIVE for UCA (or banks in NUCA) +-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0 + +# Percentage deviation from the minimum value +# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization +# that compromises at most 10% delay. +# NOTE: Try reasonable values for % deviation. Inconsistent deviation +# percentage values will not produce any valid organizations. For example, +# 0:0:100:100:100 will try to identify an organization that has both +# least delay and dynamic power. Since such an organization is not possible, CACTI will +# throw an error. Refer CACTI-6 Technical report for more details +-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000 + +# Objective for NUCA +-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100 +-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 + +# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for +# energy-delay or energy-delay sq. product +# Note: Optimize tag will disable weight or deviate values mentioned above +# Set it to NONE to let weight and deviate values determine the +# appropriate cache configuration +//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED" +-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2" +//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE" + +-Cache model (NUCA, UCA) - "UCA" +//-Cache model (NUCA, UCA) - "NUCA" + +# In order for CACTI to find the optimal NUCA bank value the following +# variable should be assigned 0. +-NUCA bank count 0 + +# NOTE: for nuca network frequency is set to a default value of +# 5GHz in time.c. CACTI automatically +# calculates the maximum possible frequency and downgrades this value if necessary + +# By default CACTI considers both full-swing and low-swing +# wires to find an optimal configuration. However, it is possible to +# restrict the search space by changing the signaling from "default" to +# "fullswing" or "lowswing" type. +-Wire signaling (fullswing, lowswing, default) - "Global_30" +//-Wire signaling (fullswing, lowswing, default) - "default" +//-Wire signaling (fullswing, lowswing, default) - "lowswing" + +//-Wire inside mat - "global" +-Wire inside mat - "semi-global" +//-Wire outside mat - "global" +-Wire outside mat - "semi-global" + +-Interconnect projection - "conservative" +//-Interconnect projection - "aggressive" + +# Contention in network (which is a function of core count and cache level) is one of +# the critical factor used for deciding the optimal bank count value +# core count can be 4, 8, or 16 +//-Core count 4 +-Core count 8 +//-Core count 16 +-Cache level (L2/L3) - "L3" + +-Add ECC - "true" + +//-Print level (DETAILED, CONCISE) - "CONCISE" +-Print level (DETAILED, CONCISE) - "DETAILED" + +# for debugging +//-Print input parameters - "true" +-Print input parameters - "false" +# force CACTI to model the cache with the +# following Ndbl, Ndwl, Nspd, Ndsam, +# and Ndcm values +//-Force cache config - "true" +-Force cache config - "false" +-Ndwl 1 +-Ndbl 1 +-Nspd 0 +-Ndcm 1 +-Ndsam1 0 +-Ndsam2 0 + + + +#### Default CONFIGURATION values for baseline external IO parameters to DRAM. + +# Memory Type (D=DDR3, L=LPDDR2, W=WideIO, S=Low-swing differential) + +//-dram_type "D" +//-dram_type "L" +//-dram_type "W" +-dram_type "S" + +# Memory State (R=Read, W=Write, I=Idle or S=Sleep) + +//-iostate "R" +-iostate "W" +//-iostate "I" +//-iostate "S" + +# Is ECC Enabled (Y=Yes, N=No) + +-dram_ecc "N" + +#Address bus timing + +//-addr_timing 0.5 //DDR, for LPDDR2 and LPDDR3 +-addr_timing 1.0 //SDR for DDR3, Wide-IO +//-addr_timing 2.0 //2T timing +//addr_timing 3.0 // 3T timing + +# Bandwidth (Gbytes per second, this is the effective bandwidth) + +-bus_bw 6 GBps //Valid range 0 to 2*bus_freq*num_dq + +# Memory Density (Gbit per memory/DRAM die) + +-mem_density 4 Gb //Valid values 2^n Gb + +# IO frequency (MHz) (frequency of the external memory interface). + +-bus_freq 3000 MHz //Valid range 0 to 1.5 GHz for DDR3, 0 to 1.2 GHz for LPDDR3, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential + +# Duty Cycle (fraction of time in the Memory State defined above) + +-duty_cycle 1.0 //Valid range 0 to 1.0 + +# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR + +# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_ca 0.5 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR + +# Number of DQ pins + +-num_dq 8 //Include ECC pins as well (if present). If ECC pins are included, the bus bandwidth is 2*(num_dq-#of ECC pins)*bus_freq. Valid range 0 to 72. + +# Number of DQS pins + +-num_dqs 2 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins. + +# Number of CA pins + +-num_ca 0 //Valid range 0 to 35 pins. + +# Number of CLK pins + +-num_clk 0 //2 x differential pair. Valid values: 0/2/4. + +# Number of Physical Ranks + +-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per DIMM or buffer chip + +# Width of the Memory Data Bus + +-mem_data_width 8 //x4 or x8 or x16 or x32 memories \ No newline at end of file diff --git a/T1/TP1/cacti-master/sample_config_files/lpddr3_cache.cfg b/T1/TP1/cacti-master/sample_config_files/lpddr3_cache.cfg new file mode 100644 index 0000000..045c540 --- /dev/null +++ b/T1/TP1/cacti-master/sample_config_files/lpddr3_cache.cfg @@ -0,0 +1,259 @@ +# Cache size +//-size (bytes) 2048 +//-size (bytes) 4096 +//-size (bytes) 32768 +//-size (bytes) 131072 +//-size (bytes) 262144 +//-size (bytes) 1048576 +//-size (bytes) 2097152 +//-size (bytes) 4194304 +-size (bytes) 8388608 +//-size (bytes) 16777216 +//-size (bytes) 33554432 +//-size (bytes) 134217728 +//-size (bytes) 67108864 +//-size (bytes) 1073741824 + +# power gating +-Array Power Gating - "false" +-WL Power Gating - "false" +-CL Power Gating - "false" +-Bitline floating - "false" +-Interconnect Power Gating - "false" +-Power Gating Performance Loss 0.01 + +# Line size +//-block size (bytes) 8 +-block size (bytes) 64 + +# To model Fully Associative cache, set associativity to zero +//-associativity 0 +//-associativity 2 +//-associativity 4 +//-associativity 8 +-associativity 8 + +-read-write port 1 +-exclusive read port 0 +-exclusive write port 0 +-single ended read ports 0 + +# Multiple banks connected using a bus +-UCA bank count 1 +-technology (u) 0.022 +//-technology (u) 0.040 +//-technology (u) 0.032 +//-technology (u) 0.090 + +# following three parameters are meaningful only for main memories + +-page size (bits) 8192 +-burst length 8 +-internal prefetch width 8 + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Data array cell type - "itrs-hp" +//-Data array cell type - "itrs-lstp" +//-Data array cell type - "itrs-lop" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Data array peripheral type - "itrs-hp" +//-Data array peripheral type - "itrs-lstp" +//-Data array peripheral type - "itrs-lop" + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Tag array cell type - "itrs-hp" +//-Tag array cell type - "itrs-lstp" +//-Tag array cell type - "itrs-lop" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Tag array peripheral type - "itrs-hp" +//-Tag array peripheral type - "itrs-lstp" +//-Tag array peripheral type - "itrs-lop + +# Bus width include data bits and address bits required by the decoder +//-output/input bus width 16 +-output/input bus width 512 + +// 300-400 in steps of 10 +-operating temperature (K) 360 + +# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file) +# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report) +-cache type "cache" +//-cache type "ram" +//-cache type "main memory" + +# to model special structure like branch target buffers, directory, etc. +# change the tag size parameter +# if you want cacti to calculate the tagbits, set the tag size to "default" +-tag size (b) "default" +//-tag size (b) 22 + +# fast - data and tag access happen in parallel +# sequential - data array is accessed after accessing the tag array +# normal - data array lookup and tag access happen in parallel +# final data block is broadcasted in data array h-tree +# after getting the signal from the tag array +//-access mode (normal, sequential, fast) - "fast" +-access mode (normal, sequential, fast) - "normal" +//-access mode (normal, sequential, fast) - "sequential" + + +# DESIGN OBJECTIVE for UCA (or banks in NUCA) +-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0 + +# Percentage deviation from the minimum value +# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization +# that compromises at most 10% delay. +# NOTE: Try reasonable values for % deviation. Inconsistent deviation +# percentage values will not produce any valid organizations. For example, +# 0:0:100:100:100 will try to identify an organization that has both +# least delay and dynamic power. Since such an organization is not possible, CACTI will +# throw an error. Refer CACTI-6 Technical report for more details +-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000 + +# Objective for NUCA +-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100 +-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 + +# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for +# energy-delay or energy-delay sq. product +# Note: Optimize tag will disable weight or deviate values mentioned above +# Set it to NONE to let weight and deviate values determine the +# appropriate cache configuration +//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED" +-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2" +//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE" + +-Cache model (NUCA, UCA) - "UCA" +//-Cache model (NUCA, UCA) - "NUCA" + +# In order for CACTI to find the optimal NUCA bank value the following +# variable should be assigned 0. +-NUCA bank count 0 + +# NOTE: for nuca network frequency is set to a default value of +# 5GHz in time.c. CACTI automatically +# calculates the maximum possible frequency and downgrades this value if necessary + +# By default CACTI considers both full-swing and low-swing +# wires to find an optimal configuration. However, it is possible to +# restrict the search space by changing the signaling from "default" to +# "fullswing" or "lowswing" type. +-Wire signaling (fullswing, lowswing, default) - "Global_30" +//-Wire signaling (fullswing, lowswing, default) - "default" +//-Wire signaling (fullswing, lowswing, default) - "lowswing" + +//-Wire inside mat - "global" +-Wire inside mat - "semi-global" +//-Wire outside mat - "global" +-Wire outside mat - "semi-global" + +-Interconnect projection - "conservative" +//-Interconnect projection - "aggressive" + +# Contention in network (which is a function of core count and cache level) is one of +# the critical factor used for deciding the optimal bank count value +# core count can be 4, 8, or 16 +//-Core count 4 +-Core count 8 +//-Core count 16 +-Cache level (L2/L3) - "L3" + +-Add ECC - "true" + +//-Print level (DETAILED, CONCISE) - "CONCISE" +-Print level (DETAILED, CONCISE) - "DETAILED" + +# for debugging +//-Print input parameters - "true" +-Print input parameters - "false" +# force CACTI to model the cache with the +# following Ndbl, Ndwl, Nspd, Ndsam, +# and Ndcm values +//-Force cache config - "true" +-Force cache config - "false" +-Ndwl 1 +-Ndbl 1 +-Nspd 0 +-Ndcm 1 +-Ndsam1 0 +-Ndsam2 0 + + + +#### Default CONFIGURATION values for baseline external IO parameters to DRAM. + +# Memory Type (D=DDR3, L=LPDDR2, W=WideIO, S=Low-swing differential) + +//-dram_type "D" +-dram_type "L" +//-dram_type "W" +//-dram_type "S" + +# Memory State (R=Read, W=Write, I=Idle or S=Sleep) + +//-iostate "R" +-iostate "W" +//-iostate "I" +//-iostate "S" + +# Is ECC Enabled (Y=Yes, N=No) + +-dram_ecc "N" + +#Address bus timing + +-addr_timing 0.5 //DDR, for LPDDR2 and LPDDR3 +//-addr_timing 1.0 //SDR for DDR3, Wide-IO +//-addr_timing 2.0 //2T timing +//addr_timing 3.0 // 3T timing + +# Bandwidth (Gbytes per second, this is the effective bandwidth) + +-bus_bw 6.4 GBps //Valid range 0 to 2*bus_freq*num_dq + +# Memory Density (Gbit per memory/DRAM die) + +-mem_density 4 Gb //Valid values 2^n Gb + +# IO frequency (MHz) (frequency of the external memory interface). + +-bus_freq 800 MHz //Valid range 0 to 1.5 GHz for DDR3, 0 to 1.2 GHz for LPDDR3, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential + +# Duty Cycle (fraction of time in the Memory State defined above) + +-duty_cycle 1.0 //Valid range 0 to 1.0 + +# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR + +# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_ca 0.5 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR + +# Number of DQ pins + +-num_dq 32 //Include ECC pins as well (if present). If ECC pins are included, the bus bandwidth is 2*(num_dq-#of ECC pins)*bus_freq. Valid range 0 to 72. + +# Number of DQS pins + +-num_dqs 8 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins. + +# Number of CA pins + +-num_ca 14 //Valid range 0 to 35 pins. + +# Number of CLK pins + +-num_clk 2 //2 x differential pair. Valid values: 0/2/4. + +# Number of Physical Ranks + +-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per DIMM or buffer chip + +# Width of the Memory Data Bus + +-mem_data_width 32 //x4 or x8 or x16 or x32 or x128 memories \ No newline at end of file diff --git a/T1/TP1/cacti-master/sample_config_files/wideio_cache.cfg b/T1/TP1/cacti-master/sample_config_files/wideio_cache.cfg new file mode 100644 index 0000000..b16ea0b --- /dev/null +++ b/T1/TP1/cacti-master/sample_config_files/wideio_cache.cfg @@ -0,0 +1,259 @@ +# Cache size +//-size (bytes) 2048 +//-size (bytes) 4096 +//-size (bytes) 32768 +//-size (bytes) 131072 +//-size (bytes) 262144 +//-size (bytes) 1048576 +//-size (bytes) 2097152 +//-size (bytes) 4194304 +-size (bytes) 8388608 +//-size (bytes) 16777216 +//-size (bytes) 33554432 +//-size (bytes) 134217728 +//-size (bytes) 67108864 +//-size (bytes) 1073741824 + +# power gating +-Array Power Gating - "false" +-WL Power Gating - "false" +-CL Power Gating - "false" +-Bitline floating - "false" +-Interconnect Power Gating - "false" +-Power Gating Performance Loss 0.01 + +# Line size +//-block size (bytes) 8 +-block size (bytes) 64 + +# To model Fully Associative cache, set associativity to zero +//-associativity 0 +//-associativity 2 +//-associativity 4 +//-associativity 8 +-associativity 8 + +-read-write port 1 +-exclusive read port 0 +-exclusive write port 0 +-single ended read ports 0 + +# Multiple banks connected using a bus +-UCA bank count 1 +-technology (u) 0.022 +//-technology (u) 0.040 +//-technology (u) 0.032 +//-technology (u) 0.090 + +# following three parameters are meaningful only for main memories + +-page size (bits) 8192 +-burst length 8 +-internal prefetch width 8 + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Data array cell type - "itrs-hp" +//-Data array cell type - "itrs-lstp" +//-Data array cell type - "itrs-lop" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Data array peripheral type - "itrs-hp" +//-Data array peripheral type - "itrs-lstp" +//-Data array peripheral type - "itrs-lop" + +# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) +-Tag array cell type - "itrs-hp" +//-Tag array cell type - "itrs-lstp" +//-Tag array cell type - "itrs-lop" + +# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) +-Tag array peripheral type - "itrs-hp" +//-Tag array peripheral type - "itrs-lstp" +//-Tag array peripheral type - "itrs-lop + +# Bus width include data bits and address bits required by the decoder +//-output/input bus width 16 +-output/input bus width 512 + +// 300-400 in steps of 10 +-operating temperature (K) 360 + +# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file) +# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report) +-cache type "cache" +//-cache type "ram" +//-cache type "main memory" + +# to model special structure like branch target buffers, directory, etc. +# change the tag size parameter +# if you want cacti to calculate the tagbits, set the tag size to "default" +-tag size (b) "default" +//-tag size (b) 22 + +# fast - data and tag access happen in parallel +# sequential - data array is accessed after accessing the tag array +# normal - data array lookup and tag access happen in parallel +# final data block is broadcasted in data array h-tree +# after getting the signal from the tag array +//-access mode (normal, sequential, fast) - "fast" +-access mode (normal, sequential, fast) - "normal" +//-access mode (normal, sequential, fast) - "sequential" + + +# DESIGN OBJECTIVE for UCA (or banks in NUCA) +-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0 + +# Percentage deviation from the minimum value +# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization +# that compromises at most 10% delay. +# NOTE: Try reasonable values for % deviation. Inconsistent deviation +# percentage values will not produce any valid organizations. For example, +# 0:0:100:100:100 will try to identify an organization that has both +# least delay and dynamic power. Since such an organization is not possible, CACTI will +# throw an error. Refer CACTI-6 Technical report for more details +-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000 + +# Objective for NUCA +-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100 +-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 + +# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for +# energy-delay or energy-delay sq. product +# Note: Optimize tag will disable weight or deviate values mentioned above +# Set it to NONE to let weight and deviate values determine the +# appropriate cache configuration +//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED" +-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2" +//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE" + +-Cache model (NUCA, UCA) - "UCA" +//-Cache model (NUCA, UCA) - "NUCA" + +# In order for CACTI to find the optimal NUCA bank value the following +# variable should be assigned 0. +-NUCA bank count 0 + +# NOTE: for nuca network frequency is set to a default value of +# 5GHz in time.c. CACTI automatically +# calculates the maximum possible frequency and downgrades this value if necessary + +# By default CACTI considers both full-swing and low-swing +# wires to find an optimal configuration. However, it is possible to +# restrict the search space by changing the signaling from "default" to +# "fullswing" or "lowswing" type. +-Wire signaling (fullswing, lowswing, default) - "Global_30" +//-Wire signaling (fullswing, lowswing, default) - "default" +//-Wire signaling (fullswing, lowswing, default) - "lowswing" + +//-Wire inside mat - "global" +-Wire inside mat - "semi-global" +//-Wire outside mat - "global" +-Wire outside mat - "semi-global" + +-Interconnect projection - "conservative" +//-Interconnect projection - "aggressive" + +# Contention in network (which is a function of core count and cache level) is one of +# the critical factor used for deciding the optimal bank count value +# core count can be 4, 8, or 16 +//-Core count 4 +-Core count 8 +//-Core count 16 +-Cache level (L2/L3) - "L3" + +-Add ECC - "true" + +//-Print level (DETAILED, CONCISE) - "CONCISE" +-Print level (DETAILED, CONCISE) - "DETAILED" + +# for debugging +//-Print input parameters - "true" +-Print input parameters - "false" +# force CACTI to model the cache with the +# following Ndbl, Ndwl, Nspd, Ndsam, +# and Ndcm values +//-Force cache config - "true" +-Force cache config - "false" +-Ndwl 1 +-Ndbl 1 +-Nspd 0 +-Ndcm 1 +-Ndsam1 0 +-Ndsam2 0 + + + +#### Default CONFIGURATION values for baseline external IO parameters to DRAM. + +# Memory Type (D=DDR3, L=LPDDR2, W=WideIO, S=Low-swing differential) + +//-dram_type "D" +//-dram_type "L" +-dram_type "W" +//-dram_type "S" + +# Memory State (R=Read, W=Write, I=Idle or S=Sleep) + +//-iostate "R" +-iostate "W" +//-iostate "I" +//-iostate "S" + +# Is ECC Enabled (Y=Yes, N=No) + +-dram_ecc "N" + +#Address bus timing + +//-addr_timing 0.5 //DDR, for LPDDR2 and LPDDR3 +-addr_timing 1.0 //SDR for DDR3, Wide-IO +//-addr_timing 2.0 //2T timing +//addr_timing 3.0 // 3T timing + +# Bandwidth (Gbytes per second, this is the effective bandwidth) + +-bus_bw 12.8 GBps //Valid range 0 to 2*bus_freq*num_dq + +# Memory Density (Gbit per memory/DRAM die) + +-mem_density 4 Gb //Valid values 2^n Gb + +# IO frequency (MHz) (frequency of the external memory interface). + +-bus_freq 400 MHz //Valid range 0 to 1.5 GHz for DDR3, 0 to 1.2 GHz for LPDDR3, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential + +# Duty Cycle (fraction of time in the Memory State defined above) + +-duty_cycle 1.0 //Valid range 0 to 1.0 + +# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR + +# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5) + +-activity_ca 0.5 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR + +# Number of DQ pins + +-num_dq 128 //Include ECC pins as well (if present). If ECC pins are included, the bus bandwidth is 2*(num_dq-#of ECC pins)*bus_freq. Valid range 0 to 72. + +# Number of DQS pins + +-num_dqs 16 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins. + +# Number of CA pins + +-num_ca 30 //Valid range 0 to 35 pins. + +# Number of CLK pins + +-num_clk 2 //2 x differential pair. Valid values: 0/2/4. + +# Number of Physical Ranks + +-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per DIMM or buffer chip + +# Width of the Memory Data Bus + +-mem_data_width 128 //x4 or x8 or x16 or x32 or x128 memories \ No newline at end of file diff --git a/T1/TP1/cacti-master/subarray.cc b/T1/TP1/cacti-master/subarray.cc new file mode 100644 index 0000000..9dfeefc --- /dev/null +++ b/T1/TP1/cacti-master/subarray.cc @@ -0,0 +1,205 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + + +#include +#include +#include + +#include "subarray.h" + + +Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_): + dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray), + num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray), + cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_) +{ + //num_cols=7; + //cout<<"num_cols ="<< num_cols <add_ecc_b_ ? (int)ceil(num_cols / num_bits_per_ecc_b_) : 0); // ECC overhead + uint32_t ram_num_cells_wl_stitching = + (dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ : + (dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_; + + area.h = cell.h * num_rows; + + area.w = cell.w * num_cols + + ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead + + if (g_ip->print_detail_debug) + { + cout << "subarray.cc: ram_num_cells_wl_stitching = " << ram_num_cells_wl_stitching<add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0; + num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0); + num_cols = num_cols_fa_cam + num_cols_fa_ram; + } + else + { + num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0; + num_cols_fa_ram = 0; + num_cols = num_cols_fa_cam; + } + + area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells + area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram + + ceil((num_cols_fa_cam + num_cols_fa_ram) / sram_num_cells_wl_stitching_)*g_tp.ram_wl_stitching_overhead_ + + 16*g_tp.wire_local.pitch //the overhead for the NAND gate to connect the two halves + + 128*g_tp.wire_local.pitch;//the overhead for the drivers from matchline to wordline of RAM + + + } + + assert(area.h>0); + assert(area.w>0); + compute_C(); +} + + + +Subarray::~Subarray() +{ +} + + + +double Subarray::get_total_cell_area() +{ +// return (is_fa==false? cell.get_area() * num_rows * num_cols +// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram)); +// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)); +// //: cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam + sram_cell.get_area()*(num_rows+1)*num_cols_fa_ram);//for FA, this area does not include the dummy cells in SRAM arrays. + + if (!(is_fa || dp.pure_cam)) + return (cell.get_area() * num_rows * num_cols); + else if (is_fa) + { //for FA, this area includes the dummy cells in SRAM arrays. + //return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)); + //cout<<"diff" < +void init_tech_params(double technology, bool is_tag) +{ + g_tp.init(technology,is_tag); +} + +void printing(const char * name, double value) +{ + cout << "tech " << name << " " << value << endl; +} + +void printing_int(const char * name, uint64_t value) +{ + cout << "tech " << name << " " << value << endl; +} +void print_g_tp() +{ + printing("g_tp.peri_global.Vdd",g_tp.peri_global.Vdd); + printing("g_tp.peri_global.Vcc_min",g_tp.peri_global.Vcc_min); + printing("g_tp.peri_global.t_ox",g_tp.peri_global.t_ox); + printing("g_tp.peri_global.Vth",g_tp.peri_global.Vth); + printing("g_tp.peri_global.C_ox",g_tp.peri_global.C_ox); + printing("g_tp.peri_global.C_g_ideal",g_tp.peri_global.C_g_ideal); + printing("g_tp.peri_global.C_fringe",g_tp.peri_global.C_fringe); + printing("g_tp.peri_global.C_junc",g_tp.peri_global.C_junc); + printing("g_tp.peri_global.C_junc_sidewall",g_tp.peri_global.C_junc_sidewall); + printing("g_tp.peri_global.l_phy",g_tp.peri_global.l_phy); + printing("g_tp.peri_global.l_elec",g_tp.peri_global.l_elec); + printing("g_tp.peri_global.I_on_n",g_tp.peri_global.I_on_n); + printing("g_tp.peri_global.R_nch_on",g_tp.peri_global.R_nch_on); + printing("g_tp.peri_global.R_pch_on",g_tp.peri_global.R_pch_on); + printing("g_tp.peri_global.n_to_p_eff_curr_drv_ratio",g_tp.peri_global.n_to_p_eff_curr_drv_ratio); + printing("g_tp.peri_global.long_channel_leakage_reduction",g_tp.peri_global.long_channel_leakage_reduction); + printing("g_tp.peri_global.I_off_n",g_tp.peri_global.I_off_n); + printing("g_tp.peri_global.I_off_p",g_tp.peri_global.I_off_p); + printing("g_tp.peri_global.I_g_on_n",g_tp.peri_global.I_g_on_n); + printing("g_tp.peri_global.I_g_on_p",g_tp.peri_global.I_g_on_p); + + printing("g_tp.peri_global.Mobility_n",g_tp.peri_global.Mobility_n); + + printing("g_tp.sleep_tx.Vdd",g_tp.sleep_tx.Vdd); + printing("g_tp.sleep_tx.Vcc_min",g_tp.sleep_tx.Vcc_min); + printing("g_tp.sleep_tx.t_ox",g_tp.sleep_tx.t_ox); + printing("g_tp.sleep_tx.Vth",g_tp.sleep_tx.Vth); + printing("g_tp.sleep_tx.C_ox",g_tp.sleep_tx.C_ox); + printing("g_tp.sleep_tx.C_g_ideal",g_tp.sleep_tx.C_g_ideal); + printing("g_tp.sleep_tx.C_fringe",g_tp.sleep_tx.C_fringe); + printing("g_tp.sleep_tx.C_junc",g_tp.sleep_tx.C_junc); + printing("g_tp.sleep_tx.C_junc_sidewall",g_tp.sleep_tx.C_junc_sidewall); + printing("g_tp.sleep_tx.l_phy",g_tp.sleep_tx.l_phy); + printing("g_tp.sleep_tx.l_elec",g_tp.sleep_tx.l_elec); + printing("g_tp.sleep_tx.I_on_n",g_tp.sleep_tx.I_on_n); + printing("g_tp.sleep_tx.R_nch_on",g_tp.sleep_tx.R_nch_on); + printing("g_tp.sleep_tx.R_pch_on",g_tp.sleep_tx.R_pch_on); + printing("g_tp.sleep_tx.n_to_p_eff_curr_drv_ratio",g_tp.sleep_tx.n_to_p_eff_curr_drv_ratio); + printing("g_tp.sleep_tx.long_channel_leakage_reduction",g_tp.sleep_tx.long_channel_leakage_reduction); + printing("g_tp.sleep_tx.I_off_n",g_tp.sleep_tx.I_off_n); + printing("g_tp.sleep_tx.I_off_p",g_tp.sleep_tx.I_off_p); + printing("g_tp.sleep_tx.I_g_on_n",g_tp.sleep_tx.I_g_on_n); + printing("g_tp.sleep_tx.I_g_on_p",g_tp.sleep_tx.I_g_on_p); + printing("g_tp.sleep_tx.Mobility_n",g_tp.sleep_tx.Mobility_n); + + printing("g_tp.sram_cell.Vdd",g_tp.sram_cell.Vdd); + printing("g_tp.sram_cell.Vcc_min",g_tp.sram_cell.Vcc_min); + printing("g_tp.sram_cell.l_phy",g_tp.sram_cell.l_phy); + printing("g_tp.sram_cell.l_elec",g_tp.sram_cell.l_elec); + printing("g_tp.sram_cell.t_ox",g_tp.sram_cell.t_ox); + printing("g_tp.sram_cell.Vth",g_tp.sram_cell.Vth); + printing("g_tp.sram_cell.C_g_ideal",g_tp.sram_cell.C_g_ideal); + printing("g_tp.sram_cell.C_fringe",g_tp.sram_cell.C_fringe); + printing("g_tp.sram_cell.C_junc",g_tp.sram_cell.C_junc); + printing("g_tp.sram_cell.C_junc_sidewall",g_tp.sram_cell.C_junc_sidewall); + printing("g_tp.sram_cell.I_on_n",g_tp.sram_cell.I_on_n); + printing("g_tp.sram_cell.R_nch_on",g_tp.sram_cell.R_nch_on); + printing("g_tp.sram_cell.R_pch_on",g_tp.sram_cell.R_pch_on); + printing("g_tp.sram_cell.n_to_p_eff_curr_drv_ratio",g_tp.sram_cell.n_to_p_eff_curr_drv_ratio); + printing("g_tp.sram_cell.long_channel_leakage_reduction",g_tp.sram_cell.long_channel_leakage_reduction); + printing("g_tp.sram_cell.I_off_n",g_tp.sram_cell.I_off_n); + printing("g_tp.sram_cell.I_off_p",g_tp.sram_cell.I_off_p); + printing("g_tp.sram_cell.I_g_on_n",g_tp.sram_cell.I_g_on_n); + printing("g_tp.sram_cell.I_g_on_p",g_tp.sram_cell.I_g_on_p); + + printing("g_tp.dram_cell_Vdd",g_tp.dram_cell_Vdd); + printing("g_tp.dram_acc.Vth",g_tp.dram_acc.Vth); + printing("g_tp.dram_acc.l_phy",g_tp.dram_acc.l_phy); + printing("g_tp.dram_acc.l_elec",g_tp.dram_acc.l_elec); + printing("g_tp.dram_acc.C_g_ideal",g_tp.dram_acc.C_g_ideal); + printing("g_tp.dram_acc.C_fringe",g_tp.dram_acc.C_fringe); + printing("g_tp.dram_acc.C_junc",g_tp.dram_acc.C_junc); + printing("g_tp.dram_acc.C_junc_sidewall",g_tp.dram_acc.C_junc_sidewall); + printing("g_tp.dram_cell_I_on",g_tp.dram_cell_I_on); + printing("g_tp.dram_cell_I_off_worst_case_len_temp",g_tp.dram_cell_I_off_worst_case_len_temp); + printing("g_tp.dram_acc.I_on_n",g_tp.dram_acc.I_on_n); + printing("g_tp.dram_cell_C",g_tp.dram_cell_C); + printing("g_tp.vpp",g_tp.vpp); + printing("g_tp.dram_wl.l_phy",g_tp.dram_wl.l_phy); + printing("g_tp.dram_wl.l_elec",g_tp.dram_wl.l_elec); + printing("g_tp.dram_wl.C_g_ideal",g_tp.dram_wl.C_g_ideal); + printing("g_tp.dram_wl.C_fringe",g_tp.dram_wl.C_fringe); + printing("g_tp.dram_wl.C_junc",g_tp.dram_wl.C_junc); + printing("g_tp.dram_wl.C_junc_sidewall",g_tp.dram_wl.C_junc_sidewall); + printing("g_tp.dram_wl.I_on_n",g_tp.dram_wl.I_on_n); + printing("g_tp.dram_wl.R_nch_on",g_tp.dram_wl.R_nch_on); + printing("g_tp.dram_wl.R_pch_on",g_tp.dram_wl.R_pch_on); + printing("g_tp.dram_wl.n_to_p_eff_curr_drv_ratio",g_tp.dram_wl.n_to_p_eff_curr_drv_ratio); + printing("g_tp.dram_wl.long_channel_leakage_reduction",g_tp.dram_wl.long_channel_leakage_reduction); + printing("g_tp.dram_wl.I_off_n",g_tp.dram_wl.I_off_n); + printing("g_tp.dram_wl.I_off_p",g_tp.dram_wl.I_off_p); + + printing("g_tp.cam_cell.Vdd",g_tp.cam_cell.Vdd); + printing("g_tp.cam_cell.l_phy",g_tp.cam_cell.l_phy); + printing("g_tp.cam_cell.l_elec",g_tp.cam_cell.l_elec); + printing("g_tp.cam_cell.t_ox",g_tp.cam_cell.t_ox); + printing("g_tp.cam_cell.Vth",g_tp.cam_cell.Vth); + printing("g_tp.cam_cell.C_g_ideal",g_tp.cam_cell.C_g_ideal); + printing("g_tp.cam_cell.C_fringe",g_tp.cam_cell.C_fringe); + printing("g_tp.cam_cell.C_junc",g_tp.cam_cell.C_junc); + printing("g_tp.cam_cell.C_junc_sidewall",g_tp.cam_cell.C_junc_sidewall); + printing("g_tp.cam_cell.I_on_n",g_tp.cam_cell.I_on_n); + printing("g_tp.cam_cell.R_nch_on",g_tp.cam_cell.R_nch_on); + printing("g_tp.cam_cell.R_pch_on",g_tp.cam_cell.R_pch_on); + printing("g_tp.cam_cell.n_to_p_eff_curr_drv_ratio",g_tp.cam_cell.n_to_p_eff_curr_drv_ratio); + printing("g_tp.cam_cell.long_channel_leakage_reduction",g_tp.cam_cell.long_channel_leakage_reduction); + printing("g_tp.cam_cell.I_off_n",g_tp.cam_cell.I_off_n); + printing("g_tp.cam_cell.I_off_p",g_tp.cam_cell.I_off_p); + printing("g_tp.cam_cell.I_g_on_n",g_tp.cam_cell.I_g_on_n); + printing("g_tp.cam_cell.I_g_on_p",g_tp.cam_cell.I_g_on_p); + + printing("g_tp.dram.cell_a_w",g_tp.dram.cell_a_w); + printing("g_tp.dram.cell_pmos_w",g_tp.dram.cell_pmos_w); + printing("g_tp.dram.cell_nmos_w",g_tp.dram.cell_nmos_w); + + + printing("g_tp.sram.cell_a_w",g_tp.sram.cell_a_w); + printing("g_tp.sram.cell_pmos_w",g_tp.sram.cell_pmos_w); + printing("g_tp.sram.cell_nmos_w",g_tp.sram.cell_nmos_w); + + + printing("g_tp.cam.cell_a_w",g_tp.cam.cell_a_w); + printing("g_tp.cam.cell_pmos_w",g_tp.cam.cell_pmos_w); + printing("g_tp.cam.cell_nmos_w",g_tp.cam.cell_nmos_w); + + printing("g_tp.scaling_factor.logic_scaling_co_eff",g_tp.scaling_factor.logic_scaling_co_eff); + printing("g_tp.scaling_factor.core_tx_density",g_tp.scaling_factor.core_tx_density); + printing("g_tp.chip_layout_overhead",g_tp.chip_layout_overhead); + printing("g_tp.macro_layout_overhead",g_tp.macro_layout_overhead); + printing("g_tp.sckt_co_eff",g_tp.sckt_co_eff); + + printing("g_tp.w_comp_inv_p1",g_tp.w_comp_inv_p1); + printing("g_tp.w_comp_inv_n1",g_tp.w_comp_inv_n1); + printing("g_tp.w_comp_inv_p2",g_tp.w_comp_inv_p2); + printing("g_tp.w_comp_inv_n2",g_tp.w_comp_inv_n2); + printing("g_tp.w_comp_inv_p3",g_tp.w_comp_inv_p3); + printing("g_tp.w_comp_inv_n3",g_tp.w_comp_inv_n3); + printing("g_tp.w_eval_inv_p",g_tp.w_eval_inv_p); + printing("g_tp.w_eval_inv_n",g_tp.w_eval_inv_n); + printing("g_tp.w_comp_n",g_tp.w_comp_n); + printing("g_tp.w_comp_p",g_tp.w_comp_p); + + printing("g_tp.MIN_GAP_BET_P_AND_N_DIFFS",g_tp.MIN_GAP_BET_P_AND_N_DIFFS); + printing("g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS",g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS); + printing("g_tp.HPOWERRAIL",g_tp.HPOWERRAIL); + printing("g_tp.cell_h_def",g_tp.cell_h_def); + printing("g_tp.w_poly_contact",g_tp.w_poly_contact); + printing("g_tp.spacing_poly_to_contact",g_tp.spacing_poly_to_contact); + printing("g_tp.spacing_poly_to_poly",g_tp.spacing_poly_to_poly); + printing("g_tp.ram_wl_stitching_overhead_",g_tp.ram_wl_stitching_overhead_); + + printing("g_tp.min_w_nmos_",g_tp.min_w_nmos_); + printing("g_tp.max_w_nmos_",g_tp.max_w_nmos_); + printing("g_tp.w_iso",g_tp.w_iso); + printing("g_tp.w_sense_n",g_tp.w_sense_n); + printing("g_tp.w_sense_p",g_tp.w_sense_p); + printing("g_tp.w_sense_en",g_tp.w_sense_en); + printing("g_tp.w_nmos_b_mux",g_tp.w_nmos_b_mux); + printing("g_tp.w_nmos_sa_mux",g_tp.w_nmos_sa_mux); + + printing("g_tp.max_w_nmos_dec",g_tp.max_w_nmos_dec); + printing_int("g_tp.h_dec",g_tp.h_dec); + + printing("g_tp.peri_global.C_overlap",g_tp.peri_global.C_overlap); + printing("g_tp.sram_cell.C_overlap",g_tp.sram_cell.C_overlap); + printing("g_tp.cam_cell.C_overlap",g_tp.cam_cell.C_overlap); + + printing("g_tp.dram_acc.C_overlap",g_tp.dram_acc.C_overlap); + printing("g_tp.dram_acc.R_nch_on",g_tp.dram_acc.R_nch_on); + + printing("g_tp.dram_wl.C_overlap",g_tp.dram_wl.C_overlap); + + printing("g_tp.gm_sense_amp_latch",g_tp.gm_sense_amp_latch); + + printing("g_tp.dram.b_w",g_tp.dram.b_w); + printing("g_tp.dram.b_h",g_tp.dram.b_h); + printing("g_tp.sram.b_w",g_tp.sram.b_w); + printing("g_tp.sram.b_h",g_tp.sram.b_h); + printing("g_tp.cam.b_w",g_tp.cam.b_w); + printing("g_tp.cam.b_h",g_tp.cam.b_h); + + printing("g_tp.dram.Vbitpre",g_tp.dram.Vbitpre); + printing("g_tp.sram.Vbitpre",g_tp.sram.Vbitpre); + printing("g_tp.sram.Vbitfloating",g_tp.sram.Vbitfloating); + printing("g_tp.cam.Vbitpre",g_tp.cam.Vbitpre); + + printing("g_tp.w_pmos_bl_precharge",g_tp.w_pmos_bl_precharge); + printing("g_tp.w_pmos_bl_eq",g_tp.w_pmos_bl_eq); + + printing("g_tp.wire_local.pitch",g_tp.wire_local.pitch); + printing("g_tp.wire_local.R_per_um",g_tp.wire_local.R_per_um); + printing("g_tp.wire_local.C_per_um",g_tp.wire_local.C_per_um); + printing("g_tp.wire_local.aspect_ratio",g_tp.wire_local.aspect_ratio); + printing("g_tp.wire_local.ild_thickness",g_tp.wire_local.ild_thickness); + printing("g_tp.wire_local.miller_value",g_tp.wire_local.miller_value); + printing("g_tp.wire_local.horiz_dielectric_constant",g_tp.wire_local.horiz_dielectric_constant); + printing("g_tp.wire_local.vert_dielectric_constant",g_tp.wire_local.vert_dielectric_constant); + + printing("g_tp.wire_inside_mat.pitch",g_tp.wire_inside_mat.pitch); + printing("g_tp.wire_inside_mat.R_per_um",g_tp.wire_inside_mat.R_per_um); + printing("g_tp.wire_inside_mat.C_per_um",g_tp.wire_inside_mat.C_per_um); + printing("g_tp.wire_inside_mat.aspect_ratio",g_tp.wire_inside_mat.aspect_ratio); + printing("g_tp.wire_inside_mat.ild_thickness",g_tp.wire_inside_mat.ild_thickness); + printing("g_tp.wire_inside_mat.miller_value",g_tp.wire_inside_mat.miller_value); + printing("g_tp.wire_inside_mat.horiz_dielectric_constant",g_tp.wire_inside_mat.horiz_dielectric_constant); + printing("g_tp.wire_inside_mat.vert_dielectric_constant",g_tp.wire_inside_mat.vert_dielectric_constant); + + printing("g_tp.wire_outside_mat.pitch",g_tp.wire_outside_mat.pitch); + printing("g_tp.wire_outside_mat.R_per_um",g_tp.wire_outside_mat.R_per_um); + printing("g_tp.wire_outside_mat.C_per_um",g_tp.wire_outside_mat.C_per_um); + printing("g_tp.wire_outside_mat.aspect_ratio",g_tp.wire_outside_mat.aspect_ratio); + printing("g_tp.wire_outside_mat.ild_thickness",g_tp.wire_outside_mat.ild_thickness); + printing("g_tp.wire_outside_mat.miller_value",g_tp.wire_outside_mat.miller_value); + printing("g_tp.wire_outside_mat.horiz_dielectric_constant",g_tp.wire_outside_mat.horiz_dielectric_constant); + printing("g_tp.wire_outside_mat.vert_dielectric_constant",g_tp.wire_outside_mat.vert_dielectric_constant); + + printing("g_tp.unit_len_wire_del",g_tp.unit_len_wire_del); + + printing("g_tp.sense_delay",g_tp.sense_delay); + printing("g_tp.sense_dy_power",g_tp.sense_dy_power); + + printing("g_tp.tsv_parasitic_resistance_fine",g_tp.tsv_parasitic_resistance_fine); + printing("g_tp.tsv_parasitic_capacitance_fine",g_tp.tsv_parasitic_capacitance_fine); + printing("g_tp.tsv_minimum_area_fine",g_tp.tsv_minimum_area_fine); + + printing("g_tp.tsv_parasitic_resistance_coarse",g_tp.tsv_parasitic_resistance_coarse); + printing("g_tp.tsv_parasitic_capacitance_coarse",g_tp.tsv_parasitic_capacitance_coarse); + printing("g_tp.tsv_minimum_area_coarse",g_tp.tsv_minimum_area_coarse); + + printing("g_tp.tsv_minimum_area_coarse",g_tp.tsv_minimum_area_coarse); + printing("g_tp.fringe_cap",g_tp.fringe_cap); + printing("g_tp.kinv",g_tp.kinv); + printing("g_tp.FO4",g_tp.FO4); + +} diff --git a/T1/TP1/cacti-master/uca.cc b/T1/TP1/cacti-master/uca.cc new file mode 100644 index 0000000..bb6124f --- /dev/null +++ b/T1/TP1/cacti-master/uca.cc @@ -0,0 +1,818 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#include +#include + +#include "uca.h" +#include "TSV.h" +#include "memorybus.h" + + +UCA::UCA(const DynamicParameter & dyn_p) + :dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) +{ + int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2)); + int num_banks_hor_dir = nbanks/num_banks_ver_dir; + + if (dp.use_inp_params) + { + RWP = dp.num_rw_ports; + ERP = dp.num_rd_ports; + EWP = dp.num_wr_ports; + SCHP = dp.num_search_ports; + } + else + { + RWP = g_ip->num_rw_ports; + ERP = g_ip->num_rd_ports; + EWP = g_ip->num_wr_ports; + SCHP = g_ip->num_search_ports; + } + + num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP); + num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP); + num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP); + num_si_b_bank = dp.num_si_b_bank_per_port * SCHP; + num_so_b_bank = dp.num_so_b_bank_per_port * SCHP; + + if (!dp.fully_assoc && !dp.pure_cam) + { + + if (g_ip->fast_access && dp.is_tag == false) + { + num_do_b_bank *= g_ip->data_assoc; + } + + htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true); + htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); + htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); + } + + else + { + + htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true); + htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); + htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); + htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); + htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); + } + + area.w = htree_in_data->area.w; + area.h = htree_in_data->area.h; + + area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks; +// cout<<"area cell"<print_detail_debug) + cout << "uca.cc: g_ip->is_3d_mem = " << g_ip->is_3d_mem << endl; + if(g_ip->is_3d_mem) + { + membus_RAS = new Memorybus(g_ip->wt, bank.mat.area.w, bank.mat.area.h, bank.mat.subarray.area.w, bank.mat.subarray.area.h, + _log2(dp.num_r_subarray * dp.Ndbl), _log2(dp.num_c_subarray * dp.Ndwl), g_ip->burst_depth*g_ip->io_width, dp.Ndbl, dp.Ndwl, Row_add_path, dp); + membus_CAS = new Memorybus(g_ip->wt, bank.mat.area.w, bank.mat.area.h, bank.mat.subarray.area.w, bank.mat.subarray.area.h, + _log2(dp.num_r_subarray * dp.Ndbl), _log2(dp.num_c_subarray * dp.Ndwl), g_ip->burst_depth*g_ip->io_width, dp.Ndbl, dp.Ndwl, Col_add_path, dp); + membus_data = new Memorybus(g_ip->wt, bank.mat.area.w, bank.mat.area.h, bank.mat.subarray.area.w, bank.mat.subarray.area.h, + _log2(dp.num_r_subarray * dp.Ndbl), _log2(dp.num_c_subarray * dp.Ndwl), g_ip->burst_depth*g_ip->io_width, dp.Ndbl, dp.Ndwl, Data_path, dp); + area.h = membus_RAS->area.h; + area.w = membus_RAS->area.w; + + if (g_ip->print_detail_debug) + { + cout<<"uca.cc: area.h = "<is_3d_mem) + { + // Add TSV delay to the terms + // --- Although there are coarse and fine, because is_array and os_bank TSV are the same, so they are the same + TSV tsv_os_bank(Coarse); + TSV tsv_is_subarray(Fine); + if(g_ip->print_detail_debug) + { + tsv_os_bank.print_TSV(); + tsv_is_subarray.print_TSV(); + } + + comm_bits = 6; + row_add_bits = _log2(dp.num_r_subarray * dp.Ndbl); + col_add_bits = _log2(dp.num_c_subarray * dp.Ndwl); + data_bits = g_ip->burst_depth * g_ip->io_width; + + //enum Part_grain part_gran = Fine_rank_level; + + double redundancy_perc_TSV = 0.5; + switch(g_ip->partition_gran) + { + case 0:// Coarse_rank_level: + delay_TSV_tot = (g_ip->num_die_3d-1) * tsv_os_bank.delay; + num_TSV_tot = (comm_bits + row_add_bits + col_add_bits + data_bits*2) * (1 + redundancy_perc_TSV); //* (g_ip->nbanks/4) + area_TSV_tot = num_TSV_tot * tsv_os_bank.area.get_area(); + dyn_pow_TSV_tot = num_TSV_tot * (g_ip->num_die_3d-1) * tsv_os_bank.power.readOp.dynamic; + dyn_pow_TSV_per_access = (comm_bits + row_add_bits + col_add_bits + data_bits) * (g_ip->num_die_3d-1) * tsv_os_bank.power.readOp.dynamic; + area_address_bus = membus_RAS->area_address_bus * (1.0 + (double)comm_bits/(double)(row_add_bits + col_add_bits)); + area_data_bus = membus_RAS->area_data_bus; + break; + case 1://Fine_rank_level: + delay_TSV_tot = (g_ip->num_die_3d) * tsv_os_bank.delay; + num_TSV_tot = (comm_bits + row_add_bits + col_add_bits + data_bits/2) * g_ip->nbanks * (1 + redundancy_perc_TSV); + area_TSV_tot = num_TSV_tot * tsv_os_bank.area.get_area(); + dyn_pow_TSV_tot = num_TSV_tot * (g_ip->num_die_3d) * tsv_os_bank.power.readOp.dynamic; + dyn_pow_TSV_per_access = (comm_bits + row_add_bits + col_add_bits + data_bits) * (g_ip->num_die_3d) * tsv_os_bank.power.readOp.dynamic; + //area_address_bus = (comm_bits + row_add_bits + col_add_bits) * 25.0; + //area_data_bus = membus_RAS->area_data_bus + (double)data_bits/2 * 25.0; + break; + case 2://Coarse_bank_level: + delay_TSV_tot = (g_ip->num_die_3d) * tsv_os_bank.delay; + num_TSV_tot = (comm_bits + row_add_bits + col_add_bits + data_bits/2) * g_ip->nbanks + * g_ip->num_tier_row_sprd * g_ip->num_tier_col_sprd * (1 + redundancy_perc_TSV); + area_TSV_tot = num_TSV_tot * tsv_os_bank.area.get_area(); + dyn_pow_TSV_tot = num_TSV_tot * (g_ip->num_die_3d) * tsv_os_bank.power.readOp.dynamic; + dyn_pow_TSV_per_access = (comm_bits + row_add_bits + col_add_bits + data_bits) * (g_ip->num_die_3d) * tsv_os_bank.power.readOp.dynamic; + //area_address_bus = (comm_bits + row_add_bits + col_add_bits) * 25.0; + //area_data_bus = (double)data_bits/2 * 25.0; + + //activate_energy *= g_ip->num_tier_row_sprd * g_ip->num_tier_col_sprd; + //read_energy *= g_ip->num_tier_row_sprd * g_ip->num_tier_col_sprd; + //write_energy *= g_ip->num_tier_row_sprd * g_ip->num_tier_col_sprd; + //precharge_energy *= g_ip->num_tier_row_sprd * g_ip->num_tier_col_sprd; + break; + case 3://Fine_bank_level: + delay_TSV_tot = (g_ip->num_die_3d) * tsv_os_bank.delay; + num_TSV_tot = (comm_bits + row_add_bits + col_add_bits + data_bits) * g_ip->nbanks *g_ip->ndwl *g_ip->ndbl + /g_ip->num_tier_col_sprd /g_ip->num_tier_row_sprd * (1 + redundancy_perc_TSV); + area_TSV_tot = num_TSV_tot * tsv_os_bank.area.get_area(); + dyn_pow_TSV_tot = num_TSV_tot * (g_ip->num_die_3d) * tsv_os_bank.power.readOp.dynamic; + dyn_pow_TSV_per_access = (comm_bits + row_add_bits + col_add_bits + data_bits) * (g_ip->num_die_3d) * tsv_os_bank.power.readOp.dynamic; + //area_address_bus = pow(2, (comm_bits + row_add_bits + col_add_bits)) * 25.0; + //area_data_bus = pow(2, data_bits/2) * 25.0; + //activate_energy *= g_ip->num_tier_row_sprd * g_ip->num_tier_col_sprd; + //read_energy *= g_ip->num_tier_row_sprd * g_ip->num_tier_col_sprd; + //write_energy *= g_ip->num_tier_row_sprd * g_ip->num_tier_col_sprd; + //precharge_energy *= g_ip->num_tier_row_sprd * g_ip->num_tier_col_sprd; + break; + default: + assert(0); + break; + } + + if(g_ip->print_detail_debug) + { + cout << "uca.cc: num_TSV_tot = " << num_TSV_tot << endl; + } + + area_lwl_drv = membus_RAS->area_lwl_drv * g_ip->nbanks; + area_row_predec_dec = membus_RAS->area_row_predec_dec * g_ip->nbanks; + area_col_predec_dec = membus_CAS->area_col_predec_dec * g_ip->nbanks; + + area_subarray = membus_RAS->area_subarray * g_ip->nbanks; + area_bus = membus_RAS->area_bus * g_ip->nbanks; + + + area_data_drv = membus_data->area_data_drv * g_ip->nbanks; + area_IOSA = membus_data->area_IOSA * g_ip->nbanks; + area_sense_amp = membus_data->area_sense_amp * g_ip->nbanks; + + area_address_bus = membus_RAS->area_address_bus * (1.0 + (double)comm_bits/(double)(row_add_bits + col_add_bits)) * g_ip->nbanks;; + area_data_bus = membus_RAS->area_data_bus + membus_data->area_local_dataline * g_ip->nbanks; + + area_per_bank = (area_lwl_drv + area_row_predec_dec + area_col_predec_dec + + area_subarray + area_bus + area_data_drv + area_IOSA + + area_address_bus + area_data_bus)/g_ip->nbanks + area_sense_amp; + + + t_RCD += delay_TSV_tot; + t_RAS += delay_TSV_tot; + t_RC += delay_TSV_tot; + t_RP += delay_TSV_tot; + t_CAS += 2 * delay_TSV_tot; + t_RRD += delay_TSV_tot; + + activate_energy += dyn_pow_TSV_per_access; + read_energy += dyn_pow_TSV_per_access; + write_energy += dyn_pow_TSV_per_access; + precharge_energy += dyn_pow_TSV_per_access; + + //double area_per_die = area.get_area(); + //double area_stack_tot = g_ip->num_die_3d * (area.get_area() + area_TSV_tot); + //int num_die = g_ip->num_die_3d; + //area.set_area(area_stack_tot); + + if(g_ip->num_die_3d > 1 || g_ip->partition_gran > 0) + total_area_per_die = area_all_dataramcells + area_TSV_tot; + else + total_area_per_die = area_all_dataramcells; + + + + if(g_ip->is_3d_mem && g_ip->print_detail_debug) + { + + cout<<"------- CACTI 3D DRAM Main Memory -------"<cache_sz) << endl; + cout << " Number of banks: " << (int) g_ip->nbanks << endl; + cout << " Technology size (nm): " << + g_ip->F_sz_nm << endl; + cout << " Page size (bits): " << g_ip->page_sz_bits << endl; + cout << " Burst depth: " << g_ip->burst_depth << endl; + cout << " Chip IO width: " << g_ip->io_width << endl; + cout << " Ndwl: " << dp.Ndwl << endl; + cout << " Ndbl: " << dp.Ndbl << endl; + cout << " # rows in subarray: " << dp.num_r_subarray << endl; + cout << " # columns in subarray: " << dp.num_c_subarray << endl; + + cout << "\nResults:\n"; + cout<<" ******************Timing terms******************"<burst_depth)/(g_ip->sys_freq_MHz*1e6)/2) * 1e3 << " mW" <print_detail_debug) + { + cout<<" ********************Other terms******************"<center_stripe->power.readOp.dynamic + membus_RAS->bank_bus->power.readOp.dynamic + + membus_RAS->add_predec->power.readOp.dynamic + membus_RAS->add_dec->power.readOp.dynamic; + cout<<" Act Bus Energy: "<< act_bus_energy * 1e9 <<" nJ"<center_stripe->delay + membus_RAS->bank_bus->delay + + membus_RAS->add_predec->delay + membus_RAS->add_dec->delay; + cout<<" Act Bus Latency: "<< act_bus_latency * 1e9 <<" ns"<num_die_3d>1) + { + cout<<" ********************TSV terms******************"<is_3d_mem) + { + delete membus_RAS; + delete membus_CAS; + delete membus_data; + } +} + + + +double UCA::compute_delays(double inrisetime) +{ + double outrisetime = bank.compute_delays(inrisetime); + //CACTI3DD + if (g_ip->is_3d_mem) + { + outrisetime = bank.compute_delays(membus_RAS->out_rise_time); + + //ram_delay_inside_mat = bank.mat.delay_bitline;// + bank.mat.delay_matchchline; + //access_time = membus_RAS->delay + bank.mat.delay_bitline + bank.mat.delay_sa + membus_CAS->delay + membus_data->delay; + + //double t_rcd = membus_RAS->delay + bank.mat.delay_bitline + bank.mat.delay_sa; + //t_RCD= membus_RAS->add_dec->delay + membus_RAS->lwl_drv->delay + bank.mat.delay_bitline + bank.mat.delay_sa; + t_RCD = membus_RAS->add_dec->delay + membus_RAS->lwl_drv->delay + bank.mat.delay_bitline + bank.mat.delay_sa; + t_RAS = membus_RAS->delay + bank.mat.delay_bitline + bank.mat.delay_sa + bank.mat.delay_bl_restore; + precharge_delay = bank.mat.delay_writeback + + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore; + t_RP = precharge_delay; + t_RC = t_RAS + t_RP; + t_CAS = membus_CAS->delay + bank.mat.delay_subarray_out_drv + membus_data->delay; + t_RRD = membus_RAS->center_stripe->delay + membus_RAS->bank_bus->delay; + //t_RRD = membus_RAS->delay; + access_time = t_RCD + t_CAS; + multisubbank_interleave_cycle_time = membus_RAS->center_stripe->delay + membus_RAS->bank_bus->delay; + //cout<<"uca.cc: multisubbank_interleave_cycle_time = "<delay = "<delay * 1e9 << " ns" <delay = "<delay * 1e9 << " ns" <delay = "<delay * 1e9 << " ns" <center_stripe->delay = "<center_stripe->delay * 1e9 << " ns" <bank_bus->delay = "<bank_bus->delay * 1e9 << " ns" <add_predec->delay = "<add_predec->delay * 1e9 << " ns" <add_dec->delay = "<add_dec->delay * 1e9 << " ns" <global_WL->delay = "<global_WL->delay * 1e9 << " ns" <lwl_drv->delay = "<lwl_drv->delay * 1e9 << " ns" <center_stripe->delay = "<center_stripe->delay * 1e9 << " ns" <bank_bus->delay = "<bank_bus->delay * 1e9 << " ns" <add_predec->delay = "<add_predec->delay * 1e9 << " ns" <add_dec->delay = "<add_dec->delay * 1e9 << " ns" <column_sel->delay = "<column_sel->delay * 1e9 << " ns" <center_stripe->delay = "<center_stripe->delay * 1e9 << " ns" <bank_bus->delay = "<bank_bus->delay * 1e9 << " ns" <global_data->delay = "<global_data->delay * 1e9 << " ns" <data_drv->delay = "<data_drv->delay * 1e9 << " ns" <local_data->delay = "<local_data->delay * 1e9 << " ns" <delay + bank.htree_in_add->delay; + double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay; + delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat + + bank.mat.sa_mux_lev_1_predec->delay + + bank.mat.sa_mux_lev_1_dec->delay; + delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat + + bank.mat.sa_mux_lev_2_predec->delay + + bank.mat.sa_mux_lev_2_dec->delay; + double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa; + + delay_before_subarray_output_driver = + MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path + delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path + MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path + delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path + delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree + + bank.htree_out_data->delay + htree_out_data->delay; + access_time = bank.mat.delay_comparator; + + double ram_delay_inside_mat; + if (dp.fully_assoc) + { + //delay of FA contains both CAM tag and RAM data + { //delay of CAM + ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; + access_time = htree_in_add->delay + bank.htree_in_add->delay; + //delay of fully-associative data array + access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out; + } + } + else + { + access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path + } + + if (dp.is_main_mem) + { + double t_rcd = max_delay_before_row_decoder + delay_inside_mat; + double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) + + delay_from_subarray_out_drv_to_out; + access_time = t_rcd + cas_latency; + } + + double temp; + + if (!dp.fully_assoc) + { + temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: : revisit + if (dp.is_dram) + { + temp += bank.mat.delay_writeback; // temp stores random cycle time + } + + + temp = MAX(temp, bank.mat.r_predec->delay); + temp = MAX(temp, bank.mat.b_mux_predec->delay); + temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); + temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); + } + else + { + ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; + temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore + + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset; + + temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: revisit whether distinguish cam and ram bitline etc. + temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); + temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); + } + + // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav + if (g_ip->rpters_in_htree == false) + { + temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay); + } + cycle_time = temp; + + double delay_req_network = max_delay_before_row_decoder; + double delay_rep_network = delay_from_subarray_out_drv_to_out; + multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network); + + if (dp.is_main_mem) + { + multisubbank_interleave_cycle_time = htree_in_add->delay; + precharge_delay = htree_in_add->delay + + bank.htree_in_add->delay + bank.mat.delay_writeback + + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore; + cycle_time = access_time + precharge_delay; + } + else + { + precharge_delay = 0; + } +/** + double dram_array_availability = 0; + if (dp.is_dram) + { + dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100; + } +**/ + }//CACTI3DD, else + return outrisetime; +} + + + +// note: currently, power numbers are for a bank of an array +void UCA::compute_power_energy() +{ + bank.compute_power_energy(); + power = bank.power; + //CACTI3DD + if (g_ip->is_3d_mem) + { + double datapath_energy = 0.505e-9 *g_ip->F_sz_nm / 55; + //double chip_IO_width = 4; + //g_ip->burst_len = 4; + activate_energy = membus_RAS->power.readOp.dynamic + (bank.mat.power_bitline.readOp.dynamic + + bank.mat.power_sa.readOp.dynamic) * dp.Ndwl; // /4 + read_energy = (membus_CAS->power.readOp.dynamic + bank.mat.power_subarray_out_drv.readOp.dynamic + + membus_data->power.readOp.dynamic ) + datapath_energy; //* g_ip->burst_len; + write_energy = (membus_CAS->power.readOp.dynamic + bank.mat.power_subarray_out_drv.readOp.dynamic + + membus_data->power.readOp.dynamic + bank.mat.power_sa.readOp.dynamic * g_ip->burst_depth*g_ip->io_width/g_ip->page_sz_bits) + datapath_energy; //* g_ip->burst_len; + precharge_energy = (bank.mat.power_bitline.readOp.dynamic + + bank.mat.power_bl_precharge_eq_drv.readOp.dynamic)* dp.Ndwl; // /4 + + activate_power = activate_energy / t_RC; + double col_cycle_act_row; + //col_cycle_act_row = MAX(MAX(MAX(membus_CAS->center_stripe->delay + membus_CAS->bank_bus->delay, bank.mat.delay_subarray_out_drv), + //membus_data->delay), membus_data->out_seg->delay *g_ip->burst_depth); + //col_cycle_act_row = membus_data->out_seg->delay * g_ip->burst_depth; + col_cycle_act_row = (1e-6/(double)g_ip->sys_freq_MHz)/2 * g_ip->burst_depth; + //--- Activity factor assumption comes from Micron data spreadsheet. + read_power = 0.25 * read_energy / col_cycle_act_row; + write_power = 0.15 * write_energy / col_cycle_act_row; + + if (g_ip->print_detail_debug) + { + cout<<"Row Address Delay components: "<power.readOp.dynamic = "<< membus_RAS->power.readOp.dynamic * 1e9 << " nJ" <power.readOp.dynamic = "<< membus_CAS->power.readOp.dynamic * 1e9 << " nJ" <power.readOp.dynamic = "<< membus_data->power.readOp.dynamic * 1e9 << " nJ" <power_bus.readOp.dynamic = "<power_bus.readOp.dynamic * 1e9 << " nJ" <power_add_predecoder.readOp.dynamic = "<< membus_RAS->power_add_predecoder.readOp.dynamic * 1e9 << " nJ" <power_add_decoders.readOp.dynamic = "<< membus_RAS->power_add_decoders.readOp.dynamic * 1e9 << " nJ" <power_lwl_drv.readOp.dynamic = "<< membus_RAS->power_lwl_drv.readOp.dynamic * 1e9 << " nJ" <power_bus.readOp.dynamic = "<< membus_CAS->power_bus.readOp.dynamic * 1e9 << " nJ" <power_add_predecoder.readOp.dynamic = "<< membus_CAS->power_add_predecoder.readOp.dynamic * 1e9 << " nJ" <power_add_decoders.readOp.dynamic = "<< membus_CAS->power_add_decoders.readOp.dynamic * 1e9 << " nJ" <power.readOp.dynamic = "<< membus_CAS->power.readOp.dynamic * 1e9 << " nJ" <power.readOp.dynamic = "<< membus_data->power.readOp.dynamic * 1e9 << " nJ" <power.readOp.dynamic + htree_out_data->power.readOp.dynamic; + power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic; + if (dp.fully_assoc || dp.pure_cam) + power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic; + + power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage + + htree_in_data->power.readOp.leakage + + htree_out_data->power.readOp.leakage; + + power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage + + htree_in_data->power.readOp.gate_leakage + + htree_out_data->power.readOp.gate_leakage; + if (dp.fully_assoc || dp.pure_cam) + { + power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; + power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; + } + + power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic; + power.readOp.dynamic += power_routing_to_bank.readOp.dynamic; + power.readOp.leakage += power_routing_to_bank.readOp.leakage; + power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage; + + // calculate total write energy per access + power.writeOp.dynamic = power.readOp.dynamic + - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir + + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir + - power_routing_to_bank.readOp.dynamic + + power_routing_to_bank.writeOp.dynamic + + bank.htree_in_data->power.readOp.dynamic + - bank.htree_out_data->power.readOp.dynamic; + + if (dp.is_dram == false) + { + power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; + } + + dyn_read_energy_from_closed_page = power.readOp.dynamic; + dyn_read_energy_from_open_page = power.readOp.dynamic - + (bank.mat.r_predec->power.readOp.dynamic + + bank.mat.power_row_decoders.readOp.dynamic + + bank.mat.power_bl_precharge_eq_drv.readOp.dynamic + + bank.mat.power_sa.readOp.dynamic + + bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir; + + dyn_read_energy_remaining_words_in_burst = + (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) * + ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + + bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + + bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + + bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + + bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + + bank.htree_out_data->power.readOp.dynamic + + power_routing_to_bank.readOp.dynamic); + dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst; + dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst; + + activate_energy = htree_in_add->power.readOp.dynamic + + bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act + + (bank.mat.r_predec->power.readOp.dynamic + + bank.mat.power_row_decoders.readOp.dynamic + + bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir; + read_energy = (htree_in_add->power.readOp.dynamic + + bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + + (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + + bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + + bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + + bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + + bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + + bank.htree_out_data->power.readOp.dynamic + + htree_in_data->power.readOp.dynamic) * g_ip->burst_len; + write_energy = (htree_in_add->power.readOp.dynamic + + bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + + htree_in_data->power.readOp.dynamic + + bank.htree_in_data->power.readOp.dynamic + + (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + + bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + + bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + + bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len; + precharge_energy = (bank.mat.power_bitline.readOp.dynamic + + bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir; + } //CACTI3DD + leak_power_subbank_closed_page = + (bank.mat.r_predec->power.readOp.leakage + + bank.mat.b_mux_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + + bank.mat.power_row_decoders.readOp.leakage + + bank.mat.power_bit_mux_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + + bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; + + leak_power_subbank_closed_page += + (bank.mat.r_predec->power.readOp.gate_leakage + + bank.mat.b_mux_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + + bank.mat.power_row_decoders.readOp.gate_leakage + + bank.mat.power_bit_mux_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+ + //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; + + leak_power_subbank_open_page = + (bank.mat.r_predec->power.readOp.leakage + + bank.mat.b_mux_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + + bank.mat.power_row_decoders.readOp.leakage + + bank.mat.power_bit_mux_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + + bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; + + leak_power_subbank_open_page += + (bank.mat.r_predec->power.readOp.gate_leakage + + bank.mat.b_mux_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + + bank.mat.power_row_decoders.readOp.gate_leakage + + bank.mat.power_bit_mux_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir; + //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; + + leak_power_request_and_reply_networks = + power_routing_to_bank.readOp.leakage + + bank.htree_in_add->power.readOp.leakage + + bank.htree_in_data->power.readOp.leakage + + bank.htree_out_data->power.readOp.leakage; + + leak_power_request_and_reply_networks += + power_routing_to_bank.readOp.gate_leakage + + bank.htree_in_add->power.readOp.gate_leakage + + bank.htree_in_data->power.readOp.gate_leakage + + bank.htree_out_data->power.readOp.gate_leakage; + + if (dp.fully_assoc || dp.pure_cam) + { + leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; + leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; + } + + + if (dp.is_dram) + { // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power + refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir + + bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays; + refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays; + refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir; + refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; + refresh_power /= dp.dram_refresh_period; + } + + + if (dp.is_tag == false) + { + power.readOp.dynamic = dyn_read_energy_from_closed_page; + power.writeOp.dynamic = dyn_read_energy_from_closed_page + - dyn_read_energy_remaining_words_in_burst + - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir + + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir + + (power_routing_to_bank.writeOp.dynamic - + power_routing_to_bank.readOp.dynamic - + bank.htree_out_data->power.readOp.dynamic + + bank.htree_in_data->power.readOp.dynamic) * + (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME + + if (dp.is_dram == false) + { + power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; + } + } + + // if DRAM, add refresh power to total leakage + if (dp.is_dram) + { + power.readOp.leakage += refresh_power; + } + + // TODO: below should be avoided. + /*if (dp.is_main_mem) + { + power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks; + }*/ + + if (g_ip->is_3d_mem) + {// ---This is only to make sure the following assert() functions don't generate errors. The values are not used in 3D DRAM models + // power = power + membus_RAS->power + membus_CAS->power + membus_data->power; //for leakage power add up, not used yet for optimization + power.readOp.dynamic = read_energy; + power.writeOp.dynamic = write_energy; + // ---Before the brackets, power = power.bank, and all the specific leakage terms have and only have accounted for bank to mat levels. + // power.readOp.leakage = power.readOp.leakage + membus_RAS->power.readOp.leakage + membus_CAS->power.readOp.leakage + membus_data->power.readOp.leakage; + power.readOp.leakage =membus_RAS->power.readOp.leakage + membus_CAS->power.readOp.leakage + membus_data->power.readOp.leakage; + //cout << "test: " << power.readOp.dynamic << endl; + //cout << "test: " << membus_RAS->power.readOp.leakage << endl; + //cout << "test: " << membus_CAS->power.readOp.leakage << endl; + //cout << "test: " << membus_data->power.readOp.leakage << endl; + //cout << "test: power.readOp.leakage" << power.readOp.leakage << endl; + } + + assert(power.readOp.dynamic > 0); + assert(power.writeOp.dynamic > 0); + assert(power.readOp.leakage > 0); +} + diff --git a/T1/TP1/cacti-master/uca.h b/T1/TP1/cacti-master/uca.h new file mode 100644 index 0000000..7b6aa38 --- /dev/null +++ b/T1/TP1/cacti-master/uca.h @@ -0,0 +1,116 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + + + +#ifndef __UCA_H__ +#define __UCA_H__ + +#include "area.h" +#include "bank.h" +#include "component.h" +#include "parameter.h" +#include "htree2.h" +#include "memorybus.h" +#include "basic_circuit.h" +#include "cacti_interface.h" + + + +class UCA : public Component +{ + public: + UCA(const DynamicParameter & dyn_p); + ~UCA(); + double compute_delays(double inrisetime); // returns outrisetime + void compute_power_energy(); + + DynamicParameter dp; + Bank bank; + + Htree2 * htree_in_add; + Htree2 * htree_in_data; + Htree2 * htree_out_data; + Htree2 * htree_in_search; + Htree2 * htree_out_search; + + Memorybus * membus_RAS; + Memorybus * membus_CAS; + Memorybus * membus_data; + + powerDef power_routing_to_bank; + + uint32_t nbanks; + + int num_addr_b_bank; + int num_di_b_bank; + int num_do_b_bank; + int num_si_b_bank; + int num_so_b_bank; + int RWP, ERP, EWP,SCHP; + double area_all_dataramcells; + double total_area_per_die; + + double dyn_read_energy_from_closed_page; + double dyn_read_energy_from_open_page; + double dyn_read_energy_remaining_words_in_burst; + + double refresh_power; // only for DRAM + double activate_energy; + double read_energy; + double write_energy; + double precharge_energy; + double leak_power_subbank_closed_page; + double leak_power_subbank_open_page; + double leak_power_request_and_reply_networks; + + double delay_array_to_sa_mux_lev_1_decoder; + double delay_array_to_sa_mux_lev_2_decoder; + double delay_before_subarray_output_driver; + double delay_from_subarray_out_drv_to_out; + double access_time; + double precharge_delay; + double multisubbank_interleave_cycle_time; + + double t_RAS, t_CAS, t_RCD, t_RC, t_RP, t_RRD; + double activate_power, read_power, write_power; + + double delay_TSV_tot, area_TSV_tot, dyn_pow_TSV_tot, dyn_pow_TSV_per_access; + unsigned int num_TSV_tot; + unsigned int comm_bits, row_add_bits, col_add_bits, data_bits; + double area_lwl_drv, area_row_predec_dec, area_col_predec_dec, + area_subarray, area_bus, area_address_bus, area_data_bus, area_data_drv, area_IOSA, area_sense_amp, + area_per_bank; + +}; + +#endif + diff --git a/T1/TP1/cacti-master/version_cacti.h b/T1/TP1/cacti-master/version_cacti.h new file mode 100644 index 0000000..e1528bb --- /dev/null +++ b/T1/TP1/cacti-master/version_cacti.h @@ -0,0 +1,40 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#ifndef VERSION_H_ +#define VERSION_H_ + +#define VER_MAJOR_CACTI 7 /* 3dd */ +#define VER_MINOR_CACTI 0 +#define VER_COMMENT_CACTI "3DD Prerelease" +#define VER_UPDATE_CACTI "Aug, 2012" + +#endif /* VERSION_H_ */ diff --git a/T1/TP1/cacti-master/wire.cc b/T1/TP1/cacti-master/wire.cc new file mode 100644 index 0000000..55a08ae --- /dev/null +++ b/T1/TP1/cacti-master/wire.cc @@ -0,0 +1,830 @@ +/***************************************************************************** + * CACTI 7.0 + * SOFTWARE LICENSE AGREEMENT + * Copyright 2015 Hewlett-Packard Development Company, L.P. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * + ***************************************************************************/ + +#include "wire.h" +#include "cmath" +// use this constructor to calculate wire stats +Wire::Wire( + enum Wire_type wire_model, + double wl, + int n, + double w_s, + double s_s, + enum Wire_placement wp, + double resistivity, + /*TechnologyParameter::*/DeviceType *dt + ):wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s), s_scale(s_s), + resistivity(resistivity), deviceType(dt) +{ + wire_placement = wp; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; + in_rise_time = 0; + out_rise_time = 0; + if (initialized != 1) { + cout << "Wire not initialized. Initializing it with default values\n"; + Wire winit; + } + calculate_wire_stats(); + // change everything back to seconds, microns, and Joules + repeater_spacing *= 1e6; + wire_length *= 1e6; + wire_width *= 1e6; + wire_spacing *= 1e6; + assert(wire_length > 0); + assert(power.readOp.dynamic > 0); + assert(power.readOp.leakage > 0); + assert(power.readOp.gate_leakage > 0); +} + + // the following values are for peripheral global technology + // specified in the input config file + Component Wire::global; + Component Wire::global_5; + Component Wire::global_10; + Component Wire::global_20; + Component Wire::global_30; + Component Wire::low_swing; + + int Wire::initialized; + double Wire::wire_width_init; + double Wire::wire_spacing_init; + + +Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis, /*TechnologyParameter::*/DeviceType *dt) +{ + w_scale = w_s; + s_scale = s_s; + deviceType = dt; + wire_placement = wp; + resistivity = resis; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + in_rise_time = 0; + out_rise_time = 0; + + switch (wire_placement) + { + case outside_mat: wire_width = g_tp.wire_outside_mat.pitch/2; break; + case inside_mat : wire_width = g_tp.wire_inside_mat.pitch/2; break; + default: wire_width = g_tp.wire_local.pitch/2; break; + } + + wire_spacing = wire_width; + + wire_width *= (w_scale * 1e-6/2) /* (m) */; + wire_spacing *= (s_scale * 1e-6/2) /* (m) */; + + initialized = 1; + init_wire(); + wire_width_init = wire_width; + wire_spacing_init = wire_spacing; + + assert(power.readOp.dynamic > 0); + assert(power.readOp.leakage > 0); + assert(power.readOp.gate_leakage > 0); +} + + + +Wire::~Wire() +{ +} + + + +void +Wire::calculate_wire_stats() +{ + + if (wire_placement == outside_mat) { + wire_width = g_tp.wire_outside_mat.pitch/2; + } + else if (wire_placement == inside_mat) { + wire_width = g_tp.wire_inside_mat.pitch/2; + } + else { + wire_width = g_tp.wire_local.pitch/2; + } + + wire_spacing = wire_width; + + wire_width *= (w_scale * 1e-6/2) /* (m) */; + wire_spacing *= (s_scale * 1e-6/2) /* (m) */; + + + if (wt != Low_swing) { + + // delay_optimal_wire(); + + if (wt == Global) { + delay = global.delay * wire_length; + power.readOp.dynamic = global.power.readOp.dynamic * wire_length; + power.readOp.leakage = global.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global.power.readOp.gate_leakage * wire_length; + repeater_spacing = global.area.w; + repeater_size = global.area.h; + area.set_area((wire_length/repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); + } + else if (wt == Global_5) { + delay = global_5.delay * wire_length; + power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_5.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global_5.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_5.area.w; + repeater_size = global_5.area.h; + area.set_area((wire_length/repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); + } + else if (wt == Global_10) { + delay = global_10.delay * wire_length; + power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_10.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global_10.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_10.area.w; + repeater_size = global_10.area.h; + area.set_area((wire_length/repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); + } + else if (wt == Global_20) { + delay = global_20.delay * wire_length; + power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_20.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global_20.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_20.area.w; + repeater_size = global_20.area.h; + area.set_area((wire_length/repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); + } + else if (wt == Global_30) { + delay = global_30.delay * wire_length; + power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_30.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global_30.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_30.area.w; + repeater_size = global_30.area.h; + area.set_area((wire_length/repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); + } + out_rise_time = delay*repeater_spacing/deviceType->Vth; + } + else if (wt == Low_swing) { + low_swing_model (); + repeater_spacing = wire_length; + repeater_size = 1; + } + else { + assert(0); + } +} + + + +/* + * The fall time of an input signal to the first stage of a circuit is + * assumed to be same as the fall time of the output signal of two + * inverters connected in series (refer: CACTI 1 Technical report, + * section 6.1.3) + */ + double +Wire::signal_fall_time () +{ + + /* rise time of inverter 1's output */ + double rt; + /* fall time of inverter 2's output */ + double ft; + double timeconst; + + timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(min_w_pmos, PCH, 1); + rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth); + timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(g_tp.min_w_nmos_, NCH, 1); + ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth; + return ft; +} + + + +double Wire::signal_rise_time () +{ + + /* rise time of inverter 1's output */ + double ft; + /* fall time of inverter 2's output */ + double rt; + double timeconst; + + timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(g_tp.min_w_nmos_, NCH, 1); + rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth; + timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(min_w_pmos, PCH, 1); + ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth); + return ft; //sec +} + + + +/* Wire resistance and capacitance calculations + * wire width + * + * /__/ + * | | + * | | height = ASPECT_RATIO*wire width (ASPECT_RATIO = 2.2, ref: ITRS) + * |__|/ + * + * spacing between wires in same level = wire width + * + */ + +double Wire::wire_cap (double len /* in m */, bool call_from_outside) +{ + //TODO: this should be consistent with the wire_res in technology file + double sidewall, adj, tot_cap; + double wire_height; + double epsilon0 = 8.8542e-12; + double aspect_ratio, horiz_dielectric_constant, vert_dielectric_constant, miller_value,ild_thickness; + + switch (wire_placement) + { + case outside_mat: + { + aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; + horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant; + vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant; + miller_value = g_tp.wire_outside_mat.miller_value; + ild_thickness = g_tp.wire_outside_mat.ild_thickness; + break; + } + case inside_mat : + { + aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; + horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant; + vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant; + miller_value = g_tp.wire_inside_mat.miller_value; + ild_thickness = g_tp.wire_inside_mat.ild_thickness; + break; + } + default: + { + aspect_ratio = g_tp.wire_local.aspect_ratio; + horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant; + vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant; + miller_value = g_tp.wire_local.miller_value; + ild_thickness = g_tp.wire_local.ild_thickness; + break; + } + } + + if (call_from_outside) + { + wire_width *= 1e-6; + wire_spacing *= 1e-6; + } + wire_height = wire_width/w_scale*aspect_ratio; + /* + * assuming height does not change. wire_width = width_original*w_scale + * So wire_height does not change as wire width increases + */ + +// capacitance between wires in the same level +// sidewall = 2*miller_value * horiz_dielectric_constant * (wire_height/wire_spacing) +// * epsilon0; + + sidewall = miller_value * horiz_dielectric_constant * (wire_height/wire_spacing) + * epsilon0; + + + // capacitance between wires in adjacent levels + //adj = miller_value * vert_dielectric_constant *w_scale * epsilon0; + //adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0; + + adj = miller_value *vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0; + //Change ild_thickness from micron to M + + //tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m + tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); //F/m + + if (call_from_outside) + { + wire_width *= 1e6; + wire_spacing *= 1e6; + } + return (tot_cap*len); // (F) +} + + + double +Wire::wire_res (double len /*(in m)*/) +{ + + double aspect_ratio,alpha_scatter =1.05, dishing_thickness=0, barrier_thickness=0; + //TODO: this should be consistent with the wire_res in technology file + //The whole computation should be consistent with the wire_res in technology.cc too! + + switch (wire_placement) + { + case outside_mat: + { + aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; + break; + } + case inside_mat : + { + aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; + break; + } + default: + { + aspect_ratio = g_tp.wire_local.aspect_ratio; + break; + } + } + return (alpha_scatter * resistivity * 1e-6 * len/((aspect_ratio*wire_width/w_scale-dishing_thickness - barrier_thickness)* + (wire_width-2*barrier_thickness))); +} + +/* + * Calculates the delay, power and area of the transmitter circuit. + * + * The transmitter delay is the sum of nand gate delay, inverter delay + * low swing nmos delay, and the wire delay + * (ref: Technical report 6) + */ + void +Wire::low_swing_model() +{ + double len = wire_length; + double beta = pmos_to_nmos_sz_ratio(); + + + double inputrise = (in_rise_time == 0) ? signal_rise_time() : in_rise_time; + + /* Final nmos low swing driver size calculation: + * Try to size the driver such that the delay + * is less than 8FO4. + * If the driver size is greater than + * the max allowable size, assume max size for the driver. + * In either case, recalculate the delay using + * the final driver size assuming slow input with + * finite rise time instead of ideal step input + * + * (ref: Technical report 6) + */ + double cwire = wire_cap(len); /* load capacitance */ + double rwire = wire_res(len); + +#define RES_ADJ (8.6) // Increase in resistance due to low driving vol. + + double driver_res = (-8*g_tp.FO4/(log(0.5) * cwire))/RES_ADJ; + double nsize = R_to_w(driver_res, NCH); + + nsize = MIN(nsize, g_tp.max_w_nmos_); + nsize = MAX(nsize, g_tp.min_w_nmos_); + + if(rwire*cwire > 8*g_tp.FO4) + { + nsize = g_tp.max_w_nmos_; + } + + // size the inverter appropriately to minimize the transmitter delay + // Note - In order to minimize leakage, we are not adding a set of inverters to + // bring down delay. Instead, we are sizing the single gate + // based on the logical effort. + double st_eff = sqrt((2+beta/1+beta)*gate_C(nsize, 0)/(gate_C(2*g_tp.min_w_nmos_, 0) + + gate_C(2*min_w_pmos, 0))); + double req_cin = ((2+beta/1+beta)*gate_C(nsize, 0))/st_eff; + double inv_size = req_cin/(gate_C(min_w_pmos, 0) + gate_C(g_tp.min_w_nmos_, 0)); + inv_size = MAX(inv_size, 1); + + /* nand gate delay */ + double res_eq = (2 * tr_R_on(g_tp.min_w_nmos_, NCH, 1)); + double cap_eq = 2 * drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(inv_size*g_tp.min_w_nmos_, 0) + + gate_C(inv_size*min_w_pmos, 0); + + double timeconst = res_eq * cap_eq; + + delay = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, + deviceType->Vth/deviceType->Vdd, RISE); + double temp_power = cap_eq*deviceType->Vdd*deviceType->Vdd; + + inputrise = delay / (deviceType->Vdd - deviceType->Vth); /* for the next stage */ + + /* Inverter delay: + * The load capacitance of this inv depends on + * the gate capacitance of the final stage nmos + * transistor which in turn depends on nsize + */ + res_eq = tr_R_on(inv_size*min_w_pmos, PCH, 1); + cap_eq = drain_C_(inv_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(inv_size*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(nsize, 0); + timeconst = res_eq * cap_eq; + + delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, + deviceType->Vth/deviceType->Vdd, FALL); + temp_power += cap_eq*deviceType->Vdd*deviceType->Vdd; + + + transmitter.delay = delay; + transmitter.power.readOp.dynamic = temp_power*2; /* since it is a diff. model*/ + transmitter.power.readOp.leakage = deviceType->Vdd * + (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + + 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); + + transmitter.power.readOp.gate_leakage = deviceType->Vdd * + (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + + 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); + + inputrise = delay / deviceType->Vth; + + /* nmos delay + wire delay */ + cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2 + + nsense * sense_amp_input_cap(); //+receiver cap + /* + * NOTE: nmos is used as both pull up and pull down transistor + * in the transmitter. This is because for low voltage swing, drive + * resistance of nmos is less than pmos + * (for a detailed graph ref: On-Chip Wires: Scaling and Efficiency) + */ + timeconst = (tr_R_on(nsize, NCH, 1)*RES_ADJ) * (cwire + + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2) + + rwire*cwire/2 + + (tr_R_on(nsize, NCH, 1)*RES_ADJ + rwire) * + nsense * sense_amp_input_cap(); + + /* + * since we are pre-equalizing and overdriving the low + * swing wires, the net time constant is less + * than the actual value + */ + delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, .25, 0); +#define VOL_SWING .1 + temp_power += cap_eq*VOL_SWING*.400; /* .4v is the over drive voltage */ + temp_power *= 2; /* differential wire */ + + l_wire.delay = delay - transmitter.delay; + l_wire.power.readOp.dynamic = temp_power - transmitter.power.readOp.dynamic; + l_wire.power.readOp.leakage = deviceType->Vdd* + (4* cmos_Isub_leakage(nsize, 0, 1, nmos)); + + l_wire.power.readOp.gate_leakage = deviceType->Vdd* + (4* cmos_Ig_leakage(nsize, 0, 1, nmos)); + + //double rt = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, + // deviceType->Vth/deviceType->Vdd, RISE)/deviceType->Vth; + + delay += g_tp.sense_delay; + + sense_amp.delay = g_tp.sense_delay; + out_rise_time = g_tp.sense_delay/(deviceType->Vth); + sense_amp.power.readOp.dynamic = g_tp.sense_dy_power; + sense_amp.power.readOp.leakage = 0; //FIXME + sense_amp.power.readOp.gate_leakage = 0; + + power.readOp.dynamic = temp_power + sense_amp.power.readOp.dynamic; + power.readOp.leakage = transmitter.power.readOp.leakage + + l_wire.power.readOp.leakage + + sense_amp.power.readOp.leakage; + power.readOp.gate_leakage = transmitter.power.readOp.gate_leakage + + l_wire.power.readOp.gate_leakage + + sense_amp.power.readOp.gate_leakage; +} + + double +Wire::sense_amp_input_cap() +{ + return drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) + + drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def); +} + + +void Wire::delay_optimal_wire () +{ + double len = wire_length; + //double min_wire_width = wire_width; //m + double beta = pmos_to_nmos_sz_ratio(); + double switching = 0; // switching energy + double short_ckt = 0; // short-circuit energy + double tc = 0; // time constant + // input cap of min sized driver + double input_cap = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0); + + // output parasitic capacitance of + // the min. sized driver + double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); + // drive resistance + double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + + tr_R_on(min_w_pmos, PCH, 1))/2; + double wr = wire_res(len); //ohm + + // wire cap /m + double wc = wire_cap(len); + + // size the repeater such that the delay of the wire is minimum + double repeater_scaling = sqrt(out_res*wc/(wr*input_cap)); // len will cancel + + // calc the optimum spacing between the repeaters (m) + + repeater_spacing = sqrt(2 * out_res * (out_cap + input_cap)/ + ((wr/len)*(wc/len))); + repeater_size = repeater_scaling; + + switching = (repeater_scaling * (input_cap + out_cap) + + repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd; + + tc = out_res * (input_cap + out_cap) + + out_res * wc/len * repeater_spacing/repeater_scaling + + wr/len * repeater_spacing * input_cap * repeater_scaling + + 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing; + + delay = 0.693 * tc * len/repeater_spacing; + +#define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */ + short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * + repeater_scaling * tc; + + area.set_area((len/repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_scaling, + g_tp.min_w_nmos_ * repeater_scaling, g_tp.cell_h_def)); + power.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt)); + power.readOp.leakage = ((len/repeater_spacing)* + deviceType->Vdd* + cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv)); + power.readOp.gate_leakage = ((len/repeater_spacing)* + deviceType->Vdd* + cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv)); +} + + + +// calculate power/delay values for wires with suboptimal repeater sizing/spacing +void +Wire::init_wire(){ + wire_length = 1; + delay_optimal_wire(); + double sp, si; + powerDef pow; + si = repeater_size; + sp = repeater_spacing; + sp *= 1e6; // in microns + + double i, j, del; + repeated_wire.push_back(Component()); + for (j=sp; j < 4*sp; j+=100) { + for (i = si; i > 1; i--) { + pow = wire_model(j*1e-6, i, &del); + if (j == sp && i == si) { + global.delay = del; + global.power = pow; + global.area.h = si; + global.area.w = sp*1e-6; // m + } +// cout << "Repeater size - "<< i << +// " Repeater spacing - " << j << +// " Delay - " << del << +// " PowerD - " << pow.readOp.dynamic << +// " PowerL - " << pow.readOp.leakage <delay; + low_swing.power = l_wire->power; + delete l_wire; +} + + + +void Wire::update_fullswing() +{ + + list::iterator citer; + double del[4]; + del[3] = this->global.delay + this->global.delay*.3; + del[2] = global.delay + global.delay*.2; + del[1] = global.delay + global.delay*.1; + del[0] = global.delay + global.delay*.05; + double threshold; + double ncost; + double cost; + int i = 4; + while (i>0) { + threshold = del[i-1]; + cost = BIGNUM; + for (citer = repeated_wire.begin(); citer != repeated_wire.end(); citer++) + { + if (citer->delay > threshold) { + citer = repeated_wire.erase(citer); + citer --; + } + else { + ncost = citer->power.readOp.dynamic/global.power.readOp.dynamic + + citer->power.readOp.leakage/global.power.readOp.leakage; + if(ncost < cost) + { + cost = ncost; + if (i == 4) { + global_30.delay = citer->delay; + global_30.power = citer->power; + global_30.area = citer->area; + } + else if (i==3) { + global_20.delay = citer->delay; + global_20.power = citer->power; + global_20.area = citer->area; + } + else if(i==2) { + global_10.delay = citer->delay; + global_10.power = citer->power; + global_10.area = citer->area; + } + else if(i==1) { + global_5.delay = citer->delay; + global_5.power = citer->power; + global_5.area = citer->area; + } + } + } + } + i--; + } +} + + + +powerDef Wire::wire_model (double space, double size, double *delay) +{ + powerDef ptemp; + double len = 1; + //double min_wire_width = wire_width; //m + double beta = pmos_to_nmos_sz_ratio(); + // switching energy + double switching = 0; + // short-circuit energy + double short_ckt = 0; + // time constant + double tc = 0; + // input cap of min sized driver + double input_cap = gate_C (g_tp.min_w_nmos_ + + min_w_pmos, 0); + + // output parasitic capacitance of + // the min. sized driver + double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); + // drive resistance + double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + + tr_R_on(min_w_pmos, PCH, 1))/2; + double wr = wire_res(len); //ohm + + // wire cap /m + double wc = wire_cap(len); + + repeater_spacing = space; + repeater_size = size; + + switching = (repeater_size * (input_cap + out_cap) + + repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd; + + tc = out_res * (input_cap + out_cap) + + out_res * wc/len * repeater_spacing/repeater_size + + wr/len * repeater_spacing * out_cap * repeater_size + + 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing; + + *delay = 0.693 * tc * len/repeater_spacing; + +#define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */ + short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * + repeater_size * tc; + + ptemp.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt)); + ptemp.readOp.leakage = ((len/repeater_spacing)* + deviceType->Vdd* + cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv)); + + ptemp.readOp.gate_leakage = ((len/repeater_spacing)* + deviceType->Vdd* + cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv)); + + return ptemp; +} + +void +Wire::print_wire() +{ + + cout << "\nWire Properties:\n\n"; + cout << " Delay Optimal\n\tRepeater size - "<< global.area.h << + " \n\tRepeater spacing - " << global.area.w*1e3 << " (mm)" + " \n\tDelay - " << global.delay*1e6 << " (ns/mm)" + " \n\tPowerD - " << global.power.readOp.dynamic *1e6<< " (nJ/mm)" + " \n\tPowerL - " << global.power.readOp.leakage << " (mW/mm)" + " \n\tPowerLgate - " << global.power.readOp.gate_leakage << " (mW/mm)\n"; + cout << "\tWire width - " < +#include + +class Wire : public Component +{ + public: + Wire(enum Wire_type wire_model, double len /* in u*/, + int nsense = 1/* no. of sense amps connected to the low-swing wire */, + double width_scaling = 1, + double spacing_scaling = 1, + enum Wire_placement wire_placement = outside_mat, + double resistivity = CU_RESISTIVITY, + /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global)); + ~Wire(); + + Wire( double width_scaling = 1, + double spacing_scaling = 1, + enum Wire_placement wire_placement = outside_mat, + double resistivity = CU_RESISTIVITY, + /*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global) + ); // should be used only once for initializing static members + void init_wire(); + + void calculate_wire_stats(); + void delay_optimal_wire(); + double wire_cap(double len, bool call_from_outside=false); + double wire_res(double len); + void low_swing_model(); + double signal_fall_time(); + double signal_rise_time(); + double sense_amp_input_cap(); + + enum Wire_type wt; + double wire_spacing; + double wire_width; + enum Wire_placement wire_placement; + double repeater_size; + double repeater_spacing; + double wire_length; + double in_rise_time, out_rise_time; + + void set_in_rise_time(double rt) + { + in_rise_time = rt; + } + static Component global; + static Component global_5; + static Component global_10; + static Component global_20; + static Component global_30; + static Component low_swing; + static double wire_width_init; + static double wire_spacing_init; + void print_wire(); + + private: + + int nsense; // no. of sense amps connected to a low-swing wire if it + // is broadcasting data to multiple destinations + // width and spacing scaling factor can be used + // to model low level wires or special + // fat wires + double w_scale, s_scale; + double resistivity; + powerDef wire_model (double space, double size, double *delay); + list repeated_wire; + void update_fullswing(); + static int initialized; + + + //low-swing + Component transmitter; + Component l_wire; + Component sense_amp; + + double min_w_pmos; + + /*TechnologyParameter::*/DeviceType *deviceType; + +}; + +#endif