tp T2
This commit is contained in:
parent
7edaf0f745
commit
1b032d43db
185 changed files with 56362 additions and 0 deletions
BIN
T1/TP/TP1/7179-Arm_Cortex-R_Comparison_Table_V8.pdf
Normal file
BIN
T1/TP/TP1/7179-Arm_Cortex-R_Comparison_Table_V8.pdf
Normal file
Binary file not shown.
Binary file not shown.
BIN
T1/TP/TP1/Arm_Cortex-A_Processor_Comparison_Table.pdf
Normal file
BIN
T1/TP/TP1/Arm_Cortex-A_Processor_Comparison_Table.pdf
Normal file
Binary file not shown.
BIN
T1/TP/TP1/HPL-2009-85.pdf
Normal file
BIN
T1/TP/TP1/HPL-2009-85.pdf
Normal file
Binary file not shown.
BIN
T1/TP/TP1/Master SETI-2022-TP1-v1[8705].pdf
Normal file
BIN
T1/TP/TP1/Master SETI-2022-TP1-v1[8705].pdf
Normal file
Binary file not shown.
194
T1/TP/TP1/cacti_7/2DDRAM_Samsung2GbDDR2.cfg
Normal file
194
T1/TP/TP1/cacti_7/2DDRAM_Samsung2GbDDR2.cfg
Normal file
|
@ -0,0 +1,194 @@
|
|||
# Cache size
|
||||
//-size (bytes) 528
|
||||
//-size (bytes) 4096
|
||||
//-size (bytes) 262144
|
||||
//-size (bytes) 1048576
|
||||
//-size (bytes) 2097152
|
||||
//-size (bytes) 4194304
|
||||
//-size (bytes) 8388608
|
||||
//-size (bytes) 16777216
|
||||
//-size (bytes) 33554432
|
||||
//-size (bytes) 134217728
|
||||
//-size (bytes) 268435456
|
||||
//-size (bytes) 536870912
|
||||
//-size (bytes) 67108864
|
||||
//-size (bytes) 536870912
|
||||
//-size (bytes) 1073741824
|
||||
# For 3D DRAM memory please use Gb as units
|
||||
-size (Gb) 2
|
||||
|
||||
# Line size
|
||||
//-block size (bytes) 8
|
||||
-block size (bytes) 128
|
||||
|
||||
# To model Fully Associative cache, set associativity to zero
|
||||
//-associativity 0
|
||||
//-associativity 2
|
||||
//-associativity 4
|
||||
-associativity 1
|
||||
//-associativity 16
|
||||
|
||||
-read-write port 1
|
||||
-exclusive read port 0
|
||||
-exclusive write port 0
|
||||
-single ended read ports 0
|
||||
|
||||
# Multiple banks connected using a bus
|
||||
-UCA bank count 16
|
||||
//-technology (u) 0.032
|
||||
//-technology (u) 0.040
|
||||
//-technology (u) 0.065
|
||||
//-technology (u) 0.078
|
||||
-technology (u) 0.080
|
||||
|
||||
# following three parameters are meaningful only for main memories
|
||||
|
||||
//-page size (bits) 8192
|
||||
-burst length 4
|
||||
-internal prefetch width 1
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
//-Data array cell type - "itrs-hp"
|
||||
//-Data array cell type - "itrs-lstp"
|
||||
//-Data array cell type - "itrs-lop"
|
||||
-Data array cell type - "comm-dram"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
//-Data array peripheral type - "itrs-hp"
|
||||
-Data array peripheral type - "itrs-lstp"
|
||||
//-Data array peripheral type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Tag array cell type - "itrs-hp"
|
||||
//-Tag array cell type - "itrs-lstp"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Tag array peripheral type - "itrs-hp"
|
||||
//-Tag array peripheral type - "itrs-lstp"
|
||||
|
||||
# Bus width include data bits and address bits required by the decoder
|
||||
//-output/input bus width 16
|
||||
//-output/input bus width 64
|
||||
-output/input bus width 64
|
||||
|
||||
// 300-400 in steps of 10
|
||||
-operating temperature (K) 350
|
||||
|
||||
# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
|
||||
# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
|
||||
//-cache type "cache"
|
||||
//-cache type "ram"
|
||||
//-cache type "main memory"
|
||||
-cache type "3D memory or 2D main memory"
|
||||
|
||||
# Parameters for 3D DRAM
|
||||
//-page size (bits) 16384
|
||||
-page size (bits) 8192
|
||||
//-page size (bits) 4096
|
||||
-burst depth 4
|
||||
-IO width 4
|
||||
-system frequency (MHz) 266
|
||||
|
||||
-stacked die count 1
|
||||
-partitioning granularity 0 // 0: coarse-grained rank-level; 1: fine-grained rank-level
|
||||
//-TSV projection 1 // 0: ITRS aggressive; 1: industrial conservative
|
||||
|
||||
## End of parameters for 3D DRAM
|
||||
|
||||
# to model special structure like branch target buffers, directory, etc.
|
||||
# change the tag size parameter
|
||||
# if you want cacti to calculate the tagbits, set the tag size to "default"
|
||||
-tag size (b) "default"
|
||||
//-tag size (b) 45
|
||||
|
||||
# fast - data and tag access happen in parallel
|
||||
# sequential - data array is accessed after accessing the tag array
|
||||
# normal - data array lookup and tag access happen in parallel
|
||||
# final data block is broadcasted in data array h-tree
|
||||
# after getting the signal from the tag array
|
||||
-access mode (normal, sequential, fast) - "fast"
|
||||
//-access mode (normal, sequential, fast) - "normal"
|
||||
//-access mode (normal, sequential, fast) - "sequential"
|
||||
|
||||
|
||||
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
|
||||
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:0:100
|
||||
|
||||
# Percentage deviation from the minimum value
|
||||
# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
|
||||
# that compromises at most 10% delay.
|
||||
# NOTE: Try reasonable values for % deviation. Inconsistent deviation
|
||||
# percentage values will not produce any valid organizations. For example,
|
||||
# 0:0:100:100:100 will try to identify an organization that has both
|
||||
# least delay and dynamic power. Since such an organization is not possible, CACTI will
|
||||
# throw an error. Refer CACTI-6 Technical report for more details
|
||||
-deviate (delay, dynamic power, leakage power, cycle time, area) 50:100000:100000:100000:1000000
|
||||
|
||||
# Objective for NUCA
|
||||
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:0:100
|
||||
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
|
||||
|
||||
# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
|
||||
# energy-delay or energy-delay sq. product
|
||||
# Note: Optimize tag will disable weight or deviate values mentioned above
|
||||
# Set it to NONE to let weight and deviate values determine the
|
||||
# appropriate cache configuration
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
|
||||
-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
|
||||
|
||||
-Cache model (NUCA, UCA) - "UCA"
|
||||
//-Cache model (NUCA, UCA) - "NUCA"
|
||||
|
||||
# In order for CACTI to find the optimal NUCA bank value the following
|
||||
# variable should be assigned 0.
|
||||
-NUCA bank count 0
|
||||
|
||||
# NOTE: for nuca network frequency is set to a default value of
|
||||
# 5GHz in time.c. CACTI automatically
|
||||
# calculates the maximum possible frequency and downgrades this value if necessary
|
||||
|
||||
# By default CACTI considers both full-swing and low-swing
|
||||
# wires to find an optimal configuration. However, it is possible to
|
||||
# restrict the search space by changing the signaling from "default" to
|
||||
# "fullswing" or "lowswing" type.
|
||||
-Wire signaling (fullswing, lowswing, default) - "Global_5"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "default"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "lowswing"
|
||||
|
||||
//-Wire inside mat - "global"
|
||||
-Wire inside mat - "semi-global"
|
||||
-Wire outside mat - "global"
|
||||
//-Wire outside mat - "semi-global"
|
||||
|
||||
-Interconnect projection - "conservative"
|
||||
//-Interconnect projection - "aggressive"
|
||||
|
||||
# Contention in network (which is a function of core count and cache level) is one of
|
||||
# the critical factor used for deciding the optimal bank count value
|
||||
# core count can be 4, 8, or 16
|
||||
//-Core count 4
|
||||
-Core count 8
|
||||
//-Core count 16
|
||||
-Cache level (L2/L3) - "L3"
|
||||
|
||||
-Add ECC - "false"
|
||||
|
||||
//-Print level (DETAILED, CONCISE) - "CONCISE"
|
||||
-Print level (DETAILED, CONCISE) - "DETAILED"
|
||||
|
||||
# for debugging
|
||||
//-Print input parameters - "true"
|
||||
-Print input parameters - "false"
|
||||
# force CACTI to model the cache with the
|
||||
# following Ndbl, Ndwl, Nspd, Ndsam,
|
||||
# and Ndcm values
|
||||
-Force cache config - "true"
|
||||
//-Force cache config - "false"
|
||||
-Ndwl 128
|
||||
-Ndbl 32
|
||||
-Nspd 1
|
||||
-Ndcm 1
|
||||
-Ndsam1 1
|
||||
-Ndsam2 1
|
||||
|
194
T1/TP/TP1/cacti_7/2DDRAM_micron1Gb.cfg
Normal file
194
T1/TP/TP1/cacti_7/2DDRAM_micron1Gb.cfg
Normal file
|
@ -0,0 +1,194 @@
|
|||
# Cache size
|
||||
//-size (bytes) 528
|
||||
//-size (bytes) 4096
|
||||
//-size (bytes) 262144
|
||||
//-size (bytes) 1048576
|
||||
//-size (bytes) 2097152
|
||||
//-size (bytes) 4194304
|
||||
//-size (bytes) 8388608
|
||||
//-size (bytes) 16777216
|
||||
//-size (bytes) 33554432
|
||||
//-size (bytes) 134217728
|
||||
//-size (bytes) 268435456
|
||||
//-size (bytes) 536870912
|
||||
//-size (bytes) 67108864
|
||||
//-size (bytes) 536870912
|
||||
//-size (bytes) 1073741824
|
||||
# For 3D DRAM memory please use Gb as units
|
||||
-size (Gb) 1
|
||||
|
||||
# Line size
|
||||
//-block size (bytes) 8
|
||||
-block size (bytes) 128
|
||||
|
||||
# To model Fully Associative cache, set associativity to zero
|
||||
//-associativity 0
|
||||
//-associativity 2
|
||||
//-associativity 4
|
||||
-associativity 1
|
||||
//-associativity 16
|
||||
|
||||
-read-write port 1
|
||||
-exclusive read port 0
|
||||
-exclusive write port 0
|
||||
-single ended read ports 0
|
||||
|
||||
# Multiple banks connected using a bus
|
||||
-UCA bank count 8
|
||||
//-technology (u) 0.032
|
||||
//-technology (u) 0.040
|
||||
//-technology (u) 0.065
|
||||
-technology (u) 0.078
|
||||
//-technology (u) 0.080
|
||||
|
||||
# following three parameters are meaningful only for main memories
|
||||
|
||||
//-page size (bits) 8192
|
||||
-burst length 4
|
||||
-internal prefetch width 1
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
//-Data array cell type - "itrs-hp"
|
||||
//-Data array cell type - "itrs-lstp"
|
||||
//-Data array cell type - "itrs-lop"
|
||||
-Data array cell type - "comm-dram"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
//-Data array peripheral type - "itrs-hp"
|
||||
-Data array peripheral type - "itrs-lstp"
|
||||
//-Data array peripheral type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Tag array cell type - "itrs-hp"
|
||||
//-Tag array cell type - "itrs-lstp"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Tag array peripheral type - "itrs-hp"
|
||||
//-Tag array peripheral type - "itrs-lstp"
|
||||
|
||||
# Bus width include data bits and address bits required by the decoder
|
||||
//-output/input bus width 16
|
||||
//-output/input bus width 64
|
||||
-output/input bus width 64
|
||||
|
||||
// 300-400 in steps of 10
|
||||
-operating temperature (K) 350
|
||||
|
||||
# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
|
||||
# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
|
||||
//-cache type "cache"
|
||||
//-cache type "ram"
|
||||
//-cache type "main memory"
|
||||
-cache type "3D memory or 2D main memory"
|
||||
|
||||
## Parameters for 3D DRAM
|
||||
-page size (bits) 16384
|
||||
//-page size (bits) 8192
|
||||
-burst depth 8
|
||||
-IO width 4
|
||||
-system frequency (MHz) 533
|
||||
|
||||
-stacked die count 1
|
||||
-partitioning granularity 0 // 0: coarse-grained rank-level; 1: fine-grained rank-level
|
||||
//-TSV projection 1 // 0: ITRS aggressive; 1: industrial conservative
|
||||
|
||||
## End of parameters for 3D DRAM
|
||||
|
||||
|
||||
# to model special structure like branch target buffers, directory, etc.
|
||||
# change the tag size parameter
|
||||
# if you want cacti to calculate the tagbits, set the tag size to "default"
|
||||
-tag size (b) "default"
|
||||
//-tag size (b) 45
|
||||
|
||||
# fast - data and tag access happen in parallel
|
||||
# sequential - data array is accessed after accessing the tag array
|
||||
# normal - data array lookup and tag access happen in parallel
|
||||
# final data block is broadcasted in data array h-tree
|
||||
# after getting the signal from the tag array
|
||||
-access mode (normal, sequential, fast) - "fast"
|
||||
//-access mode (normal, sequential, fast) - "normal"
|
||||
//-access mode (normal, sequential, fast) - "sequential"
|
||||
|
||||
|
||||
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
|
||||
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:0:10
|
||||
|
||||
# Percentage deviation from the minimum value
|
||||
# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
|
||||
# that compromises at most 10% delay.
|
||||
# NOTE: Try reasonable values for % deviation. Inconsistent deviation
|
||||
# percentage values will not produce any valid organizations. For example,
|
||||
# 0:0:100:100:100 will try to identify an organization that has both
|
||||
# least delay and dynamic power. Since such an organization is not possible, CACTI will
|
||||
# throw an error. Refer CACTI-6 Technical report for more details
|
||||
-deviate (delay, dynamic power, leakage power, cycle time, area) 50:100000:100000:100000:1000000
|
||||
|
||||
# Objective for NUCA
|
||||
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
|
||||
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
|
||||
|
||||
# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
|
||||
# energy-delay or energy-delay sq. product
|
||||
# Note: Optimize tag will disable weight or deviate values mentioned above
|
||||
# Set it to NONE to let weight and deviate values determine the
|
||||
# appropriate cache configuration
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
|
||||
-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
|
||||
|
||||
-Cache model (NUCA, UCA) - "UCA"
|
||||
//-Cache model (NUCA, UCA) - "NUCA"
|
||||
|
||||
# In order for CACTI to find the optimal NUCA bank value the following
|
||||
# variable should be assigned 0.
|
||||
-NUCA bank count 0
|
||||
|
||||
# NOTE: for nuca network frequency is set to a default value of
|
||||
# 5GHz in time.c. CACTI automatically
|
||||
# calculates the maximum possible frequency and downgrades this value if necessary
|
||||
|
||||
# By default CACTI considers both full-swing and low-swing
|
||||
# wires to find an optimal configuration. However, it is possible to
|
||||
# restrict the search space by changing the signaling from "default" to
|
||||
# "fullswing" or "lowswing" type.
|
||||
-Wire signaling (fullswing, lowswing, default) - "Global_30"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "default"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "lowswing"
|
||||
|
||||
//-Wire inside mat - "global"
|
||||
-Wire inside mat - "semi-global"
|
||||
-Wire outside mat - "global"
|
||||
//-Wire outside mat - "semi-global"
|
||||
|
||||
-Interconnect projection - "conservative"
|
||||
//-Interconnect projection - "aggressive"
|
||||
|
||||
# Contention in network (which is a function of core count and cache level) is one of
|
||||
# the critical factor used for deciding the optimal bank count value
|
||||
# core count can be 4, 8, or 16
|
||||
//-Core count 4
|
||||
-Core count 8
|
||||
//-Core count 16
|
||||
-Cache level (L2/L3) - "L3"
|
||||
|
||||
-Add ECC - "true"
|
||||
|
||||
//-Print level (DETAILED, CONCISE) - "CONCISE"
|
||||
-Print level (DETAILED, CONCISE) - "DETAILED"
|
||||
|
||||
# for debugging
|
||||
//-Print input parameters - "true"
|
||||
-Print input parameters - "false"
|
||||
# force CACTI to model the cache with the
|
||||
# following Ndbl, Ndwl, Nspd, Ndsam,
|
||||
# and Ndcm values
|
||||
-Force cache config - "true"
|
||||
//-Force cache config - "false"
|
||||
-Ndwl 16
|
||||
-Ndbl 16
|
||||
-Nspd 1
|
||||
-Ndcm 1
|
||||
-Ndsam1 1
|
||||
-Ndsam2 1
|
||||
|
197
T1/TP/TP1/cacti_7/3DDRAM_Samsung3D8Gb_extened.cfg
Normal file
197
T1/TP/TP1/cacti_7/3DDRAM_Samsung3D8Gb_extened.cfg
Normal file
|
@ -0,0 +1,197 @@
|
|||
# Cache size
|
||||
//-size (bytes) 528
|
||||
//-size (bytes) 4096
|
||||
//-size (bytes) 262144
|
||||
//-size (bytes) 1048576
|
||||
//-size (bytes) 2097152
|
||||
//-size (bytes) 4194304
|
||||
//-size (bytes) 8388608
|
||||
//-size (bytes) 16777216
|
||||
//-size (bytes) 33554432
|
||||
//-size (bytes) 134217728
|
||||
//-size (bytes) 268435456
|
||||
//-size (bytes) 536870912
|
||||
//-size (bytes) 67108864
|
||||
//-size (bytes) 536870912
|
||||
//-size (bytes) 1073741824
|
||||
# For 3D DRAM memory please use Gb as units
|
||||
-size (Gb) 8
|
||||
|
||||
# Line size
|
||||
//-block size (bytes) 8
|
||||
-block size (bytes) 128
|
||||
|
||||
# To model Fully Associative cache, set associativity to zero
|
||||
//-associativity 0
|
||||
//-associativity 2
|
||||
//-associativity 4
|
||||
-associativity 1
|
||||
//-associativity 16
|
||||
|
||||
-read-write port 1
|
||||
-exclusive read port 0
|
||||
-exclusive write port 0
|
||||
-single ended read ports 0
|
||||
|
||||
# Multiple banks connected using a bus
|
||||
-UCA bank count 8
|
||||
//-technology (u) 0.032
|
||||
//-technology (u) 0.040
|
||||
//-technology (u) 0.065
|
||||
//-technology (u) 0.078
|
||||
//-technology (u) 0.080
|
||||
//-technology (u) 0.090
|
||||
-technology (u) 0.050
|
||||
|
||||
# following three parameters are meaningful only for main memories
|
||||
|
||||
//-page size (bits) 8192
|
||||
-burst length 4
|
||||
-internal prefetch width 1
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
//-Data array cell type - "itrs-hp"
|
||||
//-Data array cell type - "itrs-lstp"
|
||||
//-Data array cell type - "itrs-lop"
|
||||
-Data array cell type - "comm-dram"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
//-Data array peripheral type - "itrs-hp"
|
||||
-Data array peripheral type - "itrs-lstp"
|
||||
//-Data array peripheral type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Tag array cell type - "itrs-hp"
|
||||
//-Tag array cell type - "itrs-lstp"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Tag array peripheral type - "itrs-hp"
|
||||
//-Tag array peripheral type - "itrs-lstp"
|
||||
|
||||
# Bus width include data bits and address bits required by the decoder
|
||||
//-output/input bus width 16
|
||||
//-output/input bus width 64
|
||||
-output/input bus width 64
|
||||
|
||||
// 300-400 in steps of 10
|
||||
-operating temperature (K) 350
|
||||
|
||||
# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
|
||||
# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
|
||||
//-cache type "cache"
|
||||
//-cache type "ram"
|
||||
//-cache type "main memory" # old main memory model, in fact, it is eDRAM model.
|
||||
-cache type "3D memory or 2D main memory" # once this parameter is used, the new parameter section below of <Parameters for 3D DRAM and 2D main memory> will override the same parameter above
|
||||
|
||||
# <Parameters for 3D DRAM and 2D main memory>
|
||||
//-page size (bits) 16384
|
||||
-page size (bits) 8192
|
||||
//-page size (bits) 4096
|
||||
-burst depth 8 // for 3D DRAM, IO per bank equals the product of burst depth and IO width
|
||||
-IO width 4
|
||||
-system frequency (MHz) 677
|
||||
|
||||
-stacked die count 4
|
||||
-partitioning granularity 0 // 0: coarse-grained rank-level; 1: fine-grained rank-level
|
||||
-TSV projection 1 // 0: ITRS aggressive; 1: industrial conservative
|
||||
|
||||
## End of parameters for 3D DRAM
|
||||
|
||||
# to model special structure like branch target buffers, directory, etc.
|
||||
# change the tag size parameter
|
||||
# if you want cacti to calculate the tagbits, set the tag size to "default"
|
||||
-tag size (b) "default"
|
||||
//-tag size (b) 45
|
||||
|
||||
# fast - data and tag access happen in parallel
|
||||
# sequential - data array is accessed after accessing the tag array
|
||||
# normal - data array lookup and tag access happen in parallel
|
||||
# final data block is broadcasted in data array h-tree
|
||||
# after getting the signal from the tag array
|
||||
-access mode (normal, sequential, fast) - "fast"
|
||||
//-access mode (normal, sequential, fast) - "normal"
|
||||
//-access mode (normal, sequential, fast) - "sequential"
|
||||
|
||||
|
||||
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
|
||||
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:0:100
|
||||
|
||||
# Percentage deviation from the minimum value
|
||||
# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
|
||||
# that compromises at most 10% delay.
|
||||
# NOTE: Try reasonable values for % deviation. Inconsistent deviation
|
||||
# percentage values will not produce any valid organizations. For example,
|
||||
# 0:0:100:100:100 will try to identify an organization that has both
|
||||
# least delay and dynamic power. Since such an organization is not possible, CACTI will
|
||||
# throw an error. Refer CACTI-6 Technical report for more details
|
||||
-deviate (delay, dynamic power, leakage power, cycle time, area) 50:100000:100000:100000:1000000
|
||||
|
||||
# Objective for NUCA
|
||||
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:0:100
|
||||
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
|
||||
|
||||
# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
|
||||
# energy-delay or energy-delay sq. product
|
||||
# Note: Optimize tag will disable weight or deviate values mentioned above
|
||||
# Set it to NONE to let weight and deviate values determine the
|
||||
# appropriate cache configuration
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
|
||||
-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
|
||||
|
||||
-Cache model (NUCA, UCA) - "UCA"
|
||||
//-Cache model (NUCA, UCA) - "NUCA"
|
||||
|
||||
# In order for CACTI to find the optimal NUCA bank value the following
|
||||
# variable should be assigned 0.
|
||||
-NUCA bank count 0
|
||||
|
||||
# NOTE: for nuca network frequency is set to a default value of
|
||||
# 5GHz in time.c. CACTI automatically
|
||||
# calculates the maximum possible frequency and downgrades this value if necessary
|
||||
|
||||
# By default CACTI considers both full-swing and low-swing
|
||||
# wires to find an optimal configuration. However, it is possible to
|
||||
# restrict the search space by changing the signaling from "default" to
|
||||
# "fullswing" or "lowswing" type.
|
||||
-Wire signaling (fullswing, lowswing, default) - "Global_30"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "default"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "lowswing"
|
||||
|
||||
//-Wire inside mat - "global"
|
||||
-Wire inside mat - "semi-global"
|
||||
-Wire outside mat - "global"
|
||||
//-Wire outside mat - "semi-global"
|
||||
|
||||
-Interconnect projection - "conservative"
|
||||
//-Interconnect projection - "aggressive"
|
||||
|
||||
# Contention in network (which is a function of core count and cache level) is one of
|
||||
# the critical factor used for deciding the optimal bank count value
|
||||
# core count can be 4, 8, or 16
|
||||
//-Core count 4
|
||||
-Core count 8
|
||||
//-Core count 16
|
||||
-Cache level (L2/L3) - "L3"
|
||||
|
||||
-Add ECC - "true"
|
||||
|
||||
//-Print level (DETAILED, CONCISE) - "CONCISE"
|
||||
-Print level (DETAILED, CONCISE) - "DETAILED"
|
||||
|
||||
|
||||
# for debugging
|
||||
//-Print input parameters - "true"
|
||||
-Print input parameters - "false"
|
||||
# force CACTI to model the cache with the
|
||||
# following Ndbl, Ndwl, Nspd, Ndsam,
|
||||
# and Ndcm values
|
||||
-Force cache config - "true"
|
||||
//-Force cache config - "false"
|
||||
-Ndwl 16
|
||||
-Ndbl 32
|
||||
-Nspd 1
|
||||
-Ndcm 1
|
||||
-Ndsam1 1
|
||||
-Ndsam2 1
|
||||
|
122
T1/TP/TP1/cacti_7/README
Normal file
122
T1/TP/TP1/cacti_7/README
Normal file
|
@ -0,0 +1,122 @@
|
|||
-----------------------------------------------------------
|
||||
|
||||
|
||||
____ __ ________ __
|
||||
/\ _`\ /\ \__ __ /\_____ \ /'__`\
|
||||
\ \ \/\_\ __ ___\ \ ,_\/\_\ \/___//'/'/\ \/\ \
|
||||
\ \ \/_/_ /'__`\ /'___\ \ \/\/\ \ /' /' \ \ \ \ \
|
||||
\ \ \L\ \/\ \L\.\_/\ \__/\ \ \_\ \ \ /' /'__ \ \ \_\ \
|
||||
\ \____/\ \__/.\_\ \____\\ \__\\ \_\ /\_/ /\_\ \ \____/
|
||||
\/___/ \/__/\/_/\/____/ \/__/ \/_/ \// \/_/ \/___/
|
||||
|
||||
|
||||
A Tool to Model Caches/Memories, 3D stacking, and off-chip IO
|
||||
-----------------------------------------------------------
|
||||
|
||||
CACTI is an analytical tool that takes a set of cache/memory para-
|
||||
meters as input and calculates its access time, power, cycle
|
||||
time, and area.
|
||||
CACTI was originally developed by Dr. Jouppi and Dr. Wilton
|
||||
in 1993 and since then it has undergone six major
|
||||
revisions.
|
||||
|
||||
List of features (version 1-7):
|
||||
===============================
|
||||
The following is the list of features supported by the tool.
|
||||
|
||||
* Power, delay, area, and cycle time model for
|
||||
direct mapped caches
|
||||
set-associative caches
|
||||
fully associative caches
|
||||
Embedded DRAM memories
|
||||
Commodity DRAM memories
|
||||
|
||||
* Support for modeling multi-ported uniform cache access (UCA)
|
||||
and multi-banked, multi-ported non-uniform cache access (NUCA).
|
||||
|
||||
* Leakage power calculation that also considers the operating
|
||||
temperature of the cache.
|
||||
|
||||
* Router power model.
|
||||
|
||||
* Interconnect model with different delay, power, and area
|
||||
properties including low-swing wire model.
|
||||
|
||||
* An interface to perform trade-off analysis involving power, delay,
|
||||
area, and bandwidth.
|
||||
|
||||
* All process specific values used by the tool are obtained
|
||||
from ITRS and currently, the tool supports 90nm, 65nm, 45nm,
|
||||
and 32nm technology nodes.
|
||||
|
||||
* Chip IO model to calculate latency and energy for DDR bus. Users can model
|
||||
different loads (fan-outs) and evaluate the impact on frequency and energy.
|
||||
This model can be used to study LR-DIMMs, R-DIMMs, etc.
|
||||
|
||||
Version 7.0 is derived from 6.5 and merged with CACTI 3D.
|
||||
It has many new additions apart from code refinements and
|
||||
bug fixes: new IO model, 3D memory model, and power gating models.
|
||||
Ref: CACTI-IO: CACTI With OFF-chip Power-Area-Timing Models
|
||||
MemCAD: An Interconnect Exploratory Tool for Innovative Memories Beyond DDR4
|
||||
CACTI-3DD: Architecture-level modeling for 3D die-stacked DRAM main memory
|
||||
|
||||
--------------------------------------------------------------------------
|
||||
Version 6.5 has a new c++ code base and includes numerous bug fixes.
|
||||
CACTI 5.3 and 6.0 activate an entire row of mats to read/write a single
|
||||
block of data. This technique improves reliability at the cost of
|
||||
power. CACTI 6.5 activates minimum number of mats just enough to retrieve
|
||||
a block to minimize power.
|
||||
|
||||
How to use the tool?
|
||||
====================
|
||||
Prior versions of CACTI take input parameters such as cache
|
||||
size and technology node as a set of command line arguments.
|
||||
To avoid a long list of command line arguments,
|
||||
CACTI 6.5 & & let users specify their cache model in a more
|
||||
detailed manner by using a config file (cache.cfg).
|
||||
|
||||
-> define the cache model using cache.cfg
|
||||
-> run the "cacti" binary <./cacti -infile cache.cfg>
|
||||
|
||||
CACTI also provides a command line interface similar to earlier versions. The command line interface can be used as
|
||||
|
||||
./cacti cache_size line_size associativity rw_ports excl_read_ports excl_write_ports
|
||||
single_ended_read_ports search_ports banks tech_node output_width specific_tag tag_width
|
||||
access_mode cache main_mem obj_func_delay obj_func_dynamic_power obj_func_leakage_power
|
||||
obj_func_cycle_time obj_func_area dev_func_delay dev_func_dynamic_power dev_func_leakage_power
|
||||
dev_func_area dev_func_cycle_time ed_ed2_none temp wt data_arr_ram_cell_tech_flavor_in
|
||||
data_arr_peri_global_tech_flavor_in tag_arr_ram_cell_tech_flavor_in tag_arr_peri_global_tech_flavor_in
|
||||
interconnect_projection_type_in wire_inside_mat_type_in wire_outside_mat_type_in
|
||||
REPEATERS_IN_HTREE_SEGMENTS_in VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in
|
||||
BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in PAGE_SIZE_BITS_in BURST_LENGTH_in
|
||||
INTERNAL_PREFETCH_WIDTH_in force_wiretype wiretype force_config ndwl ndbl nspd ndcm
|
||||
ndsam1 ndsam2 ecc
|
||||
|
||||
For complete documentation of the tool, please refer
|
||||
to the following publications and reports.
|
||||
|
||||
CACTI-5.3 & 6 reports - Details on Meory/cache organizations and tradeoffs.
|
||||
|
||||
Latency/Energy tradeoffs for large caches and NUCA design:
|
||||
"Optimizing NUCA Organizations and Wiring Alternatives for Large Caches With CACTI 6.0", that appears in MICRO 2007.
|
||||
|
||||
Memory IO design: CACTI-IO: CACTI With OFF-chip Power-Area-Timing Models,
|
||||
MemCAD: An Interconnect Exploratory Tool for Innovative Memories Beyond DDR4
|
||||
CACTI-IO Technical Report - http://www.hpl.hp.com/techreports/2013/HPL-2013-79.pdf
|
||||
|
||||
3D model:
|
||||
CACTI-3DD: Architecture-level modeling for 3D die-stacked DRAM main memory
|
||||
|
||||
We are still improving the tool and refining the code. If you
|
||||
have any comments, questions, or suggestions please write to
|
||||
us.
|
||||
|
||||
Naveen Muralimanohar
|
||||
naveen.muralimanohar@hpe.com
|
||||
|
||||
Ali Shafiee
|
||||
shafiee@cs.utah.edu
|
||||
|
||||
Vaishnav Srinivas
|
||||
vaishnav.srinivas@gmail.com
|
||||
|
242
T1/TP/TP1/cacti_7/TSV.cc
Normal file
242
T1/TP/TP1/cacti_7/TSV.cc
Normal file
|
@ -0,0 +1,242 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include "TSV.h"
|
||||
|
||||
TSV::TSV(enum TSV_type tsv_type,
|
||||
/*TechnologyParameter::*/DeviceType *dt)://TSV driver's device type set to be peri_global
|
||||
deviceType(dt), tsv_type(tsv_type)
|
||||
{
|
||||
num_gates = 1;
|
||||
num_gates_min = 1;//Is there a minimum number of stages?
|
||||
min_w_pmos = deviceType -> n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
|
||||
|
||||
switch (tsv_type)
|
||||
{
|
||||
case Fine:
|
||||
cap = g_tp.tsv_parasitic_capacitance_fine;
|
||||
res = g_tp.tsv_parasitic_resistance_fine;
|
||||
min_area = g_tp.tsv_minimum_area_fine;
|
||||
break;
|
||||
case Coarse:
|
||||
cap = g_tp.tsv_parasitic_capacitance_coarse;
|
||||
res = g_tp.tsv_parasitic_resistance_coarse;
|
||||
min_area = g_tp.tsv_minimum_area_coarse;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
|
||||
{
|
||||
w_TSV_n[i] = 0;
|
||||
w_TSV_p[i] = 0;
|
||||
}
|
||||
|
||||
double first_buf_stg_coef = 5; // To tune the total buffer delay.
|
||||
w_TSV_n[0] = g_tp.min_w_nmos_*first_buf_stg_coef;
|
||||
w_TSV_p[0] = min_w_pmos *first_buf_stg_coef;
|
||||
|
||||
is_dram = 0;
|
||||
is_wl_tr = 0;
|
||||
|
||||
//What does the function assert() mean? Should I put the function here?
|
||||
compute_buffer_stage();
|
||||
compute_area();
|
||||
compute_delay();
|
||||
}
|
||||
|
||||
TSV::~TSV()
|
||||
{
|
||||
}
|
||||
|
||||
void TSV::compute_buffer_stage()
|
||||
{
|
||||
double p_to_n_sz_ratio = deviceType->n_to_p_eff_curr_drv_ratio;
|
||||
|
||||
//BEOL parasitics in Katti's E modeling and charac. of TSV. Needs further detailed values.
|
||||
//double res_beol = 0.1;//inaccurate
|
||||
//double cap_beol = 1e-15;
|
||||
|
||||
//C_load_TSV = cap_beol + cap + cap_beol + gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0);
|
||||
C_load_TSV = cap + gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0); //+ 57.5e-15;
|
||||
if(g_ip->print_detail_debug)
|
||||
{
|
||||
cout << " The input cap of 1st buffer: " << gate_C(w_TSV_n[0] + w_TSV_p[0], 0) * 1e15 << " fF";
|
||||
}
|
||||
double F = C_load_TSV / gate_C(w_TSV_n[0] + w_TSV_p[0], 0);
|
||||
if(g_ip->print_detail_debug)
|
||||
{
|
||||
cout<<"\nF is "<<F<<" \n";
|
||||
}
|
||||
//Obtain buffer chain stages using logic effort function. Does stage number have to be even?
|
||||
num_gates = logical_effort(
|
||||
num_gates_min,
|
||||
1,
|
||||
F,
|
||||
w_TSV_n,
|
||||
w_TSV_p,
|
||||
C_load_TSV,
|
||||
p_to_n_sz_ratio,
|
||||
is_dram,
|
||||
is_wl_tr,
|
||||
g_tp.max_w_nmos_/*Correct? Decoder uses max_w_nmos_dec*/);
|
||||
}
|
||||
|
||||
void TSV::compute_area()
|
||||
{
|
||||
//Obtain the driver chain area and leakage power for TSV
|
||||
double Vdd = deviceType->Vdd;
|
||||
double cumulative_area = 0;
|
||||
double cumulative_curr = 0; // cumulative leakage current
|
||||
double cumulative_curr_Ig = 0; // cumulative leakage current
|
||||
Buffer_area.h = g_tp.cell_h_def;//cell_h_def is the assigned height for memory cell (5um), is it correct to use it here?
|
||||
|
||||
//logic_effort() didn't give the size of w_n[0] and w_p[0], which is min size inverter
|
||||
//w_TSV_n[0] = g_tp.min_w_nmos_;
|
||||
//w_TSV_p[0] = min_w_pmos;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < num_gates; i++)
|
||||
{
|
||||
cumulative_area += compute_gate_area(INV, 1, w_TSV_p[i], w_TSV_n[i], Buffer_area.h);
|
||||
if(g_ip->print_detail_debug)
|
||||
{
|
||||
cout << "\n\tArea up to the " << i+1 << " stages is: " << cumulative_area << " um2";
|
||||
}
|
||||
cumulative_curr += cmos_Isub_leakage(w_TSV_n[i], w_TSV_p[i], 1, inv, is_dram);
|
||||
cumulative_curr_Ig += cmos_Ig_leakage(w_TSV_n[i], w_TSV_p[i], 1, inv, is_dram);// The operator += is mistakenly put as = in decoder.cc
|
||||
}
|
||||
power.readOp.leakage = cumulative_curr * Vdd;
|
||||
power.readOp.gate_leakage = cumulative_curr_Ig * Vdd;
|
||||
|
||||
Buffer_area.set_area(cumulative_area);
|
||||
Buffer_area.w = (cumulative_area / Buffer_area.h);
|
||||
|
||||
TSV_metal_area.set_area(min_area * 3.1416/16);
|
||||
|
||||
if( Buffer_area.get_area() < min_area - TSV_metal_area.get_area() )
|
||||
area.set_area(min_area);
|
||||
else
|
||||
area.set_area(Buffer_area.get_area() + TSV_metal_area.get_area());
|
||||
|
||||
}
|
||||
|
||||
void TSV::compute_delay()
|
||||
{
|
||||
//Buffer chain delay and Dynamic Power
|
||||
double rd, tf, this_delay, c_load, c_intrinsic, inrisetime = 0/*The initial time*/;
|
||||
//is_dram, is_wl_tr are declared to be false in the constructor
|
||||
rd = tr_R_on(w_TSV_n[0], NCH, 1, is_dram, false, is_wl_tr);
|
||||
c_load = gate_C(w_TSV_n[1] + w_TSV_p[1], 0.0, is_dram, false, is_wl_tr);
|
||||
c_intrinsic = drain_C_(w_TSV_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
|
||||
drain_C_(w_TSV_n[0], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
|
||||
tf = rd * (c_intrinsic + c_load);
|
||||
//Refer to horowitz function definition
|
||||
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
|
||||
delay += this_delay;
|
||||
inrisetime = this_delay / (1.0 - 0.5);
|
||||
|
||||
double Vdd = deviceType -> Vdd;
|
||||
power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
|
||||
|
||||
int i;
|
||||
for (i = 1; i < num_gates - 1; ++i)
|
||||
{
|
||||
rd = tr_R_on(w_TSV_n[i], NCH, 1, is_dram, false, is_wl_tr);
|
||||
c_load = gate_C(w_TSV_p[i+1] + w_TSV_n[i+1], 0.0, is_dram, false, is_wl_tr);
|
||||
c_intrinsic = drain_C_(w_TSV_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
|
||||
drain_C_(w_TSV_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
|
||||
tf = rd * (c_intrinsic + c_load);
|
||||
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
|
||||
delay += this_delay;
|
||||
inrisetime = this_delay / (1.0 - 0.5);
|
||||
power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
|
||||
}
|
||||
|
||||
// add delay of final inverter that drives the TSV
|
||||
i = num_gates - 1;
|
||||
c_load = C_load_TSV;
|
||||
rd = tr_R_on(w_TSV_n[i], NCH, 1, is_dram, false, is_wl_tr);
|
||||
c_intrinsic = drain_C_(w_TSV_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
|
||||
drain_C_(w_TSV_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
|
||||
//The delay method for the last stage of buffer chain in Decoder.cc
|
||||
|
||||
//double res_beol = 0.1;//inaccurate
|
||||
//double R_TSV_out = res_beol + res + res_beol;
|
||||
double R_TSV_out = res;
|
||||
tf = rd * (c_intrinsic + c_load) + R_TSV_out * c_load / 2;
|
||||
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
|
||||
delay += this_delay;
|
||||
|
||||
power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; //Dynamic power done
|
||||
|
||||
//Is the delay actually delay/(1.0-0.5)??
|
||||
//ret_val = this_delay / (1.0 - 0.5);
|
||||
//return ret_val;//Originally for decoder.cc to get outrise time
|
||||
|
||||
|
||||
/* This part is to obtain delay in the TSV path, refer to Katti's paper.
|
||||
* It can be used alternatively as the step to get the final-stage delay
|
||||
double C_ext = c_intrinsic;
|
||||
R_dr = rd;
|
||||
double C_int = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0.0, is_dram, false, is_wl_tr);
|
||||
delay_TSV_path = 0.693 * (R_dr * C_ext + (R_dr + res_beol) * cap_beol + (R_dr + res_beol + 0.5 * res) * cap
|
||||
+ (R_dr + res_beol + res + res_beol) * (cap_beol + C_int);
|
||||
delay += delay_TSV_path;
|
||||
*/
|
||||
}
|
||||
|
||||
void TSV::print_TSV()
|
||||
{
|
||||
|
||||
cout << "\nTSV Properties:\n\n";
|
||||
cout << " Delay Optimal - "<<
|
||||
" \n\tTSV Cap: " << cap * 1e15 << " fF" <<
|
||||
" \n\tTSV Res: " << res * 1e3 << " mOhm"<<
|
||||
" \n\tNumber of Buffer Chain stages - " << num_gates <<
|
||||
" \n\tDelay - " << delay * 1e9 << " (ns) "
|
||||
" \n\tPowerD - " << power.readOp.dynamic * 1e9<< " (nJ)"
|
||||
" \n\tPowerL - " << power.readOp.leakage * 1e3<< " (mW)"
|
||||
" \n\tPowerLgate - " << power.readOp.gate_leakage * 1e3<< " (mW)\n" <<
|
||||
" \n\tBuffer Area: " << Buffer_area.get_area() << " um2" <<
|
||||
" \n\tBuffer Height: " << Buffer_area.h << " um" <<
|
||||
" \n\tBuffer Width: " << Buffer_area.w << " um" <<
|
||||
" \n\tTSV metal area: " << TSV_metal_area.get_area() << " um2" <<
|
||||
" \n\tTSV minimum occupied area: " <<min_area << " um2"<<
|
||||
" \n\tTotal area: " << area.get_area() << " um2";
|
||||
cout <<endl;
|
||||
cout <<endl;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
96
T1/TP/TP1/cacti_7/TSV.h
Normal file
96
T1/TP/TP1/cacti_7/TSV.h
Normal file
|
@ -0,0 +1,96 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef TSV_H_
|
||||
#define TSV_H_
|
||||
|
||||
#include "basic_circuit.h"
|
||||
#include "component.h"
|
||||
#include "parameter.h"
|
||||
//#include "assert.h"
|
||||
#include "cacti_interface.h"
|
||||
#include "const.h"
|
||||
//#include "area.h"
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
|
||||
|
||||
class TSV : public Component
|
||||
{
|
||||
public:
|
||||
TSV(enum TSV_type tsv_type,
|
||||
/*TechnologyParameter::*/DeviceType * dt = &(g_tp.peri_global));//Should change peri_global to TSV in technology.cc
|
||||
//TSV():len(20),rad(2.5),pitch(50){}
|
||||
~TSV();
|
||||
|
||||
double res;//TSV resistance
|
||||
double cap;//TSV capacitance
|
||||
double C_load_TSV;//The intrinsic load plus the load TSV is driving, needs changes?
|
||||
double min_area;
|
||||
|
||||
//int num_IO;//number of I/O
|
||||
int num_gates;
|
||||
int num_gates_min;//Necessary?
|
||||
double w_TSV_n[MAX_NUMBER_GATES_STAGE];
|
||||
double w_TSV_p[MAX_NUMBER_GATES_STAGE];
|
||||
|
||||
//double delay_TSV_path;//Delay of TSV path including the parasitics
|
||||
|
||||
double is_dram;//two external arguments, defaulted to be false in constructor
|
||||
double is_wl_tr;
|
||||
|
||||
void compute_buffer_stage();
|
||||
void compute_area();
|
||||
void compute_delay();
|
||||
void print_TSV();
|
||||
|
||||
Area TSV_metal_area;
|
||||
Area Buffer_area;
|
||||
|
||||
/*//Herigated from Component
|
||||
double delay;
|
||||
Area area;
|
||||
powerDef power, rt_power;
|
||||
double delay;
|
||||
double cycle_time;
|
||||
|
||||
int logical_effort();*/
|
||||
|
||||
private:
|
||||
double min_w_pmos;
|
||||
/*TechnologyParameter::*/DeviceType * deviceType;
|
||||
unsigned int tsv_type;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif /* TSV_H_ */
|
1073
T1/TP/TP1/cacti_7/Ucache.cc
Normal file
1073
T1/TP/TP1/cacti_7/Ucache.cc
Normal file
File diff suppressed because it is too large
Load diff
118
T1/TP/TP1/cacti_7/Ucache.h
Normal file
118
T1/TP/TP1/cacti_7/Ucache.h
Normal file
|
@ -0,0 +1,118 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
#ifndef __UCACHE_H__
|
||||
#define __UCACHE_H__
|
||||
|
||||
#include <list>
|
||||
#include "area.h"
|
||||
#include "router.h"
|
||||
#include "nuca.h"
|
||||
|
||||
|
||||
class min_values_t
|
||||
{
|
||||
public:
|
||||
double min_delay;
|
||||
double min_dyn;
|
||||
double min_leakage;
|
||||
double min_area;
|
||||
double min_cyc;
|
||||
|
||||
min_values_t() : min_delay(BIGNUM), min_dyn(BIGNUM), min_leakage(BIGNUM), min_area(BIGNUM), min_cyc(BIGNUM) { }
|
||||
|
||||
void update_min_values(const min_values_t * val);
|
||||
void update_min_values(const uca_org_t & res);
|
||||
void update_min_values(const nuca_org_t * res);
|
||||
void update_min_values(const mem_array * res);
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct solution
|
||||
{
|
||||
int tag_array_index;
|
||||
int data_array_index;
|
||||
list<mem_array *>::iterator tag_array_iter;
|
||||
list<mem_array *>::iterator data_array_iter;
|
||||
double access_time;
|
||||
double cycle_time;
|
||||
double area;
|
||||
double efficiency;
|
||||
powerDef total_power;
|
||||
};
|
||||
|
||||
|
||||
|
||||
bool calculate_time(
|
||||
bool is_tag,
|
||||
int pure_ram,
|
||||
bool pure_cam,
|
||||
double Nspd,
|
||||
unsigned int Ndwl,
|
||||
unsigned int Ndbl,
|
||||
unsigned int Ndcm,
|
||||
unsigned int Ndsam_lev_1,
|
||||
unsigned int Ndsam_lev_2,
|
||||
mem_array *ptr_array,
|
||||
int flag_results_populate,
|
||||
results_mem_array *ptr_results,
|
||||
uca_org_t *ptr_fin_res,
|
||||
Wire_type wtype, // merge from cacti-7 to cacti3d
|
||||
bool is_main_mem);
|
||||
void update(uca_org_t *fin_res);
|
||||
|
||||
void solve(uca_org_t *fin_res);
|
||||
void init_tech_params(double tech, bool is_tag);
|
||||
|
||||
|
||||
struct calc_time_mt_wrapper_struct
|
||||
{
|
||||
uint32_t tid;
|
||||
bool is_tag;
|
||||
bool pure_ram;
|
||||
bool pure_cam;
|
||||
bool is_main_mem;
|
||||
double Nspd_min;
|
||||
|
||||
min_values_t * data_res;
|
||||
min_values_t * tag_res;
|
||||
|
||||
list<mem_array *> data_arr;
|
||||
list<mem_array *> tag_arr;
|
||||
};
|
||||
|
||||
void *calc_time_mt_wrapper(void * void_obj);
|
||||
|
||||
void print_g_tp();
|
||||
|
||||
#endif
|
53
T1/TP/TP1/cacti_7/_script.py
Executable file
53
T1/TP/TP1/cacti_7/_script.py
Executable file
|
@ -0,0 +1,53 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
TP1 T1
|
||||
Ceci est un script permettant de tester différentes configurations
|
||||
de cache pour cacti
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
chemin_fichier_config = "_fichiers/configs/"
|
||||
chemin_fichier_result = "_fichiers/resultats/"
|
||||
|
||||
configurations = [[1,0,0,0],[1,0,1,1],[1,0,0,1],[1,1,1,1]]
|
||||
|
||||
nom_fichier_config = "cache.cfg"
|
||||
|
||||
i = 0
|
||||
|
||||
#----------- Cette section permet de configurer le nombre de ports entrée/sortie du fichier ---------
|
||||
with open(chemin_fichier_config + nom_fichier_config, "r") as fichier:
|
||||
Lignes = fichier.readlines()
|
||||
index = 0
|
||||
|
||||
for ligne in Lignes:
|
||||
if "<configs_ports>" in ligne:
|
||||
Lignes[index + 1] = "-read-write port {}\n".format(configurations[i][0])
|
||||
Lignes[index + 2] = "-exclusive read port {}\n".format(configurations[i][1])
|
||||
Lignes[index + 3] = "-exclusive write port {}\n".format(configurations[i][2])
|
||||
Lignes[index + 4] = "-single ended read ports {}\n".format(configurations[i][3])
|
||||
break
|
||||
index += 1
|
||||
|
||||
with open(chemin_fichier_config + nom_fichier_config, "w") as fichier:
|
||||
fichier.writelines(Lignes)
|
||||
|
||||
|
||||
#----------- Section pour lancer la simulation sur cacti -------------------------------
|
||||
|
||||
commande = "./cacti -infile {}".format(chemin_fichier_config + nom_fichier_config)
|
||||
flux = os.popen(commande)
|
||||
resultat = flux.read().split('\n')
|
||||
|
||||
for ligne in resultat:
|
||||
if "Access time (ns):" in ligne:
|
||||
val_tps = float(ligne.split(':')[1])
|
||||
print(val_tps)
|
||||
|
||||
|
||||
|
||||
#flux = os.popen(commande)
|
||||
#print(flux.read())
|
||||
|
||||
|
130
T1/TP/TP1/cacti_7/arbiter.cc
Normal file
130
T1/TP/TP1/cacti_7/arbiter.cc
Normal file
|
@ -0,0 +1,130 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include "arbiter.h"
|
||||
|
||||
Arbiter::Arbiter(
|
||||
double n_req,
|
||||
double flit_size_,
|
||||
double output_len,
|
||||
/*TechnologyParameter::*/DeviceType *dt
|
||||
):R(n_req), flit_size(flit_size_),
|
||||
o_len (output_len), deviceType(dt)
|
||||
{
|
||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
|
||||
Vdd = dt->Vdd;
|
||||
double technology = g_ip->F_sz_um;
|
||||
NTn1 = 13.5*technology/2;
|
||||
PTn1 = 76*technology/2;
|
||||
NTn2 = 13.5*technology/2;
|
||||
PTn2 = 76*technology/2;
|
||||
NTi = 12.5*technology/2;
|
||||
PTi = 25*technology/2;
|
||||
NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/
|
||||
PTtr = 20*technology/2; /* pmos tr. length*/
|
||||
}
|
||||
|
||||
Arbiter::~Arbiter(){}
|
||||
|
||||
double
|
||||
Arbiter::arb_req() {
|
||||
double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) +
|
||||
gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
|
||||
drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
|
||||
return temp;
|
||||
}
|
||||
|
||||
double
|
||||
Arbiter::arb_pri() {
|
||||
double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance
|
||||
of flip-flop is ignored */
|
||||
return temp;
|
||||
}
|
||||
|
||||
|
||||
double
|
||||
Arbiter::arb_grant() {
|
||||
double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
|
||||
return temp;
|
||||
}
|
||||
|
||||
double
|
||||
Arbiter::arb_int() {
|
||||
double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
|
||||
2*gate_C(NTn2, 0) + gate_C(PTn2, 0));
|
||||
return temp;
|
||||
}
|
||||
|
||||
void
|
||||
Arbiter::compute_power() {
|
||||
power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 +
|
||||
arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd);
|
||||
double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
|
||||
double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
|
||||
double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
|
||||
double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
|
||||
double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
|
||||
double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
|
||||
power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage
|
||||
power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd;
|
||||
}
|
||||
|
||||
double //wire cap with triple spacing
|
||||
Arbiter::Cw3(double length) {
|
||||
Wire wc(g_ip->wt, length, 1, 3, 3);
|
||||
double temp = (wc.wire_cap(length,true));
|
||||
return temp;
|
||||
}
|
||||
|
||||
double
|
||||
Arbiter::crossbar_ctrline() {
|
||||
double temp = (Cw3(o_len * 1e-6 /* m */) +
|
||||
drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) +
|
||||
gate_C(NTi, 0) + gate_C(PTi, 0));
|
||||
return temp;
|
||||
}
|
||||
|
||||
double
|
||||
Arbiter::transmission_buf_ctrcap() {
|
||||
double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0);
|
||||
return temp;
|
||||
}
|
||||
|
||||
|
||||
void Arbiter::print_arbiter()
|
||||
{
|
||||
cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
|
||||
cout << "Flit size : " << flit_size << " bits" << endl;
|
||||
cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
|
||||
cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
|
||||
}
|
||||
|
||||
|
77
T1/TP/TP1/cacti_7/arbiter.h
Normal file
77
T1/TP/TP1/cacti_7/arbiter.h
Normal file
|
@ -0,0 +1,77 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef __ARBITER__
|
||||
#define __ARBITER__
|
||||
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include "basic_circuit.h"
|
||||
#include "cacti_interface.h"
|
||||
#include "component.h"
|
||||
#include "parameter.h"
|
||||
#include "mat.h"
|
||||
#include "wire.h"
|
||||
|
||||
class Arbiter : public Component
|
||||
{
|
||||
public:
|
||||
Arbiter(
|
||||
double Req,
|
||||
double flit_sz,
|
||||
double output_len,
|
||||
/*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global));
|
||||
~Arbiter();
|
||||
|
||||
void print_arbiter();
|
||||
double arb_req();
|
||||
double arb_pri();
|
||||
double arb_grant();
|
||||
double arb_int();
|
||||
void compute_power();
|
||||
double Cw3(double len);
|
||||
double crossbar_ctrline();
|
||||
double transmission_buf_ctrcap();
|
||||
|
||||
|
||||
|
||||
private:
|
||||
double NTn1, PTn1, NTn2, PTn2, R, PTi, NTi;
|
||||
double flit_size;
|
||||
double NTtr, PTtr;
|
||||
double o_len;
|
||||
/*TechnologyParameter::*/DeviceType *deviceType;
|
||||
double TriS1, TriS2;
|
||||
double min_w_pmos, Vdd;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
46
T1/TP/TP1/cacti_7/area.cc
Normal file
46
T1/TP/TP1/cacti_7/area.cc
Normal file
|
@ -0,0 +1,46 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "area.h"
|
||||
#include "component.h"
|
||||
#include "decoder.h"
|
||||
#include "parameter.h"
|
||||
#include "basic_circuit.h"
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
|
71
T1/TP/TP1/cacti_7/area.h
Normal file
71
T1/TP/TP1/cacti_7/area.h
Normal file
|
@ -0,0 +1,71 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
#ifndef __AREA_H__
|
||||
#define __AREA_H__
|
||||
|
||||
#include "cacti_interface.h"
|
||||
#include "basic_circuit.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class Area
|
||||
{
|
||||
public:
|
||||
double w;
|
||||
double h;
|
||||
|
||||
Area():w(0), h(0), area(0) { }
|
||||
double get_w() const { return w; }
|
||||
double get_h() const { return h; }
|
||||
double get_area() const
|
||||
{
|
||||
if (w == 0 && h == 0)
|
||||
{
|
||||
return area;
|
||||
}
|
||||
else
|
||||
{
|
||||
return w*h;
|
||||
}
|
||||
}
|
||||
void set_w(double w_) { w = w_; }
|
||||
void set_h(double h_) { h = h_; }
|
||||
void set_area(double a_) { area = a_; }
|
||||
|
||||
private:
|
||||
double area;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
206
T1/TP/TP1/cacti_7/bank.cc
Normal file
206
T1/TP/TP1/cacti_7/bank.cc
Normal file
|
@ -0,0 +1,206 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "bank.h"
|
||||
#include <iostream>
|
||||
|
||||
|
||||
Bank::Bank(const DynamicParameter & dyn_p):
|
||||
dp(dyn_p), mat(dp),
|
||||
num_addr_b_mat(dyn_p.number_addr_bits_mat),
|
||||
num_mats_hor_dir(dyn_p.num_mats_h_dir), num_mats_ver_dir(dyn_p.num_mats_v_dir),
|
||||
array_leakage(0),
|
||||
wl_leakage(0),
|
||||
cl_leakage(0)
|
||||
{
|
||||
// Mat temp(dyn_p);
|
||||
int RWP;
|
||||
int ERP;
|
||||
int EWP;
|
||||
int SCHP;
|
||||
|
||||
if (dp.use_inp_params)
|
||||
{
|
||||
RWP = dp.num_rw_ports;
|
||||
ERP = dp.num_rd_ports;
|
||||
EWP = dp.num_wr_ports;
|
||||
SCHP = dp.num_search_ports;
|
||||
}
|
||||
else
|
||||
{
|
||||
RWP = g_ip->num_rw_ports;
|
||||
ERP = g_ip->num_rd_ports;
|
||||
EWP = g_ip->num_wr_ports;
|
||||
SCHP = g_ip->num_search_ports;
|
||||
}
|
||||
|
||||
int total_addrbits = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
|
||||
int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
|
||||
int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
|
||||
int searchinbits;
|
||||
int searchoutbits;
|
||||
|
||||
if (dp.fully_assoc || dp.pure_cam)
|
||||
{
|
||||
datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
|
||||
dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
|
||||
searchinbits = dp.num_si_b_bank_per_port * SCHP;
|
||||
searchoutbits = dp.num_so_b_bank_per_port * SCHP;
|
||||
}
|
||||
|
||||
if (!(dp.fully_assoc || dp.pure_cam))
|
||||
{
|
||||
if (g_ip->fast_access && dp.is_tag == false)
|
||||
{
|
||||
dataoutbits *= g_ip->data_assoc;
|
||||
}
|
||||
|
||||
htree_in_add = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
|
||||
htree_in_data = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
|
||||
htree_out_data = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
|
||||
|
||||
// htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100,
|
||||
// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
|
||||
|
||||
area.w = htree_in_data->area.w;
|
||||
area.h = htree_in_data->area.h;
|
||||
}
|
||||
else
|
||||
{
|
||||
htree_in_add = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits, searchinbits,dataoutbits,searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
|
||||
htree_in_data = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
|
||||
htree_out_data = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
|
||||
htree_in_search = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree,true, true);
|
||||
htree_out_search = new Htree2 (dp.wtype/*g_ip->wt*/,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree,true);
|
||||
|
||||
area.w = htree_in_data->area.w;
|
||||
area.h = htree_in_data->area.h;
|
||||
}
|
||||
|
||||
num_addr_b_row_dec = _log2(mat.subarray.num_rows);
|
||||
num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec;
|
||||
num_addr_b_routed_to_mat_for_rd_or_wr = num_addr_b_mat - num_addr_b_row_dec;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Bank::~Bank()
|
||||
{
|
||||
delete htree_in_add;
|
||||
delete htree_out_data;
|
||||
delete htree_in_data;
|
||||
if (dp.fully_assoc || dp.pure_cam)
|
||||
{
|
||||
delete htree_in_search;
|
||||
delete htree_out_search;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
double Bank::compute_delays(double inrisetime)
|
||||
{
|
||||
return mat.compute_delays(inrisetime);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Bank::compute_power_energy()
|
||||
{
|
||||
mat.compute_power_energy();
|
||||
|
||||
if (!(dp.fully_assoc || dp.pure_cam))
|
||||
{
|
||||
power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir;
|
||||
power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
|
||||
power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
|
||||
|
||||
power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
|
||||
power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
|
||||
|
||||
array_leakage += mat.array_leakage*dp.num_mats;
|
||||
wl_leakage += mat.wl_leakage*dp.num_mats;
|
||||
cl_leakage += mat.cl_leakage*dp.num_mats;
|
||||
//
|
||||
// power.readOp.leakage += htree_in_add->power.readOp.leakage;
|
||||
// power.readOp.leakage += htree_in_data->power.readOp.leakage;
|
||||
// power.readOp.leakage += htree_out_data->power.readOp.leakage;
|
||||
// power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
|
||||
// power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
|
||||
// power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w
|
||||
power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
|
||||
power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
|
||||
|
||||
power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats;
|
||||
power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic +
|
||||
mat.power_sa.searchOp.dynamic +
|
||||
mat.power_bitline.searchOp.dynamic +
|
||||
mat.power_subarray_out_drv.searchOp.dynamic+
|
||||
mat.ml_to_ram_wl_drv->power.readOp.dynamic;
|
||||
|
||||
power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
|
||||
power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
|
||||
|
||||
power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic;
|
||||
power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic;
|
||||
|
||||
power.readOp.leakage += htree_in_add->power.readOp.leakage;
|
||||
power.readOp.leakage += htree_in_data->power.readOp.leakage;
|
||||
power.readOp.leakage += htree_out_data->power.readOp.leakage;
|
||||
power.readOp.leakage += htree_in_search->power.readOp.leakage;
|
||||
power.readOp.leakage += htree_out_search->power.readOp.leakage;
|
||||
|
||||
|
||||
power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
|
||||
power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
|
||||
power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
|
||||
power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage;
|
||||
power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
74
T1/TP/TP1/cacti_7/bank.h
Normal file
74
T1/TP/TP1/cacti_7/bank.h
Normal file
|
@ -0,0 +1,74 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
#ifndef __BANK_H__
|
||||
#define __BANK_H__
|
||||
|
||||
#include "component.h"
|
||||
#include "decoder.h"
|
||||
#include "mat.h"
|
||||
#include "htree2.h"
|
||||
|
||||
|
||||
class Bank : public Component
|
||||
{
|
||||
public:
|
||||
Bank(const DynamicParameter & dyn_p);
|
||||
~Bank();
|
||||
double compute_delays(double inrisetime); // return outrisetime
|
||||
void compute_power_energy();
|
||||
|
||||
const DynamicParameter & dp;
|
||||
Mat mat;
|
||||
Htree2 *htree_in_add;
|
||||
Htree2 *htree_in_data;
|
||||
Htree2 *htree_out_data;
|
||||
Htree2 *htree_in_search;
|
||||
Htree2 *htree_out_search;
|
||||
|
||||
int num_addr_b_mat;
|
||||
int num_mats_hor_dir;
|
||||
int num_mats_ver_dir;
|
||||
|
||||
int num_addr_b_row_dec;
|
||||
int num_addr_b_routed_to_mat_for_act;
|
||||
int num_addr_b_routed_to_mat_for_rd_or_wr;
|
||||
|
||||
double array_leakage;
|
||||
double wl_leakage;
|
||||
double cl_leakage;
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
999
T1/TP/TP1/cacti_7/basic_circuit.cc
Normal file
999
T1/TP/TP1/cacti_7/basic_circuit.cc
Normal file
|
@ -0,0 +1,999 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
|
||||
#include "basic_circuit.h"
|
||||
#include "parameter.h"
|
||||
#include <iostream>
|
||||
#include <assert.h>
|
||||
#include <cmath>
|
||||
|
||||
uint32_t _log2(uint64_t num)
|
||||
{
|
||||
uint32_t log2 = 0;
|
||||
|
||||
if (num == 0)
|
||||
{
|
||||
std::cerr << "log0?" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
while (num > 1)
|
||||
{
|
||||
num = (num >> 1);
|
||||
log2++;
|
||||
}
|
||||
|
||||
return log2;
|
||||
}
|
||||
|
||||
|
||||
bool is_pow2(int64_t val)
|
||||
{
|
||||
if (val <= 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
else if (val == 1)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return (_log2(val) != _log2(val-1));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int powers (int base, int n)
|
||||
{
|
||||
int i, p;
|
||||
|
||||
p = 1;
|
||||
for (i = 1; i <= n; ++i)
|
||||
p *= base;
|
||||
return p;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
|
||||
double logtwo (double x)
|
||||
{
|
||||
assert(x > 0);
|
||||
return ((double) (log (x) / log (2.0)));
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
|
||||
|
||||
double gate_C(
|
||||
double width,
|
||||
double wirelength,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx)
|
||||
{
|
||||
const /*TechnologyParameter::*/DeviceType * dt;
|
||||
|
||||
if (_is_dram && _is_cell)
|
||||
{
|
||||
dt = &g_tp.dram_acc; //DRAM cell access transistor
|
||||
}
|
||||
else if (_is_dram && _is_wl_tr)
|
||||
{
|
||||
dt = &g_tp.dram_wl; //DRAM wordline transistor
|
||||
}
|
||||
else if (!_is_dram && _is_cell)
|
||||
{
|
||||
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
||||
}
|
||||
else if (_is_sleep_tx)
|
||||
{
|
||||
dt = &g_tp.sleep_tx; // Sleep transistor
|
||||
}
|
||||
else
|
||||
{
|
||||
dt = &g_tp.peri_global;
|
||||
}
|
||||
|
||||
return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
|
||||
}
|
||||
|
||||
|
||||
// returns gate capacitance in Farads
|
||||
// actually this function is the same as gate_C() now
|
||||
double gate_C_pass(
|
||||
double width, // gate width in um (length is Lphy_periph_global)
|
||||
double wirelength, // poly wire length going to gate in lambda
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx)
|
||||
{
|
||||
// v5.0
|
||||
const /*TechnologyParameter::*/DeviceType * dt;
|
||||
|
||||
if ((_is_dram) && (_is_cell))
|
||||
{
|
||||
dt = &g_tp.dram_acc; //DRAM cell access transistor
|
||||
}
|
||||
else if ((_is_dram) && (_is_wl_tr))
|
||||
{
|
||||
dt = &g_tp.dram_wl; //DRAM wordline transistor
|
||||
}
|
||||
else if ((!_is_dram) && _is_cell)
|
||||
{
|
||||
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
||||
}
|
||||
else if (_is_sleep_tx)
|
||||
{
|
||||
dt = &g_tp.sleep_tx; // Sleep transistor
|
||||
}
|
||||
else
|
||||
{
|
||||
dt = &g_tp.peri_global;
|
||||
}
|
||||
|
||||
return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
|
||||
}
|
||||
|
||||
|
||||
|
||||
double drain_C_(
|
||||
double width,
|
||||
int nchannel,
|
||||
int stack,
|
||||
int next_arg_thresh_folding_width_or_height_cell,
|
||||
double fold_dimension,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx)
|
||||
{
|
||||
double w_folded_tr;
|
||||
const /*TechnologyParameter::*/DeviceType * dt;
|
||||
|
||||
if ((_is_dram) && (_is_cell))
|
||||
{
|
||||
dt = &g_tp.dram_acc; // DRAM cell access transistor
|
||||
}
|
||||
else if ((_is_dram) && (_is_wl_tr))
|
||||
{
|
||||
dt = &g_tp.dram_wl; // DRAM wordline transistor
|
||||
}
|
||||
else if ((!_is_dram) && _is_cell)
|
||||
{
|
||||
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
||||
}
|
||||
else if (_is_sleep_tx)
|
||||
{
|
||||
dt = &g_tp.sleep_tx; // Sleep transistor
|
||||
}
|
||||
else
|
||||
{
|
||||
dt = &g_tp.peri_global;
|
||||
}
|
||||
|
||||
double c_junc_area = dt->C_junc;
|
||||
double c_junc_sidewall = dt->C_junc_sidewall;
|
||||
double c_fringe = 2*dt->C_fringe;
|
||||
double c_overlap = 2*dt->C_overlap;
|
||||
double drain_C_metal_connecting_folded_tr = 0;
|
||||
|
||||
// determine the width of the transistor after folding (if it is getting folded)
|
||||
if (next_arg_thresh_folding_width_or_height_cell == 0)
|
||||
{ // interpret fold_dimension as the the folding width threshold
|
||||
// i.e. the value of transistor width above which the transistor gets folded
|
||||
w_folded_tr = fold_dimension;
|
||||
}
|
||||
else
|
||||
{ // interpret fold_dimension as the height of the cell that this transistor is part of.
|
||||
double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL;
|
||||
// TODO : w_folded_tr must come from Component::compute_gate_area()
|
||||
double ratio_p_to_n = 2.0 / (2.0 + 1.0);
|
||||
if (nchannel)
|
||||
{
|
||||
w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
|
||||
}
|
||||
else
|
||||
{
|
||||
w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
|
||||
}
|
||||
}
|
||||
int num_folded_tr = (int) (ceil(width / w_folded_tr));
|
||||
|
||||
if (num_folded_tr < 2)
|
||||
{
|
||||
w_folded_tr = width;
|
||||
}
|
||||
|
||||
double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain
|
||||
(stack - 1) * g_tp.spacing_poly_to_poly;
|
||||
double drain_h_for_sidewall = w_folded_tr;
|
||||
double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1);
|
||||
if (num_folded_tr > 1)
|
||||
{
|
||||
total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +
|
||||
(num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly);
|
||||
|
||||
if (num_folded_tr%2 == 0)
|
||||
{
|
||||
drain_h_for_sidewall = 0;
|
||||
}
|
||||
total_drain_height_for_cap_wrt_gate *= num_folded_tr;
|
||||
drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w;
|
||||
}
|
||||
|
||||
double drain_C_area = c_junc_area * total_drain_w * w_folded_tr;
|
||||
double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w);
|
||||
double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate;
|
||||
|
||||
return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr);
|
||||
}
|
||||
|
||||
|
||||
double tr_R_on(
|
||||
double width,
|
||||
int nchannel,
|
||||
int stack,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx)
|
||||
{
|
||||
const /*TechnologyParameter::*/DeviceType * dt;
|
||||
|
||||
if ((_is_dram) && (_is_cell))
|
||||
{
|
||||
dt = &g_tp.dram_acc; //DRAM cell access transistor
|
||||
}
|
||||
else if ((_is_dram) && (_is_wl_tr))
|
||||
{
|
||||
dt = &g_tp.dram_wl; //DRAM wordline transistor
|
||||
}
|
||||
else if ((!_is_dram) && _is_cell)
|
||||
{
|
||||
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
||||
}
|
||||
else if (_is_sleep_tx)
|
||||
{
|
||||
dt = &g_tp.sleep_tx; // Sleep transistor
|
||||
}
|
||||
else
|
||||
{
|
||||
dt = &g_tp.peri_global;
|
||||
}
|
||||
|
||||
double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
|
||||
return (stack * restrans / width);
|
||||
}
|
||||
|
||||
|
||||
/* This routine operates in reverse: given a resistance, it finds
|
||||
* the transistor width that would have this R. It is used in the
|
||||
* data wordline to estimate the wordline driver size. */
|
||||
|
||||
// returns width in um
|
||||
double R_to_w(
|
||||
double res,
|
||||
int nchannel,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx)
|
||||
{
|
||||
const /*TechnologyParameter::*/DeviceType * dt;
|
||||
|
||||
if ((_is_dram) && (_is_cell))
|
||||
{
|
||||
dt = &g_tp.dram_acc; //DRAM cell access transistor
|
||||
}
|
||||
else if ((_is_dram) && (_is_wl_tr))
|
||||
{
|
||||
dt = &g_tp.dram_wl; //DRAM wordline transistor
|
||||
}
|
||||
else if ((!_is_dram) && (_is_cell))
|
||||
{
|
||||
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
||||
}
|
||||
else if (_is_sleep_tx)
|
||||
{
|
||||
dt = &g_tp.sleep_tx; // Sleep transistor
|
||||
}
|
||||
else
|
||||
{
|
||||
dt = &g_tp.peri_global;
|
||||
}
|
||||
|
||||
double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
|
||||
return (restrans / res);
|
||||
}
|
||||
|
||||
|
||||
double pmos_to_nmos_sz_ratio(
|
||||
bool _is_dram,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx)
|
||||
{
|
||||
double p_to_n_sizing_ratio;
|
||||
if ((_is_dram) && (_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio;
|
||||
}
|
||||
else if (_is_sleep_tx)
|
||||
{
|
||||
p_to_n_sizing_ratio = g_tp.sleep_tx.n_to_p_eff_curr_drv_ratio; // Sleep transistor
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio;
|
||||
}
|
||||
return p_to_n_sizing_ratio;
|
||||
}
|
||||
|
||||
|
||||
// "Timing Models for MOS Circuits" by Mark Horowitz, 1984
|
||||
double horowitz(
|
||||
double inputramptime, // input rise time
|
||||
double tf, // time constant of gate
|
||||
double vs1, // threshold voltage
|
||||
double vs2, // threshold voltage
|
||||
int rise) // whether input rises or fall
|
||||
{
|
||||
if (inputramptime == 0 && vs1 == vs2)
|
||||
{
|
||||
return tf * (vs1 < 1 ? -log(vs1) : log(vs1));
|
||||
}
|
||||
double a, b, td;
|
||||
|
||||
a = inputramptime / tf;
|
||||
if (rise == RISE)
|
||||
{
|
||||
b = 0.5;
|
||||
td = tf * sqrt(log(vs1)*log(vs1) + 2*a*b*(1.0 - vs1)) + tf*(log(vs1) - log(vs2));
|
||||
}
|
||||
else
|
||||
{
|
||||
b = 0.4;
|
||||
td = tf * sqrt(log(1.0 - vs1)*log(1.0 - vs1) + 2*a*b*(vs1)) + tf*(log(1.0 - vs1) - log(1.0 - vs2));
|
||||
}
|
||||
return (td);
|
||||
}
|
||||
|
||||
double cmos_Ileak(
|
||||
double nWidth,
|
||||
double pWidth,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx)
|
||||
{
|
||||
/*TechnologyParameter::*/DeviceType * dt;
|
||||
|
||||
if ((!_is_dram)&&(_is_cell))
|
||||
{ //SRAM cell access transistor
|
||||
dt = &(g_tp.sram_cell);
|
||||
}
|
||||
else if ((_is_dram)&&(_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
dt = &(g_tp.dram_wl);
|
||||
}
|
||||
else if (_is_sleep_tx)
|
||||
{
|
||||
dt = &g_tp.sleep_tx; // Sleep transistor
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
dt = &(g_tp.peri_global);
|
||||
}
|
||||
return nWidth*dt->I_off_n + pWidth*dt->I_off_p;
|
||||
}
|
||||
|
||||
int factorial(int n, int m)
|
||||
{
|
||||
int fa = m, i;
|
||||
for (i=m+1; i<=n; i++)
|
||||
fa *=i;
|
||||
return fa;
|
||||
}
|
||||
|
||||
int combination(int n, int m)
|
||||
{
|
||||
int ret;
|
||||
ret = factorial(n, m+1) / factorial(n - m);
|
||||
return ret;
|
||||
}
|
||||
|
||||
double simplified_nmos_Isat(
|
||||
double nwidth,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx)
|
||||
{
|
||||
/*TechnologyParameter::*/DeviceType * dt;
|
||||
|
||||
if ((!_is_dram)&&(_is_cell))
|
||||
{ //SRAM cell access transistor
|
||||
dt = &(g_tp.sram_cell);
|
||||
}
|
||||
else if ((_is_dram)&&(_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
dt = &(g_tp.dram_wl);
|
||||
}
|
||||
else if (_is_sleep_tx)
|
||||
{
|
||||
dt = &g_tp.sleep_tx; // Sleep transistor
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
dt = &(g_tp.peri_global);
|
||||
}
|
||||
return nwidth * dt->I_on_n;
|
||||
}
|
||||
|
||||
double simplified_pmos_Isat(
|
||||
double pwidth,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx)
|
||||
{
|
||||
/*TechnologyParameter::*/DeviceType * dt;
|
||||
|
||||
if ((!_is_dram)&&(_is_cell))
|
||||
{ //SRAM cell access transistor
|
||||
dt = &(g_tp.sram_cell);
|
||||
}
|
||||
else if ((_is_dram)&&(_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
dt = &(g_tp.dram_wl);
|
||||
}
|
||||
else if (_is_sleep_tx)
|
||||
{
|
||||
dt = &g_tp.sleep_tx; // Sleep transistor
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
dt = &(g_tp.peri_global);
|
||||
}
|
||||
return pwidth * dt->I_on_n/dt->n_to_p_eff_curr_drv_ratio;
|
||||
}
|
||||
|
||||
|
||||
double simplified_nmos_leakage(
|
||||
double nwidth,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx)
|
||||
{
|
||||
/*TechnologyParameter::*/DeviceType * dt;
|
||||
|
||||
if ((!_is_dram)&&(_is_cell))
|
||||
{ //SRAM cell access transistor
|
||||
dt = &(g_tp.sram_cell);
|
||||
}
|
||||
else if ((_is_dram)&&(_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
dt = &(g_tp.dram_wl);
|
||||
}
|
||||
else if (_is_sleep_tx)
|
||||
{
|
||||
dt = &g_tp.sleep_tx; // Sleep transistor
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
dt = &(g_tp.peri_global);
|
||||
}
|
||||
return nwidth * dt->I_off_n;
|
||||
}
|
||||
|
||||
double simplified_pmos_leakage(
|
||||
double pwidth,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx)
|
||||
{
|
||||
/*TechnologyParameter::*/DeviceType * dt;
|
||||
|
||||
if ((!_is_dram)&&(_is_cell))
|
||||
{ //SRAM cell access transistor
|
||||
dt = &(g_tp.sram_cell);
|
||||
}
|
||||
else if ((_is_dram)&&(_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
dt = &(g_tp.dram_wl);
|
||||
}
|
||||
else if (_is_sleep_tx)
|
||||
{
|
||||
dt = &g_tp.sleep_tx; // Sleep transistor
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
dt = &(g_tp.peri_global);
|
||||
}
|
||||
return pwidth * dt->I_off_p;
|
||||
}
|
||||
|
||||
double cmos_Ig_n(
|
||||
double nWidth,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx)
|
||||
{
|
||||
/*TechnologyParameter::*/DeviceType * dt;
|
||||
|
||||
if ((!_is_dram)&&(_is_cell))
|
||||
{ //SRAM cell access transistor
|
||||
dt = &(g_tp.sram_cell);
|
||||
}
|
||||
else if ((_is_dram)&&(_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
dt = &(g_tp.dram_wl);
|
||||
}
|
||||
else if (_is_sleep_tx)
|
||||
{
|
||||
dt = &g_tp.sleep_tx; // Sleep transistor
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
dt = &(g_tp.peri_global);
|
||||
}
|
||||
return nWidth*dt->I_g_on_n;
|
||||
}
|
||||
|
||||
double cmos_Ig_p(
|
||||
double pWidth,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx)
|
||||
{
|
||||
/*TechnologyParameter::*/DeviceType * dt;
|
||||
|
||||
if ((!_is_dram)&&(_is_cell))
|
||||
{ //SRAM cell access transistor
|
||||
dt = &(g_tp.sram_cell);
|
||||
}
|
||||
else if ((_is_dram)&&(_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
dt = &(g_tp.dram_wl);
|
||||
}
|
||||
else if (_is_sleep_tx)
|
||||
{
|
||||
dt = &g_tp.sleep_tx; // Sleep transistor
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
dt = &(g_tp.peri_global);
|
||||
}
|
||||
return pWidth*dt->I_g_on_p;
|
||||
}
|
||||
|
||||
double cmos_Isub_leakage(
|
||||
double nWidth,
|
||||
double pWidth,
|
||||
int fanin,
|
||||
enum Gate_type g_type,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx,
|
||||
enum Half_net_topology topo)
|
||||
{
|
||||
assert (fanin>=1);
|
||||
double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx);
|
||||
double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx);
|
||||
double Isub=0;
|
||||
int num_states;
|
||||
int num_off_tx;
|
||||
|
||||
num_states = int(pow(2.0, fanin));
|
||||
|
||||
switch (g_type)
|
||||
{
|
||||
case nmos:
|
||||
if (fanin==1)
|
||||
{
|
||||
Isub = nmos_leak/num_states;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (topo==parallel)
|
||||
{
|
||||
Isub=nmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
|
||||
}
|
||||
else
|
||||
{
|
||||
for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
|
||||
{
|
||||
//Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
|
||||
Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
|
||||
}
|
||||
Isub /=num_states;
|
||||
}
|
||||
|
||||
}
|
||||
break;
|
||||
case pmos:
|
||||
if (fanin==1)
|
||||
{
|
||||
Isub = pmos_leak/num_states;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (topo==parallel)
|
||||
{
|
||||
Isub=pmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
|
||||
}
|
||||
else
|
||||
{
|
||||
for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
|
||||
{
|
||||
//Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
|
||||
Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
|
||||
}
|
||||
Isub /=num_states;
|
||||
}
|
||||
|
||||
}
|
||||
break;
|
||||
case inv:
|
||||
Isub = (nmos_leak + pmos_leak)/2;
|
||||
break;
|
||||
case nand:
|
||||
Isub += fanin*pmos_leak;//the pullup network
|
||||
for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pulldown network
|
||||
{
|
||||
//Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
|
||||
Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
|
||||
}
|
||||
Isub /=num_states;
|
||||
break;
|
||||
case nor:
|
||||
for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pullup network
|
||||
{
|
||||
//Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
|
||||
Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
|
||||
}
|
||||
Isub += fanin*nmos_leak;//the pulldown network
|
||||
Isub /=num_states;
|
||||
break;
|
||||
case tri:
|
||||
Isub += (nmos_leak + pmos_leak)/2;//enabled
|
||||
Isub += nmos_leak*UNI_LEAK_STACK_FACTOR; //disabled upper bound of leakage power
|
||||
Isub /=2;
|
||||
break;
|
||||
case tg:
|
||||
Isub = (nmos_leak + pmos_leak)/2;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
return Isub;
|
||||
}
|
||||
|
||||
|
||||
double cmos_Ig_leakage(
|
||||
double nWidth,
|
||||
double pWidth,
|
||||
int fanin,
|
||||
enum Gate_type g_type,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
bool _is_sleep_tx,
|
||||
enum Half_net_topology topo)
|
||||
{
|
||||
assert (fanin>=1);
|
||||
double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx);
|
||||
double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx);
|
||||
double Ig_on=0;
|
||||
int num_states;
|
||||
int num_on_tx;
|
||||
|
||||
num_states = int(pow(2.0, fanin));
|
||||
|
||||
switch (g_type)
|
||||
{
|
||||
case nmos:
|
||||
if (fanin==1)
|
||||
{
|
||||
Ig_on = nmos_leak/num_states;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (topo==parallel)
|
||||
{
|
||||
for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
|
||||
{
|
||||
Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
|
||||
//num_on_tx is the number of on tx
|
||||
for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
|
||||
{
|
||||
Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
|
||||
}
|
||||
Ig_on /=num_states;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case pmos:
|
||||
if (fanin==1)
|
||||
{
|
||||
Ig_on = pmos_leak/num_states;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (topo==parallel)
|
||||
{
|
||||
for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
|
||||
{
|
||||
Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Ig_on += pmos_leak * fanin;//pull down network when all TXs are on.
|
||||
//num_on_tx is the number of on tx
|
||||
for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
|
||||
{
|
||||
Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
|
||||
}
|
||||
Ig_on /=num_states;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case inv:
|
||||
Ig_on = (nmos_leak + pmos_leak)/2;
|
||||
break;
|
||||
case nand:
|
||||
//pull up network
|
||||
for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)//when num_on_tx=[1,n]
|
||||
{
|
||||
Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx;
|
||||
}
|
||||
|
||||
//pull down network
|
||||
Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
|
||||
//num_on_tx is the number of on tx
|
||||
for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
|
||||
{
|
||||
Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
|
||||
}
|
||||
Ig_on /=num_states;
|
||||
break;
|
||||
case nor:
|
||||
// num_on_tx is the number of on tx in pull up network
|
||||
Ig_on += pmos_leak * fanin;//pull up network when all TXs are on.
|
||||
for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)
|
||||
{
|
||||
Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;
|
||||
|
||||
}
|
||||
//pull down network
|
||||
for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)//when num_on_tx=[1,n]
|
||||
{
|
||||
Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx;
|
||||
}
|
||||
Ig_on /=num_states;
|
||||
break;
|
||||
case tri:
|
||||
Ig_on += (2*nmos_leak + 2*pmos_leak)/2;//enabled
|
||||
Ig_on += (nmos_leak + pmos_leak)/2; //disabled upper bound of leakage power
|
||||
Ig_on /=2;
|
||||
break;
|
||||
case tg:
|
||||
Ig_on = (nmos_leak + pmos_leak)/2;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
return Ig_on;
|
||||
}
|
||||
|
||||
double shortcircuit_simple(
|
||||
double vt,
|
||||
double velocity_index,
|
||||
double c_in,
|
||||
double c_out,
|
||||
double w_nmos,
|
||||
double w_pmos,
|
||||
double i_on_n,
|
||||
double i_on_p,
|
||||
double i_on_n_in,
|
||||
double i_on_p_in,
|
||||
double vdd)
|
||||
{
|
||||
|
||||
double p_short_circuit, p_short_circuit_discharge, p_short_circuit_charge, p_short_circuit_discharge_low, /*p_short_circuit_discharge_high,*/ p_short_circuit_charge_low /*,p_short_circuit_charge_high*/; //this is actually energy
|
||||
double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
|
||||
|
||||
fo_n = i_on_n/i_on_n_in;
|
||||
fo_p = i_on_p/i_on_p_in;
|
||||
fanout = c_out/c_in;
|
||||
beta_ratio = i_on_p/i_on_n;
|
||||
vt_to_vdd_ratio = vt/vdd;
|
||||
|
||||
//p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
|
||||
p_short_circuit_discharge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
|
||||
p_short_circuit_charge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
|
||||
// double t1, t2, t3, t4, t5;
|
||||
// t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
|
||||
// t2=pow(velocity_index,2.0);
|
||||
// t3=pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio);
|
||||
// t4=t1/t2/t3;
|
||||
// cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
|
||||
|
||||
///p_short_circuit_discharge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_p/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
|
||||
///p_short_circuit_charge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_n/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
|
||||
|
||||
// t1=pow(((vdd-vt)-vt_to_vdd_ratio),1.5);
|
||||
// t2=pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
|
||||
// t3=t1/t2;
|
||||
// cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
|
||||
// p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low + 1.0/p_short_circuit_discharge_high);
|
||||
// p_short_circuit_charge = 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high); //harmmoic mean cannot be applied simple formulas.
|
||||
|
||||
p_short_circuit_discharge = p_short_circuit_discharge_low;
|
||||
p_short_circuit_charge = p_short_circuit_charge_low;
|
||||
p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge)/2;
|
||||
|
||||
return (p_short_circuit);
|
||||
}
|
||||
|
||||
double shortcircuit(
|
||||
double vt,
|
||||
double velocity_index,
|
||||
double c_in,
|
||||
double c_out,
|
||||
double w_nmos,
|
||||
double w_pmos,
|
||||
double i_on_n,
|
||||
double i_on_p,
|
||||
double i_on_n_in,
|
||||
double i_on_p_in,
|
||||
double vdd)
|
||||
{
|
||||
|
||||
double p_short_circuit=0, p_short_circuit_discharge=0;//, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
|
||||
double /*fo_n,*/ fo_p, fanout, beta_ratio /*,vt_to_vdd_ratio*/;
|
||||
double f_alpha, k_v, e, g_v_alpha, h_v_alpha;
|
||||
|
||||
///fo_n = i_on_n/i_on_n_in;
|
||||
fo_p = i_on_p/i_on_p_in;
|
||||
fanout = 1;
|
||||
beta_ratio = i_on_p/i_on_n;
|
||||
///vt_to_vdd_ratio = vt/vdd;
|
||||
e = 2.71828;
|
||||
f_alpha = 1/(velocity_index+2) -velocity_index/(2*(velocity_index+3)) +velocity_index/(velocity_index+4)*(velocity_index/2-1);
|
||||
k_v = 0.9/0.8+(vdd-vt)/0.8*log(10*(vdd-vt)/e);
|
||||
g_v_alpha = (velocity_index + 1)*pow((1-velocity_index),velocity_index)*pow((1-velocity_index),velocity_index/2)/f_alpha/pow((1-velocity_index-velocity_index),(velocity_index/2+velocity_index+2));
|
||||
h_v_alpha = pow(2, velocity_index)*(velocity_index+1)*pow((1-velocity_index),velocity_index)/pow((1-velocity_index-velocity_index),(velocity_index+1));
|
||||
|
||||
//p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
|
||||
// p_short_circuit_discharge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
|
||||
// p_short_circuit_charge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
|
||||
// double t1, t2, t3, t4, t5;
|
||||
// t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
|
||||
// t2=pow(velocity_index,2.0);
|
||||
// t3=pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio);
|
||||
// t4=t1/t2/t3;
|
||||
//
|
||||
// cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
|
||||
//
|
||||
//
|
||||
// p_short_circuit_discharge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_p/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
|
||||
// p_short_circuit_charge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_n/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
|
||||
//
|
||||
// p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low + 1.0/p_short_circuit_discharge_high);
|
||||
// p_short_circuit_charge = 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high);
|
||||
//
|
||||
// p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge)/2;
|
||||
//
|
||||
// p_short_circuit = p_short_circuit_discharge;
|
||||
|
||||
p_short_circuit_discharge = k_v*vdd*vdd*c_in*fo_p*fo_p/((vdd-vt)*g_v_alpha*fanout*beta_ratio/2/k_v + h_v_alpha*fo_p);
|
||||
return (p_short_circuit);
|
||||
}
|
||||
|
||||
|
||||
//ali
|
||||
double wire_resistance(double resistivity, double wire_width, double wire_thickness,
|
||||
double barrier_thickness, double dishing_thickness, double alpha_scatter)
|
||||
{
|
||||
double resistance;
|
||||
resistance = alpha_scatter * resistivity /((wire_thickness - barrier_thickness - dishing_thickness)*(wire_width - 2 * barrier_thickness));
|
||||
return(resistance);
|
||||
}
|
||||
|
||||
double wire_capacitance(double wire_width, double wire_thickness, double wire_spacing,
|
||||
double ild_thickness, double miller_value, double horiz_dielectric_constant,
|
||||
double vert_dielectric_constant, double fringe_cap)
|
||||
{
|
||||
double vertical_cap, sidewall_cap, total_cap;
|
||||
vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness;
|
||||
sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing;
|
||||
total_cap = vertical_cap + sidewall_cap + fringe_cap;
|
||||
return(total_cap);
|
||||
}
|
||||
|
||||
//CACTI3DD TSV
|
||||
double tsv_resistance(double resistivity, double tsv_len, double tsv_diam, double tsv_contact_resistance)
|
||||
{
|
||||
double resistance;
|
||||
resistance = resistivity * tsv_len / (3.1416 * (tsv_diam/2) * (tsv_diam/2)) + tsv_contact_resistance;
|
||||
return(resistance);
|
||||
}
|
||||
|
||||
double tsv_capacitance(double tsv_len, double tsv_diam, double tsv_pitch, double dielec_thickness, double liner_dielectric_constant, double depletion_width)
|
||||
{
|
||||
double self_cap, liner_cap, depletion_cap, lateral_coupling_cap, diagonal_coupling_cap, total_cap;
|
||||
double diagonal_coupling_constant, lateral_coupling_constant;
|
||||
const double e_si = PERMITTIVITY_FREE_SPACE * 11.9, PI = 3.1416;
|
||||
lateral_coupling_constant = 4.1;
|
||||
diagonal_coupling_constant = 5.3;
|
||||
//depletion_width = 0.6; // um
|
||||
liner_cap = 2 * PI * PERMITTIVITY_FREE_SPACE * liner_dielectric_constant * tsv_len / log(1 + dielec_thickness / (tsv_diam/2));
|
||||
depletion_cap = 2 * PI * e_si *tsv_len / log(1 + depletion_width / (dielec_thickness + tsv_diam/2));
|
||||
//self_cap = ( 1 / (1/liner_cap + 1/depletion_cap) + liner_cap ) / 2;
|
||||
self_cap = 1 / (1/liner_cap + 1/depletion_cap);
|
||||
if (g_ip->print_detail_debug)
|
||||
{
|
||||
cout<<"TSV ox cap: "<<liner_cap*1e15<<" fF"<<endl;
|
||||
cout<<"TSV self cap: "<<self_cap*1e15<<" fF"<<endl;
|
||||
}
|
||||
lateral_coupling_cap = 0.4 * (0.225 * log(0.97 * tsv_len / tsv_diam) + 0.53) * e_si / (tsv_pitch - tsv_diam) * PI * tsv_diam * tsv_len;
|
||||
diagonal_coupling_cap = 0.4 * (0.225 * log(0.97 * tsv_len / tsv_diam) + 0.53) * e_si / (1.414 * tsv_pitch - tsv_diam) * PI * tsv_diam * tsv_len;
|
||||
total_cap = self_cap + lateral_coupling_constant * lateral_coupling_cap + diagonal_coupling_constant * diagonal_coupling_cap;
|
||||
return(total_cap);
|
||||
}
|
||||
|
||||
double tsv_area(double tsv_pitch)
|
||||
{
|
||||
return(pow(tsv_pitch,2));
|
||||
}
|
||||
// end ali
|
305
T1/TP/TP1/cacti_7/basic_circuit.h
Normal file
305
T1/TP/TP1/cacti_7/basic_circuit.h
Normal file
|
@ -0,0 +1,305 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
#ifndef __BASIC_CIRCUIT_H__
|
||||
#define __BASIC_CIRCUIT_H__
|
||||
|
||||
#include "const.h"
|
||||
///#include "cacti_interface.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define UNI_LEAK_STACK_FACTOR 0.43
|
||||
|
||||
int powers (int base, int n);
|
||||
bool is_pow2(int64_t val);
|
||||
uint32_t _log2(uint64_t num);
|
||||
int factorial(int n, int m = 1);
|
||||
int combination(int n, int m);
|
||||
|
||||
//#define DBG
|
||||
#ifdef DBG
|
||||
#define PRINTDW(a);\
|
||||
a;
|
||||
#else
|
||||
#define PRINTDW(a);\
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
enum Wire_placement {
|
||||
outside_mat,
|
||||
inside_mat,
|
||||
local_wires
|
||||
};
|
||||
|
||||
|
||||
|
||||
enum Htree_type {
|
||||
Add_htree,
|
||||
Data_in_htree,
|
||||
Data_out_htree,
|
||||
Search_in_htree,
|
||||
Search_out_htree,
|
||||
};
|
||||
|
||||
//CACTI3DD
|
||||
enum Memorybus_type {
|
||||
Row_add_path,
|
||||
Col_add_path,
|
||||
Data_path
|
||||
/*in_network,
|
||||
out_network*/
|
||||
};
|
||||
|
||||
/*enum Part_grain {
|
||||
Coarse_rank_level, //amsung 2009 3D DRAM
|
||||
Fine_rank_level, //Micron HMC 2011
|
||||
Coarse_bank_level, //ITRS fine TSV supported
|
||||
Fine_bank_level
|
||||
};*/
|
||||
|
||||
enum Gate_type {
|
||||
nmos,
|
||||
pmos,
|
||||
inv,
|
||||
nand,
|
||||
nor,
|
||||
tri,
|
||||
tg
|
||||
};
|
||||
|
||||
enum Half_net_topology {
|
||||
parallel,
|
||||
series
|
||||
};
|
||||
|
||||
double logtwo (double x);
|
||||
|
||||
double gate_C(
|
||||
double width,
|
||||
double wirelength,
|
||||
bool _is_dram = false,
|
||||
bool _is_sram = false,
|
||||
bool _is_wl_tr = false,
|
||||
bool _is_sleep_tx = false);
|
||||
|
||||
double gate_C_pass(
|
||||
double width,
|
||||
double wirelength,
|
||||
bool _is_dram = false,
|
||||
bool _is_sram = false,
|
||||
bool _is_wl_tr = false,
|
||||
bool _is_sleep_tx = false);
|
||||
|
||||
double drain_C_(
|
||||
double width,
|
||||
int nchannel,
|
||||
int stack,
|
||||
int next_arg_thresh_folding_width_or_height_cell,
|
||||
double fold_dimension,
|
||||
bool _is_dram = false,
|
||||
bool _is_sram = false,
|
||||
bool _is_wl_tr = false,
|
||||
bool _is_sleep_tx = false);
|
||||
|
||||
double tr_R_on(
|
||||
double width,
|
||||
int nchannel,
|
||||
int stack,
|
||||
bool _is_dram = false,
|
||||
bool _is_sram = false,
|
||||
bool _is_wl_tr = false,
|
||||
bool _is_sleep_tx = false);
|
||||
|
||||
double R_to_w(
|
||||
double res,
|
||||
int nchannel,
|
||||
bool _is_dram = false,
|
||||
bool _is_sram = false,
|
||||
bool _is_wl_tr = false,
|
||||
bool _is_sleep_tx = false);
|
||||
|
||||
double horowitz (
|
||||
double inputramptime,
|
||||
double tf,
|
||||
double vs1,
|
||||
double vs2,
|
||||
int rise);
|
||||
|
||||
double pmos_to_nmos_sz_ratio(
|
||||
bool _is_dram = false,
|
||||
bool _is_wl_tr = false,
|
||||
bool _is_sleep_tx = false);
|
||||
|
||||
double simplified_nmos_leakage(
|
||||
double nwidth,
|
||||
bool _is_dram = false,
|
||||
bool _is_cell = false,
|
||||
bool _is_wl_tr = false,
|
||||
bool _is_sleep_tx = false);
|
||||
|
||||
double simplified_pmos_leakage(
|
||||
double pwidth,
|
||||
bool _is_dram = false,
|
||||
bool _is_cell = false,
|
||||
bool _is_wl_tr = false,
|
||||
bool _is_sleep_tx = false);
|
||||
|
||||
double simplified_nmos_Isat(
|
||||
double nwidth,
|
||||
bool _is_dram = false,
|
||||
bool _is_cell = false,
|
||||
bool _is_wl_tr = false,
|
||||
bool _is_sleep_tx = false);
|
||||
|
||||
double simplified_pmos_Isat(
|
||||
double pwidth,
|
||||
bool _is_dram = false,
|
||||
bool _is_cell = false,
|
||||
bool _is_wl_tr = false,
|
||||
bool _is_sleep_tx = false);
|
||||
|
||||
double cmos_Ileak(
|
||||
double nWidth,
|
||||
double pWidth,
|
||||
bool _is_dram = false,
|
||||
bool _is_cell = false,
|
||||
bool _is_wl_tr = false,
|
||||
bool _is_sleep_tx = false);
|
||||
|
||||
double cmos_Ig_n(
|
||||
double nWidth,
|
||||
bool _is_dram = false,
|
||||
bool _is_cell = false,
|
||||
bool _is_wl_tr= false,
|
||||
bool _is_sleep_tx = false);
|
||||
|
||||
double cmos_Ig_p(
|
||||
double pWidth,
|
||||
bool _is_dram = false,
|
||||
bool _is_cell = false,
|
||||
bool _is_wl_tr= false,
|
||||
bool _is_sleep_tx = false);
|
||||
|
||||
|
||||
double cmos_Isub_leakage(
|
||||
double nWidth,
|
||||
double pWidth,
|
||||
int fanin,
|
||||
enum Gate_type g_type,
|
||||
bool _is_dram = false,
|
||||
bool _is_cell = false,
|
||||
bool _is_wl_tr = false,
|
||||
bool _is_sleep_tx = false,
|
||||
enum Half_net_topology topo = series);
|
||||
|
||||
double cmos_Ig_leakage(
|
||||
double nWidth,
|
||||
double pWidth,
|
||||
int fanin,
|
||||
enum Gate_type g_type,
|
||||
bool _is_dram = false,
|
||||
bool _is_cell = false,
|
||||
bool _is_wl_tr = false,
|
||||
bool _is_sleep_tx = false,
|
||||
enum Half_net_topology topo = series);
|
||||
|
||||
double shortcircuit(
|
||||
double vt,
|
||||
double velocity_index,
|
||||
double c_in,
|
||||
double c_out,
|
||||
double w_nmos,
|
||||
double w_pmos,
|
||||
double i_on_n,
|
||||
double i_on_p,
|
||||
double i_on_n_in,
|
||||
double i_on_p_in,
|
||||
double vdd);
|
||||
|
||||
double shortcircuit_simple(
|
||||
double vt,
|
||||
double velocity_index,
|
||||
double c_in,
|
||||
double c_out,
|
||||
double w_nmos,
|
||||
double w_pmos,
|
||||
double i_on_n,
|
||||
double i_on_p,
|
||||
double i_on_n_in,
|
||||
double i_on_p_in,
|
||||
double vdd);
|
||||
//set power point product mask; strictly speaking this is not real point product
|
||||
inline void set_pppm(
|
||||
double * pppv,
|
||||
double a=1,
|
||||
double b=1,
|
||||
double c=1,
|
||||
double d=1
|
||||
){
|
||||
pppv[0]= a;
|
||||
pppv[1]= b;
|
||||
pppv[2]= c;
|
||||
pppv[3]= d;
|
||||
|
||||
}
|
||||
|
||||
inline void set_sppm(
|
||||
double * sppv,
|
||||
double a=1,
|
||||
double b=1,
|
||||
double c=1,
|
||||
double d=1
|
||||
){
|
||||
sppv[0]= a;
|
||||
sppv[1]= b;
|
||||
sppv[2]= c;
|
||||
}
|
||||
|
||||
//ali
|
||||
double wire_resistance(double resistivity, double wire_width, double wire_thickness,
|
||||
double barrier_thickness, double dishing_thickness, double alpha_scatter);
|
||||
|
||||
double wire_capacitance(double wire_width, double wire_thickness, double wire_spacing,
|
||||
double ild_thickness, double miller_value, double horiz_dielectric_constant,
|
||||
double vert_dielectric_constant, double fringe_cap);
|
||||
|
||||
double tsv_resistance(double resistivity, double tsv_len, double tsv_diam, double tsv_contact_resistance);
|
||||
|
||||
double tsv_capacitance(double tsv_len, double tsv_diam, double tsv_pitch, double dielec_thickness, double liner_dielectric_constant, double depletion_width);
|
||||
|
||||
double tsv_area(double tsv_pitch);
|
||||
// end ali
|
||||
|
||||
#endif
|
306
T1/TP/TP1/cacti_7/cache.cfg
Normal file
306
T1/TP/TP1/cacti_7/cache.cfg
Normal file
|
@ -0,0 +1,306 @@
|
|||
# Cache size
|
||||
//-size (bytes) 2048
|
||||
//-size (bytes) 4096
|
||||
//-size (bytes) 32768
|
||||
//-size (bytes) 262144
|
||||
//-size (bytes) 1048576
|
||||
//-size (bytes) 2097152
|
||||
//-size (bytes) 4194304
|
||||
//-size (bytes) 8388608
|
||||
//-size (bytes) 16777216
|
||||
//-size (bytes) 33554432
|
||||
//-size (bytes) 134217728
|
||||
//-size (bytes) 67108864
|
||||
//-size (bytes) 1073741824
|
||||
|
||||
# power gating
|
||||
-Array Power Gating - "false"
|
||||
-WL Power Gating - "false"
|
||||
-CL Power Gating - "false"
|
||||
-Bitline floating - "false"
|
||||
-Interconnect Power Gating - "false"
|
||||
-Power Gating Performance Loss 0.01
|
||||
|
||||
# Line size
|
||||
//-block size (bytes) 8
|
||||
-block size (bytes) 64
|
||||
|
||||
# To model Fully Associative cache, set associativity to zero
|
||||
//-associativity 0
|
||||
-associativity 2
|
||||
//-associativity 4
|
||||
//-associativity 8
|
||||
|
||||
-size (bytes) 131072
|
||||
|
||||
-read-write port 1
|
||||
-exclusive read port 0
|
||||
-exclusive write port 0
|
||||
-single ended read ports 0
|
||||
|
||||
# Multiple banks connected using a bus
|
||||
-UCA bank count 1
|
||||
//-technology (u) 0.022
|
||||
//-technology (u) 0.040
|
||||
//-technology (u) 0.032
|
||||
-technology (u) 0.090
|
||||
|
||||
# following three parameters are meaningful only for main memories
|
||||
|
||||
-page size (bits) 8192
|
||||
-burst length 8
|
||||
-internal prefetch width 8
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Data array cell type - "itrs-hp"
|
||||
//-Data array cell type - "itrs-lstp"
|
||||
//-Data array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Data array peripheral type - "itrs-hp"
|
||||
//-Data array peripheral type - "itrs-lstp"
|
||||
//-Data array peripheral type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Tag array cell type - "itrs-hp"
|
||||
//-Tag array cell type - "itrs-lstp"
|
||||
//-Tag array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Tag array peripheral type - "itrs-hp"
|
||||
//-Tag array peripheral type - "itrs-lstp"
|
||||
//-Tag array peripheral type - "itrs-lop
|
||||
|
||||
# Bus width include data bits and address bits required by the decoder
|
||||
//-output/input bus width 16
|
||||
-output/input bus width 512
|
||||
|
||||
// 300-400 in steps of 10
|
||||
-operating temperature (K) 360
|
||||
|
||||
# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
|
||||
# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
|
||||
-cache type "cache"
|
||||
//-cache type "ram"
|
||||
//-cache type "main memory"
|
||||
|
||||
# to model special structure like branch target buffers, directory, etc.
|
||||
# change the tag size parameter
|
||||
# if you want cacti to calculate the tagbits, set the tag size to "default"
|
||||
-tag size (b) "default"
|
||||
//-tag size (b) 22
|
||||
|
||||
# fast - data and tag access happen in parallel
|
||||
# sequential - data array is accessed after accessing the tag array
|
||||
# normal - data array lookup and tag access happen in parallel
|
||||
# final data block is broadcasted in data array h-tree
|
||||
# after getting the signal from the tag array
|
||||
//-access mode (normal, sequential, fast) - "fast"
|
||||
-access mode (normal, sequential, fast) - "normal"
|
||||
//-access mode (normal, sequential, fast) - "sequential"
|
||||
|
||||
|
||||
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
|
||||
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0
|
||||
|
||||
# Percentage deviation from the minimum value
|
||||
# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
|
||||
# that compromises at most 10% delay.
|
||||
# NOTE: Try reasonable values for % deviation. Inconsistent deviation
|
||||
# percentage values will not produce any valid organizations. For example,
|
||||
# 0:0:100:100:100 will try to identify an organization that has both
|
||||
# least delay and dynamic power. Since such an organization is not possible, CACTI will
|
||||
# throw an error. Refer CACTI-6 Technical report for more details
|
||||
-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000
|
||||
|
||||
# Objective for NUCA
|
||||
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
|
||||
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
|
||||
|
||||
# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
|
||||
# energy-delay or energy-delay sq. product
|
||||
# Note: Optimize tag will disable weight or deviate values mentioned above
|
||||
# Set it to NONE to let weight and deviate values determine the
|
||||
# appropriate cache configuration
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
|
||||
-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
|
||||
|
||||
-Cache model (NUCA, UCA) - "UCA"
|
||||
//-Cache model (NUCA, UCA) - "NUCA"
|
||||
|
||||
# In order for CACTI to find the optimal NUCA bank value the following
|
||||
# variable should be assigned 0.
|
||||
-NUCA bank count 0
|
||||
|
||||
# NOTE: for nuca network frequency is set to a default value of
|
||||
# 5GHz in time.c. CACTI automatically
|
||||
# calculates the maximum possible frequency and downgrades this value if necessary
|
||||
|
||||
# By default CACTI considers both full-swing and low-swing
|
||||
# wires to find an optimal configuration. However, it is possible to
|
||||
# restrict the search space by changing the signaling from "default" to
|
||||
# "fullswing" or "lowswing" type.
|
||||
-Wire signaling (fullswing, lowswing, default) - "Global_30"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "default"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "lowswing"
|
||||
|
||||
//-Wire inside mat - "global"
|
||||
-Wire inside mat - "semi-global"
|
||||
//-Wire outside mat - "global"
|
||||
-Wire outside mat - "semi-global"
|
||||
|
||||
-Interconnect projection - "conservative"
|
||||
//-Interconnect projection - "aggressive"
|
||||
|
||||
# Contention in network (which is a function of core count and cache level) is one of
|
||||
# the critical factor used for deciding the optimal bank count value
|
||||
# core count can be 4, 8, or 16
|
||||
//-Core count 4
|
||||
-Core count 8
|
||||
//-Core count 16
|
||||
-Cache level (L2/L3) - "L3"
|
||||
|
||||
-Add ECC - "true"
|
||||
|
||||
//-Print level (DETAILED, CONCISE) - "CONCISE"
|
||||
-Print level (DETAILED, CONCISE) - "DETAILED"
|
||||
|
||||
# for debugging
|
||||
-Print input parameters - "true"
|
||||
//-Print input parameters - "false"
|
||||
# force CACTI to model the cache with the
|
||||
# following Ndbl, Ndwl, Nspd, Ndsam,
|
||||
# and Ndcm values
|
||||
//-Force cache config - "true"
|
||||
-Force cache config - "false"
|
||||
-Ndwl 1
|
||||
-Ndbl 1
|
||||
-Nspd 0
|
||||
-Ndcm 1
|
||||
-Ndsam1 0
|
||||
-Ndsam2 0
|
||||
|
||||
|
||||
|
||||
#### Default CONFIGURATION values for baseline external IO parameters to DRAM. More details can be found in the CACTI-IO technical report (), especially Chapters 2 and 3.
|
||||
|
||||
# Memory Type (D3=DDR3, D4=DDR4, L=LPDDR2, W=WideIO, S=Serial). Additional memory types can be defined by the user in extio_technology.cc, along with their technology and configuration parameters.
|
||||
|
||||
-dram_type "DDR3"
|
||||
//-dram_type "DDR4"
|
||||
//-dram_type "LPDDR2"
|
||||
//-dram_type "WideIO"
|
||||
//-dram_type "Serial"
|
||||
|
||||
# Memory State (R=Read, W=Write, I=Idle or S=Sleep)
|
||||
|
||||
//-io state "READ"
|
||||
-io state "WRITE"
|
||||
//-io state "IDLE"
|
||||
//-io state "SLEEP"
|
||||
|
||||
#Address bus timing. To alleviate the timing on the command and address bus due to high loading (shared across all memories on the channel), the interface allows for multi-cycle timing options.
|
||||
|
||||
//-addr_timing 0.5 //DDR
|
||||
-addr_timing 1.0 //SDR (half of DQ rate)
|
||||
//-addr_timing 2.0 //2T timing (One fourth of DQ rate)
|
||||
//-addr_timing 3.0 // 3T timing (One sixth of DQ rate)
|
||||
|
||||
# Memory Density (Gbit per memory/DRAM die)
|
||||
|
||||
-mem_density 4 Gb //Valid values 2^n Gb
|
||||
|
||||
# IO frequency (MHz) (frequency of the external memory interface).
|
||||
|
||||
-bus_freq 800 MHz //As of current memory standards (2013), valid range 0 to 1.5 GHz for DDR3, 0 to 533 MHz for LPDDR2, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential. However this can change, and the user is free to define valid ranges based on new memory types or extending beyond existing standards for existing dram types.
|
||||
|
||||
# Duty Cycle (fraction of time in the Memory State defined above)
|
||||
|
||||
-duty_cycle 1.0 //Valid range 0 to 1.0
|
||||
|
||||
# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||||
|
||||
# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_ca 0.5 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR, 0 to 0.25 for 2T, and 0 to 0.17 for 3T
|
||||
|
||||
# Number of DQ pins
|
||||
|
||||
-num_dq 72 //Number of DQ pins. Includes ECC pins.
|
||||
|
||||
# Number of DQS pins. DQS is a data strobe that is sent along with a small number of data-lanes so the source synchronous timing is local to these DQ bits. Typically, 1 DQS per byte (8 DQ bits) is used. The DQS is also typucally differential, just like the CLK pin.
|
||||
|
||||
-num_dqs 18 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins.
|
||||
|
||||
# Number of CA pins
|
||||
|
||||
-num_ca 25 //Valid range 0 to 35 pins.
|
||||
|
||||
# Number of CLK pins. CLK is typically a differential pair. In some cases additional CLK pairs may be used to limit the loading on the CLK pin.
|
||||
|
||||
-num_clk 2 //2 x differential pair. Valid values: 0/2/4.
|
||||
|
||||
# Number of Physical Ranks
|
||||
|
||||
-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per buffer/register. If multiple LRDIMMs or buffer chips exist, the analysis for capacity and power is reported per buffer/register.
|
||||
|
||||
# Width of the Memory Data Bus
|
||||
|
||||
-mem_data_width 8 //x4 or x8 or x16 or x32 memories. For WideIO upto x128.
|
||||
|
||||
# RTT Termination Resistance
|
||||
|
||||
-rtt_value 10000
|
||||
|
||||
# RON Termination Resistance
|
||||
|
||||
-ron_value 34
|
||||
|
||||
# Time of flight for DQ
|
||||
|
||||
-tflight_value
|
||||
|
||||
# Parameter related to MemCAD
|
||||
|
||||
# Number of BoBs: 1,2,3,4,5,6,
|
||||
-num_bobs 1
|
||||
|
||||
# Memory System Capacity in GB
|
||||
-capacity 80
|
||||
|
||||
# Number of Channel per BoB: 1,2.
|
||||
-num_channels_per_bob 1
|
||||
|
||||
# First Metric for ordering different design points
|
||||
-first metric "Cost"
|
||||
#-first metric "Bandwidth"
|
||||
#-first metric "Energy"
|
||||
|
||||
# Second Metric for ordering different design points
|
||||
#-second metric "Cost"
|
||||
-second metric "Bandwidth"
|
||||
#-second metric "Energy"
|
||||
|
||||
# Third Metric for ordering different design points
|
||||
#-third metric "Cost"
|
||||
#-third metric "Bandwidth"
|
||||
-third metric "Energy"
|
||||
|
||||
|
||||
# Possible DIMM option to consider
|
||||
#-DIMM model "JUST_UDIMM"
|
||||
#-DIMM model "JUST_RDIMM"
|
||||
#-DIMM model "JUST_LRDIMM"
|
||||
-DIMM model "ALL"
|
||||
|
||||
#if channels of each bob have the same configurations
|
||||
#-mirror_in_bob "T"
|
||||
-mirror_in_bob "F"
|
||||
|
||||
#if we want to see all channels/bobs/memory configurations explored
|
||||
#-verbose "T"
|
||||
#-verbose "F"
|
||||
|
15
T1/TP/TP1/cacti_7/cache.cfg.out
Normal file
15
T1/TP/TP1/cacti_7/cache.cfg.out
Normal file
|
@ -0,0 +1,15 @@
|
|||
Tech node (nm), Capacity (bytes), Number of banks, Associativity, Output width (bits), Access time (ns), Random cycle time (ns), Dynamic search energy (nJ), Dynamic read energy (nJ), Dynamic write energy (nJ), Standby leakage per bank(mW), Area (mm2), Ndwl, Ndbl, Nspd, Ndcm, Ndsam_level_1, Ndsam_level_2, Data arrary area efficiency %, Ntwl, Ntbl, Ntspd, Ntcm, Ntsam_level_1, Ntsam_level_2, Tag arrary area efficiency %,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
||||
90, 131072, 1, 2, 512, 1.47098, 1.86851, N/A, 0.303592, 0.615022, 63.7023, 2.24949, 2, 2, 1, 2, 2, 1, 78.3192, 2, 2, 4, 1, 8, 1, 77.9289,
|
BIN
T1/TP/TP1/cacti_7/cacti
Executable file
BIN
T1/TP/TP1/cacti_7/cacti
Executable file
Binary file not shown.
8
T1/TP/TP1/cacti_7/cacti.i
Normal file
8
T1/TP/TP1/cacti_7/cacti.i
Normal file
|
@ -0,0 +1,8 @@
|
|||
%module cacti
|
||||
%{
|
||||
/* Includes the header in the wrapper code */
|
||||
#include "cacti_interface.h"
|
||||
%}
|
||||
|
||||
/* Parse the header file to generate wrappers */
|
||||
%include "cacti_interface.h"
|
53
T1/TP/TP1/cacti_7/cacti.mk
Normal file
53
T1/TP/TP1/cacti_7/cacti.mk
Normal file
|
@ -0,0 +1,53 @@
|
|||
TARGET = cacti
|
||||
SHELL = /bin/sh
|
||||
.PHONY: all depend clean
|
||||
.SUFFIXES: .cc .o
|
||||
|
||||
ifndef NTHREADS
|
||||
NTHREADS = 8
|
||||
endif
|
||||
|
||||
|
||||
LIBS =
|
||||
INCS = -lm
|
||||
|
||||
ifeq ($(TAG),dbg)
|
||||
DBG = -Wall
|
||||
OPT = -ggdb -g -O0 -DNTHREADS=1 -gstabs+
|
||||
else
|
||||
DBG =
|
||||
OPT = -g -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS)
|
||||
endif
|
||||
|
||||
#CXXFLAGS = -Wall -Wno-unknown-pragmas -Winline $(DBG) $(OPT)
|
||||
CXXFLAGS = -Wno-unknown-pragmas $(DBG) $(OPT)
|
||||
CXX = g++ -m64
|
||||
CC = gcc -m64
|
||||
|
||||
SRCS = area.cc bank.cc mat.cc main.cc Ucache.cc io.cc technology.cc basic_circuit.cc parameter.cc \
|
||||
decoder.cc component.cc uca.cc subarray.cc wire.cc htree2.cc extio.cc extio_technology.cc \
|
||||
cacti_interface.cc router.cc nuca.cc crossbar.cc arbiter.cc powergating.cc TSV.cc memorybus.cc \
|
||||
memcad.cc memcad_parameters.cc
|
||||
|
||||
|
||||
OBJS = $(patsubst %.cc,obj_$(TAG)/%.o,$(SRCS))
|
||||
PYTHONLIB_SRCS = $(patsubst main.cc, ,$(SRCS)) obj_$(TAG)/cacti_wrap.cc
|
||||
PYTHONLIB_OBJS = $(patsubst %.cc,%.o,$(PYTHONLIB_SRCS))
|
||||
INCLUDES = -I /usr/include/python2.4 -I /usr/lib/python2.4/config
|
||||
|
||||
all: obj_$(TAG)/$(TARGET)
|
||||
cp -f obj_$(TAG)/$(TARGET) $(TARGET)
|
||||
|
||||
obj_$(TAG)/$(TARGET) : $(OBJS)
|
||||
$(CXX) $(OBJS) -o $@ $(INCS) $(CXXFLAGS) $(LIBS) -pthread
|
||||
|
||||
#obj_$(TAG)/%.o : %.cc
|
||||
# $(CXX) -c $(CXXFLAGS) $(INCS) -o $@ $<
|
||||
|
||||
obj_$(TAG)/%.o : %.cc
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
clean:
|
||||
-rm -f *.o _cacti.so cacti.py $(TARGET)
|
||||
|
||||
|
174
T1/TP/TP1/cacti_7/cacti_interface.cc
Normal file
174
T1/TP/TP1/cacti_7/cacti_interface.cc
Normal file
|
@ -0,0 +1,174 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include <time.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
#include "area.h"
|
||||
#include "basic_circuit.h"
|
||||
#include "component.h"
|
||||
#include "const.h"
|
||||
#include "parameter.h"
|
||||
#include "cacti_interface.h"
|
||||
#include "Ucache.h"
|
||||
|
||||
#include <pthread.h>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
bool mem_array::lt(const mem_array * m1, const mem_array * m2)
|
||||
{
|
||||
if (m1->Nspd < m2->Nspd) return true;
|
||||
else if (m1->Nspd > m2->Nspd) return false;
|
||||
else if (m1->Ndwl < m2->Ndwl) return true;
|
||||
else if (m1->Ndwl > m2->Ndwl) return false;
|
||||
else if (m1->Ndbl < m2->Ndbl) return true;
|
||||
else if (m1->Ndbl > m2->Ndbl) return false;
|
||||
else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true;
|
||||
else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false;
|
||||
else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true;
|
||||
else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false;
|
||||
else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true;
|
||||
else return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void uca_org_t::find_delay()
|
||||
{
|
||||
mem_array * data_arr = data_array2;
|
||||
mem_array * tag_arr = tag_array2;
|
||||
|
||||
// check whether it is a regular cache or scratch ram
|
||||
if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
|
||||
{
|
||||
access_time = data_arr->access_time;
|
||||
}
|
||||
// Both tag and data lookup happen in parallel
|
||||
// and the entire set is sent over the data array h-tree without
|
||||
// waiting for the way-select signal --TODO add the corresponding
|
||||
// power overhead Nav
|
||||
else if (g_ip->fast_access == true)
|
||||
{
|
||||
access_time = MAX(tag_arr->access_time, data_arr->access_time);
|
||||
}
|
||||
// Tag is accessed first. On a hit, way-select signal along with the
|
||||
// address is sent to read/write the appropriate block in the data
|
||||
// array
|
||||
else if (g_ip->is_seq_acc == true)
|
||||
{
|
||||
access_time = tag_arr->access_time + data_arr->access_time;
|
||||
}
|
||||
// Normal access: tag array access and data array access happen in parallel.
|
||||
// But, the data array will wait for the way-select and transfer only the
|
||||
// appropriate block over the h-tree.
|
||||
else
|
||||
{
|
||||
access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder,
|
||||
data_arr->delay_before_subarray_output_driver) +
|
||||
data_arr->delay_from_subarray_output_driver_to_output;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void uca_org_t::find_energy()
|
||||
{
|
||||
if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache)
|
||||
power = data_array2->power + tag_array2->power;
|
||||
else
|
||||
power = data_array2->power;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void uca_org_t::find_area()
|
||||
{
|
||||
if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)//(g_ip->is_cache == false)
|
||||
{
|
||||
cache_ht = data_array2->height;
|
||||
cache_len = data_array2->width;
|
||||
}
|
||||
else
|
||||
{
|
||||
cache_ht = MAX(tag_array2->height, data_array2->height);
|
||||
cache_len = tag_array2->width + data_array2->width;
|
||||
}
|
||||
area = cache_ht * cache_len;
|
||||
}
|
||||
|
||||
void uca_org_t::adjust_area()
|
||||
{
|
||||
double area_adjust;
|
||||
if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
|
||||
{
|
||||
if (data_array2->area_efficiency/100.0<0.2)
|
||||
{
|
||||
//area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2));
|
||||
area_adjust = sqrt(0.2/(data_array2->area_efficiency/100.0));
|
||||
cache_ht = cache_ht/area_adjust;
|
||||
cache_len = cache_len/area_adjust;
|
||||
}
|
||||
}
|
||||
area = cache_ht * cache_len;
|
||||
}
|
||||
|
||||
void uca_org_t::find_cyc()
|
||||
{
|
||||
if ((g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache == false)
|
||||
{
|
||||
cycle_time = data_array2->cycle_time;
|
||||
}
|
||||
else
|
||||
{
|
||||
cycle_time = MAX(tag_array2->cycle_time,
|
||||
data_array2->cycle_time);
|
||||
}
|
||||
}
|
||||
|
||||
uca_org_t :: uca_org_t()
|
||||
:tag_array2(0),
|
||||
data_array2(0)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void uca_org_t :: cleanup()
|
||||
{
|
||||
if (data_array2!=0)
|
||||
delete data_array2;
|
||||
if (tag_array2!=0)
|
||||
delete tag_array2;
|
||||
}
|
904
T1/TP/TP1/cacti_7/cacti_interface.h
Normal file
904
T1/TP/TP1/cacti_7/cacti_interface.h
Normal file
|
@ -0,0 +1,904 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
#ifndef __CACTI_INTERFACE_H__
|
||||
#define __CACTI_INTERFACE_H__
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <iostream>
|
||||
#include "const.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
class min_values_t;
|
||||
class mem_array;
|
||||
class uca_org_t;
|
||||
|
||||
|
||||
class powerComponents
|
||||
{
|
||||
public:
|
||||
double dynamic;
|
||||
double leakage;
|
||||
double gate_leakage;
|
||||
double short_circuit;
|
||||
double longer_channel_leakage;
|
||||
|
||||
powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { }
|
||||
powerComponents(const powerComponents & obj) { *this = obj; }
|
||||
powerComponents & operator=(const powerComponents & rhs)
|
||||
{
|
||||
dynamic = rhs.dynamic;
|
||||
leakage = rhs.leakage;
|
||||
gate_leakage = rhs.gate_leakage;
|
||||
short_circuit = rhs.short_circuit;
|
||||
longer_channel_leakage = rhs.longer_channel_leakage;
|
||||
return *this;
|
||||
}
|
||||
void reset() { dynamic = 0; leakage = 0; gate_leakage = 0; short_circuit = 0;longer_channel_leakage = 0;}
|
||||
|
||||
friend powerComponents operator+(const powerComponents & x, const powerComponents & y);
|
||||
friend powerComponents operator*(const powerComponents & x, double const * const y);
|
||||
};
|
||||
|
||||
|
||||
|
||||
class powerDef
|
||||
{
|
||||
public:
|
||||
powerComponents readOp;
|
||||
powerComponents writeOp;
|
||||
powerComponents searchOp;//: for CAM and FA
|
||||
|
||||
powerDef() : readOp(), writeOp(), searchOp() { }
|
||||
void reset() { readOp.reset(); writeOp.reset(); searchOp.reset();}
|
||||
|
||||
friend powerDef operator+(const powerDef & x, const powerDef & y);
|
||||
friend powerDef operator*(const powerDef & x, double const * const y);
|
||||
};
|
||||
|
||||
enum Wire_type
|
||||
{
|
||||
Global /* gloabl wires with repeaters */,
|
||||
Global_5 /* 5% delay penalty */,
|
||||
Global_10 /* 10% delay penalty */,
|
||||
Global_20 /* 20% delay penalty */,
|
||||
Global_30 /* 30% delay penalty */,
|
||||
Low_swing /* differential low power wires with high area overhead */,
|
||||
Semi_global /* mid-level wires with repeaters*/,
|
||||
Full_swing /* models all global wires with different latencies (Global_x )*/,
|
||||
Transmission /* tranmission lines with high area overhead */,
|
||||
Optical /* optical wires */,
|
||||
Invalid_wtype
|
||||
};
|
||||
|
||||
enum TSV_type
|
||||
{
|
||||
Fine, /*ITRS high density*/
|
||||
Coarse /*Industry reported in 2010*/
|
||||
};
|
||||
|
||||
// ali
|
||||
|
||||
enum Mem_IO_type
|
||||
{
|
||||
DDR3,
|
||||
DDR4,
|
||||
LPDDR2,
|
||||
WideIO,
|
||||
Low_Swing_Diff,
|
||||
Serial
|
||||
};
|
||||
|
||||
enum Mem_DIMM
|
||||
{
|
||||
UDIMM,
|
||||
RDIMM,
|
||||
LRDIMM
|
||||
};
|
||||
|
||||
enum Mem_state
|
||||
{
|
||||
READ,
|
||||
WRITE,
|
||||
IDLE,
|
||||
SLEEP
|
||||
};
|
||||
|
||||
enum Mem_ECC
|
||||
{
|
||||
NO_ECC,
|
||||
SECDED, // single error correction, double error detection
|
||||
CHIP_KILL
|
||||
};
|
||||
|
||||
enum DIMM_Model
|
||||
{
|
||||
JUST_UDIMM,JUST_RDIMM,JUST_LRDIMM,ALL
|
||||
};
|
||||
|
||||
enum MemCad_metrics
|
||||
{
|
||||
Bandwidth, Energy, Cost
|
||||
};
|
||||
|
||||
/**
|
||||
enum BoB_LINK
|
||||
{
|
||||
PARALLEL, // e.g. Intel SMB c104
|
||||
SERIAL // e.g. Intel SMB 7510, IBM AMB
|
||||
};
|
||||
**/
|
||||
// end ali
|
||||
|
||||
|
||||
class InputParameter
|
||||
{
|
||||
public:
|
||||
|
||||
InputParameter();
|
||||
void parse_cfg(const string & infile);
|
||||
|
||||
bool error_checking(); // return false if the input parameters are problematic
|
||||
void display_ip();
|
||||
|
||||
unsigned int cache_sz; // in bytes
|
||||
unsigned int line_sz;
|
||||
unsigned int assoc;
|
||||
unsigned int nbanks;
|
||||
unsigned int out_w;// == nr_bits_out
|
||||
bool specific_tag;
|
||||
unsigned int tag_w;
|
||||
unsigned int access_mode;
|
||||
unsigned int obj_func_dyn_energy;
|
||||
unsigned int obj_func_dyn_power;
|
||||
unsigned int obj_func_leak_power;
|
||||
unsigned int obj_func_cycle_t;
|
||||
|
||||
double F_sz_nm; // feature size in nm
|
||||
double F_sz_um; // feature size in um
|
||||
unsigned int num_rw_ports;
|
||||
unsigned int num_rd_ports;
|
||||
unsigned int num_wr_ports;
|
||||
unsigned int num_se_rd_ports; // number of single ended read ports
|
||||
unsigned int num_search_ports; // : number of search ports for CAM
|
||||
bool is_main_mem;
|
||||
bool is_3d_mem;
|
||||
bool print_detail_debug;
|
||||
bool is_cache;
|
||||
bool pure_ram;
|
||||
bool pure_cam;
|
||||
bool rpters_in_htree; // if there are repeaters in htree segment
|
||||
unsigned int ver_htree_wires_over_array;
|
||||
unsigned int broadcast_addr_din_over_ver_htrees;
|
||||
unsigned int temp;
|
||||
|
||||
unsigned int ram_cell_tech_type;
|
||||
unsigned int peri_global_tech_type;
|
||||
unsigned int data_arr_ram_cell_tech_type;
|
||||
unsigned int data_arr_peri_global_tech_type;
|
||||
unsigned int tag_arr_ram_cell_tech_type;
|
||||
unsigned int tag_arr_peri_global_tech_type;
|
||||
|
||||
unsigned int burst_len;
|
||||
unsigned int int_prefetch_w;
|
||||
unsigned int page_sz_bits;
|
||||
|
||||
unsigned int num_die_3d;
|
||||
unsigned int burst_depth;
|
||||
unsigned int io_width;
|
||||
unsigned int sys_freq_MHz;
|
||||
|
||||
unsigned int tsv_is_subarray_type;
|
||||
unsigned int tsv_os_bank_type;
|
||||
unsigned int TSV_proj_type;
|
||||
|
||||
int partition_gran;
|
||||
unsigned int num_tier_row_sprd;
|
||||
unsigned int num_tier_col_sprd;
|
||||
bool fine_gran_bank_lvl;
|
||||
|
||||
unsigned int ic_proj_type; // interconnect_projection_type
|
||||
unsigned int wire_is_mat_type; // wire_inside_mat_type
|
||||
unsigned int wire_os_mat_type; // wire_outside_mat_type
|
||||
enum Wire_type wt;
|
||||
int force_wiretype;
|
||||
bool print_input_args;
|
||||
unsigned int nuca_cache_sz; // TODO
|
||||
int ndbl, ndwl, nspd, ndsam1, ndsam2, ndcm;
|
||||
bool force_cache_config;
|
||||
|
||||
int cache_level;
|
||||
int cores;
|
||||
int nuca_bank_count;
|
||||
int force_nuca_bank;
|
||||
|
||||
int delay_wt, dynamic_power_wt, leakage_power_wt,
|
||||
cycle_time_wt, area_wt;
|
||||
int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca,
|
||||
cycle_time_wt_nuca, area_wt_nuca;
|
||||
|
||||
int delay_dev, dynamic_power_dev, leakage_power_dev,
|
||||
cycle_time_dev, area_dev;
|
||||
int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca,
|
||||
cycle_time_dev_nuca, area_dev_nuca;
|
||||
int ed; //ED or ED2 optimization
|
||||
int nuca;
|
||||
|
||||
bool fast_access;
|
||||
unsigned int block_sz; // bytes
|
||||
unsigned int tag_assoc;
|
||||
unsigned int data_assoc;
|
||||
bool is_seq_acc;
|
||||
bool fully_assoc;
|
||||
unsigned int nsets; // == number_of_sets
|
||||
int print_detail;
|
||||
|
||||
|
||||
bool add_ecc_b_;
|
||||
//parameters for design constraint
|
||||
double throughput;
|
||||
double latency;
|
||||
bool pipelinable;
|
||||
int pipeline_stages;
|
||||
int per_stage_vector;
|
||||
bool with_clock_grid;
|
||||
|
||||
bool array_power_gated;
|
||||
bool bitline_floating;
|
||||
bool wl_power_gated;
|
||||
bool cl_power_gated;
|
||||
bool interconect_power_gated;
|
||||
bool power_gating;
|
||||
|
||||
double perfloss;
|
||||
|
||||
bool cl_vertical;
|
||||
|
||||
// Parameters related to off-chip I/O
|
||||
|
||||
double addr_timing, duty_cycle, mem_density, bus_bw, activity_dq, activity_ca, bus_freq;
|
||||
int mem_data_width, num_mem_dq, num_clk, num_ca, num_dqs, num_dq;
|
||||
|
||||
double rtt_value, ron_value, tflight_value; //FIXME
|
||||
|
||||
Mem_state iostate;
|
||||
|
||||
///char iostate, dram_ecc, io_type;
|
||||
|
||||
Mem_ECC dram_ecc;
|
||||
Mem_IO_type io_type;
|
||||
Mem_DIMM dram_dimm;
|
||||
|
||||
int num_bobs; // BoB is buffer-on-board such as Intel SMB c102
|
||||
|
||||
int capacity; // in GB
|
||||
|
||||
int num_channels_per_bob; // 1 means no bob
|
||||
|
||||
MemCad_metrics first_metric;
|
||||
|
||||
MemCad_metrics second_metric;
|
||||
|
||||
MemCad_metrics third_metric;
|
||||
|
||||
DIMM_Model dimm_model;
|
||||
|
||||
bool low_power_permitted; // Not yet implemented. It determines acceptable VDDs.
|
||||
|
||||
double load; // between 0 to 1
|
||||
|
||||
double row_buffer_hit_rate;
|
||||
|
||||
double rd_2_wr_ratio;
|
||||
|
||||
bool same_bw_in_bob; // true if all the channels in the bob have the same bandwidth.
|
||||
|
||||
bool mirror_in_bob;// true if all the channels in the bob have the same configs
|
||||
|
||||
bool total_power; // false means just considering I/O Power
|
||||
|
||||
bool verbose;
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
typedef struct{
|
||||
int Ndwl;
|
||||
int Ndbl;
|
||||
double Nspd;
|
||||
int deg_bl_muxing;
|
||||
int Ndsam_lev_1;
|
||||
int Ndsam_lev_2;
|
||||
int number_activated_mats_horizontal_direction;
|
||||
int number_subbanks;
|
||||
int page_size_in_bits;
|
||||
double delay_route_to_bank;
|
||||
double delay_crossbar;
|
||||
double delay_addr_din_horizontal_htree;
|
||||
double delay_addr_din_vertical_htree;
|
||||
double delay_row_predecode_driver_and_block;
|
||||
double delay_row_decoder;
|
||||
double delay_bitlines;
|
||||
double delay_sense_amp;
|
||||
double delay_subarray_output_driver;
|
||||
double delay_bit_mux_predecode_driver_and_block;
|
||||
double delay_bit_mux_decoder;
|
||||
double delay_senseamp_mux_lev_1_predecode_driver_and_block;
|
||||
double delay_senseamp_mux_lev_1_decoder;
|
||||
double delay_senseamp_mux_lev_2_predecode_driver_and_block;
|
||||
double delay_senseamp_mux_lev_2_decoder;
|
||||
double delay_input_htree;
|
||||
double delay_output_htree;
|
||||
double delay_dout_vertical_htree;
|
||||
double delay_dout_horizontal_htree;
|
||||
double delay_comparator;
|
||||
double access_time;
|
||||
double cycle_time;
|
||||
double multisubbank_interleave_cycle_time;
|
||||
double delay_request_network;
|
||||
double delay_inside_mat;
|
||||
double delay_reply_network;
|
||||
double trcd;
|
||||
double cas_latency;
|
||||
double precharge_delay;
|
||||
powerDef power_routing_to_bank;
|
||||
powerDef power_addr_input_htree;
|
||||
powerDef power_data_input_htree;
|
||||
powerDef power_data_output_htree;
|
||||
powerDef power_addr_horizontal_htree;
|
||||
powerDef power_datain_horizontal_htree;
|
||||
powerDef power_dataout_horizontal_htree;
|
||||
powerDef power_addr_vertical_htree;
|
||||
powerDef power_datain_vertical_htree;
|
||||
powerDef power_row_predecoder_drivers;
|
||||
powerDef power_row_predecoder_blocks;
|
||||
powerDef power_row_decoders;
|
||||
powerDef power_bit_mux_predecoder_drivers;
|
||||
powerDef power_bit_mux_predecoder_blocks;
|
||||
powerDef power_bit_mux_decoders;
|
||||
powerDef power_senseamp_mux_lev_1_predecoder_drivers;
|
||||
powerDef power_senseamp_mux_lev_1_predecoder_blocks;
|
||||
powerDef power_senseamp_mux_lev_1_decoders;
|
||||
powerDef power_senseamp_mux_lev_2_predecoder_drivers;
|
||||
powerDef power_senseamp_mux_lev_2_predecoder_blocks;
|
||||
powerDef power_senseamp_mux_lev_2_decoders;
|
||||
powerDef power_bitlines;
|
||||
powerDef power_sense_amps;
|
||||
powerDef power_prechg_eq_drivers;
|
||||
powerDef power_output_drivers_at_subarray;
|
||||
powerDef power_dataout_vertical_htree;
|
||||
powerDef power_comparators;
|
||||
powerDef power_crossbar;
|
||||
powerDef total_power;
|
||||
double area;
|
||||
double all_banks_height;
|
||||
double all_banks_width;
|
||||
double bank_height;
|
||||
double bank_width;
|
||||
double subarray_memory_cell_area_height;
|
||||
double subarray_memory_cell_area_width;
|
||||
double mat_height;
|
||||
double mat_width;
|
||||
double routing_area_height_within_bank;
|
||||
double routing_area_width_within_bank;
|
||||
double area_efficiency;
|
||||
// double perc_power_dyn_routing_to_bank;
|
||||
// double perc_power_dyn_addr_horizontal_htree;
|
||||
// double perc_power_dyn_datain_horizontal_htree;
|
||||
// double perc_power_dyn_dataout_horizontal_htree;
|
||||
// double perc_power_dyn_addr_vertical_htree;
|
||||
// double perc_power_dyn_datain_vertical_htree;
|
||||
// double perc_power_dyn_row_predecoder_drivers;
|
||||
// double perc_power_dyn_row_predecoder_blocks;
|
||||
// double perc_power_dyn_row_decoders;
|
||||
// double perc_power_dyn_bit_mux_predecoder_drivers;
|
||||
// double perc_power_dyn_bit_mux_predecoder_blocks;
|
||||
// double perc_power_dyn_bit_mux_decoders;
|
||||
// double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers;
|
||||
// double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks;
|
||||
// double perc_power_dyn_senseamp_mux_lev_1_decoders;
|
||||
// double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers;
|
||||
// double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks;
|
||||
// double perc_power_dyn_senseamp_mux_lev_2_decoders;
|
||||
// double perc_power_dyn_bitlines;
|
||||
// double perc_power_dyn_sense_amps;
|
||||
// double perc_power_dyn_prechg_eq_drivers;
|
||||
// double perc_power_dyn_subarray_output_drivers;
|
||||
// double perc_power_dyn_dataout_vertical_htree;
|
||||
// double perc_power_dyn_comparators;
|
||||
// double perc_power_dyn_crossbar;
|
||||
// double perc_power_dyn_spent_outside_mats;
|
||||
// double perc_power_leak_routing_to_bank;
|
||||
// double perc_power_leak_addr_horizontal_htree;
|
||||
// double perc_power_leak_datain_horizontal_htree;
|
||||
// double perc_power_leak_dataout_horizontal_htree;
|
||||
// double perc_power_leak_addr_vertical_htree;
|
||||
// double perc_power_leak_datain_vertical_htree;
|
||||
// double perc_power_leak_row_predecoder_drivers;
|
||||
// double perc_power_leak_row_predecoder_blocks;
|
||||
// double perc_power_leak_row_decoders;
|
||||
// double perc_power_leak_bit_mux_predecoder_drivers;
|
||||
// double perc_power_leak_bit_mux_predecoder_blocks;
|
||||
// double perc_power_leak_bit_mux_decoders;
|
||||
// double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers;
|
||||
// double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks;
|
||||
// double perc_power_leak_senseamp_mux_lev_1_decoders;
|
||||
// double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers;
|
||||
// double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks;
|
||||
// double perc_power_leak_senseamp_mux_lev_2_decoders;
|
||||
// double perc_power_leak_bitlines;
|
||||
// double perc_power_leak_sense_amps;
|
||||
// double perc_power_leak_prechg_eq_drivers;
|
||||
// double perc_power_leak_subarray_output_drivers;
|
||||
// double perc_power_leak_dataout_vertical_htree;
|
||||
// double perc_power_leak_comparators;
|
||||
// double perc_power_leak_crossbar;
|
||||
// double perc_leak_mats;
|
||||
// double perc_active_mats;
|
||||
double refresh_power;
|
||||
double dram_refresh_period;
|
||||
double dram_array_availability;
|
||||
double dyn_read_energy_from_closed_page;
|
||||
double dyn_read_energy_from_open_page;
|
||||
double leak_power_subbank_closed_page;
|
||||
double leak_power_subbank_open_page;
|
||||
double leak_power_request_and_reply_networks;
|
||||
double activate_energy;
|
||||
double read_energy;
|
||||
double write_energy;
|
||||
double precharge_energy;
|
||||
} results_mem_array;
|
||||
|
||||
|
||||
class uca_org_t
|
||||
{
|
||||
public:
|
||||
mem_array * tag_array2;
|
||||
mem_array * data_array2;
|
||||
double access_time;
|
||||
double cycle_time;
|
||||
double area;
|
||||
double area_efficiency;
|
||||
powerDef power;
|
||||
double leak_power_with_sleep_transistors_in_mats;
|
||||
double cache_ht;
|
||||
double cache_len;
|
||||
char file_n[100];
|
||||
double vdd_periph_global;
|
||||
bool valid;
|
||||
results_mem_array tag_array;
|
||||
results_mem_array data_array;
|
||||
|
||||
uca_org_t();
|
||||
void find_delay();
|
||||
void find_energy();
|
||||
void find_area();
|
||||
void find_cyc();
|
||||
void adjust_area();//for McPAT only to adjust routing overhead
|
||||
void cleanup();
|
||||
~uca_org_t(){};
|
||||
};
|
||||
|
||||
|
||||
class IO_org_t
|
||||
{
|
||||
public:
|
||||
double io_area;
|
||||
double io_timing_margin;
|
||||
double io_voltage_margin;
|
||||
double io_dynamic_power;
|
||||
double io_phy_power;
|
||||
double io_wakeup_time;
|
||||
double io_termination_power;
|
||||
IO_org_t():io_area(0),io_timing_margin(0),io_voltage_margin(0)
|
||||
,io_dynamic_power(0),io_phy_power(0),io_wakeup_time(0),io_termination_power(0)
|
||||
{}
|
||||
};
|
||||
|
||||
|
||||
void reconfigure(InputParameter *local_interface, uca_org_t *fin_res);
|
||||
|
||||
uca_org_t cacti_interface(const string & infile_name);
|
||||
//McPAT's plain interface, please keep !!!
|
||||
uca_org_t cacti_interface(InputParameter * const local_interface);
|
||||
//McPAT's plain interface, please keep !!!
|
||||
uca_org_t init_interface(InputParameter * const local_interface);
|
||||
//McPAT's plain interface, please keep !!!
|
||||
uca_org_t cacti_interface(
|
||||
int cache_size,
|
||||
int line_size,
|
||||
int associativity,
|
||||
int rw_ports,
|
||||
int excl_read_ports,
|
||||
int excl_write_ports,
|
||||
int single_ended_read_ports,
|
||||
int search_ports,
|
||||
int banks,
|
||||
double tech_node,
|
||||
int output_width,
|
||||
int specific_tag,
|
||||
int tag_width,
|
||||
int access_mode,
|
||||
int cache,
|
||||
int main_mem,
|
||||
int obj_func_delay,
|
||||
int obj_func_dynamic_power,
|
||||
int obj_func_leakage_power,
|
||||
int obj_func_cycle_time,
|
||||
int obj_func_area,
|
||||
int dev_func_delay,
|
||||
int dev_func_dynamic_power,
|
||||
int dev_func_leakage_power,
|
||||
int dev_func_area,
|
||||
int dev_func_cycle_time,
|
||||
int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
|
||||
int temp,
|
||||
int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
|
||||
int data_arr_ram_cell_tech_flavor_in,
|
||||
int data_arr_peri_global_tech_flavor_in,
|
||||
int tag_arr_ram_cell_tech_flavor_in,
|
||||
int tag_arr_peri_global_tech_flavor_in,
|
||||
int interconnect_projection_type_in,
|
||||
int wire_inside_mat_type_in,
|
||||
int wire_outside_mat_type_in,
|
||||
int REPEATERS_IN_HTREE_SEGMENTS_in,
|
||||
int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
|
||||
int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
|
||||
int PAGE_SIZE_BITS_in,
|
||||
int BURST_LENGTH_in,
|
||||
int INTERNAL_PREFETCH_WIDTH_in,
|
||||
int force_wiretype,
|
||||
int wiretype,
|
||||
int force_config,
|
||||
int ndwl,
|
||||
int ndbl,
|
||||
int nspd,
|
||||
int ndcm,
|
||||
int ndsam1,
|
||||
int ndsam2,
|
||||
int ecc);
|
||||
// int cache_size,
|
||||
// int line_size,
|
||||
// int associativity,
|
||||
// int rw_ports,
|
||||
// int excl_read_ports,
|
||||
// int excl_write_ports,
|
||||
// int single_ended_read_ports,
|
||||
// int banks,
|
||||
// double tech_node,
|
||||
// int output_width,
|
||||
// int specific_tag,
|
||||
// int tag_width,
|
||||
// int access_mode,
|
||||
// int cache,
|
||||
// int main_mem,
|
||||
// int obj_func_delay,
|
||||
// int obj_func_dynamic_power,
|
||||
// int obj_func_leakage_power,
|
||||
// int obj_func_area,
|
||||
// int obj_func_cycle_time,
|
||||
// int dev_func_delay,
|
||||
// int dev_func_dynamic_power,
|
||||
// int dev_func_leakage_power,
|
||||
// int dev_func_area,
|
||||
// int dev_func_cycle_time,
|
||||
// int temp,
|
||||
// int data_arr_ram_cell_tech_flavor_in,
|
||||
// int data_arr_peri_global_tech_flavor_in,
|
||||
// int tag_arr_ram_cell_tech_flavor_in,
|
||||
// int tag_arr_peri_global_tech_flavor_in,
|
||||
// int interconnect_projection_type_in,
|
||||
// int wire_inside_mat_type_in,
|
||||
// int wire_outside_mat_type_in,
|
||||
// int REPEATERS_IN_HTREE_SEGMENTS_in,
|
||||
// int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
|
||||
// int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
|
||||
//// double MAXAREACONSTRAINT_PERC_in,
|
||||
//// double MAXACCTIMECONSTRAINT_PERC_in,
|
||||
//// double MAX_PERC_DIFF_IN_DELAY_FROM_BEST_DELAY_REPEATER_SOLUTION_in,
|
||||
// int PAGE_SIZE_BITS_in,
|
||||
// int BURST_LENGTH_in,
|
||||
// int INTERNAL_PREFETCH_WIDTH_in);
|
||||
|
||||
//Naveen's interface
|
||||
uca_org_t cacti_interface(
|
||||
int cache_size,
|
||||
int line_size,
|
||||
int associativity,
|
||||
int rw_ports,
|
||||
int excl_read_ports,
|
||||
int excl_write_ports,
|
||||
int single_ended_read_ports,
|
||||
int banks,
|
||||
double tech_node,
|
||||
int page_sz,
|
||||
int burst_length,
|
||||
int pre_width,
|
||||
int output_width,
|
||||
int specific_tag,
|
||||
int tag_width,
|
||||
int access_mode, //0 normal, 1 seq, 2 fast
|
||||
int cache, //scratch ram or cache
|
||||
int main_mem,
|
||||
int obj_func_delay,
|
||||
int obj_func_dynamic_power,
|
||||
int obj_func_leakage_power,
|
||||
int obj_func_area,
|
||||
int obj_func_cycle_time,
|
||||
int dev_func_delay,
|
||||
int dev_func_dynamic_power,
|
||||
int dev_func_leakage_power,
|
||||
int dev_func_area,
|
||||
int dev_func_cycle_time,
|
||||
int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
|
||||
int temp,
|
||||
int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
|
||||
int data_arr_ram_cell_tech_flavor_in,
|
||||
int data_arr_peri_global_tech_flavor_in,
|
||||
int tag_arr_ram_cell_tech_flavor_in,
|
||||
int tag_arr_peri_global_tech_flavor_in,
|
||||
int interconnect_projection_type_in, // 0 - aggressive, 1 - normal
|
||||
int wire_inside_mat_type_in,
|
||||
int wire_outside_mat_type_in,
|
||||
int is_nuca, // 0 - UCA, 1 - NUCA
|
||||
int core_count,
|
||||
int cache_level, // 0 - L2, 1 - L3
|
||||
int nuca_bank_count,
|
||||
int nuca_obj_func_delay,
|
||||
int nuca_obj_func_dynamic_power,
|
||||
int nuca_obj_func_leakage_power,
|
||||
int nuca_obj_func_area,
|
||||
int nuca_obj_func_cycle_time,
|
||||
int nuca_dev_func_delay,
|
||||
int nuca_dev_func_dynamic_power,
|
||||
int nuca_dev_func_leakage_power,
|
||||
int nuca_dev_func_area,
|
||||
int nuca_dev_func_cycle_time,
|
||||
int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
|
||||
int p_input);
|
||||
|
||||
|
||||
//CACTI3DD interface
|
||||
uca_org_t cacti_interface(
|
||||
int cache_size,
|
||||
int line_size,
|
||||
int associativity,
|
||||
int rw_ports,
|
||||
int excl_read_ports,// para5
|
||||
int excl_write_ports,
|
||||
int single_ended_read_ports,
|
||||
int search_ports,
|
||||
int banks,
|
||||
double tech_node,//para10
|
||||
int output_width,
|
||||
int specific_tag,
|
||||
int tag_width,
|
||||
int access_mode,
|
||||
int cache, //para15
|
||||
int main_mem,
|
||||
int obj_func_delay,
|
||||
int obj_func_dynamic_power,
|
||||
int obj_func_leakage_power,
|
||||
int obj_func_cycle_time, //para20
|
||||
int obj_func_area,
|
||||
int dev_func_delay,
|
||||
int dev_func_dynamic_power,
|
||||
int dev_func_leakage_power,
|
||||
int dev_func_area, //para25
|
||||
int dev_func_cycle_time,
|
||||
int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
|
||||
int temp,
|
||||
int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
|
||||
int data_arr_ram_cell_tech_flavor_in,//para30
|
||||
int data_arr_peri_global_tech_flavor_in,
|
||||
int tag_arr_ram_cell_tech_flavor_in,
|
||||
int tag_arr_peri_global_tech_flavor_in,
|
||||
int interconnect_projection_type_in,
|
||||
int wire_inside_mat_type_in,//para35
|
||||
int wire_outside_mat_type_in,
|
||||
int REPEATERS_IN_HTREE_SEGMENTS_in,
|
||||
int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
|
||||
int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
|
||||
int PAGE_SIZE_BITS_in,//para40
|
||||
int BURST_LENGTH_in,
|
||||
int INTERNAL_PREFETCH_WIDTH_in,
|
||||
int force_wiretype,
|
||||
int wiretype,
|
||||
int force_config,//para45
|
||||
int ndwl,
|
||||
int ndbl,
|
||||
int nspd,
|
||||
int ndcm,
|
||||
int ndsam1,//para50
|
||||
int ndsam2,
|
||||
int ecc,
|
||||
int is_3d_dram,
|
||||
int burst_depth,
|
||||
int IO_width,
|
||||
int sys_freq,
|
||||
int debug_detail,
|
||||
int num_dies,
|
||||
int tsv_gran_is_subarray,
|
||||
int tsv_gran_os_bank,
|
||||
int num_tier_row_sprd,
|
||||
int num_tier_col_sprd,
|
||||
int partition_level);
|
||||
|
||||
class mem_array
|
||||
{
|
||||
public:
|
||||
int Ndcm;
|
||||
int Ndwl;
|
||||
int Ndbl;
|
||||
double Nspd;
|
||||
int deg_bl_muxing;
|
||||
int Ndsam_lev_1;
|
||||
int Ndsam_lev_2;
|
||||
double access_time;
|
||||
double cycle_time;
|
||||
double multisubbank_interleave_cycle_time;
|
||||
double area_ram_cells;
|
||||
double area;
|
||||
powerDef power;
|
||||
double delay_senseamp_mux_decoder;
|
||||
double delay_before_subarray_output_driver;
|
||||
double delay_from_subarray_output_driver_to_output;
|
||||
double height;
|
||||
double width;
|
||||
|
||||
double mat_height;
|
||||
double mat_length;
|
||||
double subarray_length;
|
||||
double subarray_height;
|
||||
|
||||
double delay_route_to_bank,
|
||||
delay_input_htree,
|
||||
delay_row_predecode_driver_and_block,
|
||||
delay_row_decoder,
|
||||
delay_bitlines,
|
||||
delay_sense_amp,
|
||||
delay_subarray_output_driver,
|
||||
delay_dout_htree,
|
||||
delay_comparator,
|
||||
delay_matchlines;
|
||||
//CACTI3DD 3d stats
|
||||
double delay_row_activate_net,
|
||||
delay_local_wordline,
|
||||
|
||||
delay_column_access_net,
|
||||
delay_column_predecoder,
|
||||
delay_column_decoder,
|
||||
delay_column_selectline,
|
||||
delay_datapath_net,
|
||||
delay_global_data,
|
||||
delay_local_data_and_drv,
|
||||
delay_data_buffer;
|
||||
|
||||
double energy_row_activate_net,
|
||||
energy_row_predecode_driver_and_block,
|
||||
energy_row_decoder,
|
||||
energy_local_wordline,
|
||||
energy_bitlines,
|
||||
energy_sense_amp,
|
||||
energy_column_access_net,
|
||||
energy_column_predecoder,
|
||||
energy_column_decoder,
|
||||
energy_column_selectline,
|
||||
energy_datapath_net,
|
||||
energy_global_data,
|
||||
energy_local_data_and_drv,
|
||||
energy_data_buffer,
|
||||
energy_subarray_output_driver;
|
||||
|
||||
double all_banks_height,
|
||||
all_banks_width,
|
||||
area_efficiency;
|
||||
|
||||
powerDef power_routing_to_bank;
|
||||
powerDef power_addr_input_htree;
|
||||
powerDef power_data_input_htree;
|
||||
powerDef power_data_output_htree;
|
||||
powerDef power_htree_in_search;
|
||||
powerDef power_htree_out_search;
|
||||
powerDef power_row_predecoder_drivers;
|
||||
powerDef power_row_predecoder_blocks;
|
||||
powerDef power_row_decoders;
|
||||
powerDef power_bit_mux_predecoder_drivers;
|
||||
powerDef power_bit_mux_predecoder_blocks;
|
||||
powerDef power_bit_mux_decoders;
|
||||
powerDef power_senseamp_mux_lev_1_predecoder_drivers;
|
||||
powerDef power_senseamp_mux_lev_1_predecoder_blocks;
|
||||
powerDef power_senseamp_mux_lev_1_decoders;
|
||||
powerDef power_senseamp_mux_lev_2_predecoder_drivers;
|
||||
powerDef power_senseamp_mux_lev_2_predecoder_blocks;
|
||||
powerDef power_senseamp_mux_lev_2_decoders;
|
||||
powerDef power_bitlines;
|
||||
powerDef power_sense_amps;
|
||||
powerDef power_prechg_eq_drivers;
|
||||
powerDef power_output_drivers_at_subarray;
|
||||
powerDef power_dataout_vertical_htree;
|
||||
powerDef power_comparators;
|
||||
|
||||
powerDef power_cam_bitline_precharge_eq_drv;
|
||||
powerDef power_searchline;
|
||||
powerDef power_searchline_precharge;
|
||||
powerDef power_matchlines;
|
||||
powerDef power_matchline_precharge;
|
||||
powerDef power_matchline_to_wordline_drv;
|
||||
|
||||
min_values_t *arr_min;
|
||||
enum Wire_type wt;
|
||||
|
||||
// dram stats
|
||||
double activate_energy, read_energy, write_energy, precharge_energy,
|
||||
refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
|
||||
leak_power_request_and_reply_networks;
|
||||
|
||||
double precharge_delay;
|
||||
|
||||
//Power-gating stats
|
||||
double array_leakage;
|
||||
double wl_leakage;
|
||||
double cl_leakage;
|
||||
|
||||
double sram_sleep_tx_width, wl_sleep_tx_width, cl_sleep_tx_width;
|
||||
double sram_sleep_tx_area, wl_sleep_tx_area, cl_sleep_tx_area;
|
||||
double sram_sleep_wakeup_latency, wl_sleep_wakeup_latency, cl_sleep_wakeup_latency, bl_floating_wakeup_latency;
|
||||
double sram_sleep_wakeup_energy, wl_sleep_wakeup_energy, cl_sleep_wakeup_energy, bl_floating_wakeup_energy;
|
||||
|
||||
int num_active_mats;
|
||||
int num_submarray_mats;
|
||||
|
||||
static bool lt(const mem_array * m1, const mem_array * m2);
|
||||
|
||||
//CACTI3DD 3d dram stats
|
||||
double t_RCD, t_RAS, t_RC, t_CAS, t_RP, t_RRD;
|
||||
double activate_power, read_power, write_power, peak_read_power;
|
||||
int num_row_subarray, num_col_subarray;
|
||||
double delay_TSV_tot, area_TSV_tot, dyn_pow_TSV_tot, dyn_pow_TSV_per_access;
|
||||
unsigned int num_TSV_tot;
|
||||
double area_lwl_drv, area_row_predec_dec, area_col_predec_dec,
|
||||
area_subarray, area_bus, area_address_bus, area_data_bus, area_data_drv, area_IOSA, area_sense_amp;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
237
T1/TP/TP1/cacti_7/component.cc
Normal file
237
T1/TP/TP1/cacti_7/component.cc
Normal file
|
@ -0,0 +1,237 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
#include "bank.h"
|
||||
#include "component.h"
|
||||
#include "decoder.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
|
||||
Component::Component()
|
||||
:area(), power(), rt_power(),delay(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
Component::~Component()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr)
|
||||
{
|
||||
double w_poly = g_ip->F_sz_um;
|
||||
double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
|
||||
double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain
|
||||
num_stacked_in * w_poly +
|
||||
(num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
|
||||
|
||||
if (num_folded_tr > 1)
|
||||
{
|
||||
total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly +
|
||||
(num_folded_tr - 1) * num_stacked_in * w_poly +
|
||||
(num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
|
||||
}
|
||||
|
||||
return total_diff_w;
|
||||
}
|
||||
|
||||
|
||||
|
||||
double Component::compute_gate_area(
|
||||
int gate_type,
|
||||
int num_inputs,
|
||||
double w_pmos,
|
||||
double w_nmos,
|
||||
double h_gate)
|
||||
{
|
||||
if (w_pmos <= 0.0 || w_nmos <= 0.0)
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
double w_folded_pmos, w_folded_nmos;
|
||||
int num_folded_pmos, num_folded_nmos;
|
||||
double total_ndiff_w, total_pdiff_w;
|
||||
Area gate;
|
||||
|
||||
double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL;
|
||||
double ratio_p_to_n = w_pmos / (w_pmos + w_nmos);
|
||||
|
||||
if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0)
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n;
|
||||
w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n);
|
||||
|
||||
assert(w_folded_pmos > 0);
|
||||
|
||||
num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos));
|
||||
num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos));
|
||||
|
||||
switch (gate_type)
|
||||
{
|
||||
case INV:
|
||||
total_ndiff_w = compute_diffusion_width(1, num_folded_nmos);
|
||||
total_pdiff_w = compute_diffusion_width(1, num_folded_pmos);
|
||||
break;
|
||||
|
||||
case NOR:
|
||||
total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos);
|
||||
total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos);
|
||||
break;
|
||||
|
||||
case NAND:
|
||||
total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos);
|
||||
total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos);
|
||||
break;
|
||||
default:
|
||||
cout << "Unknown gate type: " << gate_type << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
gate.w = MAX(total_ndiff_w, total_pdiff_w);
|
||||
|
||||
if (w_folded_nmos > w_nmos)
|
||||
{
|
||||
//means that the height of the gate can
|
||||
//be made smaller than the input height specified, so calculate the height of the gate.
|
||||
gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL;
|
||||
}
|
||||
else
|
||||
{
|
||||
gate.h = h_gate;
|
||||
}
|
||||
return gate.get_area();
|
||||
}
|
||||
|
||||
|
||||
|
||||
double Component::compute_tr_width_after_folding(
|
||||
double input_width,
|
||||
double threshold_folding_width)
|
||||
{//This is actually the width of the cell not the width of a device.
|
||||
//The width of a cell and the width of a device is orthogonal.
|
||||
if (input_width <= 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int num_folded_tr = (int) (ceil(input_width / threshold_folding_width));
|
||||
double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
|
||||
double width_poly = g_ip->F_sz_um;
|
||||
double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly;
|
||||
|
||||
return total_diff_width;
|
||||
}
|
||||
|
||||
|
||||
|
||||
double Component::height_sense_amplifier(double pitch_sense_amp)
|
||||
{
|
||||
// compute the height occupied by all PMOS transistors
|
||||
double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 +
|
||||
compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) +
|
||||
2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
|
||||
|
||||
// compute the height occupied by all NMOS transistors
|
||||
double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 +
|
||||
compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) +
|
||||
2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
|
||||
|
||||
// compute total height by considering gap between the p and n diffusion areas
|
||||
return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int Component::logical_effort(
|
||||
int num_gates_min,
|
||||
double g,
|
||||
double F,
|
||||
double * w_n,
|
||||
double * w_p,
|
||||
double C_load,
|
||||
double p_to_n_sz_ratio,
|
||||
bool is_dram_,
|
||||
bool is_wl_tr_,
|
||||
double max_w_nmos)
|
||||
{
|
||||
int num_gates = (int) (log(F) / log(fopt));
|
||||
|
||||
// check if num_gates is odd. if so, add 1 to make it even
|
||||
num_gates+= (num_gates % 2) ? 1 : 0;
|
||||
num_gates = MAX(num_gates, num_gates_min);
|
||||
|
||||
// recalculate the effective fanout of each stage
|
||||
double f = pow(F, 1.0 / num_gates);
|
||||
int i = num_gates - 1;
|
||||
double C_in = C_load / f;
|
||||
w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_);
|
||||
w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_);
|
||||
w_p[i] = p_to_n_sz_ratio * w_n[i];
|
||||
|
||||
if (w_n[i] > max_w_nmos) // && !g_ip->is_3d_mem)
|
||||
{
|
||||
double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_);
|
||||
F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_);
|
||||
num_gates = (int) (log(F) / log(fopt)) + 1;
|
||||
num_gates+= (num_gates % 2) ? 1 : 0;
|
||||
num_gates = MAX(num_gates, num_gates_min);
|
||||
f = pow(F, 1.0 / (num_gates - 1));
|
||||
i = num_gates - 1;
|
||||
w_n[i] = max_w_nmos;
|
||||
w_p[i] = p_to_n_sz_ratio * w_n[i];
|
||||
}
|
||||
|
||||
for (i = num_gates - 2; i >= 1; i--)
|
||||
{
|
||||
w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_);
|
||||
w_p[i] = p_to_n_sz_ratio * w_n[i];
|
||||
}
|
||||
|
||||
assert(num_gates <= MAX_NUMBER_GATES_STAGE);
|
||||
return num_gates;
|
||||
}
|
||||
|
84
T1/TP/TP1/cacti_7/component.h
Normal file
84
T1/TP/TP1/cacti_7/component.h
Normal file
|
@ -0,0 +1,84 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
#ifndef __COMPONENT_H__
|
||||
#define __COMPONENT_H__
|
||||
|
||||
#include "parameter.h"
|
||||
#include "area.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class Crossbar;
|
||||
class Bank;
|
||||
|
||||
class Component
|
||||
{
|
||||
public:
|
||||
Component();
|
||||
~Component();
|
||||
|
||||
Area area;
|
||||
powerDef power,rt_power;
|
||||
double delay;
|
||||
double cycle_time;
|
||||
|
||||
double compute_gate_area(
|
||||
int gate_type,
|
||||
int num_inputs,
|
||||
double w_pmos,
|
||||
double w_nmos,
|
||||
double h_gate);
|
||||
|
||||
double compute_tr_width_after_folding(double input_width, double threshold_folding_width);
|
||||
double height_sense_amplifier(double pitch_sense_amp);
|
||||
|
||||
protected:
|
||||
int logical_effort(
|
||||
int num_gates_min,
|
||||
double g,
|
||||
double F,
|
||||
double * w_n,
|
||||
double * w_p,
|
||||
double C_load,
|
||||
double p_to_n_sz_ratio,
|
||||
bool is_dram_,
|
||||
bool is_wl_tr_,
|
||||
double max_w_nmos);
|
||||
|
||||
private:
|
||||
double compute_diffusion_width(int num_stacked_in, int num_folded_tr);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
273
T1/TP/TP1/cacti_7/const.h
Normal file
273
T1/TP/TP1/cacti_7/const.h
Normal file
|
@ -0,0 +1,273 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef __CONST_H__
|
||||
#define __CONST_H__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
/* The following are things you might want to change
|
||||
* when compiling
|
||||
*/
|
||||
|
||||
/*
|
||||
* Address bits in a word, and number of output bits from the cache
|
||||
*/
|
||||
|
||||
/*
|
||||
was: #define ADDRESS_BITS 32
|
||||
now: 42 bits as in the Power4
|
||||
This is 36 bits in Pentium 4
|
||||
and 40 bits in Opteron.
|
||||
*/
|
||||
const int ADDRESS_BITS = 42;
|
||||
|
||||
/*dt: In addition to the tag bits, the tags also include 1 valid bit, 1 dirty bit, 2 bits for a 4-state
|
||||
cache coherency protocoll (MESI), 1 bit for MRU (change this to log(ways) for full LRU).
|
||||
So in total we have 1 + 1 + 2 + 1 = 5 */
|
||||
const int EXTRA_TAG_BITS = 5;
|
||||
|
||||
/* limits on the various N parameters */
|
||||
|
||||
const unsigned int MAXDATAN = 512; // maximum for Ndwl and Ndbl
|
||||
const unsigned int MAXSUBARRAYS = 1048576; // maximum subarrays for data and tag arrays
|
||||
const unsigned int MAXDATASPD = 256; // maximum for Nspd
|
||||
const unsigned int MAX_COL_MUX = 256;
|
||||
|
||||
|
||||
|
||||
#define ROUTER_TYPES 3
|
||||
#define WIRE_TYPES 6
|
||||
|
||||
const double Cpolywire = 0;
|
||||
|
||||
|
||||
/* Threshold voltages (as a proportion of Vdd)
|
||||
If you don't know them, set all values to 0.5 */
|
||||
#define VTHFA1 0.452
|
||||
#define VTHFA2 0.304
|
||||
#define VTHFA3 0.420
|
||||
#define VTHFA4 0.413
|
||||
#define VTHFA5 0.405
|
||||
#define VTHFA6 0.452
|
||||
#define VSINV 0.452
|
||||
#define VTHCOMPINV 0.437
|
||||
#define VTHMUXNAND 0.548 // TODO : this constant must be revisited
|
||||
#define VTHEVALINV 0.452
|
||||
#define VTHSENSEEXTDRV 0.438
|
||||
|
||||
|
||||
//WmuxdrvNANDn and WmuxdrvNANDp are no longer being used but it's part of the old
|
||||
//delay_comparator function which we are using exactly as it used to be, so just setting these to 0
|
||||
const double WmuxdrvNANDn = 0;
|
||||
const double WmuxdrvNANDp = 0;
|
||||
|
||||
|
||||
/*===================================================================*/
|
||||
/*
|
||||
* The following are things you probably wouldn't want to change.
|
||||
*/
|
||||
|
||||
#define BIGNUM 1e30
|
||||
#define INF 9999999
|
||||
#define MAX(a,b) (((a)>(b))?(a):(b))
|
||||
#define MIN(a,b) (((a)<(b))?(a):(b))
|
||||
|
||||
/* Used to communicate with the horowitz model */
|
||||
#define RISE 1
|
||||
#define FALL 0
|
||||
#define NCH 1
|
||||
#define PCH 0
|
||||
|
||||
|
||||
#define EPSILON 0.5 //v4.1: This constant is being used in order to fix floating point -> integer
|
||||
//conversion problems that were occuring within CACTI. Typical problem that was occuring was
|
||||
//that with different compilers a floating point number like 3.0 would get represented as either
|
||||
//2.9999....or 3.00000001 and then the integer part of the floating point number (3.0) would
|
||||
//be computed differently depending on the compiler. What we are doing now is to replace
|
||||
//int (x) with (int) (x+EPSILON) where EPSILON is 0.5. This would fix such problems. Note that
|
||||
//this works only when x is an integer >= 0.
|
||||
/*
|
||||
* thinks this is more a solution to solve the simple truncate problem
|
||||
* (http://www.cs.tut.fi/~jkorpela/round.html) rather than the problem mentioned above.
|
||||
* Unfortunately, this solution causes nasty bugs (different results when using O0 and O3).
|
||||
* Moreover, round is not correct in CACTI since when an extra fraction of bit/line is needed,
|
||||
* we need to provide a complete bit/line even the fraction is just 0.01.
|
||||
* So, in later version than 6.5 we use (int)ceil() to get double to int conversion.
|
||||
*/
|
||||
|
||||
#define EPSILON2 0.1
|
||||
#define EPSILON3 0.6
|
||||
|
||||
|
||||
#define MINSUBARRAYROWS 16 //For simplicity in modeling, for the row decoding structure, we assume
|
||||
//that each row predecode block is composed of at least one 2-4 decoder. When the outputs from the
|
||||
//row predecode blocks are combined this means that there are at least 4*4=16 row decode outputs
|
||||
#define MAXSUBARRAYROWS 262144 //Each row predecode block produces a max of 2^9 outputs. So
|
||||
//the maximum number of row decode outputs will be 2^9*2^9
|
||||
#define MINSUBARRAYCOLS 2
|
||||
#define MAXSUBARRAYCOLS 262144
|
||||
|
||||
|
||||
#define INV 0
|
||||
#define NOR 1
|
||||
#define NAND 2
|
||||
|
||||
|
||||
#define NUMBER_TECH_FLAVORS 4
|
||||
|
||||
#define NUMBER_INTERCONNECT_PROJECTION_TYPES 2 //aggressive and conservative
|
||||
//0 = Aggressive projections, 1 = Conservative projections
|
||||
#define NUMBER_WIRE_TYPES 4 //local, semi-global and global
|
||||
//1 = 'Semi-global' wire type, 2 = 'Global' wire type
|
||||
#define NUMBER_TSV_TYPES 3
|
||||
//0 = ITRS projected fine TSV type, 1 = Industrial reported large TSV type, 2 = TBD
|
||||
|
||||
const int dram_cell_tech_flavor = 3;
|
||||
|
||||
|
||||
#define VBITSENSEMIN 0.08 //minimum bitline sense voltage is fixed to be 80 mV.
|
||||
|
||||
#define fopt 4.0
|
||||
|
||||
#define INPUT_WIRE_TO_INPUT_GATE_CAP_RATIO 0
|
||||
#define BUFFER_SEPARATION_LENGTH_MULTIPLIER 1
|
||||
#define NUMBER_MATS_PER_REDUNDANT_MAT 8
|
||||
|
||||
#define NUMBER_STACKED_DIE_LAYERS 1
|
||||
|
||||
// this variable can be set to carry out solution optimization for
|
||||
// a maximum area allocation.
|
||||
#define STACKED_DIE_LAYER_ALLOTED_AREA_mm2 0 //6.24 //6.21//71.5
|
||||
|
||||
// this variable can also be employed when solution optimization
|
||||
// with maximum area allocation is carried out.
|
||||
#define MAX_PERCENT_AWAY_FROM_ALLOTED_AREA 50
|
||||
|
||||
// this variable can also be employed when solution optimization
|
||||
// with maximum area allocation is carried out.
|
||||
#define MIN_AREA_EFFICIENCY 20
|
||||
|
||||
// this variable can be employed when solution with a desired
|
||||
// aspect ratio is required.
|
||||
#define STACKED_DIE_LAYER_ASPECT_RATIO 1
|
||||
|
||||
// this variable can be employed when solution with a desired
|
||||
// aspect ratio is required.
|
||||
#define MAX_PERCENT_AWAY_FROM_ASPECT_RATIO 101
|
||||
|
||||
// this variable can be employed to carry out solution optimization
|
||||
// for a certain target random cycle time.
|
||||
#define TARGET_CYCLE_TIME_ns 1000000000
|
||||
|
||||
#define NUMBER_PIPELINE_STAGES 4
|
||||
|
||||
// this can be used to model the length of interconnect
|
||||
// between a bank and a crossbar
|
||||
#define LENGTH_INTERCONNECT_FROM_BANK_TO_CROSSBAR 0 //3791 // 2880//micron
|
||||
|
||||
#define IS_CROSSBAR 0
|
||||
#define NUMBER_INPUT_PORTS_CROSSBAR 8
|
||||
#define NUMBER_OUTPUT_PORTS_CROSSBAR 8
|
||||
#define NUMBER_SIGNALS_PER_PORT_CROSSBAR 256
|
||||
|
||||
|
||||
#define MAT_LEAKAGE_REDUCTION_DUE_TO_SLEEP_TRANSISTORS_FACTOR 1
|
||||
#define LEAKAGE_REDUCTION_DUE_TO_LONG_CHANNEL_HP_TRANSISTORS_FACTOR 1
|
||||
|
||||
#define PAGE_MODE 0
|
||||
|
||||
#define MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA 60
|
||||
// We are actually not using this variable in the CACTI code. We just want to acknowledge that
|
||||
// this current should be multiplied by the DDR(n) system VDD value to compute the standby power
|
||||
// consumed during precharge.
|
||||
|
||||
|
||||
const double VDD_STORAGE_LOSS_FRACTION_WORST = 0.125;
|
||||
const double CU_RESISTIVITY = 0.022; //ohm-micron
|
||||
const double BULK_CU_RESISTIVITY = 0.018; //ohm-micron
|
||||
const double PERMITTIVITY_FREE_SPACE = 8.854e-18; //F/micron
|
||||
|
||||
const static uint32_t sram_num_cells_wl_stitching_ = 16;
|
||||
const static uint32_t dram_num_cells_wl_stitching_ = 64;
|
||||
const static uint32_t comm_dram_num_cells_wl_stitching_ = 256;
|
||||
const static double num_bits_per_ecc_b_ = 8.0;
|
||||
|
||||
const double bit_to_byte = 8.0;
|
||||
|
||||
#define MAX_NUMBER_GATES_STAGE 20
|
||||
#define MAX_NUMBER_HTREE_NODES 20
|
||||
#define NAND2_LEAK_STACK_FACTOR 0.2
|
||||
#define NAND3_LEAK_STACK_FACTOR 0.2
|
||||
#define NOR2_LEAK_STACK_FACTOR 0.2
|
||||
#define INV_LEAK_STACK_FACTOR 0.5
|
||||
#define MAX_NUMBER_ARRAY_PARTITIONS 1000000
|
||||
|
||||
// abbreviations used in this project
|
||||
// ----------------------------------
|
||||
//
|
||||
// num : number
|
||||
// rw : read/write
|
||||
// rd : read
|
||||
// wr : write
|
||||
// se : single-ended
|
||||
// sz : size
|
||||
// F : feature
|
||||
// w : width
|
||||
// h : height or horizontal
|
||||
// v : vertical or velocity
|
||||
|
||||
|
||||
enum ram_cell_tech_type_num
|
||||
{
|
||||
itrs_hp = 0,
|
||||
itrs_lstp = 1,
|
||||
itrs_lop = 2,
|
||||
lp_dram = 3,
|
||||
comm_dram = 4
|
||||
};
|
||||
|
||||
const double pppm[4] = {1,1,1,1};
|
||||
const double pppm_lkg[4] = {0,1,1,0};
|
||||
const double pppm_dyn[4] = {1,0,0,0};
|
||||
const double pppm_Isub[4] = {0,1,0,0};
|
||||
const double pppm_Ig[4] = {0,0,1,0};
|
||||
const double pppm_sc[4] = {0,0,0,1};
|
||||
|
||||
const double Ilinear_to_Isat_ratio =2.0;
|
||||
|
||||
|
||||
|
||||
#endif
|
126
T1/TP/TP1/cacti_7/contention.dat
Normal file
126
T1/TP/TP1/cacti_7/contention.dat
Normal file
|
@ -0,0 +1,126 @@
|
|||
l34c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l34c64l2b: 9 11 19 29 43 62 81 102
|
||||
l34c64l4b: 6 8 12 17 24 29 39 47
|
||||
l34c64l8b: 7 8 10 14 18 22 25 30
|
||||
l34c64l16b: 7 7 9 12 14 17 20 24
|
||||
l34c64l32b: 7 7 9 12 14 17 20 24 -r
|
||||
l34c64l64b: 7 7 9 12 14 17 20 24 -r
|
||||
l34c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l34c128l2b: 4 10 19 30 44 64 82 103
|
||||
l34c128l4b: 3 6 11 17 24 31 38 47
|
||||
l34c128l8b: 3 5 9 13 17 21 25 29
|
||||
l34c128l16b: 4 5 7 10 13 16 19 22
|
||||
l34c128l32b: 4 5 7 10 13 16 19 22 -r
|
||||
l34c128l64b: 4 5 7 10 13 16 19 22 -r
|
||||
l34c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l34c256l2b: 3 10 19 30 44 63 82 103
|
||||
l34c256l4b: 3 6 11 17 24 31 38 47
|
||||
l34c256l8b: 2 5 8 12 16 20 24 29
|
||||
l34c256l16b: 2 4 7 9 12 15 18 21
|
||||
l34c256l32b: 2 4 7 9 12 15 18 21 -r
|
||||
l34c256l64b: 2 4 7 9 12 15 18 21 -r
|
||||
l38c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l38c64l2b: 57 59 77 90 137 187 219 245
|
||||
l38c64l4b: 35 40 48 56 43 61 80 101
|
||||
l38c64l8b: 18 27 41 45 52 58 58 58 -r
|
||||
l38c64l16b: 16 17 19 35 40 49 53 53 -r
|
||||
l38c64l32b: 15 15 17 19 22 25 30 30 -r
|
||||
l38c64l64b: 15 15 17 19 22 25 30 30 -r
|
||||
l38c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l38c128l2b: 38 50 78 93 139 188 220 245
|
||||
l38c128l4b: 29 37 46 56 43 61 81 102
|
||||
l38c128l8b: 16 30 39 44 50 57 57 57 -r
|
||||
l38c128l16b: 14 16 19 33 40 47 52 52 -r
|
||||
l38c128l32b: 14 15 17 20 23 27 31 31 -r
|
||||
l38c128l64b: 14 15 17 20 23 27 31 31 -r
|
||||
l38c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l38c256l2b: 35 50 78 94 139 188 220 246
|
||||
l38c256l4b: 28 36 45 55 55 61 81 102
|
||||
l38c256l8b: 17 30 38 43 50 57 57 57 -r
|
||||
l38c256l16b: 15 17 21 32 40 47 51 51
|
||||
l38c256l32b: 15 17 19 21 24 29 33 33
|
||||
l38c256l64b: 15 17 19 21 24 29 33 33 -r
|
||||
l316c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l316c64l2b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l316c64l4b: 34 35 78 126 178 220 252 274
|
||||
l316c64l8b: 9 11 23 43 62 87 105 130
|
||||
l316c64l16b: 7 9 13 23 33 45 56 67
|
||||
l316c64l32b: 5 6 7 10 13 19 25 30
|
||||
l316c64l64b: 4 5 6 8 10 14 18 21
|
||||
l316c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l316c128l2b: 25 131 243 1000 1000 1000 1000 1000
|
||||
l316c128l4b: 8 28 79 127 179 221 253 274
|
||||
l316c128l8b: 4 9 22 43 62 88 106 131
|
||||
l316c128l16b: 4 6 11 21 32 44 55 67
|
||||
l316c128l32b: 4 6 11 12 12 18 24 29
|
||||
l316c128l64b: 2 3 5 7 9 13 17 21
|
||||
l316c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l316c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l316c256l4b: 5 28 80 128 180 221 253 274
|
||||
l316c256l8b: 3 8 22 43 63 88 107 131
|
||||
l316c256l16b: 2 5 11 21 32 44 55 67
|
||||
l316c256l32b: 2 3 5 8 12 18 24 29
|
||||
l316c256l64b: 2 3 4 6 9 13 17 21
|
||||
l24c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l24c64l2b: 10 12 24 41 60 86 105 122
|
||||
l24c64l4b: 5 7 13 20 29 38 47 56
|
||||
l24c64l8b: 5 6 9 14 18 24 29 35
|
||||
l24c64l16b: 4 5 7 10 12 16 19 22
|
||||
l24c64l32b: 5 5 6 8 10 12 14 17
|
||||
l24c64l64b: 5 5 6 8 10 12 14 16
|
||||
l24c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l24c128l2b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l24c128l4b: 3 7 13 20 29 38 47 57
|
||||
l24c128l8b: 3 5 9 13 18 23 29 35
|
||||
l24c128l16b: 3 4 6 9 12 15 19 22
|
||||
l24c128l32b: 3 4 5 7 9 11 14 16
|
||||
l24c128l64b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l24c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l24c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l24c256l4b: 2 6 13 20 29 38 47 57
|
||||
l24c256l8b: 2 4 8 13 18 23 28 35
|
||||
l24c256l16b: 2 3 6 8 11 15 18 22
|
||||
l24c256l32b: 2 3 5 6 8 11 14 16
|
||||
l24c256l64b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l28c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l28c64l2b: 46 52 117 157 188 225 246 261
|
||||
l28c64l4b: 19 25 39 54 96 107 120 150
|
||||
l28c64l8b: 9 12 21 30 39 47 58 79
|
||||
l28c64l16b: 8 9 11 16 25 32 37 42
|
||||
l28c64l32b: 7 8 9 11 14 19 23 28
|
||||
l28c64l64b: 7 7 8 10 12 14 18 22
|
||||
l28c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l28c128l2b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l28c128l4b: 12 22 39 54 98 108 130 151
|
||||
l28c128l8b: 7 12 21 30 39 48 59 80
|
||||
l28c128l16b: 6 8 11 16 24 31 37 42
|
||||
l28c128l32b: 6 7 9 11 14 19 24 28
|
||||
l28c128l64b: 6 7 9 11 14 19 24 28
|
||||
l28c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l28c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l28c256l4b: 12 22 39 54 100 108 130 152
|
||||
l28c256l8b: 7 12 21 30 39 48 59 81
|
||||
l28c256l16b: 6 8 11 16 24 31 37 42
|
||||
l28c256l32b: 6 7 9 11 14 19 24 28
|
||||
l28c256l64b: 6 7 9 11 14 19 24 28
|
||||
l216c64l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l216c64l2b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l216c64l4b: 34 35 78 126 178 220 252 274
|
||||
l216c64l8b: 9 11 23 43 62 87 105 130
|
||||
l216c64l16b: 7 9 13 23 33 45 56 67
|
||||
l216c64l32b: 5 6 7 10 13 19 25 30
|
||||
l216c64l64b: 4 5 6 8 10 14 18 21
|
||||
l216c128l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l216c128l2b: 25 131 243 1000 1000 1000 1000 1000
|
||||
l216c128l4b: 8 28 79 127 179 221 253 274
|
||||
l216c128l8b: 4 9 22 43 62 88 106 131
|
||||
l216c128l16b: 4 6 11 21 32 44 55 67
|
||||
l216c128l32b: 4 6 11 12 12 18 24 29
|
||||
l216c128l64b: 2 3 5 7 9 13 17 21
|
||||
l216c256l1b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l216c256l2b: 1000 1000 1000 1000 1000 1000 1000 1000
|
||||
l216c256l4b: 5 28 80 128 180 221 253 274
|
||||
l216c256l8b: 3 8 22 43 63 88 107 131
|
||||
l216c256l16b: 2 5 11 21 32 44 55 67
|
||||
l216c256l32b: 2 3 5 8 12 18 24 29
|
||||
l216c256l64b: 2 3 4 6 9 13 17 21
|
161
T1/TP/TP1/cacti_7/crossbar.cc
Normal file
161
T1/TP/TP1/cacti_7/crossbar.cc
Normal file
|
@ -0,0 +1,161 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include "crossbar.h"
|
||||
|
||||
#define ASPECT_THRESHOLD .8
|
||||
#define ADJ 1
|
||||
|
||||
Crossbar::Crossbar(
|
||||
double n_inp_,
|
||||
double n_out_,
|
||||
double flit_size_,
|
||||
/*TechnologyParameter::*/DeviceType *dt
|
||||
):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt)
|
||||
{
|
||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
|
||||
Vdd = dt->Vdd;
|
||||
CB_ADJ = 1;
|
||||
}
|
||||
|
||||
Crossbar::~Crossbar(){}
|
||||
|
||||
double Crossbar::output_buffer()
|
||||
{
|
||||
|
||||
//Wire winit(4, 4);
|
||||
double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
|
||||
Wire w1(g_ip->wt, l_eff);
|
||||
//double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
|
||||
double s1 = w1.repeater_size * (l_eff <w1.repeater_spacing? l_eff *ADJ/w1.repeater_spacing : ADJ);
|
||||
double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
|
||||
// the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
|
||||
TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
|
||||
TriS2 = s1; //driver transistor
|
||||
|
||||
if (TriS1 < 1)
|
||||
TriS1 = 1;
|
||||
|
||||
double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) +
|
||||
gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0);
|
||||
// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
|
||||
// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
|
||||
// gate_C(TriS2*g_tp.min_w_nmos_, 0)+
|
||||
// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
|
||||
// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||
// gate_C(TriS2*min_w_pmos, 0);
|
||||
tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
|
||||
drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
|
||||
gate_C(TriS2*g_tp.min_w_nmos_, 0)+
|
||||
drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
|
||||
drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||
gate_C(TriS2*min_w_pmos, 0);
|
||||
double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
|
||||
drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
|
||||
double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0);
|
||||
|
||||
tri_inp_cap = input_cap;
|
||||
tri_out_cap = output_cap;
|
||||
tri_ctr_cap = ctr_cap;
|
||||
return input_cap + output_cap + ctr_cap;
|
||||
}
|
||||
|
||||
void Crossbar::compute_power()
|
||||
{
|
||||
|
||||
Wire winit(4, 4);
|
||||
double tri_cap = output_buffer();
|
||||
assert(tri_cap > 0);
|
||||
//area of a tristate logic
|
||||
double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def);
|
||||
g_area *= 2; // to model area of output transistors
|
||||
g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def);
|
||||
g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def);
|
||||
double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def);
|
||||
// effective no. of tristate buffers that need to be laid side by side
|
||||
int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch));
|
||||
double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out);
|
||||
Wire w1(g_ip->wt, wire_len);
|
||||
|
||||
area.w = wire_len;
|
||||
area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ;
|
||||
Wire w2(g_ip->wt, area.h);
|
||||
|
||||
double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp);
|
||||
if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb;
|
||||
|
||||
if (aspect_ratio_cb < ASPECT_THRESHOLD) {
|
||||
if (n_out > 2 && n_inp > 2) {
|
||||
CB_ADJ+=0.2;
|
||||
//cout << "CB ADJ " << CB_ADJ << endl;
|
||||
if (CB_ADJ < 4) {
|
||||
this->compute_power();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size;
|
||||
power.readOp.leakage = n_inp * n_out * flit_size * (
|
||||
cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
|
||||
cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
|
||||
cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
|
||||
w1.power.readOp.leakage + w2.power.readOp.leakage);
|
||||
power.readOp.gate_leakage = n_inp * n_out * flit_size * (
|
||||
cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
|
||||
cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
|
||||
cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
|
||||
w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
|
||||
|
||||
// delay calculation
|
||||
double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
|
||||
Wire wdriver(g_ip->wt, l_eff);
|
||||
double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1);
|
||||
double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap;
|
||||
delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
|
||||
|
||||
Wire wreset();
|
||||
}
|
||||
|
||||
void Crossbar::print_crossbar()
|
||||
{
|
||||
cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
|
||||
cout << "Flit size : " << flit_size << " bits" << endl;
|
||||
cout << "Width : " << area.w << " u" << endl;
|
||||
cout << "Height : " << area.h << " u" << endl;
|
||||
cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl;
|
||||
cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
|
||||
cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl;
|
||||
cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
|
||||
}
|
||||
|
||||
|
83
T1/TP/TP1/cacti_7/crossbar.h
Normal file
83
T1/TP/TP1/cacti_7/crossbar.h
Normal file
|
@ -0,0 +1,83 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
#ifndef __CROSSBAR__
|
||||
#define __CROSSBAR__
|
||||
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include "basic_circuit.h"
|
||||
#include "cacti_interface.h"
|
||||
#include "component.h"
|
||||
#include "parameter.h"
|
||||
#include "mat.h"
|
||||
#include "wire.h"
|
||||
|
||||
class Crossbar : public Component
|
||||
{
|
||||
public:
|
||||
Crossbar(
|
||||
double in,
|
||||
double out,
|
||||
double flit_sz,
|
||||
/*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global));
|
||||
~Crossbar();
|
||||
|
||||
void print_crossbar();
|
||||
double output_buffer();
|
||||
void compute_power();
|
||||
|
||||
double n_inp, n_out;
|
||||
double flit_size;
|
||||
double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap;
|
||||
|
||||
private:
|
||||
double CB_ADJ;
|
||||
/*
|
||||
* Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar
|
||||
* buffer is adjusted to get an aspect ratio of whole cross bar close to one;
|
||||
* when adjust the ratio, the number of wires route over the tri-state buffers does not change,
|
||||
* however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase
|
||||
* during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch
|
||||
* will increase. As a result, the height of the crossbar (area.h) will increase.
|
||||
*/
|
||||
|
||||
/*TechnologyParameter::*/DeviceType *deviceType;
|
||||
double TriS1, TriS2;
|
||||
double min_w_pmos, Vdd;
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
254
T1/TP/TP1/cacti_7/ddr3.cfg
Normal file
254
T1/TP/TP1/cacti_7/ddr3.cfg
Normal file
|
@ -0,0 +1,254 @@
|
|||
# Cache size
|
||||
//-size (bytes) 2048
|
||||
//-size (bytes) 4096
|
||||
//-size (bytes) 32768
|
||||
//-size (bytes) 131072
|
||||
//-size (bytes) 262144
|
||||
//-size (bytes) 1048576
|
||||
//-size (bytes) 2097152
|
||||
//-size (bytes) 4194304
|
||||
-size (bytes) 8388608
|
||||
//-size (bytes) 16777216
|
||||
//-size (bytes) 33554432
|
||||
//-size (bytes) 134217728
|
||||
//-size (bytes) 67108864
|
||||
//-size (bytes) 1073741824
|
||||
|
||||
# power gating
|
||||
-Array Power Gating - "false"
|
||||
-WL Power Gating - "false"
|
||||
-CL Power Gating - "false"
|
||||
-Bitline floating - "false"
|
||||
-Interconnect Power Gating - "false"
|
||||
-Power Gating Performance Loss 0.01
|
||||
|
||||
# Line size
|
||||
//-block size (bytes) 8
|
||||
-block size (bytes) 64
|
||||
|
||||
# To model Fully Associative cache, set associativity to zero
|
||||
//-associativity 0
|
||||
//-associativity 2
|
||||
//-associativity 4
|
||||
//-associativity 8
|
||||
-associativity 8
|
||||
|
||||
-read-write port 1
|
||||
-exclusive read port 0
|
||||
-exclusive write port 0
|
||||
-single ended read ports 0
|
||||
|
||||
# Multiple banks connected using a bus
|
||||
-UCA bank count 1
|
||||
-technology (u) 0.022
|
||||
//-technology (u) 0.040
|
||||
//-technology (u) 0.032
|
||||
//-technology (u) 0.090
|
||||
|
||||
# following three parameters are meaningful only for main memories
|
||||
|
||||
-page size (bits) 8192
|
||||
-burst length 8
|
||||
-internal prefetch width 8
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Data array cell type - "itrs-hp"
|
||||
//-Data array cell type - "itrs-lstp"
|
||||
//-Data array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Data array peripheral type - "itrs-hp"
|
||||
//-Data array peripheral type - "itrs-lstp"
|
||||
//-Data array peripheral type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Tag array cell type - "itrs-hp"
|
||||
//-Tag array cell type - "itrs-lstp"
|
||||
//-Tag array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Tag array peripheral type - "itrs-hp"
|
||||
//-Tag array peripheral type - "itrs-lstp"
|
||||
//-Tag array peripheral type - "itrs-lop
|
||||
|
||||
# Bus width include data bits and address bits required by the decoder
|
||||
//-output/input bus width 16
|
||||
-output/input bus width 512
|
||||
|
||||
// 300-400 in steps of 10
|
||||
-operating temperature (K) 360
|
||||
|
||||
# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
|
||||
# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
|
||||
-cache type "cache"
|
||||
//-cache type "ram"
|
||||
//-cache type "main memory"
|
||||
|
||||
# to model special structure like branch target buffers, directory, etc.
|
||||
# change the tag size parameter
|
||||
# if you want cacti to calculate the tagbits, set the tag size to "default"
|
||||
-tag size (b) "default"
|
||||
//-tag size (b) 22
|
||||
|
||||
# fast - data and tag access happen in parallel
|
||||
# sequential - data array is accessed after accessing the tag array
|
||||
# normal - data array lookup and tag access happen in parallel
|
||||
# final data block is broadcasted in data array h-tree
|
||||
# after getting the signal from the tag array
|
||||
//-access mode (normal, sequential, fast) - "fast"
|
||||
-access mode (normal, sequential, fast) - "normal"
|
||||
//-access mode (normal, sequential, fast) - "sequential"
|
||||
|
||||
|
||||
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
|
||||
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0
|
||||
|
||||
# Percentage deviation from the minimum value
|
||||
# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
|
||||
# that compromises at most 10% delay.
|
||||
# NOTE: Try reasonable values for % deviation. Inconsistent deviation
|
||||
# percentage values will not produce any valid organizations. For example,
|
||||
# 0:0:100:100:100 will try to identify an organization that has both
|
||||
# least delay and dynamic power. Since such an organization is not possible, CACTI will
|
||||
# throw an error. Refer CACTI-6 Technical report for more details
|
||||
-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000
|
||||
|
||||
# Objective for NUCA
|
||||
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
|
||||
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
|
||||
|
||||
# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
|
||||
# energy-delay or energy-delay sq. product
|
||||
# Note: Optimize tag will disable weight or deviate values mentioned above
|
||||
# Set it to NONE to let weight and deviate values determine the
|
||||
# appropriate cache configuration
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
|
||||
-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
|
||||
|
||||
-Cache model (NUCA, UCA) - "UCA"
|
||||
//-Cache model (NUCA, UCA) - "NUCA"
|
||||
|
||||
# In order for CACTI to find the optimal NUCA bank value the following
|
||||
# variable should be assigned 0.
|
||||
-NUCA bank count 0
|
||||
|
||||
# NOTE: for nuca network frequency is set to a default value of
|
||||
# 5GHz in time.c. CACTI automatically
|
||||
# calculates the maximum possible frequency and downgrades this value if necessary
|
||||
|
||||
# By default CACTI considers both full-swing and low-swing
|
||||
# wires to find an optimal configuration. However, it is possible to
|
||||
# restrict the search space by changing the signaling from "default" to
|
||||
# "fullswing" or "lowswing" type.
|
||||
-Wire signaling (fullswing, lowswing, default) - "Global_30"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "default"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "lowswing"
|
||||
|
||||
//-Wire inside mat - "global"
|
||||
-Wire inside mat - "semi-global"
|
||||
//-Wire outside mat - "global"
|
||||
-Wire outside mat - "semi-global"
|
||||
|
||||
-Interconnect projection - "conservative"
|
||||
//-Interconnect projection - "aggressive"
|
||||
|
||||
# Contention in network (which is a function of core count and cache level) is one of
|
||||
# the critical factor used for deciding the optimal bank count value
|
||||
# core count can be 4, 8, or 16
|
||||
//-Core count 4
|
||||
-Core count 8
|
||||
//-Core count 16
|
||||
-Cache level (L2/L3) - "L3"
|
||||
|
||||
-Add ECC - "true"
|
||||
|
||||
//-Print level (DETAILED, CONCISE) - "CONCISE"
|
||||
-Print level (DETAILED, CONCISE) - "DETAILED"
|
||||
|
||||
# for debugging
|
||||
//-Print input parameters - "true"
|
||||
-Print input parameters - "false"
|
||||
# force CACTI to model the cache with the
|
||||
# following Ndbl, Ndwl, Nspd, Ndsam,
|
||||
# and Ndcm values
|
||||
//-Force cache config - "true"
|
||||
-Force cache config - "false"
|
||||
-Ndwl 1
|
||||
-Ndbl 1
|
||||
-Nspd 0
|
||||
-Ndcm 1
|
||||
-Ndsam1 0
|
||||
-Ndsam2 0
|
||||
|
||||
|
||||
|
||||
#### Default CONFIGURATION values for baseline external IO parameters to DRAM. More details can be found in the CACTI-IO technical report (), especially Chapters 2 and 3.
|
||||
|
||||
# Memory Type (D=DDR3, L=LPDDR2, W=WideIO). Additional memory types can be defined by the user in extio_technology.cc, along with their technology and configuration parameters.
|
||||
|
||||
-dram_type "D"
|
||||
//-dram_type "L"
|
||||
//-dram_type "W"
|
||||
//-dram_type "S"
|
||||
|
||||
# Memory State (R=Read, W=Write, I=Idle or S=Sleep)
|
||||
|
||||
//-iostate "R"
|
||||
-iostate "W"
|
||||
//-iostate "I"
|
||||
//-iostate "S"
|
||||
|
||||
#Address bus timing. To alleviate the timing on the command and address bus due to high loading (shared across all memories on the channel), the interface allows for multi-cycle timing options.
|
||||
|
||||
-addr_timing 0.5 //DDR
|
||||
//-addr_timing 1.0 //SDR (half of DQ rate)
|
||||
//-addr_timing 2.0 //2T timing (One fourth of DQ rate)
|
||||
//-addr_timing 3.0 // 3T timing (One sixth of DQ rate)
|
||||
|
||||
# Memory Density (Gbit per memory/DRAM die)
|
||||
|
||||
-mem_density 8 Gb //Valid values 2^n Gb
|
||||
|
||||
# IO frequency (MHz) (frequency of the external memory interface).
|
||||
|
||||
-bus_freq 800 MHz //As of current memory standards (2013), valid range 0 to 1.5 GHz for DDR3, 0 to 533 MHz for LPDDR2, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential. However this can change, and the user is free to define valid ranges based on new memory types or extending beyond existing standards for existing dram types.
|
||||
|
||||
# Duty Cycle (fraction of time in the Memory State defined above)
|
||||
|
||||
-duty_cycle 1.0 //Valid range 0 to 1.0
|
||||
|
||||
# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||||
#-activity_dq .50 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||||
|
||||
# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_ca 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR, 0 to 0.25 for 2T, and 0 to 0.17 for 3T
|
||||
#-activity_ca 0.25 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR, 0 to 0.25 for 2T, and 0 to 0.17 for 3T
|
||||
|
||||
# Number of DQ pins
|
||||
|
||||
-num_dq 72 //Number of DQ pins. Includes ECC pins.
|
||||
|
||||
# Number of DQS pins. DQS is a data strobe that is sent along with a small number of data-lanes so the source synchronous timing is local to these DQ bits. Typically, 1 DQS per byte (8 DQ bits) is used. The DQS is also typucally differential, just like the CLK pin.
|
||||
|
||||
-num_dqs 36 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins.
|
||||
|
||||
# Number of CA pins
|
||||
|
||||
-num_ca 35 //Valid range 0 to 35 pins.
|
||||
#-num_ca 25 //Valid range 0 to 35 pins.
|
||||
|
||||
# Number of CLK pins. CLK is typically a differential pair. In some cases additional CLK pairs may be used to limit the loading on the CLK pin.
|
||||
|
||||
-num_clk 2 //2 x differential pair. Valid values: 0/2/4.
|
||||
|
||||
# Number of Physical Ranks
|
||||
|
||||
-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per buffer/register. If multiple LRDIMMs or buffer chips exist, the analysis for capacity and power is reported per buffer/register.
|
||||
|
||||
# Width of the Memory Data Bus
|
||||
|
||||
-mem_data_width 4 //x4 or x8 or x16 or x32 memories. For WideIO upto x128.
|
1673
T1/TP/TP1/cacti_7/decoder.cc
Normal file
1673
T1/TP/TP1/cacti_7/decoder.cc
Normal file
File diff suppressed because it is too large
Load diff
272
T1/TP/TP1/cacti_7/decoder.h
Normal file
272
T1/TP/TP1/cacti_7/decoder.h
Normal file
|
@ -0,0 +1,272 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
#ifndef __DECODER_H__
|
||||
#define __DECODER_H__
|
||||
|
||||
#include "area.h"
|
||||
#include "component.h"
|
||||
#include "parameter.h"
|
||||
#include "powergating.h"
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
class Decoder : public Component
|
||||
{
|
||||
public:
|
||||
Decoder(
|
||||
int _num_dec_signals,
|
||||
bool flag_way_select,
|
||||
double _C_ld_dec_out,
|
||||
double _R_wire_dec_out,
|
||||
bool fully_assoc_,
|
||||
bool is_dram_,
|
||||
bool is_wl_tr_,
|
||||
const Area & cell_);
|
||||
|
||||
bool exist;
|
||||
int num_in_signals;
|
||||
double C_ld_dec_out;
|
||||
double R_wire_dec_out;
|
||||
int num_gates;
|
||||
int num_gates_min;
|
||||
double w_dec_n[MAX_NUMBER_GATES_STAGE];
|
||||
double w_dec_p[MAX_NUMBER_GATES_STAGE];
|
||||
double delay;
|
||||
//powerDef power;
|
||||
bool fully_assoc;
|
||||
bool is_dram;
|
||||
bool is_wl_tr;
|
||||
|
||||
double total_driver_nwidth;
|
||||
double total_driver_pwidth;
|
||||
Sleep_tx * sleeptx;
|
||||
|
||||
const Area & cell;
|
||||
int nodes_DSTN;
|
||||
|
||||
void compute_widths();
|
||||
void compute_area();
|
||||
double compute_delays(double inrisetime); // return outrisetime
|
||||
void compute_power_gating();
|
||||
|
||||
void leakage_feedback(double temperature);
|
||||
|
||||
~Decoder()
|
||||
{
|
||||
if (!sleeptx)
|
||||
delete sleeptx;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
class PredecBlk : public Component
|
||||
{
|
||||
public:
|
||||
PredecBlk(
|
||||
int num_dec_signals,
|
||||
Decoder * dec,
|
||||
double C_wire_predec_blk_out,
|
||||
double R_wire_predec_blk_out,
|
||||
int num_dec_per_predec,
|
||||
bool is_dram_,
|
||||
bool is_blk1);
|
||||
|
||||
Decoder * dec;
|
||||
bool exist;
|
||||
int number_input_addr_bits;
|
||||
double C_ld_predec_blk_out;
|
||||
double R_wire_predec_blk_out;
|
||||
int branch_effort_nand2_gate_output;
|
||||
int branch_effort_nand3_gate_output;
|
||||
bool flag_two_unique_paths;
|
||||
int flag_L2_gate;
|
||||
int number_inputs_L1_gate;
|
||||
int number_gates_L1_nand2_path;
|
||||
int number_gates_L1_nand3_path;
|
||||
int number_gates_L2;
|
||||
int min_number_gates_L1;
|
||||
int min_number_gates_L2;
|
||||
int num_L1_active_nand2_path;
|
||||
int num_L1_active_nand3_path;
|
||||
double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE];
|
||||
double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE];
|
||||
double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE];
|
||||
double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE];
|
||||
double w_L2_n[MAX_NUMBER_GATES_STAGE];
|
||||
double w_L2_p[MAX_NUMBER_GATES_STAGE];
|
||||
double delay_nand2_path;
|
||||
double delay_nand3_path;
|
||||
powerDef power_nand2_path;
|
||||
powerDef power_nand3_path;
|
||||
powerDef power_L2;
|
||||
|
||||
bool is_dram_;
|
||||
|
||||
void compute_widths();
|
||||
void compute_area();
|
||||
|
||||
void leakage_feedback(double temperature);
|
||||
|
||||
pair<double, double> compute_delays(pair<double, double> inrisetime); // <nand2, nand3>
|
||||
// return <outrise_nand2, outrise_nand3>
|
||||
};
|
||||
|
||||
|
||||
class PredecBlkDrv : public Component
|
||||
{
|
||||
public:
|
||||
PredecBlkDrv(
|
||||
int way_select,
|
||||
PredecBlk * blk_,
|
||||
bool is_dram);
|
||||
|
||||
int flag_driver_exists;
|
||||
int number_input_addr_bits;
|
||||
int number_gates_nand2_path;
|
||||
int number_gates_nand3_path;
|
||||
int min_number_gates;
|
||||
int num_buffers_driving_1_nand2_load;
|
||||
int num_buffers_driving_2_nand2_load;
|
||||
int num_buffers_driving_4_nand2_load;
|
||||
int num_buffers_driving_2_nand3_load;
|
||||
int num_buffers_driving_8_nand3_load;
|
||||
int num_buffers_nand3_path;
|
||||
double c_load_nand2_path_out;
|
||||
double c_load_nand3_path_out;
|
||||
double r_load_nand2_path_out;
|
||||
double r_load_nand3_path_out;
|
||||
double width_nand2_path_n[MAX_NUMBER_GATES_STAGE];
|
||||
double width_nand2_path_p[MAX_NUMBER_GATES_STAGE];
|
||||
double width_nand3_path_n[MAX_NUMBER_GATES_STAGE];
|
||||
double width_nand3_path_p[MAX_NUMBER_GATES_STAGE];
|
||||
double delay_nand2_path;
|
||||
double delay_nand3_path;
|
||||
powerDef power_nand2_path;
|
||||
powerDef power_nand3_path;
|
||||
|
||||
PredecBlk * blk;
|
||||
Decoder * dec;
|
||||
bool is_dram_;
|
||||
int way_select;
|
||||
|
||||
void compute_widths();
|
||||
void compute_area();
|
||||
|
||||
void leakage_feedback(double temperature);
|
||||
|
||||
|
||||
pair<double, double> compute_delays(
|
||||
double inrisetime_nand2_path,
|
||||
double inrisetime_nand3_path); // return <outrise_nand2, outrise_nand3>
|
||||
|
||||
inline int num_addr_bits_nand2_path()
|
||||
{
|
||||
return num_buffers_driving_1_nand2_load +
|
||||
num_buffers_driving_2_nand2_load +
|
||||
num_buffers_driving_4_nand2_load;
|
||||
}
|
||||
inline int num_addr_bits_nand3_path()
|
||||
{
|
||||
return num_buffers_driving_2_nand3_load +
|
||||
num_buffers_driving_8_nand3_load;
|
||||
}
|
||||
double get_rdOp_dynamic_E(int num_act_mats_hor_dir);
|
||||
};
|
||||
|
||||
|
||||
|
||||
class Predec : public Component
|
||||
{
|
||||
public:
|
||||
Predec(
|
||||
PredecBlkDrv * drv1,
|
||||
PredecBlkDrv * drv2);
|
||||
|
||||
double compute_delays(double inrisetime); // return outrisetime
|
||||
|
||||
void leakage_feedback(double temperature);
|
||||
PredecBlk * blk1;
|
||||
PredecBlk * blk2;
|
||||
PredecBlkDrv * drv1;
|
||||
PredecBlkDrv * drv2;
|
||||
|
||||
powerDef block_power;
|
||||
powerDef driver_power;
|
||||
|
||||
private:
|
||||
// returns <delay, risetime>
|
||||
pair<double, double> get_max_delay_before_decoder(
|
||||
pair<double, double> input_pair1,
|
||||
pair<double, double> input_pair2);
|
||||
};
|
||||
|
||||
|
||||
|
||||
class Driver : public Component
|
||||
{
|
||||
public:
|
||||
Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram);
|
||||
|
||||
int number_gates;
|
||||
int min_number_gates;
|
||||
double width_n[MAX_NUMBER_GATES_STAGE];
|
||||
double width_p[MAX_NUMBER_GATES_STAGE];
|
||||
double c_gate_load;
|
||||
double c_wire_load;
|
||||
double r_wire_load;
|
||||
double delay;
|
||||
// powerDef power;
|
||||
bool is_dram_;
|
||||
|
||||
double total_driver_nwidth;
|
||||
double total_driver_pwidth;
|
||||
Sleep_tx * sleeptx;
|
||||
|
||||
void compute_widths();
|
||||
void compute_area();
|
||||
double compute_delay(double inrisetime);
|
||||
|
||||
void compute_power_gating();
|
||||
|
||||
~Driver()
|
||||
{
|
||||
if (!sleeptx)
|
||||
delete sleeptx;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
#endif
|
114
T1/TP/TP1/cacti_7/dram.cfg
Normal file
114
T1/TP/TP1/cacti_7/dram.cfg
Normal file
|
@ -0,0 +1,114 @@
|
|||
//-size (bytes) 16777216
|
||||
//-size (bytes) 33554432
|
||||
-size (bytes) 134217728
|
||||
//-size (bytes) 67108864
|
||||
//-size (bytes) 1073741824
|
||||
|
||||
-block size (bytes) 64
|
||||
-associativity 1
|
||||
-read-write port 1
|
||||
-exclusive read port 0
|
||||
-exclusive write port 0
|
||||
-single ended read ports 0
|
||||
-UCA bank count 1
|
||||
//-technology (u) 0.032
|
||||
//-technology (u) 0.045
|
||||
-technology (u) 0.068
|
||||
//-technology (u) 0.078
|
||||
|
||||
# following three parameters are meaningful only for main memories
|
||||
-page size (bits) 8192
|
||||
-burst length 8
|
||||
-internal prefetch width 8
|
||||
|
||||
# following parameter can have one of the five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Data array cell type - "comm-dram"
|
||||
|
||||
# following parameter can have one of the three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Data array peripheral type - "itrs-hp"
|
||||
|
||||
# following parameter can have one of the five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Tag array cell type - "itrs-hp"
|
||||
|
||||
# following parameter can have one of the three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Tag array peripheral type - "itrs-hp"
|
||||
|
||||
# Bus width include data bits and address bits required by the decoder
|
||||
//-output/input bus width 512
|
||||
-output/input bus width 64
|
||||
|
||||
-operating temperature (K) 350
|
||||
|
||||
-cache type "main memory"
|
||||
|
||||
# to model special structure like branch target buffers, directory, etc.
|
||||
# change the tag size parameter
|
||||
# if you want cacti to calculate the tagbits, set the tag size to "default"
|
||||
-tag size (b) "default"
|
||||
//-tag size (b) 45
|
||||
|
||||
# fast - data and tag access happen in parallel
|
||||
# sequential - data array is accessed after accessing the tag array
|
||||
# normal - data array lookup and tag access happen in parallel
|
||||
# final data block is broadcasted in data array h-tree
|
||||
# after getting the signal from the tag array
|
||||
//-access mode (normal, sequential, fast) - "fast"
|
||||
-access mode (normal, sequential, fast) - "normal"
|
||||
//-access mode (normal, sequential, fast) - "sequential"
|
||||
|
||||
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
|
||||
//-design objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:0
|
||||
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0
|
||||
-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:1000000
|
||||
//-deviate (delay, dynamic power, leakage power, cycle time, area) 200:100000:100000:100000:20
|
||||
|
||||
-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
|
||||
|
||||
-Cache model (NUCA, UCA) - "UCA"
|
||||
|
||||
//-Wire signalling (fullswing, lowswing, default) - "default"
|
||||
-Wire signalling (fullswing, lowswing, default) - "Global_10"
|
||||
|
||||
-Wire inside mat - "global"
|
||||
//-Wire inside mat - "semi-global"
|
||||
-Wire outside mat - "global"
|
||||
|
||||
-Interconnect projection - "conservative"
|
||||
//-Interconnect projection - "aggressive"
|
||||
|
||||
-Add ECC - "true"
|
||||
|
||||
-Print level (DETAILED, CONCISE) - "DETAILED"
|
||||
|
||||
# for debugging
|
||||
-Print input parameters - "true"
|
||||
# force CACTI to model the cache with the
|
||||
# following Ndbl, Ndwl, Nspd, Ndsam,
|
||||
# and Ndcm values
|
||||
//-Force cache config - "true"
|
||||
-Force cache config - "false"
|
||||
-Ndwl 1
|
||||
-Ndbl 1
|
||||
-Nspd 0
|
||||
-Ndcm 1
|
||||
-Ndsam1 0
|
||||
-Ndsam2 0
|
||||
|
||||
########### NUCA Params ############
|
||||
|
||||
# Objective for NUCA
|
||||
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
|
||||
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
|
||||
|
||||
# Contention in network (which is a function of core count and cache level) is one of
|
||||
# the critical factor used for deciding the optimal bank count value
|
||||
# core count can be 4, 8, or 16
|
||||
//-Core count 4
|
||||
-Core count 8
|
||||
//-Core count 16
|
||||
-Cache level (L2/L3) - "L3"
|
||||
|
||||
# In order for CACTI to find the optimal NUCA bank value the following
|
||||
# variable should be assigned 0.
|
||||
-NUCA bank count 0
|
||||
|
234
T1/TP/TP1/cacti_7/etude.txt
Normal file
234
T1/TP/TP1/cacti_7/etude.txt
Normal file
|
@ -0,0 +1,234 @@
|
|||
Cache size : 131072
|
||||
Block size : 64
|
||||
Associativity : 2
|
||||
Read only ports : 0
|
||||
Write only ports : 0
|
||||
Read write ports : 1
|
||||
Single ended read ports : 0
|
||||
Cache banks (UCA) : 1
|
||||
Technology : 0.09
|
||||
Temperature : 360
|
||||
Tag size : 42
|
||||
array type : Cache
|
||||
Model as memory : 0
|
||||
Model as 3D memory : 0
|
||||
Access mode : 0
|
||||
Data array cell type : 0
|
||||
Data array peripheral type : 0
|
||||
Tag array cell type : 0
|
||||
Tag array peripheral type : 0
|
||||
Optimization target : 2
|
||||
Design objective (UCA wt) : 0 0 0 100 0
|
||||
Design objective (UCA dev) : 20 100000 100000 100000 100000
|
||||
Cache model : 0
|
||||
Nuca bank : 0
|
||||
Wire inside mat : 1
|
||||
Wire outside mat : 1
|
||||
Interconnect projection : 1
|
||||
Wire signaling : 1
|
||||
Print level : 1
|
||||
ECC overhead : 1
|
||||
Page size : 8192
|
||||
Burst length : 8
|
||||
Internal prefetch width : 8
|
||||
Force cache config : 0
|
||||
Subarray Driver direction : 1
|
||||
iostate : WRITE
|
||||
dram_ecc : NO_ECC
|
||||
io_type : DDR3
|
||||
dram_dimm : UDIMM
|
||||
IO Area (sq.mm) = inf
|
||||
IO Timing Margin (ps) = -14.1667
|
||||
IO Votlage Margin (V) = 0.155
|
||||
IO Dynamic Power (mW) = 1506.36 PHY Power (mW) = 232.752 PHY Wakeup Time (us) = 27.503
|
||||
IO Termination and Bias Power (mW) = 2505.96
|
||||
|
||||
---------- CACTI (version 7.0.3DD Prerelease of Aug, 2012), Uniform Cache Access SRAM Model ----------
|
||||
|
||||
Cache Parameters:
|
||||
Total cache size (bytes): 131072
|
||||
Number of banks: 1
|
||||
Associativity: 2
|
||||
Block size (bytes): 64
|
||||
Read/write Ports: 1
|
||||
Read ports: 0
|
||||
Write ports: 0
|
||||
Technology size (nm): 90
|
||||
|
||||
Access time (ns): 1.47098
|
||||
Cycle time (ns): 1.86851
|
||||
Total dynamic read energy per access (nJ): 0.303592
|
||||
Total dynamic write energy per access (nJ): 0.615022
|
||||
Total leakage power of a bank (mW): 59.1454
|
||||
Total gate leakage power of a bank (mW): 4.55691
|
||||
Cache height x width (mm): 1.57965 x 1.42405
|
||||
|
||||
Best Ndwl : 2
|
||||
Best Ndbl : 2
|
||||
Best Nspd : 1
|
||||
Best Ndcm : 2
|
||||
Best Ndsam L1 : 2
|
||||
Best Ndsam L2 : 1
|
||||
|
||||
Best Ntwl : 2
|
||||
Best Ntbl : 2
|
||||
Best Ntspd : 4
|
||||
Best Ntcm : 1
|
||||
Best Ntsam L1 : 8
|
||||
Best Ntsam L2 : 1
|
||||
Data array, H-tree wire type: Global wires with 30% delay penalty
|
||||
Tag array, H-tree wire type: Global wires with 30% delay penalty
|
||||
|
||||
Time Components:
|
||||
|
||||
Data side (with Output driver) (ns): 1.47098
|
||||
H-tree input delay (ns): 0
|
||||
Decoder + wordline delay (ns): 0.752867
|
||||
Bitline delay (ns): 0.546781
|
||||
Sense Amplifier delay (ns): 0.0107354
|
||||
H-tree output delay (ns): 0.160596
|
||||
|
||||
Tag side (with Output driver) (ns): 0.71334
|
||||
H-tree input delay (ns): 0
|
||||
Decoder + wordline delay (ns): 0.466679
|
||||
Bitline delay (ns): 0.147706
|
||||
Sense Amplifier delay (ns): 0.0107949
|
||||
Comparator delay (ns): 0.131234
|
||||
H-tree output delay (ns): 0.08816
|
||||
|
||||
|
||||
Power Components:
|
||||
|
||||
Data array: Total dynamic read energy/access (nJ): 0.286158
|
||||
Total energy in H-tree (that includes both address and data transfer) (nJ): 0
|
||||
Output Htree inside bank Energy (nJ): 0
|
||||
Decoder (nJ): 0.00164907
|
||||
Wordline (nJ): 0.00212735
|
||||
Bitline mux & associated drivers (nJ): 0.00335251
|
||||
Sense amp mux & associated drivers (nJ): 0
|
||||
Bitlines precharge and equalization circuit (nJ): 0.0161369
|
||||
Bitlines (nJ): 0.116857
|
||||
Sense amplifier energy (nJ): 0.00726078
|
||||
Sub-array output driver (nJ): 0.137516
|
||||
Total leakage power of a bank (mW): 55.1285
|
||||
Total leakage power in H-tree (that includes both address and data network) ((mW)): 0
|
||||
Total leakage power in cells (mW): 0
|
||||
Total leakage power in row logic(mW): 0
|
||||
Total leakage power in column logic(mW): 0
|
||||
Total gate leakage power in H-tree (that includes both address and data network) ((mW)): 0
|
||||
|
||||
Tag array: Total dynamic read energy/access (nJ): 0.0174337
|
||||
Total leakage read/write power of a bank (mW): 4.01688
|
||||
Total energy in H-tree (that includes both address and data transfer) (nJ): 0
|
||||
Output Htree inside a bank Energy (nJ): 0
|
||||
Decoder (nJ): 0.000340468
|
||||
Wordline (nJ): 0.000710492
|
||||
Bitline mux & associated drivers (nJ): 0
|
||||
Sense amp mux & associated drivers (nJ): 0.000330669
|
||||
Bitlines precharge and equalization circuit (nJ): 0.00425803
|
||||
Bitlines (nJ): 0.00759182
|
||||
Sense amplifier energy (nJ): 0.00354912
|
||||
Sub-array output driver (nJ): 0.000194898
|
||||
Total leakage power of a bank (mW): 4.01688
|
||||
Total leakage power in H-tree (that includes both address and data network) ((mW)): 0
|
||||
Total leakage power in cells (mW): 0
|
||||
Total leakage power in row logic(mW): 0
|
||||
Total leakage power in column logic(mW): 0
|
||||
Total gate leakage power in H-tree (that includes both address and data network) ((mW)): 0
|
||||
|
||||
|
||||
Area Components:
|
||||
|
||||
Data array: Area (mm2): 1.78124
|
||||
Height (mm): 1.57965
|
||||
Width (mm): 1.12762
|
||||
Area efficiency (Memory cell area/Total area) - 78.3192 %
|
||||
MAT Height (mm): 1.57965
|
||||
MAT Length (mm): 1.12762
|
||||
Subarray Height (mm): 0.672768
|
||||
Subarray Length (mm): 0.5427
|
||||
|
||||
Tag array: Area (mm2): 0.108777
|
||||
Height (mm): 0.366956
|
||||
Width (mm): 0.296431
|
||||
Area efficiency (Memory cell area/Total area) - 77.9289 %
|
||||
MAT Height (mm): 0.366956
|
||||
MAT Length (mm): 0.296431
|
||||
Subarray Height (mm): 0.168192
|
||||
Subarray Length (mm): 0.1314
|
||||
|
||||
Wire Properties:
|
||||
|
||||
Delay Optimal
|
||||
Repeater size - 61.5792
|
||||
Repeater spacing - 0.321831 (mm)
|
||||
Delay - 0.137938 (ns/mm)
|
||||
PowerD - 0.000766371 (nJ/mm)
|
||||
PowerL - 0.00525075 (mW/mm)
|
||||
PowerLgate - 0.000882254 (mW/mm)
|
||||
Wire width - 0.09 microns
|
||||
Wire spacing - 0.09 microns
|
||||
|
||||
5% Overhead
|
||||
Repeater size - 34.5792
|
||||
Repeater spacing - 0.421831 (mm)
|
||||
Delay - 0.144333 (ns/mm)
|
||||
PowerD - 0.000519963 (nJ/mm)
|
||||
PowerL - 0.00224953 (mW/mm)
|
||||
PowerLgate - 0.000377976 (mW/mm)
|
||||
Wire width - 0.09 microns
|
||||
Wire spacing - 0.09 microns
|
||||
|
||||
10% Overhead
|
||||
Repeater size - 32.5792
|
||||
Repeater spacing - 0.521831 (mm)
|
||||
Delay - 0.151515 (ns/mm)
|
||||
PowerD - 0.000485471 (nJ/mm)
|
||||
PowerL - 0.00171327 (mW/mm)
|
||||
PowerLgate - 0.000287871 (mW/mm)
|
||||
Wire width - 0.09 microns
|
||||
Wire spacing - 0.09 microns
|
||||
|
||||
20% Overhead
|
||||
Repeater size - 27.5792
|
||||
Repeater spacing - 0.621831 (mm)
|
||||
Delay - 0.164984 (ns/mm)
|
||||
PowerD - 0.000447956 (nJ/mm)
|
||||
PowerL - 0.00121709 (mW/mm)
|
||||
PowerLgate - 0.000204502 (mW/mm)
|
||||
Wire width - 0.09 microns
|
||||
Wire spacing - 0.09 microns
|
||||
|
||||
30% Overhead
|
||||
Repeater size - 21.5792
|
||||
Repeater spacing - 0.621831 (mm)
|
||||
Delay - 0.179014 (ns/mm)
|
||||
PowerD - 0.000419905 (nJ/mm)
|
||||
PowerL - 0.000952309 (mW/mm)
|
||||
PowerLgate - 0.000160011 (mW/mm)
|
||||
Wire width - 0.09 microns
|
||||
Wire spacing - 0.09 microns
|
||||
|
||||
Low-swing wire (1 mm) - Note: Unlike repeated wires,
|
||||
delay and power values of low-swing wires do not
|
||||
have a linear relationship with length.
|
||||
delay - 0.611231 (ns)
|
||||
powerD - 2.52036e-05 (nJ)
|
||||
PowerL - 2.71875e-07 (mW)
|
||||
PowerLgate - 8.41995e-08 (mW)
|
||||
Wire width - 1.8e-07 microns
|
||||
Wire spacing - 1.8e-07 microns
|
||||
|
||||
|
||||
top 3 best memory configurations are:
|
||||
Memory cap: 80 GB num_bobs: 1 bw: 533 (MHz) cost: $731.2 energy: 32.6101 (nJ)
|
||||
{
|
||||
(0) BoB cap: 80 GB num_channels: 1 bw: 533 (MHz) cost: $731.2 energy: 32.6101 (nJ)
|
||||
==============
|
||||
(0) cap: 80 GB bw: 533 (MHz) cost: $731.2 dpc: 3 energy: 32.6101 (nJ) DIMM: RDIMM low power: F [ 0(4GB) 0(8GB) 1(16GB) 2(32GB) 0(64GB) ]
|
||||
==============
|
||||
|
||||
}
|
||||
|
||||
=============================================
|
||||
|
506
T1/TP/TP1/cacti_7/extio.cc
Normal file
506
T1/TP/TP1/cacti_7/extio.cc
Normal file
|
@ -0,0 +1,506 @@
|
|||
#include "extio.h"
|
||||
#include <cassert>
|
||||
|
||||
|
||||
Extio::Extio(IOTechParam *iot):
|
||||
io_param(iot){}
|
||||
|
||||
|
||||
//External IO AREA. Does not include PHY or decap, includes only IO active circuit. More details can be found in the CACTI-IO technical report (), Chapter 2.3.
|
||||
|
||||
void Extio::extio_area()
|
||||
{
|
||||
|
||||
//Area per IO, assuming drive stage and ODT are shared
|
||||
double single_io_area = io_param->ioarea_c +
|
||||
(io_param->ioarea_k0/io_param->r_on)+(1/io_param->r_on)*
|
||||
(io_param->ioarea_k1*io_param->frequency +
|
||||
io_param->ioarea_k2*io_param->frequency*io_param->frequency +
|
||||
io_param->ioarea_k3*io_param->frequency*
|
||||
io_param->frequency*io_param->frequency); // IO Area in sq.mm.
|
||||
|
||||
//Area per IO if ODT requirements are more stringent than the Ron
|
||||
//requirements in determining size of driver
|
||||
if (2*io_param->rtt1_dq_read < io_param->r_on) {
|
||||
single_io_area = io_param->ioarea_c +
|
||||
(io_param->ioarea_k0/(2*io_param->rtt1_dq_read))+
|
||||
(1/io_param->r_on)*(io_param->ioarea_k1*io_param->frequency +
|
||||
io_param->ioarea_k2*io_param->frequency*io_param->frequency +
|
||||
io_param->ioarea_k3*io_param->frequency*io_param->frequency*io_param->frequency);
|
||||
}
|
||||
|
||||
//Total IO area
|
||||
io_area = (g_ip->num_dq + g_ip->num_dqs + g_ip->num_ca + g_ip->num_clk) *
|
||||
single_io_area;
|
||||
|
||||
printf("IO Area (sq.mm) = ");
|
||||
cout << io_area << endl;
|
||||
|
||||
}
|
||||
|
||||
//External IO Termination Power. More details can be found in the CACTI-IO technical report (), Chapter 2.1.
|
||||
|
||||
void Extio::extio_power_term()
|
||||
{
|
||||
|
||||
//IO Termination and Bias Power
|
||||
|
||||
//Bias and Leakage Power
|
||||
power_bias = io_param->i_bias * io_param->vdd_io +
|
||||
io_param->i_leak * (g_ip->num_dq +
|
||||
g_ip->num_dqs +
|
||||
g_ip->num_clk +
|
||||
g_ip->num_ca) * io_param->vdd_io/1000000;
|
||||
|
||||
|
||||
//Termination Power
|
||||
power_termination_read = 1000 * (g_ip->num_dq + g_ip->num_dqs) *
|
||||
io_param->vdd_io * io_param->vdd_io * 0.25 *
|
||||
(1/(io_param->r_on + io_param->rpar_read + io_param->rs1_dq) +
|
||||
1/(io_param->rtt1_dq_read) + 1/(io_param->rtt2_dq_read)) +
|
||||
1000 * g_ip->num_ca * io_param->vdd_io * io_param->vdd_io *
|
||||
(0.5 / (2 * (io_param->r_on_ca + io_param->rtt_ca)));
|
||||
|
||||
power_termination_write = 1000 * (g_ip->num_dq + g_ip->num_dqs) *
|
||||
io_param->vdd_io * io_param->vdd_io * 0.25 *
|
||||
(1/(io_param->r_on + io_param->rpar_write) +
|
||||
1/(io_param->rtt1_dq_write) + 1/(io_param->rtt2_dq_write)) +
|
||||
1000 * g_ip->num_ca * io_param->vdd_io * io_param->vdd_io *
|
||||
(0.5 / (2 * (io_param->r_on_ca + io_param->rtt_ca)));
|
||||
|
||||
power_clk_bias = io_param->vdd_io * io_param->v_sw_clk / io_param->r_diff_term * 1000;
|
||||
|
||||
|
||||
if (io_param->io_type == Serial)
|
||||
{ power_termination_read= 1000*(g_ip->num_dq)*io_param->vdd_io*io_param->v_sw_clk/io_param->r_diff_term;
|
||||
power_termination_write= 1000*(g_ip->num_dq)*io_param->vdd_io*io_param->v_sw_clk/io_param->r_diff_term;
|
||||
power_clk_bias=0;
|
||||
}
|
||||
|
||||
if (io_param->io_type == DDR4)
|
||||
{
|
||||
power_termination_read=1000 * (g_ip->num_dq + g_ip->num_dqs) *
|
||||
io_param->vdd_io * io_param->vdd_io *0.5 * (1/(io_param->r_on + io_param->rpar_read + io_param->rs1_dq))
|
||||
+ 1000 * g_ip->num_ca * io_param->vdd_io * io_param->vdd_io *
|
||||
(0.5 / (2 * (io_param->r_on_ca + io_param->rtt_ca)));
|
||||
|
||||
|
||||
|
||||
power_termination_write = 1000 * (g_ip->num_dq + g_ip->num_dqs) *
|
||||
io_param->vdd_io * io_param->vdd_io * 0.5 *
|
||||
(1/(io_param->r_on + io_param->rpar_write)) +
|
||||
1000 * g_ip->num_ca * io_param->vdd_io * io_param->vdd_io *
|
||||
(0.5 / (2 * (io_param->r_on_ca + io_param->rtt_ca)));
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
//Combining the power terms based on STATE (READ/WRITE/IDLE/SLEEP)
|
||||
if (g_ip->iostate == READ)
|
||||
{
|
||||
io_power_term = g_ip->duty_cycle *
|
||||
(power_termination_read + power_bias + power_clk_bias);
|
||||
}
|
||||
else if (g_ip->iostate == WRITE)
|
||||
{
|
||||
io_power_term = g_ip->duty_cycle *
|
||||
(power_termination_write + power_bias + power_clk_bias);
|
||||
}
|
||||
else if (g_ip->iostate == IDLE)
|
||||
{
|
||||
io_power_term = g_ip->duty_cycle *
|
||||
(power_termination_write + power_bias + power_clk_bias);
|
||||
if (io_param->io_type == DDR4)
|
||||
{ io_power_term = 1e-6*io_param->i_leak*io_param->vdd_io; // IDLE IO power for DDR4 is leakage since bus can be parked at VDDQ
|
||||
}
|
||||
}
|
||||
else if (g_ip->iostate == SLEEP)
|
||||
{
|
||||
io_power_term = 1e-6*io_param->i_leak*io_param->vdd_io; //nA to mW
|
||||
}
|
||||
else
|
||||
{
|
||||
io_power_term = 0;
|
||||
}
|
||||
|
||||
|
||||
printf("IO Termination and Bias Power (mW) = ");
|
||||
cout << io_power_term << endl;
|
||||
}
|
||||
|
||||
|
||||
//External PHY Power and Wakeup Times. More details can be found in the CACTI-IO technical report (), Chapter 2.1.
|
||||
|
||||
void Extio::extio_power_phy ()
|
||||
{
|
||||
|
||||
|
||||
phy_static_power = io_param->phy_datapath_s + io_param->phy_phase_rotator_s +
|
||||
io_param->phy_clock_tree_s + io_param->phy_rx_s + io_param->phy_dcc_s +
|
||||
io_param->phy_deskew_s + io_param->phy_leveling_s + io_param->phy_pll_s; // in mW
|
||||
|
||||
phy_dynamic_power = io_param->phy_datapath_d + io_param->phy_phase_rotator_d +
|
||||
io_param->phy_clock_tree_d + io_param->phy_rx_d + io_param->phy_dcc_d +
|
||||
io_param->phy_deskew_d + io_param->phy_leveling_d +
|
||||
io_param->phy_pll_d; // in mW/Gbps
|
||||
|
||||
|
||||
|
||||
//Combining the power terms based on STATE (READ/WRITE/IDLE/SLEEP)
|
||||
if (g_ip->iostate == READ)
|
||||
{
|
||||
phy_power = phy_static_power + 2 * io_param->frequency * g_ip->num_dq * phy_dynamic_power / 1000; // Total PHY power in mW
|
||||
}
|
||||
else if (g_ip->iostate == WRITE)
|
||||
{
|
||||
phy_power = phy_static_power + 2 * io_param->frequency * g_ip->num_dq * phy_dynamic_power / 1000; // Total PHY power in mW
|
||||
}
|
||||
else if (g_ip->iostate == IDLE)
|
||||
{
|
||||
phy_power = phy_static_power; // Total PHY power in mW
|
||||
|
||||
}
|
||||
else if (g_ip->iostate == SLEEP)
|
||||
{
|
||||
phy_power = 0; // Total PHY power in mW;
|
||||
}
|
||||
else
|
||||
{
|
||||
phy_power = 0; // Total PHY power in mW;
|
||||
}
|
||||
|
||||
|
||||
phy_wtime = io_param->phy_pll_wtime + io_param->phy_phase_rotator_wtime + io_param->phy_rx_wtime + io_param->phy_bandgap_wtime + io_param->phy_deskew_wtime + io_param->phy_vrefgen_wtime; // Total Wakeup time from SLEEP to ACTIVE. Some of the Wakeup time can be hidden if all components do not need to be serially brought out of SLEEP. This depends on the implementation and user can modify the Wakeup times accordingly.
|
||||
|
||||
|
||||
printf("PHY Power (mW) = ");
|
||||
cout << phy_power << " ";
|
||||
printf("PHY Wakeup Time (us) = ");
|
||||
cout << phy_wtime << endl;
|
||||
|
||||
}
|
||||
|
||||
|
||||
//External IO Dynamic Power. Does not include termination or PHY. More details can be found in the CACTI-IO technical report (), Chapter 2.1.
|
||||
|
||||
void Extio::extio_power_dynamic()
|
||||
{
|
||||
|
||||
if (io_param->io_type == Serial)
|
||||
{
|
||||
power_dq_write = 0;
|
||||
|
||||
power_dqs_write = 0;
|
||||
|
||||
power_ca_write = 0;
|
||||
|
||||
power_dq_read = 0;
|
||||
|
||||
power_dqs_read = 0;
|
||||
|
||||
power_ca_read = 0;
|
||||
|
||||
power_clk = 0;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
|
||||
//Line capacitance calculations for effective c_line
|
||||
|
||||
double c_line =1e6/(io_param->z0*2*io_param->frequency); //For DDR signals: DQ, DQS, CLK
|
||||
double c_line_ca=c_line; //For DDR CA
|
||||
double c_line_sdr=1e6/(io_param->z0*io_param->frequency); //For SDR CA
|
||||
double c_line_2T=1e6*2/(io_param->z0*io_param->frequency); //For 2T timing
|
||||
double c_line_3T=1e6*3/(io_param->z0*io_param->frequency); //For 3T timing
|
||||
|
||||
//Line capacitance if flight time is less than half the bit period
|
||||
|
||||
if (io_param->t_flight < 1e3/(4*io_param->frequency)){
|
||||
c_line = 1e3*io_param->t_flight/io_param->z0;
|
||||
}
|
||||
|
||||
if (io_param->t_flight_ca < 1e3/(4*io_param->frequency)){
|
||||
c_line_ca = 1e3*io_param->t_flight/io_param->z0;
|
||||
}
|
||||
|
||||
if (io_param->t_flight_ca < 1e3/(2*io_param->frequency)){
|
||||
c_line_sdr = 1e3*io_param->t_flight/io_param->z0;
|
||||
}
|
||||
|
||||
if (io_param->t_flight_ca < 1e3*2/(2*io_param->frequency)){
|
||||
c_line_2T = 1e3*io_param->t_flight/io_param->z0;
|
||||
}
|
||||
|
||||
if (io_param->t_flight_ca < 1e3*3/(2*io_param->frequency)){
|
||||
c_line_3T = 1e3*io_param->t_flight/io_param->z0;
|
||||
}
|
||||
|
||||
//Line capacitance calculation for the address bus, depending on what address timing is chosen (DDR/SDR/2T/3T)
|
||||
|
||||
if (g_ip->addr_timing==1.0) {
|
||||
c_line_ca = c_line_sdr;
|
||||
}
|
||||
else if (g_ip->addr_timing==2.0){
|
||||
c_line_ca = c_line_2T;
|
||||
}
|
||||
else if (g_ip->addr_timing==3.0){
|
||||
c_line_ca = c_line_3T;
|
||||
}
|
||||
|
||||
//Dynamic power per signal group for WRITE and READ modes
|
||||
|
||||
power_dq_write = g_ip->num_dq * g_ip->activity_dq *
|
||||
(io_param->c_tx + c_line) * io_param->vdd_io *
|
||||
io_param->v_sw_data_write_line * io_param->frequency / 1000 +
|
||||
g_ip->num_dq * g_ip->activity_dq * io_param->c_data *
|
||||
io_param->vdd_io * io_param->v_sw_data_write_load1 *
|
||||
io_param->frequency / 1000 +
|
||||
g_ip->num_dq * g_ip->activity_dq * ((g_ip->num_mem_dq-1) *
|
||||
io_param->c_data) * io_param->vdd_io *
|
||||
io_param->v_sw_data_write_load2 * io_param->frequency / 1000 +
|
||||
g_ip->num_dq * g_ip->activity_dq * io_param->c_int *
|
||||
io_param->vdd_io * io_param->vdd_io * io_param->frequency / 1000;
|
||||
|
||||
power_dqs_write = g_ip->num_dqs * (io_param->c_tx + c_line) *
|
||||
io_param->vdd_io * io_param->v_sw_data_write_line *
|
||||
io_param->frequency / 1000 +
|
||||
g_ip->num_dqs * io_param->c_data * io_param->vdd_io *
|
||||
io_param->v_sw_data_write_load1 * io_param->frequency / 1000 +
|
||||
g_ip->num_dqs * ((g_ip->num_mem_dq-1) * io_param->c_data) *
|
||||
io_param->vdd_io * io_param->v_sw_data_write_load2 *
|
||||
io_param->frequency / 1000 +
|
||||
g_ip->num_dqs * io_param->c_int * io_param->vdd_io *
|
||||
io_param->vdd_io * io_param->frequency / 1000;
|
||||
|
||||
power_ca_write = g_ip->num_ca * g_ip->activity_ca *
|
||||
(io_param->c_tx + io_param->num_mem_ca * io_param->c_addr +
|
||||
c_line_ca) *
|
||||
io_param->vdd_io * io_param->v_sw_addr * io_param->frequency / 1000 +
|
||||
g_ip->num_ca * g_ip->activity_ca * io_param->c_int *
|
||||
io_param->vdd_io * io_param->vdd_io * io_param->frequency / 1000;
|
||||
|
||||
power_dq_read = g_ip->num_dq * g_ip->activity_dq *
|
||||
(io_param->c_tx + c_line) * io_param->vdd_io *
|
||||
io_param->v_sw_data_read_line * io_param->frequency / 1000.0 +
|
||||
g_ip->num_dq * g_ip->activity_dq * io_param->c_data *
|
||||
io_param->vdd_io * io_param->v_sw_data_read_load1 * io_param->frequency / 1000.0 +
|
||||
g_ip->num_dq *g_ip->activity_dq * ((g_ip->num_mem_dq-1) * io_param->c_data) *
|
||||
io_param->vdd_io * io_param->v_sw_data_read_load2 * io_param->frequency / 1000.0 +
|
||||
g_ip->num_dq * g_ip->activity_dq * io_param->c_int * io_param->vdd_io *
|
||||
io_param->vdd_io * io_param->frequency / 1000.0;
|
||||
|
||||
power_dqs_read = g_ip->num_dqs * (io_param->c_tx + c_line) *
|
||||
io_param->vdd_io * io_param->v_sw_data_read_line *
|
||||
io_param->frequency / 1000.0 +
|
||||
g_ip->num_dqs * io_param->c_data * io_param->vdd_io *
|
||||
io_param->v_sw_data_read_load1 * io_param->frequency / 1000.0 +
|
||||
g_ip->num_dqs * ((g_ip->num_mem_dq-1) * io_param->c_data) *
|
||||
io_param->vdd_io * io_param->v_sw_data_read_load2 * io_param->frequency / 1000.0 +
|
||||
g_ip->num_dqs * io_param->c_int * io_param->vdd_io * io_param->vdd_io *
|
||||
io_param->frequency / 1000.0;
|
||||
|
||||
power_ca_read = g_ip->num_ca * g_ip->activity_ca *
|
||||
(io_param->c_tx + io_param->num_mem_ca *
|
||||
io_param->c_addr + c_line_ca) *
|
||||
io_param->vdd_io * io_param->v_sw_addr * io_param->frequency / 1000 +
|
||||
g_ip->num_ca * g_ip->activity_ca * io_param->c_int *
|
||||
io_param->vdd_io * io_param->vdd_io * io_param->frequency / 1000;
|
||||
|
||||
power_clk = g_ip->num_clk *
|
||||
(io_param->c_tx + io_param->num_mem_clk *
|
||||
io_param->c_data + c_line) *
|
||||
io_param->vdd_io * io_param->v_sw_clk *io_param->frequency / 1000 +
|
||||
g_ip->num_clk * io_param->c_int * io_param->vdd_io *
|
||||
io_param->vdd_io * io_param->frequency / 1000;
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
//Combining the power terms based on STATE (READ/WRITE/IDLE/SLEEP)
|
||||
|
||||
if (g_ip->iostate == READ) {
|
||||
io_power_dynamic = g_ip->duty_cycle * (power_dq_read +
|
||||
power_ca_read + power_dqs_read + power_clk);
|
||||
|
||||
}
|
||||
else if (g_ip->iostate == WRITE) {
|
||||
io_power_dynamic = g_ip->duty_cycle *
|
||||
(power_dq_write + power_ca_write + power_dqs_write + power_clk);
|
||||
}
|
||||
else if (g_ip->iostate == IDLE) {
|
||||
io_power_dynamic = g_ip->duty_cycle * (power_clk);
|
||||
}
|
||||
else if (g_ip->iostate == SLEEP) {
|
||||
io_power_dynamic = 0;
|
||||
}
|
||||
else {
|
||||
io_power_dynamic = 0;
|
||||
}
|
||||
|
||||
|
||||
printf("IO Dynamic Power (mW) = ");
|
||||
cout << io_power_dynamic << " ";
|
||||
}
|
||||
|
||||
|
||||
//External IO Timing and Voltage Margins. More details can be found in the CACTI-IO technical report (), Chapter 2.2.
|
||||
|
||||
void Extio::extio_eye()
|
||||
{
|
||||
|
||||
if (io_param->io_type == Serial)
|
||||
{io_vmargin=0;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
//VOLTAGE MARGINS
|
||||
//Voltage noise calculations based on proportional and independent noise
|
||||
//sources for WRITE, READ and CA
|
||||
double v_noise_write = io_param->k_noise_write_sen * io_param->v_sw_data_write_line +
|
||||
io_param->v_noise_independent_write;
|
||||
double v_noise_read = io_param->k_noise_read_sen * io_param->v_sw_data_read_line +
|
||||
io_param->v_noise_independent_read;
|
||||
double v_noise_addr = io_param->k_noise_addr_sen * io_param->v_sw_addr +
|
||||
io_param->v_noise_independent_addr;
|
||||
|
||||
|
||||
//Worst-case voltage margin (Swing/2 - Voltage noise) calculations per state
|
||||
//depending on DQ voltage margin and CA voltage margin (lesser or the two is
|
||||
//reported)
|
||||
if (g_ip->iostate == READ)
|
||||
{
|
||||
if ((io_param->v_sw_data_read_line/2 - v_noise_read) <
|
||||
(io_param->v_sw_addr/2 - v_noise_addr)) {
|
||||
io_vmargin = io_param->v_sw_data_read_line/2 - v_noise_read;
|
||||
}
|
||||
else {
|
||||
io_vmargin = io_param->v_sw_addr/2 - v_noise_addr;
|
||||
}
|
||||
}
|
||||
else if (g_ip->iostate == WRITE) {
|
||||
if ((io_param->v_sw_data_write_line/2 - v_noise_write) <
|
||||
(io_param->v_sw_addr/2 - v_noise_addr)) {
|
||||
io_vmargin = io_param->v_sw_data_write_line/2 - v_noise_write;
|
||||
}
|
||||
else {
|
||||
io_vmargin = io_param->v_sw_addr/2 - v_noise_addr;
|
||||
}
|
||||
}
|
||||
else {
|
||||
io_vmargin = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//TIMING MARGINS
|
||||
|
||||
double t_margin_write_setup,t_margin_write_hold,t_margin_read_setup
|
||||
,t_margin_read_hold,t_margin_addr_setup,t_margin_addr_hold;
|
||||
|
||||
if (io_param->io_type == Serial)
|
||||
{
|
||||
|
||||
t_margin_write_setup = (1e6/(4*io_param->frequency)) -
|
||||
io_param->t_ds -
|
||||
io_param->t_jitter_setup_sen;
|
||||
|
||||
t_margin_write_hold = (1e6/(4*io_param->frequency)) -
|
||||
io_param->t_dh - io_param->t_dcd_soc -
|
||||
io_param->t_jitter_hold_sen;
|
||||
|
||||
t_margin_read_setup = (1e6/(4*io_param->frequency)) -
|
||||
io_param->t_soc_setup -
|
||||
io_param->t_jitter_setup_sen;
|
||||
|
||||
t_margin_read_hold = (1e6/(4*io_param->frequency)) -
|
||||
io_param->t_soc_hold - io_param->t_dcd_dram -
|
||||
io_param->t_dcd_soc -
|
||||
io_param->t_jitter_hold_sen;
|
||||
|
||||
|
||||
|
||||
t_margin_addr_setup = (1e6*g_ip->addr_timing/(2*io_param->frequency));
|
||||
|
||||
|
||||
t_margin_addr_hold = (1e6*g_ip->addr_timing/(2*io_param->frequency));
|
||||
|
||||
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
|
||||
|
||||
//Setup and Hold timing margins for DQ WRITE, DQ READ and CA based on timing
|
||||
//budget
|
||||
t_margin_write_setup = (1e6/(4*io_param->frequency)) -
|
||||
io_param->t_ds - io_param->t_error_soc -
|
||||
io_param->t_jitter_setup_sen - io_param->t_skew_setup + io_param->t_cor_margin;
|
||||
|
||||
t_margin_write_hold = (1e6/(4*io_param->frequency)) -
|
||||
io_param->t_dh - io_param->t_dcd_soc - io_param->t_error_soc -
|
||||
io_param->t_jitter_hold_sen - io_param->t_skew_hold + io_param->t_cor_margin;
|
||||
|
||||
t_margin_read_setup = (1e6/(4*io_param->frequency)) -
|
||||
io_param->t_soc_setup - io_param->t_error_soc -
|
||||
io_param->t_jitter_setup_sen - io_param->t_skew_setup -
|
||||
io_param->t_dqsq + io_param->t_cor_margin;
|
||||
|
||||
t_margin_read_hold = (1e6/(4*io_param->frequency)) -
|
||||
io_param->t_soc_hold - io_param->t_dcd_dram -
|
||||
io_param->t_dcd_soc - io_param->t_error_soc -
|
||||
io_param->t_jitter_hold_sen - io_param->t_skew_hold + io_param->t_cor_margin;
|
||||
|
||||
|
||||
|
||||
t_margin_addr_setup = (1e6*g_ip->addr_timing/(2*io_param->frequency)) -
|
||||
io_param->t_is - io_param->t_error_soc -
|
||||
io_param->t_jitter_addr_setup_sen - io_param->t_skew_setup + io_param->t_cor_margin;
|
||||
|
||||
|
||||
t_margin_addr_hold = (1e6*g_ip->addr_timing/(2*io_param->frequency)) -
|
||||
io_param->t_ih - io_param->t_dcd_soc - io_param->t_error_soc -
|
||||
io_param->t_jitter_addr_hold_sen - io_param->t_skew_hold + io_param->t_cor_margin;
|
||||
}
|
||||
|
||||
//Worst-case timing margin per state depending on DQ and CA timing margins
|
||||
if (g_ip->iostate == READ) {
|
||||
io_tmargin = t_margin_read_setup < t_margin_read_hold ?
|
||||
t_margin_read_setup : t_margin_read_hold;
|
||||
io_tmargin = io_tmargin < t_margin_addr_setup ?
|
||||
io_tmargin : t_margin_addr_setup;
|
||||
io_tmargin = io_tmargin < t_margin_addr_hold ?
|
||||
io_tmargin : t_margin_addr_hold;
|
||||
}
|
||||
else if (g_ip->iostate == WRITE) {
|
||||
io_tmargin = t_margin_write_setup < t_margin_write_hold ?
|
||||
t_margin_write_setup : t_margin_write_hold;
|
||||
io_tmargin = io_tmargin < t_margin_addr_setup ?
|
||||
io_tmargin : t_margin_addr_setup;
|
||||
io_tmargin = io_tmargin < t_margin_addr_hold ?
|
||||
io_tmargin : t_margin_addr_hold;
|
||||
}
|
||||
else {
|
||||
io_tmargin = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//OUTPUTS
|
||||
|
||||
|
||||
printf("IO Timing Margin (ps) = ");
|
||||
cout << io_tmargin <<endl;
|
||||
printf("IO Votlage Margin (V) = ");
|
||||
cout << io_vmargin << endl;
|
||||
|
||||
}
|
46
T1/TP/TP1/cacti_7/extio.h
Normal file
46
T1/TP/TP1/cacti_7/extio.h
Normal file
|
@ -0,0 +1,46 @@
|
|||
#ifndef _extio_H_
|
||||
#define _extio_H_
|
||||
#include "parameter.h"
|
||||
#include "component.h"
|
||||
#include "extio_technology.h"
|
||||
|
||||
class Extio : public Component
|
||||
{
|
||||
public:
|
||||
|
||||
Extio(IOTechParam *);
|
||||
|
||||
void extio_area();
|
||||
void extio_eye();
|
||||
void extio_power_dynamic();
|
||||
void extio_power_phy();
|
||||
void extio_power_term();
|
||||
|
||||
private:
|
||||
IOTechParam *io_param;
|
||||
|
||||
double io_area;
|
||||
|
||||
double io_power_term;
|
||||
double power_termination_write;
|
||||
double power_termination_read;
|
||||
double power_bias;
|
||||
double power_clk_bias;
|
||||
|
||||
double phy_power;
|
||||
double phy_wtime;
|
||||
double phy_static_power;
|
||||
double phy_dynamic_power;
|
||||
|
||||
double io_power_dynamic;
|
||||
|
||||
double power_dq_write, power_dqs_write, power_ca_write,
|
||||
power_dq_read, power_dqs_read, power_ca_read,
|
||||
power_clk;
|
||||
|
||||
double io_tmargin, io_vmargin;
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif // _extio_H_
|
1617
T1/TP/TP1/cacti_7/extio_technology.cc
Normal file
1617
T1/TP/TP1/cacti_7/extio_technology.cc
Normal file
File diff suppressed because it is too large
Load diff
225
T1/TP/TP1/cacti_7/extio_technology.h
Normal file
225
T1/TP/TP1/cacti_7/extio_technology.h
Normal file
|
@ -0,0 +1,225 @@
|
|||
#ifndef __EXTIO_TECH__
|
||||
#define __EXTIO_TECH__
|
||||
|
||||
#include <iostream>
|
||||
#include "parameter.h"
|
||||
#include "const.h"
|
||||
|
||||
#define NUM_DIMM 1
|
||||
|
||||
|
||||
extern const double rtt1_wr_lrdimm_ddr3[8][4];
|
||||
extern const double rtt2_wr_lrdimm_ddr3[8][4];
|
||||
extern const double rtt1_rd_lrdimm_ddr3[8][4];
|
||||
extern const double rtt2_rd_lrdimm_ddr3[8][4];
|
||||
|
||||
extern const double rtt1_wr_host_dimm_ddr3[3][4];
|
||||
extern const double rtt2_wr_host_dimm_ddr3[3][4];
|
||||
extern const double rtt1_rd_host_dimm_ddr3[3][4];
|
||||
extern const double rtt2_rd_host_dimm_ddr3[3][4];
|
||||
|
||||
extern const double rtt1_wr_bob_dimm_ddr3[3][4];
|
||||
extern const double rtt2_wr_bob_dimm_ddr3[3][4];
|
||||
extern const double rtt1_rd_bob_dimm_ddr3[3][4];
|
||||
extern const double rtt2_rd_bob_dimm_ddr3[3][4];
|
||||
|
||||
|
||||
extern const double rtt1_wr_lrdimm_ddr4[8][4];
|
||||
extern const double rtt2_wr_lrdimm_ddr4[8][4];
|
||||
extern const double rtt1_rd_lrdimm_ddr4[8][4];
|
||||
extern const double rtt2_rd_lrdimm_ddr4[8][4];
|
||||
|
||||
extern const double rtt1_wr_host_dimm_ddr4[3][4];
|
||||
extern const double rtt2_wr_host_dimm_ddr4[3][4];
|
||||
extern const double rtt1_rd_host_dimm_ddr4[3][4];
|
||||
extern const double rtt2_rd_host_dimm_ddr4[3][4];
|
||||
|
||||
extern const double rtt1_wr_bob_dimm_ddr4[3][4];
|
||||
extern const double rtt2_wr_bob_dimm_ddr4[3][4];
|
||||
extern const double rtt1_rd_bob_dimm_ddr4[3][4];
|
||||
extern const double rtt2_rd_bob_dimm_ddr4[3][4];
|
||||
|
||||
class IOTechParam
|
||||
{
|
||||
public:
|
||||
IOTechParam(InputParameter *);
|
||||
// connection : 0(bob-dimm), 1(host-dimm), 2(on-dimm)
|
||||
IOTechParam(InputParameter *, Mem_IO_type io_type, int num_mem_dq, int mem_data_width, int num_dq, int connection, int num_loads, double freq) ;
|
||||
~IOTechParam();
|
||||
double num_mem_ca; /* Number of loads on the address bus
|
||||
based on total number of memories in the channel.For
|
||||
registered or buffered configurations, the num_mem_dq and num_mem_ca is per buffer. */
|
||||
|
||||
double num_mem_clk; /* Number of loads on the clock as total
|
||||
memories in the channel / number of clock lines available */
|
||||
|
||||
//Technology Parameters
|
||||
// IO Supply voltage (V)
|
||||
double vdd_io; /* Voltage swing on CLK/CLKB (V) (swing on the CLK pin if it
|
||||
is differentially terminated) */
|
||||
double v_sw_clk;
|
||||
|
||||
// Loading parameters
|
||||
|
||||
double c_int; /*Internal IO loading (pF) (loading within the IO, due to
|
||||
predriver nets) */
|
||||
double c_tx; /* IO TX self-load including package (pF) (loading at the
|
||||
CPU TX pin) */
|
||||
double c_data; /* Device loading per memory data pin (pF) (DRAM device
|
||||
load for DQ per die) */
|
||||
double c_addr; /* Device loading per memory address pin (pF) (DRAM
|
||||
device load for CA per die) */
|
||||
double i_bias; /* Bias current (mA) (includes bias current for the whole memory
|
||||
bus due to RX Vref based receivers */
|
||||
double i_leak; // Active leakage current per pin (nA)
|
||||
|
||||
|
||||
|
||||
// IO Area coefficients
|
||||
|
||||
double ioarea_c; /* sq.mm. (IO Area baseline coeeficient for control
|
||||
circuitry and overhead) */
|
||||
double ioarea_k0; /* sq.mm * ohms (IO Area coefficient for the driver, for
|
||||
unit drive strength or output impedance) */
|
||||
double ioarea_k1; /* sq.mm * ohms / MHz (IO Area coefficient for the
|
||||
predriver final stage, based on fanout needed) */
|
||||
double ioarea_k2; /* sq.mm * ohms / MHz^2 (IO Area coefficient for
|
||||
predriver middle stage, based on fanout needed) */
|
||||
double ioarea_k3; /* sq.mm * ohms / MHz^3 (IO Area coefficient for
|
||||
predriver first stage, based on fanout needed) */
|
||||
|
||||
|
||||
// Timing parameters (ps)
|
||||
|
||||
double t_ds; //DQ setup time at DRAM
|
||||
double t_is; //CA setup time at DRAM
|
||||
double t_dh; //DQ hold time at DRAM
|
||||
double t_ih; //CA hold time at DRAM
|
||||
double t_dcd_soc; //Duty-cycle distortion at the CPU/SOC
|
||||
double t_dcd_dram; //Duty-cycle distortion at the DRAM
|
||||
double t_error_soc; //Timing error due to edge placement uncertainty of the DLL
|
||||
double t_skew_setup;//Setup skew between DQ/DQS or CA/CLK after deskewing the lines
|
||||
double t_skew_hold; //Hold skew between DQ/DQS or CA/CLK after deskewing the lines
|
||||
double t_dqsq; //DQ-DQS skew at the DRAM output during Read
|
||||
//double t_qhs; //DQ-DQS hold factor at the DRAM output during Read FIXME: I am commenting it as the variable is never used.
|
||||
double t_soc_setup; //Setup time at SOC input dueing Read
|
||||
double t_soc_hold; //Hold time at SOC input during Read
|
||||
double t_jitter_setup; /* Half-cycle jitter on the DQS at DRAM input
|
||||
affecting setup time */
|
||||
double t_jitter_hold; /* Half-cycle jitter on the DQS at the DRAM input
|
||||
affecting hold time */
|
||||
double t_jitter_addr_setup; /* Half-cycle jitter on the CLK at DRAM input
|
||||
affecting setup time */
|
||||
double t_jitter_addr_hold; /* Half-cycle jitter on the CLK at the DRAM
|
||||
input affecting hold time */
|
||||
double t_cor_margin; // Statistical correlation margin
|
||||
|
||||
|
||||
//Termination Parameters
|
||||
|
||||
double r_diff_term; /* Differential termination resister if
|
||||
used for CLK (Ohm) */
|
||||
|
||||
|
||||
// ODT related termination resistor values (Ohm)
|
||||
|
||||
double rtt1_dq_read; //DQ Read termination at CPU
|
||||
double rtt2_dq_read; //DQ Read termination at inactive DRAM
|
||||
double rtt1_dq_write; //DQ Write termination at active DRAM
|
||||
double rtt2_dq_write; //DQ Write termination at inactive DRAM
|
||||
double rtt_ca; //CA fly-by termination
|
||||
double rs1_dq; //Series resistor at active DRAM
|
||||
double rs2_dq; //Series resistor at inactive DRAM
|
||||
double r_stub_ca; //Series resistor for the fly-by channel
|
||||
double r_on; //Driver impedance
|
||||
double r_on_ca; //CA driver impedance
|
||||
|
||||
double z0; //Line impedance (ohms): Characteristic impedance of the route.
|
||||
double t_flight; /* Flight time of the interconnect (ns) (approximately
|
||||
180ps/inch for FR4) */
|
||||
double t_flight_ca; /* Flight time of the Control/Address (CA)
|
||||
interconnect (ns) (approximately 180ps/inch for FR4) */
|
||||
|
||||
// Voltage noise coeffecients
|
||||
|
||||
double k_noise_write; //Proportional noise coefficient for Write mode
|
||||
double k_noise_read; //Proportional noise coefficient for Read mode
|
||||
double k_noise_addr; //Proportional noise coefficient for Address bus
|
||||
double v_noise_independent_write; //Independent noise voltage for Write mode
|
||||
double v_noise_independent_read; //Independent noise voltage for Read mode
|
||||
double v_noise_independent_addr; //Independent noise voltage for Address bus
|
||||
|
||||
|
||||
//SENSITIVITY INPUTS FOR TIMING AND VOLTAGE NOISE
|
||||
|
||||
/* This is a user-defined section that depends on the channel sensitivity
|
||||
* to IO and DRAM parameters. The t_jitter_* and k_noise_* are the
|
||||
* parameters that are impacted based on the channel analysis. The user
|
||||
* can define any relationship between the termination, loading and
|
||||
* configuration parameters AND the t_jitter/k_noise parameters. */
|
||||
|
||||
double k_noise_write_sen;
|
||||
double k_noise_read_sen;
|
||||
double k_noise_addr_sen;
|
||||
double t_jitter_setup_sen;
|
||||
double t_jitter_hold_sen;
|
||||
double t_jitter_addr_setup_sen;
|
||||
double t_jitter_addr_hold_sen;
|
||||
|
||||
//SWING AND TERMINATION CALCULATIONS
|
||||
//R|| calculation
|
||||
|
||||
double rpar_write;
|
||||
double rpar_read;
|
||||
|
||||
//Swing calculation
|
||||
|
||||
double v_sw_data_read_load1; //Swing for DQ at dram1 during READ
|
||||
double v_sw_data_read_load2; //Swing for DQ at dram2 during READ
|
||||
double v_sw_data_read_line; //Swing for DQ on the line during READ
|
||||
double v_sw_addr; //Swing for the address bus
|
||||
double v_sw_data_write_load1; //Swing for DQ at dram1 during WRITE
|
||||
double v_sw_data_write_load2; //Swing for DQ at dram2 during WRITE
|
||||
double v_sw_data_write_line; //Swing for DQ on the line during WRITE
|
||||
|
||||
// PHY Static Power Coefficients (mW)
|
||||
|
||||
double phy_datapath_s; // Datapath Static Power
|
||||
double phy_phase_rotator_s; // Phase Rotator Static Power
|
||||
double phy_clock_tree_s; // Clock Tree Static Power
|
||||
double phy_rx_s; // Receiver Static Power
|
||||
double phy_dcc_s; // Duty Cycle Correction Static Power
|
||||
double phy_deskew_s; // Deskewing Static Power
|
||||
double phy_leveling_s; // Write and Read Leveling Static Power
|
||||
double phy_pll_s; // PHY PLL Static Power
|
||||
|
||||
|
||||
// PHY Dynamic Power Coefficients (mW/Gbps)
|
||||
|
||||
double phy_datapath_d; // Datapath Dynamic Power
|
||||
double phy_phase_rotator_d; // Phase Rotator Dynamic Power
|
||||
double phy_clock_tree_d; // Clock Tree Dynamic Power
|
||||
double phy_rx_d; // Receiver Dynamic Power
|
||||
double phy_dcc_d; // Duty Cycle Correction Dynamic Power
|
||||
double phy_deskew_d; // Deskewing Dynamic Power
|
||||
double phy_leveling_d; // Write and Read Leveling Dynamic Power
|
||||
double phy_pll_d; // PHY PLL Dynamic Power
|
||||
|
||||
|
||||
//PHY Wakeup Times (Sleep to Active) (microseconds)
|
||||
|
||||
double phy_pll_wtime; // PHY PLL Wakeup Time
|
||||
double phy_phase_rotator_wtime; // Phase Rotator Wakeup Time
|
||||
double phy_rx_wtime; // Receiver Wakeup Time
|
||||
double phy_bandgap_wtime; // Bandgap Wakeup Time
|
||||
double phy_deskew_wtime; // Deskewing Wakeup Time
|
||||
double phy_vrefgen_wtime; // VREF Generator Wakeup Time
|
||||
|
||||
|
||||
// RTT values depends on the number of loads, frequency, and link_type
|
||||
double frequency;
|
||||
Mem_IO_type io_type;
|
||||
int frequnecy_index(Mem_IO_type type);
|
||||
};
|
||||
|
||||
#endif
|
640
T1/TP/TP1/cacti_7/htree2.cc
Normal file
640
T1/TP/TP1/cacti_7/htree2.cc
Normal file
|
@ -0,0 +1,640 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "htree2.h"
|
||||
#include "wire.h"
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
|
||||
Htree2::Htree2(
|
||||
enum Wire_type wire_model, double mat_w, double mat_h,
|
||||
int a_bits, int d_inbits, int search_data_in, int d_outbits, int search_data_out, int bl, int wl, enum Htree_type htree_type,
|
||||
bool uca_tree_, bool search_tree_, /*TechnologyParameter::*/DeviceType *dt)
|
||||
:in_rise_time(0), out_rise_time(0),
|
||||
tree_type(htree_type), mat_width(mat_w), mat_height(mat_h),
|
||||
add_bits(a_bits), data_in_bits(d_inbits), search_data_in_bits(search_data_in),data_out_bits(d_outbits),
|
||||
search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl),
|
||||
uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), deviceType(dt)
|
||||
{
|
||||
assert(ndbl >= 2 && ndwl >= 2);
|
||||
|
||||
// if (ndbl == 1 && ndwl == 1)
|
||||
// {
|
||||
// delay = 0;
|
||||
// power.readOp.dynamic = 0;
|
||||
// power.readOp.leakage = 0;
|
||||
// area.w = mat_w;
|
||||
// area.h = mat_h;
|
||||
// return;
|
||||
// }
|
||||
// if (ndwl == 1) ndwl++;
|
||||
// if (ndbl == 1) ndbl++;
|
||||
|
||||
max_unpipelined_link_delay = 0; //TODO
|
||||
min_w_nmos = g_tp.min_w_nmos_;
|
||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
|
||||
|
||||
switch (htree_type)
|
||||
{
|
||||
case Add_htree:
|
||||
wire_bw = init_wire_bw = add_bits;
|
||||
in_htree();
|
||||
break;
|
||||
case Data_in_htree:
|
||||
wire_bw = init_wire_bw = data_in_bits;
|
||||
in_htree();
|
||||
break;
|
||||
case Data_out_htree:
|
||||
wire_bw = init_wire_bw = data_out_bits;
|
||||
out_htree();
|
||||
break;
|
||||
case Search_in_htree:
|
||||
wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not.
|
||||
in_htree();
|
||||
break;
|
||||
case Search_out_htree:
|
||||
wire_bw = init_wire_bw = search_data_out_bits;
|
||||
out_htree();
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
power_bit = power;
|
||||
power.readOp.dynamic *= init_wire_bw;
|
||||
|
||||
assert(power.readOp.dynamic >= 0);
|
||||
assert(power.readOp.leakage >= 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// nand gate sizing calculation
|
||||
void Htree2::input_nand(double s1, double s2, double l_eff)
|
||||
{
|
||||
Wire w1(wt, l_eff);
|
||||
double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
|
||||
// input capacitance of a repeater = input capacitance of nand.
|
||||
double nsize = s1*(1 + pton_size)/(2 + pton_size);
|
||||
nsize = (nsize < 1) ? 1 : nsize;
|
||||
|
||||
double tc = 2*tr_R_on(nsize*min_w_nmos, NCH, 1) *
|
||||
(drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
|
||||
2 * gate_C(s2*(min_w_nmos + min_w_pmos), 0));
|
||||
delay+= horowitz (w1.out_rise_time, tc,
|
||||
deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
|
||||
power.readOp.dynamic += 0.5 *
|
||||
(2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
|
||||
+ drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
|
||||
+ 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
|
||||
deviceType->Vdd * deviceType->Vdd;
|
||||
|
||||
power.searchOp.dynamic += 0.5 *
|
||||
(2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
|
||||
+ drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
|
||||
+ 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) *
|
||||
deviceType->Vdd * deviceType->Vdd * wire_bw ;
|
||||
power.readOp.leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
|
||||
power.readOp.gate_leakage += (wire_bw*cmos_Ig_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// tristate buffer model consisting of not, nand, nor, and driver transistors
|
||||
void Htree2::output_buffer(double s1, double s2, double l_eff)
|
||||
{
|
||||
Wire w1(wt, l_eff);
|
||||
double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
|
||||
// input capacitance of repeater = input capacitance of nand + nor.
|
||||
double size = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
|
||||
double s_eff = //stage eff of a repeater in a wire
|
||||
(gate_C(s2*(min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff*1e-6,true))/
|
||||
gate_C(s2*(min_w_nmos + min_w_pmos), 0);
|
||||
double tr_size = gate_C(s1*(min_w_nmos + min_w_pmos), 0) * 1/2/(s_eff*gate_C(min_w_pmos, 0));
|
||||
size = (size < 1) ? 1 : size;
|
||||
|
||||
double res_nor = 2*tr_R_on(size*min_w_pmos, PCH, 1);
|
||||
double res_ptrans = tr_R_on(tr_size*min_w_nmos, NCH, 1);
|
||||
double cap_nand_out = drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
|
||||
drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
|
||||
gate_C(tr_size*min_w_pmos, 0);
|
||||
double cap_ptrans_out = 2 *(drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||
drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) +
|
||||
gate_C(s1*(min_w_nmos + min_w_pmos), 0);
|
||||
|
||||
double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out;
|
||||
|
||||
|
||||
delay += horowitz (w1.out_rise_time, tc,
|
||||
deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
|
||||
|
||||
//nand
|
||||
power.readOp.dynamic += 0.5 *
|
||||
(2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||
drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
|
||||
gate_C(tr_size*(min_w_pmos), 0)) *
|
||||
deviceType->Vdd * deviceType->Vdd;
|
||||
|
||||
power.searchOp.dynamic += 0.5 *
|
||||
(2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||
drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
|
||||
gate_C(tr_size*(min_w_pmos), 0)) *
|
||||
deviceType->Vdd * deviceType->Vdd*init_wire_bw;
|
||||
|
||||
//not
|
||||
power.readOp.dynamic += 0.5 *
|
||||
(drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
|
||||
+drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
|
||||
+gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
|
||||
deviceType->Vdd * deviceType->Vdd;
|
||||
|
||||
power.searchOp.dynamic += 0.5 *
|
||||
(drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
|
||||
+drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
|
||||
+gate_C(size*(min_w_nmos + min_w_pmos), 0)) *
|
||||
deviceType->Vdd * deviceType->Vdd*init_wire_bw;
|
||||
|
||||
//nor
|
||||
power.readOp.dynamic += 0.5 *
|
||||
(drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
|
||||
+ 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
|
||||
+gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
|
||||
deviceType->Vdd * deviceType->Vdd;
|
||||
|
||||
power.searchOp.dynamic += 0.5 *
|
||||
(drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
|
||||
+ 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
|
||||
+gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) *
|
||||
deviceType->Vdd * deviceType->Vdd*init_wire_bw;
|
||||
|
||||
//output transistor
|
||||
power.readOp.dynamic += 0.5 *
|
||||
((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
|
||||
+drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
|
||||
+ gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
|
||||
deviceType->Vdd * deviceType->Vdd;
|
||||
|
||||
power.searchOp.dynamic += 0.5 *
|
||||
((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
|
||||
+drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2
|
||||
+ gate_C(s1*(min_w_nmos + min_w_pmos), 0)) *
|
||||
deviceType->Vdd * deviceType->Vdd*init_wire_bw;
|
||||
|
||||
if(uca_tree) {
|
||||
power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
|
||||
power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
|
||||
power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
|
||||
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
|
||||
//power.readOp.gate_leakage *=;
|
||||
}
|
||||
else {
|
||||
power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
|
||||
power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
|
||||
power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
|
||||
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
|
||||
//power.readOp.gate_leakage *=deviceType->Vdd*wire_bw;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* calculates the input h-tree delay/power
|
||||
* A nand gate is used at each node to
|
||||
* limit the signal
|
||||
* The area of an unbalanced htree (rows != columns)
|
||||
* depends on how data is traversed.
|
||||
* In the following function, if ( no. of rows < no. of columns),
|
||||
* then data first traverse in excess hor. links until vertical
|
||||
* and horizontal nodes are same.
|
||||
* If no. of rows is bigger, then data traverse in
|
||||
* a hor. link followed by a ver. link in a repeated
|
||||
* fashion (similar to a balanced tree) until there are no
|
||||
* hor. links left. After this it goes through the remaining vertical
|
||||
* links.
|
||||
*/
|
||||
void
|
||||
Htree2::in_htree()
|
||||
{
|
||||
//temp var
|
||||
double s1 = 0, s2 = 0, s3 = 0;
|
||||
double l_eff = 0;
|
||||
Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
|
||||
double len = 0, ht = 0;
|
||||
int option = 0;
|
||||
|
||||
int h = (int) _log2(ndwl/2); // horizontal nodes
|
||||
int v = (int) _log2(ndbl/2); // vertical nodes
|
||||
double len_temp;
|
||||
double ht_temp;
|
||||
if (uca_tree)
|
||||
{//: this computation do not consider the wires that route from edge to middle.
|
||||
ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
|
||||
((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
|
||||
2 * (1-pow(0.5,h))))/2;
|
||||
len_temp = (mat_width*ndwl/2 +
|
||||
((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
|
||||
2 * (1-pow(0.5,v))))/2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ndwl == ndbl) {
|
||||
ht_temp = ((mat_height*ndbl/2) +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits))* (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
|
||||
)/2;
|
||||
len_temp = (mat_width*ndwl/2 +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
|
||||
}
|
||||
else if (ndwl > ndbl) {
|
||||
double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
|
||||
ht_temp = ((mat_height*ndbl/2) +
|
||||
((add_bits + + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
|
||||
(data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
|
||||
(2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
|
||||
len_temp = (mat_width*ndwl/2 +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
|
||||
}
|
||||
else {
|
||||
double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
|
||||
ht_temp = ((mat_height*ndbl/2) +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
|
||||
)/2;
|
||||
len_temp = (mat_width*ndwl/2 +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
|
||||
(data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
|
||||
}
|
||||
}
|
||||
|
||||
area.h = ht_temp * 2;
|
||||
area.w = len_temp * 2;
|
||||
delay = 0;
|
||||
power.readOp.dynamic = 0;
|
||||
power.readOp.leakage = 0;
|
||||
power.searchOp.dynamic =0;
|
||||
len = len_temp;
|
||||
ht = ht_temp/2;
|
||||
|
||||
while (v > 0 || h > 0)
|
||||
{
|
||||
if (wtemp1) delete wtemp1;
|
||||
if (wtemp2) delete wtemp2;
|
||||
if (wtemp3) delete wtemp3;
|
||||
|
||||
if (h > v)
|
||||
{
|
||||
//the iteration considers only one horizontal link
|
||||
wtemp1 = new Wire(wt, len); // hor
|
||||
wtemp2 = new Wire(wt, len/2); // ver
|
||||
len_temp = len;
|
||||
len /= 2;
|
||||
wtemp3 = 0;
|
||||
h--;
|
||||
option = 0;
|
||||
}
|
||||
else if (v>0 && h>0)
|
||||
{
|
||||
//considers one horizontal link and one vertical link
|
||||
wtemp1 = new Wire(wt, len); // hor
|
||||
wtemp2 = new Wire(wt, ht); // ver
|
||||
wtemp3 = new Wire(wt, len/2); // next hor
|
||||
len_temp = len;
|
||||
ht_temp = ht;
|
||||
len /= 2;
|
||||
ht /= 2;
|
||||
v--;
|
||||
h--;
|
||||
option = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// considers only one vertical link
|
||||
assert(h == 0);
|
||||
wtemp1 = new Wire(wt, ht); // ver
|
||||
wtemp2 = new Wire(wt, ht/2); // hor
|
||||
ht_temp = ht;
|
||||
ht /= 2;
|
||||
wtemp3 = 0;
|
||||
v--;
|
||||
option = 2;
|
||||
}
|
||||
|
||||
delay += wtemp1->delay;
|
||||
power.readOp.dynamic += wtemp1->power.readOp.dynamic;
|
||||
power.searchOp.dynamic += wtemp1->power.readOp.dynamic*wire_bw;
|
||||
power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
|
||||
power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
|
||||
if ((uca_tree == false && option == 2) || search_tree==true)
|
||||
{
|
||||
wire_bw*=2; // wire bandwidth doubles only for vertical branches
|
||||
}
|
||||
|
||||
if (uca_tree == false)
|
||||
{
|
||||
if (len_temp > wtemp1->repeater_spacing)
|
||||
{
|
||||
s1 = wtemp1->repeater_size;
|
||||
l_eff = wtemp1->repeater_spacing;
|
||||
}
|
||||
else
|
||||
{
|
||||
s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
|
||||
l_eff = len_temp;
|
||||
}
|
||||
|
||||
if (ht_temp > wtemp2->repeater_spacing)
|
||||
{
|
||||
s2 = wtemp2->repeater_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
|
||||
}
|
||||
// first level
|
||||
input_nand(s1, s2, l_eff);
|
||||
}
|
||||
|
||||
|
||||
if (option != 1)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// second level
|
||||
delay += wtemp2->delay;
|
||||
power.readOp.dynamic += wtemp2->power.readOp.dynamic;
|
||||
power.searchOp.dynamic += wtemp2->power.readOp.dynamic*wire_bw;
|
||||
power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
|
||||
power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
|
||||
|
||||
if (uca_tree)
|
||||
{
|
||||
power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
|
||||
power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
|
||||
}
|
||||
else
|
||||
{
|
||||
power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
|
||||
power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
|
||||
wire_bw*=2;
|
||||
|
||||
if (ht_temp > wtemp3->repeater_spacing)
|
||||
{
|
||||
s3 = wtemp3->repeater_size;
|
||||
l_eff = wtemp3->repeater_spacing;
|
||||
}
|
||||
else
|
||||
{
|
||||
s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
|
||||
l_eff = ht_temp;
|
||||
}
|
||||
|
||||
input_nand(s2, s3, l_eff);
|
||||
}
|
||||
}
|
||||
|
||||
if (wtemp1) delete wtemp1;
|
||||
if (wtemp2) delete wtemp2;
|
||||
if (wtemp3) delete wtemp3;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* a tristate buffer is used to handle fan-ins
|
||||
* The area of an unbalanced htree (rows != columns)
|
||||
* depends on how data is traversed.
|
||||
* In the following function, if ( no. of rows < no. of columns),
|
||||
* then data first traverse in excess hor. links until vertical
|
||||
* and horizontal nodes are same.
|
||||
* If no. of rows is bigger, then data traverse in
|
||||
* a hor. link followed by a ver. link in a repeated
|
||||
* fashion (similar to a balanced tree) until there are no
|
||||
* hor. links left. After this it goes through the remaining vertical
|
||||
* links.
|
||||
*/
|
||||
void Htree2::out_htree()
|
||||
{
|
||||
//temp var
|
||||
double s1 = 0, s2 = 0, s3 = 0;
|
||||
double l_eff = 0;
|
||||
Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0;
|
||||
double len = 0, ht = 0;
|
||||
int option = 0;
|
||||
|
||||
int h = (int) _log2(ndwl/2);
|
||||
int v = (int) _log2(ndbl/2);
|
||||
double len_temp;
|
||||
double ht_temp;
|
||||
if (uca_tree)
|
||||
{
|
||||
ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
|
||||
((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
|
||||
2 * (1-pow(0.5,h))))/2;
|
||||
len_temp = (mat_width*ndwl/2 +
|
||||
((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
|
||||
2 * (1-pow(0.5,v))))/2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ndwl == ndbl) {
|
||||
ht_temp = ((mat_height*ndbl/2) +
|
||||
((add_bits+ (search_data_in_bits + search_data_out_bits)) * (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
|
||||
)/2;
|
||||
len_temp = (mat_width*ndwl/2 +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
|
||||
|
||||
}
|
||||
else if (ndwl > ndbl) {
|
||||
double excess_part = (_log2(ndwl/2) - _log2(ndbl/2));
|
||||
ht_temp = ((mat_height*ndbl/2) +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
|
||||
(data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
|
||||
(2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2;
|
||||
len_temp = (mat_width*ndwl/2 +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
|
||||
}
|
||||
else {
|
||||
double excess_part = (_log2(ndbl/2) - _log2(ndwl/2));
|
||||
ht_temp = ((mat_height*ndbl/2) +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
|
||||
)/2;
|
||||
len_temp = (mat_width*ndwl/2 +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
|
||||
(data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
|
||||
}
|
||||
}
|
||||
area.h = ht_temp * 2;
|
||||
area.w = len_temp * 2;
|
||||
delay = 0;
|
||||
power.readOp.dynamic = 0;
|
||||
power.readOp.leakage = 0;
|
||||
power.readOp.gate_leakage = 0;
|
||||
//cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
|
||||
len = len_temp;
|
||||
ht = ht_temp/2;
|
||||
|
||||
while (v > 0 || h > 0)
|
||||
{ //finds delay/power of each link in the tree
|
||||
if (wtemp1) delete wtemp1;
|
||||
if (wtemp2) delete wtemp2;
|
||||
if (wtemp3) delete wtemp3;
|
||||
|
||||
if(h > v) {
|
||||
//the iteration considers only one horizontal link
|
||||
wtemp1 = new Wire(wt, len); // hor
|
||||
wtemp2 = new Wire(wt, len/2); // ver
|
||||
len_temp = len;
|
||||
len /= 2;
|
||||
wtemp3 = 0;
|
||||
h--;
|
||||
option = 0;
|
||||
}
|
||||
else if (v>0 && h>0) {
|
||||
//considers one horizontal link and one vertical link
|
||||
wtemp1 = new Wire(wt, len); // hor
|
||||
wtemp2 = new Wire(wt, ht); // ver
|
||||
wtemp3 = new Wire(wt, len/2); // next hor
|
||||
len_temp = len;
|
||||
ht_temp = ht;
|
||||
len /= 2;
|
||||
ht /= 2;
|
||||
v--;
|
||||
h--;
|
||||
option = 1;
|
||||
}
|
||||
else {
|
||||
// considers only one vertical link
|
||||
assert(h == 0);
|
||||
wtemp1 = new Wire(wt, ht); // hor
|
||||
wtemp2 = new Wire(wt, ht/2); // ver
|
||||
ht_temp = ht;
|
||||
ht /= 2;
|
||||
wtemp3 = 0;
|
||||
v--;
|
||||
option = 2;
|
||||
}
|
||||
delay += wtemp1->delay;
|
||||
power.readOp.dynamic += wtemp1->power.readOp.dynamic;
|
||||
power.searchOp.dynamic += wtemp1->power.readOp.dynamic*init_wire_bw;
|
||||
power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw;
|
||||
power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw;
|
||||
//cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
|
||||
if ((uca_tree == false && option == 2) || search_tree==true)
|
||||
{
|
||||
wire_bw*=2;
|
||||
}
|
||||
|
||||
if (uca_tree == false)
|
||||
{
|
||||
if (len_temp > wtemp1->repeater_spacing)
|
||||
{
|
||||
s1 = wtemp1->repeater_size;
|
||||
l_eff = wtemp1->repeater_spacing;
|
||||
}
|
||||
else
|
||||
{
|
||||
s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
|
||||
l_eff = len_temp;
|
||||
}
|
||||
if (ht_temp > wtemp2->repeater_spacing)
|
||||
{
|
||||
s2 = wtemp2->repeater_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
|
||||
}
|
||||
// first level
|
||||
output_buffer(s1, s2, l_eff);
|
||||
}
|
||||
|
||||
|
||||
if (option != 1)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// second level
|
||||
delay += wtemp2->delay;
|
||||
power.readOp.dynamic += wtemp2->power.readOp.dynamic;
|
||||
power.searchOp.dynamic += wtemp2->power.readOp.dynamic*init_wire_bw;
|
||||
power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw;
|
||||
power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
|
||||
//cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
|
||||
if (uca_tree)
|
||||
{
|
||||
power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
|
||||
power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
|
||||
}
|
||||
else
|
||||
{
|
||||
power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw);
|
||||
power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
|
||||
wire_bw*=2;
|
||||
|
||||
if (ht_temp > wtemp3->repeater_spacing)
|
||||
{
|
||||
s3 = wtemp3->repeater_size;
|
||||
l_eff = wtemp3->repeater_spacing;
|
||||
}
|
||||
else
|
||||
{
|
||||
s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
|
||||
l_eff = ht_temp;
|
||||
}
|
||||
|
||||
output_buffer(s2, s3, l_eff);
|
||||
}
|
||||
//cout<<"power.readOp.leakage"<<power.readOp.leakage<<endl;
|
||||
//cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
|
||||
//cout<<"wtemp2->power.readOp.gate_leakage"<<wtemp2->power.readOp.gate_leakage<<endl;
|
||||
}
|
||||
|
||||
if (wtemp1) delete wtemp1;
|
||||
if (wtemp2) delete wtemp2;
|
||||
if (wtemp3) delete wtemp3;
|
||||
}
|
||||
|
97
T1/TP/TP1/cacti_7/htree2.h
Normal file
97
T1/TP/TP1/cacti_7/htree2.h
Normal file
|
@ -0,0 +1,97 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
#ifndef __HTREE2_H__
|
||||
#define __HTREE2_H__
|
||||
|
||||
#include "basic_circuit.h"
|
||||
#include "component.h"
|
||||
#include "parameter.h"
|
||||
#include "assert.h"
|
||||
#include "subarray.h"
|
||||
#include "cacti_interface.h"
|
||||
#include "wire.h"
|
||||
|
||||
// leakge power includes entire htree in a bank (when uca_tree == false)
|
||||
// leakge power includes only part to one bank when uca_tree == true
|
||||
|
||||
class Htree2 : public Component
|
||||
{
|
||||
public:
|
||||
Htree2(enum Wire_type wire_model,
|
||||
double mat_w, double mat_h, int add, int data_in, int search_data_in, int data_out, int search_data_out, int bl, int wl,
|
||||
enum Htree_type h_type, bool uca_tree_ = false, bool search_tree_ = false,
|
||||
/*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global));
|
||||
~Htree2() {};
|
||||
|
||||
void in_htree();
|
||||
void out_htree();
|
||||
|
||||
// repeaters only at h-tree nodes
|
||||
void limited_in_htree();
|
||||
void limited_out_htree();
|
||||
void input_nand(double s1, double s2, double l);
|
||||
void output_buffer(double s1, double s2, double l);
|
||||
|
||||
double in_rise_time, out_rise_time;
|
||||
|
||||
void set_in_rise_time(double rt)
|
||||
{
|
||||
in_rise_time = rt;
|
||||
}
|
||||
|
||||
double max_unpipelined_link_delay;
|
||||
powerDef power_bit;
|
||||
|
||||
|
||||
private:
|
||||
double wire_bw;
|
||||
double init_wire_bw; // bus width at root
|
||||
enum Htree_type tree_type;
|
||||
double htree_hnodes;
|
||||
double htree_vnodes;
|
||||
double mat_width;
|
||||
double mat_height;
|
||||
int add_bits, data_in_bits,search_data_in_bits,data_out_bits, search_data_out_bits;
|
||||
int ndbl, ndwl;
|
||||
bool uca_tree; // should have full bandwidth to access all banks in the array simultaneously
|
||||
bool search_tree;
|
||||
|
||||
enum Wire_type wt;
|
||||
double min_w_nmos;
|
||||
double min_w_pmos;
|
||||
|
||||
/*TechnologyParameter::*/DeviceType *deviceType;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
3790
T1/TP/TP1/cacti_7/io.cc
Normal file
3790
T1/TP/TP1/cacti_7/io.cc
Normal file
File diff suppressed because it is too large
Load diff
45
T1/TP/TP1/cacti_7/io.h
Normal file
45
T1/TP/TP1/cacti_7/io.h
Normal file
|
@ -0,0 +1,45 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
#ifndef __IO_H__
|
||||
#define __IO_H__
|
||||
|
||||
|
||||
#include "const.h"
|
||||
#include "cacti_interface.h"
|
||||
|
||||
|
||||
void output_data_csv(const uca_org_t & fin_res, string fn="out.csv");
|
||||
void output_UCA(uca_org_t * fin_res);
|
||||
void output_data_csv_3dd(const uca_org_t & fin_res);
|
||||
|
||||
#endif
|
254
T1/TP/TP1/cacti_7/lpddr.cfg
Normal file
254
T1/TP/TP1/cacti_7/lpddr.cfg
Normal file
|
@ -0,0 +1,254 @@
|
|||
# Cache size
|
||||
//-size (bytes) 2048
|
||||
//-size (bytes) 4096
|
||||
//-size (bytes) 32768
|
||||
//-size (bytes) 131072
|
||||
//-size (bytes) 262144
|
||||
//-size (bytes) 1048576
|
||||
//-size (bytes) 2097152
|
||||
//-size (bytes) 4194304
|
||||
-size (bytes) 8388608
|
||||
//-size (bytes) 16777216
|
||||
//-size (bytes) 33554432
|
||||
//-size (bytes) 134217728
|
||||
//-size (bytes) 67108864
|
||||
//-size (bytes) 1073741824
|
||||
|
||||
# power gating
|
||||
-Array Power Gating - "false"
|
||||
-WL Power Gating - "false"
|
||||
-CL Power Gating - "false"
|
||||
-Bitline floating - "false"
|
||||
-Interconnect Power Gating - "false"
|
||||
-Power Gating Performance Loss 0.01
|
||||
|
||||
# Line size
|
||||
//-block size (bytes) 8
|
||||
-block size (bytes) 64
|
||||
|
||||
# To model Fully Associative cache, set associativity to zero
|
||||
//-associativity 0
|
||||
//-associativity 2
|
||||
//-associativity 4
|
||||
//-associativity 8
|
||||
-associativity 8
|
||||
|
||||
-read-write port 1
|
||||
-exclusive read port 0
|
||||
-exclusive write port 0
|
||||
-single ended read ports 0
|
||||
|
||||
# Multiple banks connected using a bus
|
||||
-UCA bank count 1
|
||||
-technology (u) 0.022
|
||||
//-technology (u) 0.040
|
||||
//-technology (u) 0.032
|
||||
//-technology (u) 0.090
|
||||
|
||||
# following three parameters are meaningful only for main memories
|
||||
|
||||
-page size (bits) 8192
|
||||
-burst length 8
|
||||
-internal prefetch width 8
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Data array cell type - "itrs-hp"
|
||||
//-Data array cell type - "itrs-lstp"
|
||||
//-Data array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Data array peripheral type - "itrs-hp"
|
||||
//-Data array peripheral type - "itrs-lstp"
|
||||
//-Data array peripheral type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Tag array cell type - "itrs-hp"
|
||||
//-Tag array cell type - "itrs-lstp"
|
||||
//-Tag array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Tag array peripheral type - "itrs-hp"
|
||||
//-Tag array peripheral type - "itrs-lstp"
|
||||
//-Tag array peripheral type - "itrs-lop
|
||||
|
||||
# Bus width include data bits and address bits required by the decoder
|
||||
//-output/input bus width 16
|
||||
-output/input bus width 512
|
||||
|
||||
// 300-400 in steps of 10
|
||||
-operating temperature (K) 360
|
||||
|
||||
# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
|
||||
# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
|
||||
-cache type "cache"
|
||||
//-cache type "ram"
|
||||
//-cache type "main memory"
|
||||
|
||||
# to model special structure like branch target buffers, directory, etc.
|
||||
# change the tag size parameter
|
||||
# if you want cacti to calculate the tagbits, set the tag size to "default"
|
||||
-tag size (b) "default"
|
||||
//-tag size (b) 22
|
||||
|
||||
# fast - data and tag access happen in parallel
|
||||
# sequential - data array is accessed after accessing the tag array
|
||||
# normal - data array lookup and tag access happen in parallel
|
||||
# final data block is broadcasted in data array h-tree
|
||||
# after getting the signal from the tag array
|
||||
//-access mode (normal, sequential, fast) - "fast"
|
||||
-access mode (normal, sequential, fast) - "normal"
|
||||
//-access mode (normal, sequential, fast) - "sequential"
|
||||
|
||||
|
||||
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
|
||||
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0
|
||||
|
||||
# Percentage deviation from the minimum value
|
||||
# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
|
||||
# that compromises at most 10% delay.
|
||||
# NOTE: Try reasonable values for % deviation. Inconsistent deviation
|
||||
# percentage values will not produce any valid organizations. For example,
|
||||
# 0:0:100:100:100 will try to identify an organization that has both
|
||||
# least delay and dynamic power. Since such an organization is not possible, CACTI will
|
||||
# throw an error. Refer CACTI-6 Technical report for more details
|
||||
-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000
|
||||
|
||||
# Objective for NUCA
|
||||
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
|
||||
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
|
||||
|
||||
# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
|
||||
# energy-delay or energy-delay sq. product
|
||||
# Note: Optimize tag will disable weight or deviate values mentioned above
|
||||
# Set it to NONE to let weight and deviate values determine the
|
||||
# appropriate cache configuration
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
|
||||
-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
|
||||
|
||||
-Cache model (NUCA, UCA) - "UCA"
|
||||
//-Cache model (NUCA, UCA) - "NUCA"
|
||||
|
||||
# In order for CACTI to find the optimal NUCA bank value the following
|
||||
# variable should be assigned 0.
|
||||
-NUCA bank count 0
|
||||
|
||||
# NOTE: for nuca network frequency is set to a default value of
|
||||
# 5GHz in time.c. CACTI automatically
|
||||
# calculates the maximum possible frequency and downgrades this value if necessary
|
||||
|
||||
# By default CACTI considers both full-swing and low-swing
|
||||
# wires to find an optimal configuration. However, it is possible to
|
||||
# restrict the search space by changing the signaling from "default" to
|
||||
# "fullswing" or "lowswing" type.
|
||||
-Wire signaling (fullswing, lowswing, default) - "Global_30"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "default"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "lowswing"
|
||||
|
||||
//-Wire inside mat - "global"
|
||||
-Wire inside mat - "semi-global"
|
||||
//-Wire outside mat - "global"
|
||||
-Wire outside mat - "semi-global"
|
||||
|
||||
-Interconnect projection - "conservative"
|
||||
//-Interconnect projection - "aggressive"
|
||||
|
||||
# Contention in network (which is a function of core count and cache level) is one of
|
||||
# the critical factor used for deciding the optimal bank count value
|
||||
# core count can be 4, 8, or 16
|
||||
//-Core count 4
|
||||
-Core count 8
|
||||
//-Core count 16
|
||||
-Cache level (L2/L3) - "L3"
|
||||
|
||||
-Add ECC - "true"
|
||||
|
||||
//-Print level (DETAILED, CONCISE) - "CONCISE"
|
||||
-Print level (DETAILED, CONCISE) - "DETAILED"
|
||||
|
||||
# for debugging
|
||||
//-Print input parameters - "true"
|
||||
-Print input parameters - "false"
|
||||
# force CACTI to model the cache with the
|
||||
# following Ndbl, Ndwl, Nspd, Ndsam,
|
||||
# and Ndcm values
|
||||
//-Force cache config - "true"
|
||||
-Force cache config - "false"
|
||||
-Ndwl 1
|
||||
-Ndbl 1
|
||||
-Nspd 0
|
||||
-Ndcm 1
|
||||
-Ndsam1 0
|
||||
-Ndsam2 0
|
||||
|
||||
|
||||
|
||||
#### Default CONFIGURATION values for baseline external IO parameters to DRAM. More details can be found in the CACTI-IO technical report (), especially Chapters 2 and 3.
|
||||
|
||||
# Memory Type (D=DDR3, L=LPDDR2, W=WideIO). Additional memory types can be defined by the user in extio_technology.cc, along with their technology and configuration parameters.
|
||||
|
||||
//-dram_type "D"
|
||||
-dram_type "L"
|
||||
//-dram_type "W"
|
||||
//-dram_type "S"
|
||||
|
||||
# Memory State (R=Read, W=Write, I=Idle or S=Sleep)
|
||||
|
||||
//-iostate "R"
|
||||
-iostate "W"
|
||||
//-iostate "I"
|
||||
//-iostate "S"
|
||||
|
||||
#Address bus timing. To alleviate the timing on the command and address bus due to high loading (shared across all memories on the channel), the interface allows for multi-cycle timing options.
|
||||
|
||||
-addr_timing 0.5 //DDR
|
||||
//-addr_timing 1.0 //SDR (half of DQ rate)
|
||||
//-addr_timing 2.0 //2T timing (One fourth of DQ rate)
|
||||
//-addr_timing 3.0 // 3T timing (One sixth of DQ rate)
|
||||
|
||||
# Memory Density (Gbit per memory/DRAM die)
|
||||
|
||||
-mem_density 8 Gb //Valid values 2^n Gb
|
||||
|
||||
# IO frequency (MHz) (frequency of the external memory interface).
|
||||
|
||||
-bus_freq 533 MHz //As of current memory standards (2013), valid range 0 to 1.5 GHz for DDR3, 0 to 533 MHz for LPDDR2, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential. However this can change, and the user is free to define valid ranges based on new memory types or extending beyond existing standards for existing dram types.
|
||||
|
||||
# Duty Cycle (fraction of time in the Memory State defined above)
|
||||
|
||||
-duty_cycle 1.0 //Valid range 0 to 1.0
|
||||
|
||||
# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||||
#-activity_dq .50 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||||
|
||||
# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_ca 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR, 0 to 0.25 for 2T, and 0 to 0.17 for 3T
|
||||
#-activity_ca 0.25 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR, 0 to 0.25 for 2T, and 0 to 0.17 for 3T
|
||||
|
||||
# Number of DQ pins
|
||||
|
||||
-num_dq 72 //Number of DQ pins. Includes ECC pins.
|
||||
|
||||
# Number of DQS pins. DQS is a data strobe that is sent along with a small number of data-lanes so the source synchronous timing is local to these DQ bits. Typically, 1 DQS per byte (8 DQ bits) is used. The DQS is also typucally differential, just like the CLK pin.
|
||||
|
||||
-num_dqs 36 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins.
|
||||
|
||||
# Number of CA pins
|
||||
|
||||
-num_ca 35 //Valid range 0 to 35 pins.
|
||||
#-num_ca 25 //Valid range 0 to 35 pins.
|
||||
|
||||
# Number of CLK pins. CLK is typically a differential pair. In some cases additional CLK pairs may be used to limit the loading on the CLK pin.
|
||||
|
||||
-num_clk 2 //2 x differential pair. Valid values: 0/2/4.
|
||||
|
||||
# Number of Physical Ranks
|
||||
|
||||
-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per buffer/register. If multiple LRDIMMs or buffer chips exist, the analysis for capacity and power is reported per buffer/register.
|
||||
|
||||
# Width of the Memory Data Bus
|
||||
|
||||
-mem_data_width 32 //x4 or x8 or x16 or x32 memories. For WideIO upto x128.
|
270
T1/TP/TP1/cacti_7/main.cc
Normal file
270
T1/TP/TP1/cacti_7/main.cc
Normal file
|
@ -0,0 +1,270 @@
|
|||
/*------------------------------------------------------------
|
||||
* CACTI 6.5
|
||||
* Copyright 2008 Hewlett-Packard Development Corporation
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Permission to use, copy, and modify this software and its documentation is
|
||||
* hereby granted only under the following terms and conditions. Both the
|
||||
* above copyright notice and this permission notice must appear in all copies
|
||||
* of the software, derivative works or modified versions, and any portions
|
||||
* thereof, and both notices must appear in supporting documentation.
|
||||
*
|
||||
* Users of this software agree to the terms and conditions set forth herein, and
|
||||
* hereby grant back to Hewlett-Packard Company and its affiliated companies ("HP")
|
||||
* a non-exclusive, unrestricted, royalty-free right and license under any changes,
|
||||
* enhancements or extensions made to the core functions of the software, including
|
||||
* but not limited to those affording compatibility with other hardware or software
|
||||
* environments, but excluding applications which incorporate this software.
|
||||
* Users further agree to use their best efforts to return to HP any such changes,
|
||||
* enhancements or extensions that they make and inform HP of noteworthy uses of
|
||||
* this software. Correspondence should be provided to HP at:
|
||||
*
|
||||
* Director of Intellectual Property Licensing
|
||||
* Office of Strategy and Technology
|
||||
* Hewlett-Packard Company
|
||||
* 1501 Page Mill Road
|
||||
* Palo Alto, California 94304
|
||||
*
|
||||
* This software may be distributed (but not offered for sale or transferred
|
||||
* for compensation) to third parties, provided such third parties agree to
|
||||
* abide by the terms and conditions of this notice.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND HP DISCLAIMS ALL
|
||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL HP
|
||||
* CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
|
||||
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
|
||||
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
|
||||
* SOFTWARE.
|
||||
*------------------------------------------------------------*/
|
||||
|
||||
#include "io.h"
|
||||
#include <iostream>
|
||||
|
||||
#include "Ucache.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
int main(int argc,char *argv[])
|
||||
{
|
||||
|
||||
uca_org_t result;
|
||||
if (argc != 53 && argc != 55 && argc !=64)
|
||||
{
|
||||
bool infile_specified = false;
|
||||
string infile_name("");
|
||||
|
||||
for (int32_t i = 0; i < argc; i++)
|
||||
{
|
||||
if (argv[i] == string("-infile"))
|
||||
{
|
||||
infile_specified = true;
|
||||
i++;
|
||||
infile_name = argv[i];
|
||||
}
|
||||
}
|
||||
if (infile_specified == false)
|
||||
{
|
||||
cerr << " Invalid arguments -- how to use CACTI:" << endl;
|
||||
cerr << " 1) cacti -infile <input file name>" << endl;
|
||||
cerr << " 2) cacti arg1 ... arg52 -- please refer to the README file" << endl;
|
||||
cerr << " No. of arguments input - " << argc << endl;
|
||||
exit(1);
|
||||
}
|
||||
else
|
||||
{
|
||||
result = cacti_interface(infile_name);
|
||||
}
|
||||
}
|
||||
else if (argc == 53)
|
||||
{
|
||||
result = cacti_interface(atoi(argv[ 1]),
|
||||
atoi(argv[ 2]),
|
||||
atoi(argv[ 3]),
|
||||
atoi(argv[ 4]),
|
||||
atoi(argv[ 5]),
|
||||
atoi(argv[ 6]),
|
||||
atoi(argv[ 7]),
|
||||
atoi(argv[ 8]),
|
||||
atoi(argv[ 9]),
|
||||
atof(argv[10]),
|
||||
atoi(argv[11]),
|
||||
atoi(argv[12]),
|
||||
atoi(argv[13]),
|
||||
atoi(argv[14]),
|
||||
atoi(argv[15]),
|
||||
atoi(argv[16]),
|
||||
atoi(argv[17]),
|
||||
atoi(argv[18]),
|
||||
atoi(argv[19]),
|
||||
atoi(argv[20]),
|
||||
atoi(argv[21]),
|
||||
atoi(argv[22]),
|
||||
atoi(argv[23]),
|
||||
atoi(argv[24]),
|
||||
atoi(argv[25]),
|
||||
atoi(argv[26]),
|
||||
atoi(argv[27]),
|
||||
atoi(argv[28]),
|
||||
atoi(argv[29]),
|
||||
atoi(argv[30]),
|
||||
atoi(argv[31]),
|
||||
atoi(argv[32]),
|
||||
atoi(argv[33]),
|
||||
atoi(argv[34]),
|
||||
atoi(argv[35]),
|
||||
atoi(argv[36]),
|
||||
atoi(argv[37]),
|
||||
atoi(argv[38]),
|
||||
atoi(argv[39]),
|
||||
atoi(argv[40]),
|
||||
atoi(argv[41]),
|
||||
atoi(argv[42]),
|
||||
atoi(argv[43]),
|
||||
atoi(argv[44]),
|
||||
atoi(argv[45]),
|
||||
atoi(argv[46]),
|
||||
atoi(argv[47]),
|
||||
atoi(argv[48]),
|
||||
atoi(argv[49]),
|
||||
atoi(argv[50]),
|
||||
atoi(argv[51]),
|
||||
atoi(argv[52]));
|
||||
}
|
||||
else if (argc == 55)
|
||||
{
|
||||
result = cacti_interface(atoi(argv[ 1]),
|
||||
atoi(argv[ 2]),
|
||||
atoi(argv[ 3]),
|
||||
atoi(argv[ 4]),
|
||||
atoi(argv[ 5]),
|
||||
atoi(argv[ 6]),
|
||||
atoi(argv[ 7]),
|
||||
atoi(argv[ 8]),
|
||||
atof(argv[ 9]),
|
||||
atoi(argv[10]),
|
||||
atoi(argv[11]),
|
||||
atoi(argv[12]),
|
||||
atoi(argv[13]),
|
||||
atoi(argv[14]),
|
||||
atoi(argv[15]),
|
||||
atoi(argv[16]),
|
||||
atoi(argv[17]),
|
||||
atoi(argv[18]),
|
||||
atoi(argv[19]),
|
||||
atoi(argv[20]),
|
||||
atoi(argv[21]),
|
||||
atoi(argv[22]),
|
||||
atoi(argv[23]),
|
||||
atoi(argv[24]),
|
||||
atoi(argv[25]),
|
||||
atoi(argv[26]),
|
||||
atoi(argv[27]),
|
||||
atoi(argv[28]),
|
||||
atoi(argv[29]),
|
||||
atoi(argv[30]),
|
||||
atoi(argv[31]),
|
||||
atoi(argv[32]),
|
||||
atoi(argv[33]),
|
||||
atoi(argv[34]),
|
||||
atoi(argv[35]),
|
||||
atoi(argv[36]),
|
||||
atoi(argv[37]),
|
||||
atoi(argv[38]),
|
||||
atoi(argv[39]),
|
||||
atoi(argv[40]),
|
||||
atoi(argv[41]),
|
||||
atoi(argv[42]),
|
||||
atoi(argv[43]),
|
||||
atoi(argv[44]),
|
||||
atoi(argv[45]),
|
||||
atoi(argv[46]),
|
||||
atoi(argv[47]),
|
||||
atoi(argv[48]),
|
||||
atoi(argv[49]),
|
||||
atoi(argv[50]),
|
||||
atoi(argv[51]),
|
||||
atoi(argv[52]),
|
||||
atoi(argv[53]),
|
||||
atoi(argv[54]));
|
||||
}
|
||||
else if (argc == 64)
|
||||
{
|
||||
result = cacti_interface(atoi(argv[ 1]),
|
||||
atoi(argv[ 2]),
|
||||
atoi(argv[ 3]),
|
||||
atoi(argv[ 4]),
|
||||
atoi(argv[ 5]),
|
||||
atoi(argv[ 6]),
|
||||
atoi(argv[ 7]),
|
||||
atoi(argv[ 8]),
|
||||
atof(argv[ 9]),
|
||||
atoi(argv[10]),
|
||||
atoi(argv[11]),
|
||||
atoi(argv[12]),
|
||||
atoi(argv[13]),
|
||||
atoi(argv[14]),
|
||||
atoi(argv[15]),
|
||||
atoi(argv[16]),
|
||||
atoi(argv[17]),
|
||||
atoi(argv[18]),
|
||||
atoi(argv[19]),
|
||||
atoi(argv[20]),
|
||||
atoi(argv[21]),
|
||||
atoi(argv[22]),
|
||||
atoi(argv[23]),
|
||||
atoi(argv[24]),
|
||||
atoi(argv[25]),
|
||||
atoi(argv[26]),
|
||||
atoi(argv[27]),
|
||||
atoi(argv[28]),
|
||||
atoi(argv[29]),
|
||||
atoi(argv[30]),
|
||||
atoi(argv[31]),
|
||||
atoi(argv[32]),
|
||||
atoi(argv[33]),
|
||||
atoi(argv[34]),
|
||||
atoi(argv[35]),
|
||||
atoi(argv[36]),
|
||||
atoi(argv[37]),
|
||||
atoi(argv[38]),
|
||||
atoi(argv[39]),
|
||||
atoi(argv[40]),
|
||||
atoi(argv[41]),
|
||||
atoi(argv[42]),
|
||||
atoi(argv[43]),
|
||||
atoi(argv[44]),
|
||||
atoi(argv[45]),
|
||||
atoi(argv[46]),
|
||||
atoi(argv[47]),
|
||||
atoi(argv[48]),
|
||||
atoi(argv[49]),
|
||||
atoi(argv[50]),
|
||||
atoi(argv[51]),
|
||||
atoi(argv[52]),
|
||||
atoi(argv[53]),
|
||||
atoi(argv[54]),
|
||||
atoi(argv[55]),
|
||||
atoi(argv[56]),
|
||||
atoi(argv[57]),
|
||||
atoi(argv[58]),
|
||||
atoi(argv[59]),
|
||||
atoi(argv[60]),
|
||||
atoi(argv[61]),
|
||||
atoi(argv[62]),
|
||||
atoi(argv[63]));
|
||||
}
|
||||
|
||||
cout << "=============================================\n\n";
|
||||
// print_g_tp(); //function to test technology paramters.
|
||||
// g_tp.display();
|
||||
result.cleanup();
|
||||
// delete result.data_array2;
|
||||
// if (result.tag_array2!=NULL)
|
||||
// delete result.tag_array2;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
28
T1/TP/TP1/cacti_7/makefile
Normal file
28
T1/TP/TP1/cacti_7/makefile
Normal file
|
@ -0,0 +1,28 @@
|
|||
TAR = cacti
|
||||
|
||||
.PHONY: dbg opt depend clean clean_dbg clean_opt
|
||||
|
||||
all: dbg
|
||||
|
||||
dbg: $(TAR).mk obj_dbg
|
||||
@$(MAKE) TAG=dbg -C . -f $(TAR).mk
|
||||
|
||||
opt: $(TAR).mk obj_opt
|
||||
@$(MAKE) TAG=opt -C . -f $(TAR).mk
|
||||
|
||||
obj_dbg:
|
||||
mkdir $@
|
||||
|
||||
obj_opt:
|
||||
mkdir $@
|
||||
|
||||
clean: clean_dbg clean_opt
|
||||
|
||||
clean_dbg: obj_dbg
|
||||
@$(MAKE) TAG=dbg -C . -f $(TAR).mk clean
|
||||
rm -rf $<
|
||||
|
||||
clean_opt: obj_opt
|
||||
@$(MAKE) TAG=opt -C . -f $(TAR).mk clean
|
||||
rm -rf $<
|
||||
|
1940
T1/TP/TP1/cacti_7/mat.cc
Normal file
1940
T1/TP/TP1/cacti_7/mat.cc
Normal file
File diff suppressed because it is too large
Load diff
176
T1/TP/TP1/cacti_7/mat.h
Normal file
176
T1/TP/TP1/cacti_7/mat.h
Normal file
|
@ -0,0 +1,176 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
#ifndef __MAT_H__
|
||||
#define __MAT_H__
|
||||
|
||||
#include "component.h"
|
||||
#include "decoder.h"
|
||||
#include "wire.h"
|
||||
#include "subarray.h"
|
||||
#include "powergating.h"
|
||||
|
||||
class Mat : public Component
|
||||
{
|
||||
public:
|
||||
Mat(const DynamicParameter & dyn_p);
|
||||
~Mat();
|
||||
double compute_delays(double inrisetime); // return outrisetime
|
||||
void compute_power_energy();
|
||||
|
||||
const DynamicParameter & dp;
|
||||
|
||||
// TODO: clean up pointers and powerDefs below
|
||||
Decoder * row_dec;
|
||||
Decoder * bit_mux_dec;
|
||||
Decoder * sa_mux_lev_1_dec;
|
||||
Decoder * sa_mux_lev_2_dec;
|
||||
PredecBlk * dummy_way_sel_predec_blk1;
|
||||
PredecBlk * dummy_way_sel_predec_blk2;
|
||||
PredecBlkDrv * way_sel_drv1;
|
||||
PredecBlkDrv * dummy_way_sel_predec_blk_drv2;
|
||||
|
||||
Predec * r_predec;
|
||||
Predec * b_mux_predec;
|
||||
Predec * sa_mux_lev_1_predec;
|
||||
Predec * sa_mux_lev_2_predec;
|
||||
|
||||
Wire * subarray_out_wire;
|
||||
Driver * bl_precharge_eq_drv;
|
||||
Driver * cam_bl_precharge_eq_drv;//bitline pre-charge circuit is separated for CAM and RAM arrays.
|
||||
Driver * ml_precharge_drv;//matchline prechange driver
|
||||
Driver * sl_precharge_eq_drv;//searchline prechage driver
|
||||
Driver * sl_data_drv;//search line data driver
|
||||
Driver * ml_to_ram_wl_drv;//search line data driver
|
||||
|
||||
|
||||
powerDef power_row_decoders;
|
||||
powerDef power_bit_mux_decoders;
|
||||
powerDef power_sa_mux_lev_1_decoders;
|
||||
powerDef power_sa_mux_lev_2_decoders;
|
||||
powerDef power_fa_cam; // TODO: leakage power is not computed yet
|
||||
powerDef power_bl_precharge_eq_drv;
|
||||
powerDef power_subarray_out_drv;
|
||||
powerDef power_cam_all_active;
|
||||
powerDef power_searchline_precharge;
|
||||
powerDef power_matchline_precharge;
|
||||
powerDef power_ml_to_ram_wl_drv;
|
||||
|
||||
double delay_fa_tag, delay_cam;
|
||||
double delay_before_decoder;
|
||||
double delay_bitline;
|
||||
double delay_wl_reset;
|
||||
double delay_bl_restore;
|
||||
|
||||
double delay_searchline;
|
||||
double delay_matchchline;
|
||||
double delay_cam_sl_restore;
|
||||
double delay_cam_ml_reset;
|
||||
double delay_fa_ram_wl;
|
||||
|
||||
double delay_hit_miss_reset;
|
||||
double delay_hit_miss;
|
||||
|
||||
Subarray subarray;
|
||||
powerDef power_bitline, power_searchline, power_matchline, power_bitline_gated;
|
||||
double per_bitline_read_energy;
|
||||
int deg_bl_muxing;
|
||||
int num_act_mats_hor_dir;
|
||||
double delay_writeback;
|
||||
Area cell,cam_cell;
|
||||
bool is_dram,is_fa, pure_cam, camFlag;
|
||||
int num_mats;
|
||||
powerDef power_sa;
|
||||
double delay_sa;
|
||||
double leak_power_sense_amps_closed_page_state;
|
||||
double leak_power_sense_amps_open_page_state;
|
||||
double delay_subarray_out_drv;
|
||||
double delay_subarray_out_drv_htree;
|
||||
double delay_comparator;
|
||||
powerDef power_comparator;
|
||||
int num_do_b_mat;
|
||||
int num_so_b_mat;
|
||||
int num_sa_subarray;
|
||||
int num_sa_subarray_search;
|
||||
double C_bl;
|
||||
|
||||
uint32_t num_subarrays_per_mat; // the number of subarrays in a mat
|
||||
uint32_t num_subarrays_per_row; // the number of subarrays in a row of a mat
|
||||
|
||||
double array_leakage;
|
||||
double wl_leakage;
|
||||
double cl_leakage;
|
||||
|
||||
Sleep_tx * sram_sleep_tx;
|
||||
Sleep_tx * wl_sleep_tx;
|
||||
Sleep_tx * cl_sleep_tx;
|
||||
|
||||
powerDef array_wakeup_e;
|
||||
double array_wakeup_t;
|
||||
double array_sleep_tx_area;
|
||||
|
||||
powerDef blfloating_wakeup_e;
|
||||
double blfloating_wakeup_t;
|
||||
double blfloating_sleep_tx_area;
|
||||
|
||||
powerDef wl_wakeup_e;
|
||||
double wl_wakeup_t;
|
||||
double wl_sleep_tx_area;
|
||||
|
||||
powerDef cl_wakeup_e;
|
||||
double cl_wakeup_t;
|
||||
double cl_sleep_tx_area;
|
||||
|
||||
double compute_bitline_delay(double inrisetime);
|
||||
double compute_sa_delay(double inrisetime);
|
||||
double compute_subarray_out_drv(double inrisetime);
|
||||
|
||||
private:
|
||||
double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
|
||||
double width_write_driver_or_write_mux();
|
||||
double compute_comparators_height(int tagbits, int number_ways_in_mat, double subarray_mem_cell_area_w);
|
||||
double compute_cam_delay(double inrisetime);
|
||||
//double compute_bitline_delay(double inrisetime);
|
||||
//double compute_sa_delay(double inrisetime);
|
||||
//double compute_subarray_out_drv(double inrisetime);
|
||||
double compute_comparator_delay(double inrisetime);
|
||||
|
||||
int RWP;
|
||||
int ERP;
|
||||
int EWP;
|
||||
int SCHP;
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
599
T1/TP/TP1/cacti_7/memcad.cc
Normal file
599
T1/TP/TP1/cacti_7/memcad.cc
Normal file
|
@ -0,0 +1,599 @@
|
|||
#include "memcad.h"
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
#include <cassert>
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
vector<channel_conf*> *memcad_all_channels;
|
||||
|
||||
vector<bob_conf*> *memcad_all_bobs;
|
||||
|
||||
vector<memory_conf*> *memcad_all_memories;
|
||||
|
||||
vector<memory_conf*> *memcad_best_results;
|
||||
|
||||
bool compare_channels(channel_conf* first, channel_conf* second)
|
||||
{
|
||||
if(first->capacity != second->capacity)
|
||||
return (first->capacity < second->capacity);
|
||||
|
||||
MemCad_metrics first_metric = first->memcad_params->first_metric;
|
||||
MemCad_metrics second_metric = first->memcad_params->second_metric;
|
||||
MemCad_metrics third_metric = first->memcad_params->third_metric;
|
||||
|
||||
switch(first_metric)
|
||||
{
|
||||
case(Cost):
|
||||
if(first->cost != second->cost)
|
||||
return (first->cost < second->cost);
|
||||
break;
|
||||
case(Bandwidth):
|
||||
if(first->bandwidth != second->bandwidth)
|
||||
return (first->bandwidth > second->bandwidth);
|
||||
break;
|
||||
case(Energy):
|
||||
if( fabs(first->energy_per_access - second->energy_per_access)>EPS)
|
||||
return (first->energy_per_access < second->energy_per_access);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
switch(second_metric)
|
||||
{
|
||||
case(Cost):
|
||||
if(first->cost != second->cost)
|
||||
return (first->cost < second->cost);
|
||||
break;
|
||||
case(Bandwidth):
|
||||
if(first->bandwidth != second->bandwidth)
|
||||
return (first->bandwidth > second->bandwidth);
|
||||
break;
|
||||
case(Energy):
|
||||
if( fabs(first->energy_per_access - second->energy_per_access)>EPS)
|
||||
return (first->energy_per_access < second->energy_per_access);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
switch(third_metric)
|
||||
{
|
||||
case(Cost):
|
||||
if(first->cost != second->cost)
|
||||
return (first->cost < second->cost);
|
||||
break;
|
||||
case(Bandwidth):
|
||||
if(first->bandwidth != second->bandwidth)
|
||||
return (first->bandwidth > second->bandwidth);
|
||||
break;
|
||||
case(Energy):
|
||||
if( fabs(first->energy_per_access - second->energy_per_access)>EPS)
|
||||
return (first->energy_per_access < second->energy_per_access);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void prune_channels()
|
||||
{
|
||||
vector<channel_conf*> * temp = new vector<channel_conf*>();
|
||||
int last_added = -1;
|
||||
for(unsigned int i=0;i< memcad_all_channels->size();i++)
|
||||
{
|
||||
if(last_added != (*memcad_all_channels)[i]->capacity)
|
||||
{
|
||||
temp->push_back(clone((*memcad_all_channels)[i]));
|
||||
last_added = (*memcad_all_channels)[i]->capacity;
|
||||
}
|
||||
}
|
||||
|
||||
for(unsigned int i=0;i< memcad_all_channels->size();i++)
|
||||
{
|
||||
delete (*memcad_all_channels)[i];
|
||||
}
|
||||
memcad_all_channels->clear();
|
||||
delete memcad_all_channels;
|
||||
memcad_all_channels = temp;
|
||||
}
|
||||
|
||||
void find_all_channels(MemCadParameters * memcad_params)
|
||||
{
|
||||
|
||||
int DIMM_size[]={0,4,8,16,32,64};
|
||||
Mem_IO_type current_io_type = memcad_params->io_type;
|
||||
DIMM_Model current_dimm_model = memcad_params->dimm_model;
|
||||
|
||||
|
||||
memcad_all_channels= new vector<channel_conf*>();
|
||||
|
||||
// channels can have up to 3 DIMMs per channel
|
||||
// di is the capacity if i-th dimm in the channel
|
||||
for(int d1=0; d1<6;d1++)
|
||||
{
|
||||
for(int d2=d1;d2<6;d2++)
|
||||
{
|
||||
for(int d3=d2;d3<6;d3++)
|
||||
{
|
||||
// channel capacity should not exceed the entire memory capacity.
|
||||
if((DIMM_size[d1]+DIMM_size[d2]+DIMM_size[d3])>memcad_params->capacity)
|
||||
continue;
|
||||
|
||||
if( ((current_dimm_model== JUST_LRDIMM) || (current_dimm_model== ALL))
|
||||
&& ((d1==0) || (MemoryParameters::cost[current_io_type][2][d1-1]<INF))
|
||||
&& ((d2==0) || (MemoryParameters::cost[current_io_type][2][d2-1]<INF))
|
||||
&& ((d3==0) || (MemoryParameters::cost[current_io_type][2][d3-1]<INF)) )
|
||||
{
|
||||
int num_dimm_per_channel =0;
|
||||
vector<int> dimm_cap;
|
||||
dimm_cap.push_back(DIMM_size[d1]); if(d1>0) num_dimm_per_channel++;
|
||||
dimm_cap.push_back(DIMM_size[d2]); if(d2>0) num_dimm_per_channel++;
|
||||
dimm_cap.push_back(DIMM_size[d3]); if(d3>0) num_dimm_per_channel++;
|
||||
|
||||
int max_index = bw_index(current_io_type, MemoryParameters::bandwidth_load[current_io_type][4-num_dimm_per_channel]);
|
||||
for(int bw_id=0;bw_id<=max_index; ++bw_id)
|
||||
{
|
||||
int bandwidth = MemoryParameters::bandwidth_load[current_io_type][bw_id];
|
||||
channel_conf * new_channel = new channel_conf(memcad_params, dimm_cap, bandwidth, LRDIMM, false);
|
||||
if(new_channel->cost <INF)
|
||||
{
|
||||
memcad_all_channels->push_back(new_channel);
|
||||
}
|
||||
|
||||
if((DIMM_size[d1]+DIMM_size[d2]+DIMM_size[d3])==0)
|
||||
continue;
|
||||
|
||||
if(memcad_params->low_power_permitted)
|
||||
{
|
||||
new_channel = new channel_conf(memcad_params, dimm_cap, bandwidth, LRDIMM, true);
|
||||
if(new_channel->cost <INF)
|
||||
{
|
||||
memcad_all_channels->push_back(new_channel);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if( (current_dimm_model== JUST_RDIMM) || (current_dimm_model== ALL)
|
||||
&& ((d1==0) || (MemoryParameters::cost[current_io_type][1][d1-1]<INF))
|
||||
&& ((d2==0) || (MemoryParameters::cost[current_io_type][1][d2-1]<INF))
|
||||
&& ((d3==0) || (MemoryParameters::cost[current_io_type][1][d3-1]<INF)) )
|
||||
{
|
||||
|
||||
int num_dimm_per_channel =0;
|
||||
vector<int> dimm_cap;
|
||||
dimm_cap.push_back(DIMM_size[d1]); if(d1>0) num_dimm_per_channel++;
|
||||
dimm_cap.push_back(DIMM_size[d2]); if(d2>0) num_dimm_per_channel++;
|
||||
dimm_cap.push_back(DIMM_size[d3]); if(d3>0) num_dimm_per_channel++;
|
||||
|
||||
if((DIMM_size[d1]+DIMM_size[d2]+DIMM_size[d3])==0)
|
||||
continue;
|
||||
|
||||
int max_index = bw_index(current_io_type, MemoryParameters::bandwidth_load[current_io_type][4-num_dimm_per_channel]);
|
||||
|
||||
for(int bw_id=0;bw_id<=max_index; ++bw_id)
|
||||
{
|
||||
int bandwidth = MemoryParameters::bandwidth_load[current_io_type][bw_id];
|
||||
channel_conf * new_channel = new channel_conf(memcad_params, dimm_cap, bandwidth, RDIMM, false);
|
||||
if(new_channel->cost <INF)
|
||||
{
|
||||
memcad_all_channels->push_back(new_channel);
|
||||
}
|
||||
|
||||
if(memcad_params->low_power_permitted)
|
||||
{
|
||||
new_channel = new channel_conf(memcad_params, dimm_cap, bandwidth, RDIMM, true);
|
||||
if(new_channel->cost <INF)
|
||||
{
|
||||
memcad_all_channels->push_back(new_channel);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( (current_dimm_model== JUST_UDIMM) || (current_dimm_model== ALL)
|
||||
&& ((d1==0) || (MemoryParameters::cost[current_io_type][0][d1-1]<INF))
|
||||
&& ((d2==0) || (MemoryParameters::cost[current_io_type][0][d2-1]<INF))
|
||||
&& ((d3==0) || (MemoryParameters::cost[current_io_type][0][d3-1]<INF)) )
|
||||
{
|
||||
int num_dimm_per_channel =0;
|
||||
vector<int> dimm_cap;
|
||||
dimm_cap.push_back(DIMM_size[d1]); if(d1>0) num_dimm_per_channel++;
|
||||
dimm_cap.push_back(DIMM_size[d2]); if(d2>0) num_dimm_per_channel++;
|
||||
dimm_cap.push_back(DIMM_size[d3]); if(d3>0) num_dimm_per_channel++;
|
||||
|
||||
if((DIMM_size[d1]+DIMM_size[d2]+DIMM_size[d3])==0)
|
||||
continue;
|
||||
int max_index = bw_index(current_io_type, MemoryParameters::bandwidth_load[current_io_type][4-num_dimm_per_channel]);
|
||||
for(int bw_id=0;bw_id<=max_index; ++bw_id)
|
||||
{
|
||||
int bandwidth = MemoryParameters::bandwidth_load[current_io_type][bw_id];
|
||||
channel_conf * new_channel = new channel_conf(memcad_params, dimm_cap, bandwidth, UDIMM, false);
|
||||
if(new_channel->cost <INF)
|
||||
{
|
||||
memcad_all_channels->push_back(new_channel);
|
||||
}
|
||||
|
||||
if(memcad_params->low_power_permitted)
|
||||
{
|
||||
new_channel = new channel_conf(memcad_params, dimm_cap, bandwidth, UDIMM, true);
|
||||
if(new_channel->cost <INF)
|
||||
{
|
||||
memcad_all_channels->push_back(new_channel);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sort(memcad_all_channels->begin(), memcad_all_channels->end(), compare_channels);
|
||||
|
||||
|
||||
prune_channels();
|
||||
|
||||
if(memcad_params->verbose)
|
||||
{
|
||||
for(unsigned int i=0;i<memcad_all_channels->size();i++)
|
||||
{
|
||||
cout << *(*memcad_all_channels)[i] << endl;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool compare_channels_bw(channel_conf* first, channel_conf* second)
|
||||
{
|
||||
return (first->bandwidth < second->bandwidth);
|
||||
}
|
||||
|
||||
bool compare_bobs(bob_conf* first, bob_conf* second)
|
||||
{
|
||||
if(first->capacity != second->capacity)
|
||||
return (first->capacity < second->capacity);
|
||||
|
||||
MemCad_metrics first_metric = first->memcad_params->first_metric;
|
||||
MemCad_metrics second_metric = first->memcad_params->second_metric;
|
||||
MemCad_metrics third_metric = first->memcad_params->third_metric;
|
||||
|
||||
switch(first_metric)
|
||||
{
|
||||
case(Cost):
|
||||
if(first->cost != second->cost)
|
||||
return (first->cost < second->cost);
|
||||
break;
|
||||
case(Bandwidth):
|
||||
if(first->bandwidth != second->bandwidth)
|
||||
return (first->bandwidth > second->bandwidth);
|
||||
break;
|
||||
case(Energy):
|
||||
if( fabs(first->energy_per_access - second->energy_per_access)>EPS)
|
||||
return (first->energy_per_access < second->energy_per_access);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
switch(second_metric)
|
||||
{
|
||||
case(Cost):
|
||||
if(first->cost != second->cost)
|
||||
return (first->cost < second->cost);
|
||||
break;
|
||||
case(Bandwidth):
|
||||
if(first->bandwidth != second->bandwidth)
|
||||
return (first->bandwidth > second->bandwidth);
|
||||
break;
|
||||
case(Energy):
|
||||
if( fabs(first->energy_per_access - second->energy_per_access)>EPS)
|
||||
return (first->energy_per_access < second->energy_per_access);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
switch(third_metric)
|
||||
{
|
||||
case(Cost):
|
||||
if(first->cost != second->cost)
|
||||
return (first->cost < second->cost);
|
||||
break;
|
||||
case(Bandwidth):
|
||||
if(first->bandwidth != second->bandwidth)
|
||||
return (first->bandwidth > second->bandwidth);
|
||||
break;
|
||||
case(Energy):
|
||||
if( fabs(first->energy_per_access - second->energy_per_access)>EPS)
|
||||
return (first->energy_per_access < second->energy_per_access);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void prune_bobs()
|
||||
{
|
||||
vector<bob_conf*> * temp = new vector<bob_conf*>();
|
||||
int last_added = -1;
|
||||
for(unsigned int i=0;i< memcad_all_bobs->size();i++)
|
||||
{
|
||||
if(last_added != (*memcad_all_bobs)[i]->capacity)
|
||||
{
|
||||
temp->push_back(clone((*memcad_all_bobs)[i]));
|
||||
last_added = (*memcad_all_bobs)[i]->capacity;
|
||||
}
|
||||
}
|
||||
|
||||
for(unsigned int i=0;i< memcad_all_bobs->size();i++)
|
||||
{
|
||||
delete (*memcad_all_bobs)[i];
|
||||
}
|
||||
memcad_all_bobs->clear();
|
||||
delete memcad_all_bobs;
|
||||
memcad_all_bobs = temp;
|
||||
}
|
||||
|
||||
void find_bobs_recursive(MemCadParameters * memcad_params,int start,int end,int nb, list<int> *channel_index)
|
||||
{
|
||||
if(nb==1)
|
||||
{
|
||||
for(int i=start; i<=end;++i)
|
||||
{
|
||||
channel_index->push_back(i);
|
||||
|
||||
vector<channel_conf*> temp;
|
||||
for(list<int>::iterator it= channel_index->begin(); it!= channel_index->end(); it++)
|
||||
{
|
||||
int idx = *it;
|
||||
temp.push_back((*memcad_all_channels)[idx]);
|
||||
}
|
||||
memcad_all_bobs->push_back(new bob_conf(memcad_params, &temp));
|
||||
temp.clear();
|
||||
|
||||
channel_index->pop_back();
|
||||
}
|
||||
return;
|
||||
}
|
||||
for(int i=start;i<=end;++i)
|
||||
{
|
||||
channel_index->push_back(i);
|
||||
find_bobs_recursive(memcad_params,i,end,nb-1,channel_index);
|
||||
channel_index->pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
void find_all_bobs(MemCadParameters * memcad_params)
|
||||
{
|
||||
memcad_all_bobs = new vector<bob_conf*>();
|
||||
if(memcad_params->mirror_in_bob)
|
||||
{
|
||||
for(unsigned int i=0;i<memcad_all_channels->size();++i)
|
||||
{
|
||||
vector<channel_conf*> channels;
|
||||
for(int j=0;j<memcad_params->num_channels_per_bob;j++)
|
||||
channels.push_back((*memcad_all_channels)[i]);
|
||||
memcad_all_bobs->push_back(new bob_conf(memcad_params, &channels));
|
||||
channels.clear();
|
||||
}
|
||||
}
|
||||
else if(memcad_params->same_bw_in_bob)
|
||||
{
|
||||
sort(memcad_all_channels->begin(), memcad_all_channels->end(), compare_channels_bw);
|
||||
vector<int> start_index; start_index.push_back(0);
|
||||
vector<int> end_index;
|
||||
int last_bw =(*memcad_all_channels)[0]->bandwidth;
|
||||
for(unsigned int i=0;i< memcad_all_channels->size();i++)
|
||||
{
|
||||
if(last_bw!=(*memcad_all_channels)[i]->bandwidth)
|
||||
{
|
||||
end_index.push_back(i-1);
|
||||
start_index.push_back(i);
|
||||
last_bw = (*memcad_all_channels)[i]->bandwidth;
|
||||
}
|
||||
}
|
||||
end_index.push_back(memcad_all_channels->size()-1);
|
||||
|
||||
list<int> channel_index;
|
||||
|
||||
for(unsigned int i=0;i< start_index.size();++i)
|
||||
{
|
||||
find_bobs_recursive(memcad_params,start_index[i],end_index[i],memcad_params->num_channels_per_bob, &channel_index);
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "We do not support different frequencies per in a BoB!" << endl;
|
||||
assert(false);
|
||||
}
|
||||
|
||||
|
||||
sort(memcad_all_bobs->begin(), memcad_all_bobs->end(), compare_bobs);
|
||||
prune_bobs();
|
||||
if(memcad_params->verbose)
|
||||
{
|
||||
for(unsigned int i=0;i<memcad_all_bobs->size();i++)
|
||||
{
|
||||
cout << *(*memcad_all_bobs)[i] << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void find_mems_recursive(MemCadParameters * memcad_params, int remaining_capacity, int start, int nb, list<int>* bobs_index)
|
||||
{
|
||||
|
||||
if(nb==1)
|
||||
{
|
||||
for(unsigned int i=start; i< memcad_all_bobs->size();++i)
|
||||
{
|
||||
if((*memcad_all_bobs)[i]->capacity != remaining_capacity)
|
||||
continue;
|
||||
|
||||
bobs_index->push_back(i);
|
||||
vector<bob_conf*> temp;
|
||||
for(list<int>::iterator it= bobs_index->begin(); it!= bobs_index->end(); it++)
|
||||
{
|
||||
int index = *it;
|
||||
temp.push_back((*memcad_all_bobs)[index]);
|
||||
}
|
||||
memcad_all_memories->push_back(new memory_conf(memcad_params, &temp));
|
||||
temp.clear();
|
||||
bobs_index->pop_back();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for(unsigned int i=start; i<memcad_all_bobs->size();i++)
|
||||
{
|
||||
if((*memcad_all_bobs)[i]->capacity > remaining_capacity)
|
||||
continue;
|
||||
|
||||
int new_remaining_capacity = remaining_capacity-(*memcad_all_bobs)[i]->capacity;
|
||||
bobs_index->push_back(i);
|
||||
find_mems_recursive(memcad_params, new_remaining_capacity, i, nb-1, bobs_index);
|
||||
bobs_index->pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
//void find_mems_recursive(MemCadParameters * memcad_params, int start, int
|
||||
|
||||
bool compare_memories(memory_conf* first, memory_conf* second)
|
||||
{
|
||||
if(first->capacity != second->capacity)
|
||||
return (first->capacity < second->capacity);
|
||||
|
||||
MemCad_metrics first_metric = first->memcad_params->first_metric;
|
||||
MemCad_metrics second_metric = first->memcad_params->second_metric;
|
||||
MemCad_metrics third_metric = first->memcad_params->third_metric;
|
||||
|
||||
switch(first_metric)
|
||||
{
|
||||
case(Cost):
|
||||
if(first->cost != second->cost)
|
||||
return (first->cost < second->cost);
|
||||
break;
|
||||
case(Bandwidth):
|
||||
if(first->bandwidth != second->bandwidth)
|
||||
return (first->bandwidth > second->bandwidth);
|
||||
break;
|
||||
case(Energy):
|
||||
if( fabs(first->energy_per_access - second->energy_per_access)>EPS)
|
||||
return (first->energy_per_access < second->energy_per_access);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
switch(second_metric)
|
||||
{
|
||||
case(Cost):
|
||||
if(first->cost != second->cost)
|
||||
return (first->cost < second->cost);
|
||||
break;
|
||||
case(Bandwidth):
|
||||
if(first->bandwidth != second->bandwidth)
|
||||
return (first->bandwidth > second->bandwidth);
|
||||
break;
|
||||
case(Energy):
|
||||
if( fabs(first->energy_per_access - second->energy_per_access)>EPS)
|
||||
return (first->energy_per_access < second->energy_per_access);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
switch(third_metric)
|
||||
{
|
||||
case(Cost):
|
||||
if(first->cost != second->cost)
|
||||
return (first->cost < second->cost);
|
||||
break;
|
||||
case(Bandwidth):
|
||||
if(first->bandwidth != second->bandwidth)
|
||||
return (first->bandwidth > second->bandwidth);
|
||||
break;
|
||||
case(Energy):
|
||||
if( fabs(first->energy_per_access - second->energy_per_access)>EPS)
|
||||
return (first->energy_per_access < second->energy_per_access);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool find_all_memories(MemCadParameters * memcad_params)
|
||||
{
|
||||
memcad_all_memories = new vector<memory_conf*>();
|
||||
|
||||
list<int> bobs_index;
|
||||
find_mems_recursive(memcad_params, memcad_params->capacity, 0,memcad_params->num_bobs, &bobs_index);
|
||||
|
||||
sort(memcad_all_memories->begin(), memcad_all_memories->end(), compare_memories);
|
||||
|
||||
if(memcad_params->verbose)
|
||||
{
|
||||
cout << "all possible results:" << endl;
|
||||
for(unsigned int i=0;i<memcad_all_memories->size();i++)
|
||||
{
|
||||
cout << *(*memcad_all_memories)[i] << endl;
|
||||
}
|
||||
}
|
||||
if(memcad_all_memories->size()==0)
|
||||
{
|
||||
cout << "No result found " << endl;
|
||||
return false;
|
||||
}
|
||||
cout << "top 3 best memory configurations are:" << endl;
|
||||
int min_num_results = (memcad_all_memories->size()>3?3:memcad_all_memories->size());
|
||||
for(int i=0;i<min_num_results;++i)
|
||||
{
|
||||
if((*memcad_all_memories)[i])
|
||||
cout << *(*memcad_all_memories)[i] << endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void clean_results()
|
||||
{
|
||||
for(unsigned int i=0;i<memcad_all_channels->size();++i)
|
||||
{
|
||||
delete (*memcad_all_channels)[i];
|
||||
}
|
||||
delete memcad_all_channels;
|
||||
|
||||
for(unsigned int i=0;i<memcad_all_bobs->size();++i)
|
||||
{
|
||||
delete (*memcad_all_bobs)[i];
|
||||
}
|
||||
delete memcad_all_bobs;
|
||||
|
||||
for(unsigned int i=0;i<memcad_all_memories->size();++i)
|
||||
{
|
||||
delete (*memcad_all_memories)[i];
|
||||
}
|
||||
delete memcad_all_memories;
|
||||
}
|
||||
|
||||
|
||||
void solve_memcad(MemCadParameters * memcad_params)
|
||||
{
|
||||
|
||||
find_all_channels(memcad_params);
|
||||
find_all_bobs(memcad_params);
|
||||
find_all_memories(memcad_params);
|
||||
clean_results();
|
||||
}
|
||||
|
30
T1/TP/TP1/cacti_7/memcad.h
Normal file
30
T1/TP/TP1/cacti_7/memcad.h
Normal file
|
@ -0,0 +1,30 @@
|
|||
#ifndef __MEMCAD_H__
|
||||
#define __MEMCAD_H__
|
||||
|
||||
#include "memcad_parameters.h"
|
||||
#include <vector>
|
||||
|
||||
|
||||
extern vector<channel_conf*> *memcad_all_channels;
|
||||
|
||||
extern vector<bob_conf*> *memcad_all_bobs;
|
||||
|
||||
extern vector<memory_conf*> *memcad_all_memories;
|
||||
|
||||
extern vector<memory_conf*> *memcad_best_results;
|
||||
|
||||
|
||||
|
||||
void find_all_channels(MemCadParameters * memcad_params);
|
||||
|
||||
void find_all_bobs(MemCadParameters * memcad_params);
|
||||
|
||||
bool find_all_memories(MemCadParameters * memcad_params);
|
||||
|
||||
void clean_results();
|
||||
|
||||
void solve_memcad(MemCadParameters * memcad_params);
|
||||
|
||||
#endif
|
||||
|
||||
|
466
T1/TP/TP1/cacti_7/memcad_parameters.cc
Normal file
466
T1/TP/TP1/cacti_7/memcad_parameters.cc
Normal file
|
@ -0,0 +1,466 @@
|
|||
#include "memcad_parameters.h"
|
||||
#include <cmath>
|
||||
#include <cassert>
|
||||
|
||||
MemCadParameters::MemCadParameters(InputParameter * g_ip)
|
||||
{
|
||||
// default value
|
||||
io_type=DDR4; // DDR3 vs. DDR4
|
||||
capacity=400; // in GB
|
||||
num_bobs=4; // default=4me
|
||||
num_channels_per_bob=2; // 1 means no bob
|
||||
capacity_wise=true; // true means the load on each channel is proportional to its capacity.
|
||||
first_metric=Cost;
|
||||
second_metric=Bandwidth;
|
||||
third_metric=Energy;
|
||||
dimm_model=ALL;
|
||||
low_power_permitted=false;
|
||||
load=0.9; // between 0 to 1
|
||||
row_buffer_hit_rate=1;
|
||||
rd_2_wr_ratio=2;
|
||||
same_bw_in_bob=true; // true if all the channels in the bob have the same bandwidth
|
||||
mirror_in_bob=true;// true if all the channels in the bob have the same configs
|
||||
total_power=false; // false means just considering I/O Power.
|
||||
verbose=false;
|
||||
// values for input
|
||||
io_type=g_ip->io_type;
|
||||
capacity=g_ip->capacity;
|
||||
num_bobs=g_ip->num_bobs;
|
||||
num_channels_per_bob=g_ip->num_channels_per_bob;
|
||||
first_metric=g_ip->first_metric;
|
||||
second_metric=g_ip->second_metric;
|
||||
third_metric=g_ip->third_metric;
|
||||
dimm_model=g_ip->dimm_model;
|
||||
///low_power_permitted=g_ip->low_power_permitted;
|
||||
///load=g_ip->load;
|
||||
///row_buffer_hit_rate=g_ip->row_buffer_hit_rate;
|
||||
///rd_2_wr_ratio=g_ip->rd_2_wr_ratio;
|
||||
///same_bw_in_bob=g_ip->same_bw_in_bob;
|
||||
mirror_in_bob=g_ip->mirror_in_bob;
|
||||
///total_power=g_ip->total_power;
|
||||
verbose=g_ip->verbose;
|
||||
|
||||
}
|
||||
|
||||
void MemCadParameters::print_inputs()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
bool MemCadParameters::sanity_check()
|
||||
{
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
double MemoryParameters::VDD[2][2][4]= //[lp:hp][ddr3:ddr4][frequency index]
|
||||
{
|
||||
{
|
||||
{1.5,1.5,1.5,1.5},
|
||||
{1.2,1.2,1.2,1.2}
|
||||
},
|
||||
{
|
||||
{1.35,1.35,1.35,1.35},
|
||||
{1.0,1.0,1.0,1.0}
|
||||
}
|
||||
};
|
||||
|
||||
double MemoryParameters::IDD0[2][4]=
|
||||
{
|
||||
{55,60,65,75},
|
||||
{58,58,60,64}
|
||||
};
|
||||
|
||||
double MemoryParameters::IDD2P0[2][4]=
|
||||
{
|
||||
{20,20,20,20},
|
||||
{20,20,20,20}
|
||||
};
|
||||
|
||||
double MemoryParameters::IDD2P1[2][4]=
|
||||
{
|
||||
{30,30,32,37},
|
||||
{30,30,30,32}
|
||||
};
|
||||
|
||||
double MemoryParameters::IDD2N[2][4]=
|
||||
{
|
||||
{40,42,45,50},
|
||||
{44,44,46,50}
|
||||
};
|
||||
|
||||
double MemoryParameters::IDD3P[2][4]=
|
||||
{
|
||||
{45,50,55,60},
|
||||
{44,44,44,44}
|
||||
};
|
||||
|
||||
double MemoryParameters::IDD3N[2][4]=
|
||||
{
|
||||
{42,47,52,57},
|
||||
{44,44,44,44}
|
||||
};
|
||||
|
||||
double MemoryParameters::IDD4R[2][4]=
|
||||
{
|
||||
{120,135,155,175},
|
||||
{140,140,150,160}
|
||||
};
|
||||
|
||||
double MemoryParameters::IDD4W[2][4]=
|
||||
{
|
||||
{100,125,145,165},
|
||||
{156,156,176,196}
|
||||
};
|
||||
|
||||
double MemoryParameters::IDD5[2][4]=
|
||||
{
|
||||
{150,205,210,220},
|
||||
{190,190,190,192}
|
||||
};
|
||||
|
||||
double MemoryParameters::io_energy_read[2][3][3][4] =// [ddr3:ddr4][udimm:rdimm:lrdimm][load 1:2:3][frequency 0:1:2:3]
|
||||
{
|
||||
{ //ddr3
|
||||
{//udimm
|
||||
{2592.33, 2593.33, 3288.784, 4348.612},
|
||||
{2638.23, 2640.23, 3941.584, 5415.492},
|
||||
{2978.659, 2981.659, 4816.644, 6964.162}
|
||||
|
||||
},
|
||||
{//rdimm
|
||||
{2592.33, 3087.071, 3865.044, 4844.982},
|
||||
{2932.759, 3733.318, 4237.634, 5415.492},
|
||||
{3572.509, 4603.109, 5300.004, 6964.162}
|
||||
},
|
||||
{//lrdimm
|
||||
{4628.966, 6357.625, 7079.348, 9680.454},
|
||||
{5368.26, 6418.788, 7428.058, 10057.164},
|
||||
{5708.689, 7065.038, 7808.678, 10627.674}
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{ //ddr
|
||||
{//udimm
|
||||
{2135.906, 2633.317, 2750.919, 2869.406},
|
||||
{2458.714, 2695.791, 2822.298, 3211.111},
|
||||
{2622.85, 3030.048, 3160.265, 3534.448}
|
||||
|
||||
},
|
||||
{//rdimm
|
||||
{2135.906, 2633.317, 2750.919, 2869.406},
|
||||
{2458.714, 2695.791, 3088.886, 3211.111},
|
||||
{2622.85, 3030.048, 3312.468, 3758.445}
|
||||
|
||||
},
|
||||
{//lrdimm
|
||||
{4226.903, 5015.342, 5490.61, 5979.864},
|
||||
{4280.471, 5319.132, 5668.945, 6060.216},
|
||||
{4603.279, 5381.605, 5740.325, 6401.926}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
double MemoryParameters::io_energy_write[2][3][3][4] =
|
||||
{
|
||||
{ //ddr3
|
||||
{//udimm
|
||||
{2758.951, 2984.854, 3571.804, 4838.902},
|
||||
{2804.851, 3768.524, 4352.214, 5580.362},
|
||||
{3213.897, 3829.684, 5425.854, 6933.512}
|
||||
|
||||
},
|
||||
{//rdimm
|
||||
{2758.951, 3346.104, 3931.154, 4838.902},
|
||||
{3167.997, 4114.754, 4696.724, 5580.362},
|
||||
{3561.831, 3829.684, 6039.994, 8075.542}
|
||||
|
||||
},
|
||||
{//lrdimm
|
||||
{4872.238, 5374.314, 7013.868, 9267.574},
|
||||
{5701.502, 6214.348, 7449.758, 10045.004},
|
||||
{5747.402, 6998.018, 8230.168, 10786.464}
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
{ //ddr4
|
||||
{//udimm
|
||||
{2525.129, 2840.853, 2979.037, 3293.608},
|
||||
{2933.756, 3080.126, 3226.497, 3979.698},
|
||||
{3293.964, 3753.37, 3906.137, 4312.448}
|
||||
|
||||
},
|
||||
{//rdimm
|
||||
{2525.129, 2840.853, 3155.117, 3293.608},
|
||||
{2933.756, 3080.126, 3834.757, 3979.698},
|
||||
{3293.964, 3753.37, 4413.037, 5358.078}
|
||||
|
||||
},
|
||||
{//lrdimm
|
||||
{4816.453, 5692.314, 5996.134, 6652.936},
|
||||
{4870.021, 5754.788, 6067.514, 6908.636},
|
||||
{5298.373, 5994.07, 6491.054, 7594.726}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
double MemoryParameters::T_RAS[2] = {35,35};
|
||||
|
||||
double MemoryParameters::T_RC[2] = {47.5,47.5};
|
||||
|
||||
double MemoryParameters::T_RP[2] = {13,13};
|
||||
|
||||
double MemoryParameters::T_RFC[2] = {340,260};
|
||||
|
||||
double MemoryParameters::T_REFI[2] = {7800,7800};
|
||||
|
||||
int MemoryParameters::bandwidth_load[2][4]={{400,533,667,800},{800,933,1066,1200}};
|
||||
|
||||
double MemoryParameters::cost[2][3][5] =
|
||||
{
|
||||
{
|
||||
{40.38,76.13,INF,INF,INF},
|
||||
{42.24,64.17,122.6,304.3,INF},
|
||||
{INF,INF,211.3,287.5,1079.5}
|
||||
},
|
||||
{
|
||||
{25.99,45.99,INF,INF,INF},
|
||||
{32.99,60.45,126,296.3,INF},
|
||||
{INF,INF,278.99,333,1474}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
double calculate_power(double load, double row_buffer_hr, double rd_wr_ratio, int chips_per_rank, int frequency_index, int lp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bw_index(Mem_IO_type type, int bandwidth)
|
||||
{
|
||||
if(type==DDR3)
|
||||
{
|
||||
if(bandwidth<=400)
|
||||
return 0;
|
||||
else if(bandwidth <= 533)
|
||||
return 1;
|
||||
else if(bandwidth <= 667)
|
||||
return 2;
|
||||
else
|
||||
return 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(bandwidth<=800)
|
||||
return 0;
|
||||
else if(bandwidth <= 933)
|
||||
return 1;
|
||||
else if(bandwidth <= 1066)
|
||||
return 2;
|
||||
else
|
||||
return 3;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
channel_conf::channel_conf(MemCadParameters * memcad_params, const vector<int>& dimm_cap, int bandwidth, Mem_DIMM type, bool low_power)
|
||||
:memcad_params(memcad_params),type(type),low_power(low_power),bandwidth(bandwidth),latency(0),valid(true)
|
||||
{
|
||||
//assert(memcad_params);
|
||||
assert(dimm_cap.size() <=DIMM_PER_CHANNEL);
|
||||
assert(memcad_params->io_type<2); // So far, we just support DDR3 and DDR4.
|
||||
// upading capacity
|
||||
num_dimm_per_channel=0;
|
||||
capacity =0;
|
||||
for(int i=0;i<5;i++) histogram_capacity[i]=0;
|
||||
for(unsigned int i=0;i<dimm_cap.size();i++)
|
||||
{
|
||||
if(dimm_cap[i]==0)
|
||||
continue;
|
||||
|
||||
int index =(int)(log2(dimm_cap[i]+0.1))-2;
|
||||
assert(index<5);
|
||||
histogram_capacity[index]++;
|
||||
num_dimm_per_channel++;
|
||||
capacity += dimm_cap[i];
|
||||
}
|
||||
|
||||
// updating bandwidth
|
||||
if(capacity>0)
|
||||
bandwidth =0;
|
||||
|
||||
//bandwidth = MemoryParameters::bandwidth_load[memcad_params->io_type][4-num_dimm_per_channel];
|
||||
// updating channel cost
|
||||
cost =0;
|
||||
for(int i=0;i<5;++i)
|
||||
cost += histogram_capacity[i] * MemoryParameters::cost[memcad_params->io_type][type][i];
|
||||
|
||||
// update energy
|
||||
calc_power();
|
||||
|
||||
}
|
||||
|
||||
void channel_conf::calc_power()
|
||||
{
|
||||
|
||||
double read_ratio = memcad_params->rd_2_wr_ratio/(1.0+memcad_params->rd_2_wr_ratio);
|
||||
double write_ratio = 1.0/(1.0+memcad_params->rd_2_wr_ratio);
|
||||
Mem_IO_type current_io_type = memcad_params->io_type;
|
||||
double capacity_ratio = (capacity/(double) memcad_params->capacity );
|
||||
|
||||
double T_BURST = 4; // memory cycles
|
||||
|
||||
energy_per_read = MemoryParameters::io_energy_read[current_io_type][type][num_dimm_per_channel-1][bw_index(current_io_type,bandwidth)];
|
||||
energy_per_read /= (bandwidth/T_BURST);
|
||||
|
||||
energy_per_write = MemoryParameters::io_energy_write[current_io_type][type][num_dimm_per_channel-1][bw_index(current_io_type,bandwidth)];
|
||||
energy_per_write /= (bandwidth/T_BURST);
|
||||
if(memcad_params->capacity_wise)
|
||||
{
|
||||
energy_per_read *= capacity_ratio;
|
||||
energy_per_write *= capacity_ratio;
|
||||
}
|
||||
|
||||
energy_per_access = read_ratio* energy_per_read + write_ratio*energy_per_write;
|
||||
|
||||
}
|
||||
|
||||
channel_conf* clone(channel_conf* origin)
|
||||
{
|
||||
vector<int> temp;
|
||||
int size =4;
|
||||
for(int i=0;i<5;++i)
|
||||
{
|
||||
for(int j=0;j<origin->histogram_capacity[i];++j)
|
||||
{
|
||||
temp.push_back(size);
|
||||
}
|
||||
size *=2;
|
||||
}
|
||||
channel_conf * new_channel = new channel_conf(origin->memcad_params,temp,origin->bandwidth, origin->type,origin->low_power);
|
||||
return new_channel;
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream &os, const channel_conf& ch_cnf)
|
||||
{
|
||||
os << "cap: " << ch_cnf.capacity << " GB ";
|
||||
os << "bw: " << ch_cnf.bandwidth << " (MHz) ";
|
||||
os << "cost: $" << ch_cnf.cost << " ";
|
||||
os << "dpc: " << ch_cnf.num_dimm_per_channel << " ";
|
||||
os << "energy: " << ch_cnf.energy_per_access << " (nJ) ";
|
||||
os << " DIMM: " << ((ch_cnf.type==UDIMM)?" UDIMM ":((ch_cnf.type==RDIMM)?" RDIMM ":"LRDIMM "));
|
||||
os << " low power: " << ((ch_cnf.low_power)? "T ":"F ");
|
||||
os << "[ ";
|
||||
for(int i=0;i<5;i++)
|
||||
os << ch_cnf.histogram_capacity[i] << "(" << (1<<(i+2)) << "GB) ";
|
||||
os << "]";
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
bob_conf::bob_conf(MemCadParameters * memcad_params, vector<channel_conf*> * in_channels)
|
||||
:memcad_params(memcad_params),num_channels(0),capacity(0),bandwidth(0)
|
||||
,energy_per_read(0),energy_per_write(0),energy_per_access(0),cost(0),latency(0),valid(true)
|
||||
{
|
||||
|
||||
assert(in_channels->size() <= MAX_NUM_CHANNELS_PER_BOB);
|
||||
for(int i=0;i<MAX_NUM_CHANNELS_PER_BOB;i++)
|
||||
channels[i]=0;
|
||||
|
||||
for(unsigned int i=0;i< in_channels->size();++i)
|
||||
{
|
||||
channels[i] = (*in_channels)[i];
|
||||
num_channels++;
|
||||
capacity += (*in_channels)[i]->capacity;
|
||||
cost += (*in_channels)[i]->cost;
|
||||
bandwidth += (*in_channels)[i]->bandwidth;
|
||||
energy_per_read += (*in_channels)[i]->energy_per_read;
|
||||
energy_per_write += (*in_channels)[i]->energy_per_write;
|
||||
energy_per_access += (*in_channels)[i]->energy_per_access;
|
||||
}
|
||||
}
|
||||
|
||||
bob_conf* clone(bob_conf* origin)
|
||||
{
|
||||
vector<channel_conf*> temp;
|
||||
for(int i=0;i<MAX_NUM_CHANNELS_PER_BOB;++i)
|
||||
{
|
||||
if( (origin->channels)[i]==0 )
|
||||
break;
|
||||
temp.push_back( (origin->channels)[i] );
|
||||
}
|
||||
|
||||
bob_conf * new_bob = new bob_conf(origin->memcad_params,&temp);
|
||||
return new_bob;
|
||||
}
|
||||
|
||||
ostream & operator <<(ostream &os, const bob_conf& bob_cnf)
|
||||
{
|
||||
os << " " << "BoB " ;
|
||||
os << "cap: " << bob_cnf.capacity << " GB ";
|
||||
os << "num_channels: " << bob_cnf.num_channels << " ";
|
||||
os << "bw: " << bob_cnf.bandwidth << " (MHz) ";
|
||||
os << "cost: $" << bob_cnf.cost << " ";
|
||||
os << "energy: " << bob_cnf.energy_per_access << " (nJ) ";
|
||||
os << endl;
|
||||
os << " " << " ==============" << endl;
|
||||
for(int i=0;i<bob_cnf.num_channels;i++)
|
||||
{
|
||||
channel_conf * temp = bob_cnf.channels[i];
|
||||
os << " (" << i << ") " << (*temp) << endl ;
|
||||
}
|
||||
os << " " << " =============="<< endl;
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
memory_conf::memory_conf(MemCadParameters * memcad_params, vector<bob_conf*> * in_bobs)
|
||||
:memcad_params(memcad_params),num_bobs(0),capacity(0),bandwidth(0)
|
||||
,energy_per_read(0),energy_per_write(0),energy_per_access(0),cost(0),latency(0),valid(true)
|
||||
{
|
||||
assert(in_bobs->size() <= MAX_NUM_BOBS);
|
||||
for(int i=0;i<MAX_NUM_BOBS;i++)
|
||||
bobs[i]=0;
|
||||
|
||||
for(unsigned int i=0;i< in_bobs->size();++i)
|
||||
{
|
||||
bobs[i] = (*in_bobs)[i];
|
||||
num_bobs++;
|
||||
capacity += (*in_bobs)[i]->capacity;
|
||||
cost += (*in_bobs)[i]->cost;
|
||||
bandwidth += (*in_bobs)[i]->bandwidth;
|
||||
energy_per_read += (*in_bobs)[i]->energy_per_read;
|
||||
energy_per_write += (*in_bobs)[i]->energy_per_write;
|
||||
energy_per_access += (*in_bobs)[i]->energy_per_access;
|
||||
}
|
||||
}
|
||||
|
||||
ostream & operator <<(ostream &os, const memory_conf& mem_cnf)
|
||||
{
|
||||
os << "Memory " ;
|
||||
os << "cap: " << mem_cnf.capacity << " GB ";
|
||||
os << "num_bobs: " << mem_cnf.num_bobs << " ";
|
||||
os << "bw: " << mem_cnf.bandwidth << " (MHz) ";
|
||||
os << "cost: $" << mem_cnf.cost << " ";
|
||||
os << "energy: " << mem_cnf.energy_per_access << " (nJ) ";
|
||||
os << endl;
|
||||
os << " {" << endl;
|
||||
for(int i=0;i<mem_cnf.num_bobs;i++)
|
||||
{
|
||||
bob_conf * temp = mem_cnf.bobs[i];
|
||||
os<< " (" << i <<") " <<(*temp) << endl ;
|
||||
}
|
||||
os << " }"<< endl;
|
||||
return os;
|
||||
}
|
||||
|
251
T1/TP/TP1/cacti_7/memcad_parameters.h
Normal file
251
T1/TP/TP1/cacti_7/memcad_parameters.h
Normal file
|
@ -0,0 +1,251 @@
|
|||
#ifndef __MEMCAD_PARAMS_H__
|
||||
#define __MEMCAD_PARAMS_H__
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "cacti_interface.h"
|
||||
#include "const.h"
|
||||
#include "parameter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
///#define INF 1000000
|
||||
#define EPS 0.0000001
|
||||
|
||||
#define MAX_DIMM_PER_CHANNEL 3
|
||||
#define MAX_CAP_PER_DIMM 64
|
||||
#define MAX_RANKS_PER_DIMM 4
|
||||
#define MIN_BW_PER_CHANNEL 400
|
||||
#define MAX_DDR3_CHANNEL_BW 800
|
||||
#define MAX_DDR4_CHANNEL_BW 1600
|
||||
#define MAX_NUM_CHANNELS_PER_BOB 2
|
||||
#define MAX_NUM_BOBS 6
|
||||
#define DIMM_PER_CHANNEL 3
|
||||
|
||||
/*
|
||||
enum Mem_IO_type
|
||||
{
|
||||
DDR3,
|
||||
DDR4,
|
||||
LPDDR2,
|
||||
WideIO,
|
||||
Low_Swing_Diff,
|
||||
Serial
|
||||
};
|
||||
|
||||
enum Mem_DIMM
|
||||
{
|
||||
UDIMM,
|
||||
RDIMM,
|
||||
LRDIMM
|
||||
};
|
||||
*/
|
||||
|
||||
|
||||
|
||||
class MemCadParameters
|
||||
{
|
||||
public:
|
||||
|
||||
Mem_IO_type io_type; // DDR3 vs. DDR4
|
||||
|
||||
int capacity; // in GB
|
||||
|
||||
int num_bobs; // default=4me
|
||||
|
||||
///int bw_per_channel; // defaul=1600 MHz;
|
||||
|
||||
///bool with_bob;
|
||||
|
||||
int num_channels_per_bob; // 1 means no bob
|
||||
|
||||
bool capacity_wise; // true means the load on each channel is proportional to its capacity.
|
||||
|
||||
///int min_bandwith;
|
||||
|
||||
MemCad_metrics first_metric;
|
||||
|
||||
MemCad_metrics second_metric;
|
||||
|
||||
MemCad_metrics third_metric;
|
||||
|
||||
DIMM_Model dimm_model;
|
||||
|
||||
bool low_power_permitted; // Not yet implemented. It determines acceptable VDDs.
|
||||
|
||||
double load; // between 0 to 1
|
||||
|
||||
double row_buffer_hit_rate;
|
||||
|
||||
double rd_2_wr_ratio;
|
||||
|
||||
bool same_bw_in_bob; // true if all the channels in the bob have the same bandwidth.
|
||||
|
||||
|
||||
bool mirror_in_bob;// true if all the channels in the bob have the same configs
|
||||
|
||||
bool total_power; // false means just considering I/O Power
|
||||
|
||||
bool verbose;
|
||||
|
||||
// Functions
|
||||
MemCadParameters(InputParameter * g_ip);
|
||||
void print_inputs();
|
||||
bool sanity_check();
|
||||
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class MemoryParameters
|
||||
{
|
||||
public:
|
||||
// Power Parameteres
|
||||
static double VDD[2][2][4];
|
||||
|
||||
static double IDD0[2][4];
|
||||
|
||||
static double IDD1[2][4];
|
||||
|
||||
static double IDD2P0[2][4];
|
||||
|
||||
static double IDD2P1[2][4];
|
||||
|
||||
static double IDD2N[2][4];
|
||||
|
||||
static double IDD3P[2][4];
|
||||
|
||||
static double IDD3N[2][4];
|
||||
|
||||
static double IDD4R[2][4];
|
||||
|
||||
static double IDD4W[2][4];
|
||||
|
||||
static double IDD5[2][4];
|
||||
|
||||
static double io_energy_read[2][3][3][4];
|
||||
|
||||
static double io_energy_write[2][3][3][4];
|
||||
|
||||
// Timing Parameters
|
||||
static double T_RAS[2];
|
||||
|
||||
static double T_RC[2];
|
||||
|
||||
static double T_RP[2];
|
||||
|
||||
static double T_RFC[2];
|
||||
|
||||
static double T_REFI[2];
|
||||
|
||||
// Bandwidth Parameters
|
||||
static int bandwidth_load[2][4];
|
||||
|
||||
// Cost Parameters
|
||||
static double cost[2][3][5];
|
||||
|
||||
|
||||
// Functions
|
||||
MemoryParameters();
|
||||
|
||||
int bw_index(Mem_IO_type type, int bandwidth);
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int bw_index(Mem_IO_type type, int bandwidth);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class channel_conf
|
||||
{
|
||||
public:
|
||||
MemCadParameters *memcad_params;
|
||||
|
||||
Mem_DIMM type;
|
||||
int num_dimm_per_channel;
|
||||
int histogram_capacity[5]; // 0->4GB, 1->8GB, 2->16GB, 3->32GB, 4->64GB
|
||||
bool low_power;
|
||||
|
||||
int capacity;
|
||||
int bandwidth;
|
||||
double energy_per_read;
|
||||
double energy_per_write;
|
||||
double energy_per_access;
|
||||
|
||||
double cost;
|
||||
double latency;
|
||||
|
||||
bool valid;
|
||||
// Functions
|
||||
channel_conf(MemCadParameters * memcad_params, const vector<int>& dimm_cap, int bandwidth, Mem_DIMM type, bool low_power);
|
||||
|
||||
void calc_power();
|
||||
|
||||
friend channel_conf* clone(channel_conf*);
|
||||
friend ostream & operator<<(ostream &os, const channel_conf& ch_cnf);
|
||||
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class bob_conf
|
||||
{
|
||||
public:
|
||||
MemCadParameters *memcad_params;
|
||||
int num_channels;
|
||||
channel_conf *channels[MAX_NUM_CHANNELS_PER_BOB];
|
||||
|
||||
int capacity;
|
||||
int bandwidth;
|
||||
double energy_per_read;
|
||||
double energy_per_write;
|
||||
double energy_per_access;
|
||||
|
||||
double cost;
|
||||
double latency;
|
||||
|
||||
bool valid;
|
||||
|
||||
bob_conf(MemCadParameters * memcad_params, vector<channel_conf*> * channels);
|
||||
|
||||
friend bob_conf* clone(bob_conf*);
|
||||
friend ostream & operator <<(ostream &os, const bob_conf& bob_cnf);
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
class memory_conf
|
||||
{
|
||||
public:
|
||||
MemCadParameters *memcad_params;
|
||||
int num_bobs;
|
||||
bob_conf* bobs[MAX_NUM_BOBS];
|
||||
|
||||
int capacity;
|
||||
int bandwidth;
|
||||
double energy_per_read;
|
||||
double energy_per_write;
|
||||
double energy_per_access;
|
||||
|
||||
double cost;
|
||||
double latency;
|
||||
|
||||
bool valid;
|
||||
|
||||
memory_conf(MemCadParameters * memcad_params, vector<bob_conf*> * bobs);
|
||||
friend ostream & operator <<(ostream &os, const memory_conf& bob_cnf);
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
741
T1/TP/TP1/cacti_7/memorybus.cc
Normal file
741
T1/TP/TP1/cacti_7/memorybus.cc
Normal file
|
@ -0,0 +1,741 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include "memorybus.h"
|
||||
#include "wire.h"
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
Memorybus::Memorybus(
|
||||
enum Wire_type wire_model, double mat_w, double mat_h, double subarray_w_, double subarray_h_,
|
||||
int _row_add_bits, int _col_add_bits, int _data_bits, int _ndbl, int _ndwl, /*enum Htree_type htree_type,*/
|
||||
enum Memorybus_type membus_type_, const DynamicParameter & dp_,
|
||||
/*TechnologyParameter::*/DeviceType *dt):
|
||||
dp(dp_),
|
||||
in_rise_time(0), out_rise_time(0),
|
||||
is_dram(dp.is_dram),
|
||||
membus_type(membus_type_),
|
||||
mat_width(mat_w), mat_height(mat_h), subarray_width(subarray_w_), subarray_height(subarray_h_),
|
||||
data_bits(_data_bits), ndbl(_ndbl), ndwl(_ndwl),
|
||||
wt(wire_model), deviceType(dt)
|
||||
{
|
||||
if (g_ip->print_detail_debug)
|
||||
cout << "memorybus.cc: membus_type = " << membus_type << endl;
|
||||
power.readOp.dynamic = 0;
|
||||
power.readOp.leakage = 0;
|
||||
power.readOp.gate_leakage = 0;
|
||||
power.searchOp.dynamic =0;
|
||||
delay = 0;
|
||||
|
||||
cell.h = g_tp.dram.b_h;
|
||||
cell.w = g_tp.dram.b_w;
|
||||
|
||||
if (!g_ip->is_3d_mem)
|
||||
assert(ndbl >= 2 && ndwl >= 2);
|
||||
|
||||
if (g_ip->print_detail_debug)
|
||||
{
|
||||
cout << "burst length: " << g_ip->burst_depth <<endl;
|
||||
cout << "output width: " << g_ip->io_width <<endl;
|
||||
}
|
||||
|
||||
//Default value
|
||||
chip_IO_width = g_ip->io_width; //g_ip->out_w; //x4, x8, x16 chip
|
||||
burst_length = g_ip->burst_depth; //g_ip->burst_len; //DDR2 4, DDR3 8
|
||||
data_bits = chip_IO_width * burst_length;
|
||||
|
||||
row_add_bits = _row_add_bits;
|
||||
col_add_bits = _col_add_bits;
|
||||
|
||||
|
||||
max_unpipelined_link_delay = 0; //TODO
|
||||
min_w_nmos = g_tp.min_w_nmos_;
|
||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
|
||||
|
||||
|
||||
semi_repeated_global_line = 0; // 1: semi-repeated global line, repeaters in decoder stripes; 0: Non-repeated global line, slower
|
||||
ndwl = _ndwl/ g_ip->num_tier_row_sprd;
|
||||
ndbl = _ndbl/ g_ip->num_tier_col_sprd;
|
||||
num_subarray_global_IO = ndbl>16?16:ndbl;
|
||||
|
||||
switch (membus_type)
|
||||
{
|
||||
case Data_path:
|
||||
data_bits = chip_IO_width * burst_length;
|
||||
Network();
|
||||
break;
|
||||
case Row_add_path:
|
||||
add_bits = _row_add_bits;
|
||||
num_dec_signals = dp.num_r_subarray * ndbl;
|
||||
Network();
|
||||
break;
|
||||
case Col_add_path:
|
||||
add_bits = _col_add_bits;
|
||||
num_dec_signals = dp.num_c_subarray * ndwl / data_bits;
|
||||
Network();
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
assert(power.readOp.dynamic >= 0);
|
||||
assert(power.readOp.leakage >= 0);
|
||||
}
|
||||
|
||||
Memorybus::~Memorybus()
|
||||
{
|
||||
delete center_stripe;
|
||||
delete bank_bus;
|
||||
switch (membus_type)
|
||||
{
|
||||
case Data_path:
|
||||
delete local_data;
|
||||
delete global_data;
|
||||
delete local_data_drv;
|
||||
if(semi_repeated_global_line)
|
||||
delete global_data_drv;
|
||||
delete out_seg;
|
||||
break;
|
||||
case Row_add_path:
|
||||
delete global_WL;
|
||||
delete add_predec;
|
||||
delete add_dec;
|
||||
delete lwl_drv;
|
||||
break;
|
||||
case Col_add_path:
|
||||
delete column_sel;
|
||||
delete add_predec;
|
||||
delete add_dec;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// ---For 3D DRAM, the bank height and length is reduced to 1/num_tier_row_sprd and 1/num_tier_col_sprd.
|
||||
// ---As a result, ndwl and ndbl are also reduced to the same ratio, but he number of banks increase to the product of these two parameters
|
||||
void Memorybus::Network()
|
||||
{
|
||||
//double POLY_RESISTIVITY = 0.148; //ohm-micron
|
||||
double R_wire_dec_out = 0;
|
||||
double C_ld_dec_out = 0;
|
||||
double bank_bus_length = 0;
|
||||
double area_bank_vertical_peripheral_circuitry = 0, area_bank_horizontal_peripheral_circuitry = 0;
|
||||
|
||||
area_sense_amp = (mat_height - subarray_height) * mat_width * ndbl * ndwl;
|
||||
area_subarray = subarray_height * subarray_width * ndbl * ndwl;
|
||||
|
||||
// ---Because in 3D DRAM mat only has one subarray, but contains the subarray peripheral circuits such as SA. Detail see mat.cc is_3d_mem part.
|
||||
subarray_height = mat_height;
|
||||
subarray_width = mat_width;
|
||||
|
||||
if(g_ip->partition_gran == 0)// Coarse_rank_level: add/data bus around
|
||||
{
|
||||
height_bank = subarray_height * ndbl + (col_add_bits + row_add_bits)*g_tp.wire_outside_mat.pitch/2 + data_bits*g_tp.wire_outside_mat.pitch;
|
||||
length_bank = subarray_width * ndwl + (col_add_bits + row_add_bits)*g_tp.wire_outside_mat.pitch/2 + data_bits*g_tp.wire_outside_mat.pitch;
|
||||
area_address_bus = (row_add_bits + col_add_bits) *g_tp.wire_outside_mat.pitch * sqrt(length_bank * height_bank);
|
||||
area_data_bus = data_bits *g_tp.wire_outside_mat.pitch * sqrt(length_bank * height_bank);
|
||||
}
|
||||
else if(g_ip->partition_gran == 1)//Fine_rank_level: add bus replaced by TSVs
|
||||
{
|
||||
height_bank = subarray_height * ndbl;
|
||||
length_bank = subarray_width * ndwl;
|
||||
area_address_bus = 0;
|
||||
area_data_bus = data_bits *g_tp.wire_outside_mat.pitch * sqrt(length_bank * height_bank);
|
||||
}
|
||||
else if(g_ip->partition_gran == 2)//Coarse_bank_level: add/data bus replaced by TSVs
|
||||
{
|
||||
height_bank = subarray_height * ndbl;
|
||||
length_bank = subarray_width * ndwl;
|
||||
area_address_bus = 0;
|
||||
area_data_bus = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
if (g_ip->print_detail_debug)
|
||||
{
|
||||
cout << "memorybus.cc: N subarrays per mat = " << dp.num_subarrays / dp.num_mats << endl;
|
||||
cout << "memorybus.cc: g_tp.wire_local.pitch = " << g_tp.wire_local.pitch /1e3 << " mm" << endl;
|
||||
cout << "memorybus.cc: subarray_width = " << subarray_width /1e3 << " mm" << endl;
|
||||
cout << "memorybus.cc: subarray_height = " << subarray_height /1e3 << " mm" << endl;
|
||||
cout << "memorybus.cc: mat_height = " << mat_height /1e3 << " mm" << endl;
|
||||
cout << "memorybus.cc: mat_width = " << mat_width /1e3 << " mm" << endl;
|
||||
cout << "memorybus.cc: height_bank = " << height_bank /1e3 << " mm" << endl;
|
||||
cout << "memorybus.cc: length_bank = " << length_bank /1e3 << " mm" << endl;
|
||||
}
|
||||
|
||||
int num_banks_hor_dir = 1 << (int)ceil((double)_log2( g_ip->nbanks * g_ip->num_tier_row_sprd )/2 ) ;
|
||||
int num_banks_ver_dir = 1 << (int)ceil((double)_log2( g_ip->nbanks * g_ip->num_tier_col_sprd * g_ip->num_tier_row_sprd /num_banks_hor_dir ) );
|
||||
|
||||
if (g_ip->print_detail_debug)
|
||||
{
|
||||
cout<<"horz bank #: "<<num_banks_hor_dir<<endl;
|
||||
cout<<"vert bank #: "<<num_banks_ver_dir<<endl;
|
||||
|
||||
cout << "memorybus.cc: g_ip->nbanks = " << g_ip->nbanks << endl;
|
||||
cout << "memorybus.cc: num_banks_hor_dir = " << num_banks_hor_dir << endl;
|
||||
}
|
||||
|
||||
// ************************************* Wire Interconnections *****************************************
|
||||
double center_stripe_length = 0.5 * double(num_banks_hor_dir) * height_bank;
|
||||
if(g_ip->print_detail_debug)
|
||||
{
|
||||
cout << "memorybus.cc: center_stripe wire length = " << center_stripe_length << " um"<< endl;
|
||||
}
|
||||
center_stripe = new Wire(wt, center_stripe_length);
|
||||
area_bus = 2.0 * center_stripe_length * (row_add_bits + col_add_bits + data_bits) *g_tp.wire_outside_mat.pitch / g_ip->nbanks;
|
||||
|
||||
//if (g_ip->partition_gran == 0)
|
||||
//area_bus = (row_add_bits + col_add_bits) *g_tp.wire_outside_mat.pitch * center_stripe_length;
|
||||
if (membus_type == Row_add_path)
|
||||
{
|
||||
int num_lwl_per_gwl = 4;
|
||||
global_WL = new Wire(wt, length_bank, 1, 1, 1, inside_mat, CU_RESISTIVITY, &(g_tp.peri_global));
|
||||
//local_WL = new Wire(wt, length_bank/num_lwl_drv, local_wires, POLY_RESISTIVITY, &(g_tp.dram_wl));
|
||||
num_lwl_drv = ndwl;
|
||||
//C_GWL = num_lwl_drv * gate_C(g_tp.min_w_nmos_+min_w_pmos,0) + c_w_metal * dp.num_c_subarray * ndwl;
|
||||
if(semi_repeated_global_line)
|
||||
{
|
||||
C_GWL = (double)num_lwl_per_gwl * gate_C(g_tp.min_w_nmos_+min_w_pmos,0) + g_tp.wire_inside_mat.C_per_um * (subarray_width + g_tp.wire_local.pitch);
|
||||
R_GWL = g_tp.wire_inside_mat.R_per_um * (subarray_width + g_tp.wire_local.pitch);
|
||||
}
|
||||
else
|
||||
{
|
||||
C_GWL = (double)num_lwl_drv * num_lwl_per_gwl * gate_C(g_tp.min_w_nmos_+min_w_pmos,0) + g_tp.wire_inside_mat.C_per_um * length_bank;
|
||||
R_GWL = length_bank * g_tp.wire_inside_mat.R_per_um;
|
||||
}
|
||||
|
||||
lwl_driver_c_gate_load = dp.num_c_subarray * gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true);
|
||||
//lwl_driver_c_wire_load = subarray_width * g_tp.wire_local.C_per_um;
|
||||
//lwl_driver_r_wire_load = subarray_width * g_tp.wire_local.R_per_um;
|
||||
|
||||
if (g_ip->print_detail_debug)
|
||||
{
|
||||
cout<<"C_GWL: "<<C_GWL<<endl;
|
||||
cout<<"num_lwl_drv: "<<num_lwl_drv<<endl;
|
||||
cout<<"g_tp.wire_inside_mat.C_per_um: "<<g_tp.wire_inside_mat.C_per_um<<endl;
|
||||
cout<<"length_bank: "<<length_bank<<endl;
|
||||
|
||||
cout << "memorybus.cc: lwl single gate capacitance = " << gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) << endl;
|
||||
cout << "memorybus.cc: lwl wire capacitance per single wire = " << g_tp.wire_local.C_per_um << endl;
|
||||
cout << "memorybus.cc: dp.num_c_subarray = " << dp.num_c_subarray << endl;
|
||||
cout << "memorybus.cc: dram.b_w = " << g_tp.dram.b_w << endl;
|
||||
}
|
||||
|
||||
lwl_driver_c_wire_load = dp.num_c_subarray * g_tp.dram.b_w * g_tp.wire_local.C_per_um;
|
||||
lwl_driver_r_wire_load = dp.num_c_subarray * g_tp.dram.b_w * g_tp.wire_local.R_per_um;
|
||||
|
||||
C_LWL = lwl_driver_c_gate_load + lwl_driver_c_wire_load;
|
||||
|
||||
lwl_drv = new Driver(
|
||||
lwl_driver_c_gate_load,
|
||||
lwl_driver_c_wire_load,
|
||||
lwl_driver_r_wire_load,
|
||||
is_dram);
|
||||
lwl_drv->compute_area();
|
||||
|
||||
if(!g_ip->fine_gran_bank_lvl)
|
||||
{
|
||||
C_ld_dec_out = C_GWL;
|
||||
R_wire_dec_out = R_GWL;
|
||||
}
|
||||
else
|
||||
{
|
||||
C_ld_dec_out = gate_C(g_tp.min_w_nmos_+min_w_pmos,0);
|
||||
R_wire_dec_out = 0;
|
||||
}
|
||||
|
||||
if (g_ip->print_detail_debug)
|
||||
cout << "memorybus.cc: ndwl * dp.num_c_subarray * g_tp.dram.b_w = " << ndwl * dp.num_c_subarray * g_tp.dram.b_w << endl;
|
||||
//bank_bus_length = double(num_banks_ver_dir) * 0.5 * (height_bank + 0.5*double(row_add_bits+col_add_bits+data_bits)*g_tp.wire_outside_mat.pitch);
|
||||
bank_bus_length = double(num_banks_ver_dir) * 0.5 * MAX(length_bank, height_bank);
|
||||
bank_bus = new Wire(wt, bank_bus_length);
|
||||
|
||||
}
|
||||
else if (membus_type == Col_add_path)
|
||||
{
|
||||
column_sel = new Wire(wt, sqrt(length_bank * height_bank), 1, 1, 1, outside_mat, CU_RESISTIVITY, &(g_tp.peri_global));
|
||||
if(semi_repeated_global_line)
|
||||
{
|
||||
C_colsel = g_tp.wire_inside_mat.C_per_um * (subarray_height + g_tp.wire_local.pitch) ;
|
||||
R_colsel = g_tp.wire_inside_mat.R_per_um * (subarray_height + g_tp.wire_local.pitch);
|
||||
}
|
||||
else
|
||||
{
|
||||
C_colsel = column_sel->repeater_size * gate_C(g_tp.min_w_nmos_+min_w_pmos,0)
|
||||
+ (column_sel->repeater_spacing < height_bank ? column_sel->repeater_spacing : height_bank) * g_tp.wire_outside_mat.C_per_um;
|
||||
R_colsel = (column_sel->repeater_spacing < height_bank ? column_sel->repeater_spacing : height_bank) * g_tp.wire_outside_mat.R_per_um;
|
||||
}
|
||||
|
||||
if(!g_ip->fine_gran_bank_lvl)
|
||||
{
|
||||
C_ld_dec_out = C_colsel;
|
||||
//+ (int)(column_sel->repeater_spacing/height_bank) * ndbl*dp.num_r_subarray* gate_C(g_tp.w_nmos_sa_mux,0);
|
||||
R_wire_dec_out = R_colsel;
|
||||
}
|
||||
else
|
||||
{
|
||||
C_ld_dec_out = gate_C(g_tp.min_w_nmos_+min_w_pmos,0);
|
||||
R_wire_dec_out = 0;
|
||||
}
|
||||
|
||||
|
||||
if (g_ip->print_detail_debug)
|
||||
cout << "memorybus.cc: column_sel->repeater_size = " << column_sel->repeater_size << endl;
|
||||
|
||||
bank_bus_length = double(num_banks_ver_dir) * 0.5 * MAX(length_bank, height_bank);
|
||||
bank_bus = new Wire(wt, bank_bus_length);
|
||||
}
|
||||
else if (membus_type == Data_path)
|
||||
{
|
||||
local_data = new Wire(wt, subarray_width, 1, 1, 1, inside_mat, CU_RESISTIVITY, &(g_tp.peri_global));
|
||||
global_data = new Wire(wt, sqrt(length_bank * height_bank), 1, 1, 1, outside_mat, CU_RESISTIVITY, &(g_tp.peri_global));
|
||||
|
||||
if(semi_repeated_global_line)
|
||||
{
|
||||
C_global_data = g_tp.wire_inside_mat.C_per_um * (subarray_height + g_tp.wire_local.pitch);
|
||||
R_global_data = g_tp.wire_inside_mat.R_per_um * (subarray_height + g_tp.wire_local.pitch) ;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
C_global_data = g_tp.wire_inside_mat.C_per_um * height_bank /2;
|
||||
R_global_data = g_tp.wire_inside_mat.R_per_um * height_bank /2;
|
||||
}
|
||||
|
||||
global_data_drv = new Driver(
|
||||
0,
|
||||
C_global_data,
|
||||
R_global_data,
|
||||
is_dram);
|
||||
global_data_drv->compute_delay(0);
|
||||
global_data_drv->compute_area();
|
||||
//---Unrepeated local dataline
|
||||
double local_data_c_gate_load = dp.num_c_subarray * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, cell.w, is_dram);
|
||||
//double local_data_c_gate_load = 0;
|
||||
double local_data_c_wire_load = dp.num_c_subarray * g_tp.dram.b_w * g_tp.wire_inside_mat.C_per_um;
|
||||
double local_data_r_wire_load = dp.num_c_subarray * g_tp.dram.b_w * g_tp.wire_inside_mat.R_per_um;
|
||||
//double local_data_r_gate_load = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
|
||||
double local_data_r_gate_load = 0;
|
||||
|
||||
double tf = (local_data_c_gate_load + local_data_c_wire_load) * (local_data_r_wire_load + local_data_r_gate_load);
|
||||
double this_delay = horowitz(0, tf, 0.5, 0.5, RISE);
|
||||
//double local_data_outrisetime = this_delay/(1.0-0.5);
|
||||
|
||||
//---Unrepeated and undriven local dataline, not significant growth
|
||||
//local_data->delay = this_delay;
|
||||
//local_data->power.readOp.dynamic = (local_data_c_gate_load + local_data_c_wire_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
|
||||
|
||||
|
||||
double data_drv_c_gate_load = local_data_c_gate_load;
|
||||
double data_drv_c_wire_load = local_data_c_wire_load;
|
||||
double data_drv_r_wire_load = local_data_r_gate_load + local_data_r_wire_load;
|
||||
|
||||
//---Assume unrepeated global data path, too high RC
|
||||
//double data_drv_c_wire_load = height_bank * g_tp.wire_outside_mat.C_per_um;
|
||||
//double data_drv_r_wire_load = height_bank * g_tp.wire_inside_mat.R_per_um;
|
||||
|
||||
|
||||
local_data_drv = new Driver(
|
||||
data_drv_c_gate_load,
|
||||
data_drv_c_wire_load,
|
||||
data_drv_r_wire_load,
|
||||
is_dram);
|
||||
local_data_drv->compute_delay(0);
|
||||
local_data_drv->compute_area();
|
||||
|
||||
if (g_ip->print_detail_debug)
|
||||
{
|
||||
cout<<"C: "<<local_data_c_gate_load + local_data_c_wire_load <<" F"<<endl;
|
||||
cout<<"R: "<<local_data_r_gate_load + local_data_r_wire_load <<" Ohm"<<endl;
|
||||
cout<<"this_delay" << this_delay * 1e9 <<" ns"<<endl;
|
||||
cout<<" local_data_drv delay: " << local_data_drv->delay * 1e9 <<" ns"<<endl;
|
||||
}
|
||||
|
||||
|
||||
//Not accounted for.
|
||||
/*local_data_drv = new Driver(
|
||||
global_data->repeater_size * gate_C(g_tp.min_w_nmos_+min_w_pmos,0),
|
||||
global_data->repeater_spacing * g_tp.wire_inside_mat.C_per_um,
|
||||
global_data->repeater_spacing * g_tp.wire_inside_mat.R_per_um,
|
||||
is_dram);*/
|
||||
|
||||
//bank_bus_length = double(num_banks_ver_dir) * 0.5 * (height_bank + 0.5*double(row_add_bits+col_add_bits+data_bits)*g_tp.wire_outside_mat.pitch) - height_bank + length_bank;
|
||||
bank_bus_length = double(num_banks_ver_dir) * 0.5 * MAX(length_bank, height_bank);
|
||||
bank_bus = new Wire(wt, bank_bus_length);
|
||||
if (g_ip->print_detail_debug)
|
||||
cout << "memorybus.cc: bank_bus_length = " << bank_bus_length << endl;
|
||||
|
||||
out_seg = new Wire(wt, 0.25 * num_banks_hor_dir * (length_bank + (row_add_bits+col_add_bits+data_bits)*g_tp.wire_outside_mat.pitch) );
|
||||
area_IOSA = (875+500)*g_ip->F_sz_um*g_ip->F_sz_um * data_bits;//Reference:
|
||||
area_data_drv = local_data_drv->area.get_area() * data_bits;
|
||||
if(ndbl>16)
|
||||
{
|
||||
area_IOSA *= (double)ndbl/16.0;
|
||||
area_data_drv *= (double)ndbl/16.0;
|
||||
}
|
||||
area_local_dataline = data_bits*subarray_width *g_tp.wire_local.pitch*ndbl;
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Row decoder
|
||||
if (membus_type == Row_add_path || membus_type == Col_add_path )
|
||||
{
|
||||
|
||||
if (g_ip->print_detail_debug)
|
||||
{
|
||||
cout << "memorybus.cc: num_dec_signals = " << num_dec_signals << endl;
|
||||
cout << "memorybus.cc: C_ld_dec_out = " << C_ld_dec_out << endl;
|
||||
cout << "memorybus.cc: R_wire_dec_out = " << R_wire_dec_out << endl;
|
||||
cout << "memorybus.cc: is_dram = " << is_dram << endl;
|
||||
cout << "memorybus.cc: cell.h = " << cell.h << endl;
|
||||
}
|
||||
|
||||
add_dec = new Decoder(
|
||||
(num_dec_signals>16)?num_dec_signals:16,
|
||||
false,
|
||||
C_ld_dec_out,
|
||||
R_wire_dec_out,
|
||||
false,
|
||||
is_dram,
|
||||
membus_type == Row_add_path?true:false,
|
||||
cell);
|
||||
|
||||
|
||||
|
||||
// Predecoder and decoder for GWL
|
||||
double C_wire_predec_blk_out;
|
||||
double R_wire_predec_blk_out;
|
||||
C_wire_predec_blk_out = 0; // num_subarrays_per_row * dp.num_r_subarray * g_tp.wire_inside_mat.C_per_um * cell.h;
|
||||
R_wire_predec_blk_out = 0; // num_subarrays_per_row * dp.num_r_subarray * g_tp.wire_inside_mat.R_per_um * cell.h;
|
||||
|
||||
|
||||
//int num_subarrays_per_mat = dp.num_subarrays/dp.num_mats;
|
||||
int num_dec_per_predec = 1;
|
||||
PredecBlk * add_predec_blk1 = new PredecBlk(
|
||||
num_dec_signals,
|
||||
add_dec,
|
||||
C_wire_predec_blk_out,
|
||||
R_wire_predec_blk_out,
|
||||
num_dec_per_predec,
|
||||
is_dram,
|
||||
true);
|
||||
|
||||
|
||||
|
||||
PredecBlk * add_predec_blk2 = new PredecBlk(
|
||||
num_dec_signals,
|
||||
add_dec,
|
||||
C_wire_predec_blk_out,
|
||||
R_wire_predec_blk_out,
|
||||
num_dec_per_predec,
|
||||
is_dram,
|
||||
false);
|
||||
|
||||
|
||||
|
||||
PredecBlkDrv * add_predec_blk_drv1 = new PredecBlkDrv(0, add_predec_blk1, is_dram);
|
||||
PredecBlkDrv * add_predec_blk_drv2 = new PredecBlkDrv(0, add_predec_blk2, is_dram);
|
||||
|
||||
add_predec = new Predec(add_predec_blk_drv1, add_predec_blk_drv2);
|
||||
|
||||
|
||||
|
||||
if (membus_type == Row_add_path)
|
||||
{
|
||||
area_row_predec_dec = add_predec_blk_drv1->area.get_area() + add_predec_blk_drv2->area.get_area() +
|
||||
add_predec_blk1->area.get_area() + add_predec_blk2->area.get_area() + num_dec_signals * add_dec->area.get_area();
|
||||
|
||||
|
||||
area_lwl_drv = num_lwl_drv/2.0 * dp.num_r_subarray * ndbl * lwl_drv->area.get_area(); //num_lwl_drv is ndwl/the lwl driver count one gwl connects. two adjacent lwls share one driver.
|
||||
|
||||
if (g_ip->print_detail_debug)
|
||||
{
|
||||
cout<<"memorybus.cc: area_bank_vertical_peripheral_circuitry = " << area_bank_vertical_peripheral_circuitry /1e6<<" mm2"<<endl;
|
||||
cout<<"memorybus.cc: lwl drv area = " << lwl_drv->area.get_area() /1e6<<" mm2"<<endl;
|
||||
cout<<"memorybus.cc: total lwl drv area = " << num_lwl_drv * dp.num_r_subarray
|
||||
* ndbl * lwl_drv->area.get_area() /1e6<<" mm2"<<endl;
|
||||
}
|
||||
}
|
||||
else if (membus_type == Col_add_path)
|
||||
{
|
||||
area_col_predec_dec = add_predec_blk_drv1->area.get_area() + add_predec_blk_drv2->area.get_area() +
|
||||
add_predec_blk1->area.get_area() + add_predec_blk2->area.get_area() + num_dec_signals * add_dec->area.get_area();
|
||||
if(ndbl>16)
|
||||
{
|
||||
area_col_predec_dec *= (double)ndbl/16.0;
|
||||
}
|
||||
}
|
||||
|
||||
area_bank_vertical_peripheral_circuitry = area_row_predec_dec + area_lwl_drv + area_address_bus + area_data_bus ;
|
||||
area_bank_horizontal_peripheral_circuitry = area_col_predec_dec + area_data_drv + (area_bus + area_IOSA)/g_ip->nbanks;
|
||||
|
||||
if (g_ip->print_detail_debug)
|
||||
{
|
||||
cout<<"memorybus.cc: add_predec_blk_drv1->area = " << add_predec_blk_drv1->area.get_area() /1e6<<" mm2"<<endl;
|
||||
cout<<"memorybus.cc: add_predec_blk_drv2->area = " << add_predec_blk_drv2->area.get_area() /1e6<<" mm2"<<endl;
|
||||
cout<<"memorybus.cc: add_predec_blk1->area = " << add_predec_blk1->area.get_area() /1e6<<" mm2"<<endl;
|
||||
cout<<"memorybus.cc: add_predec_blk2->area = " << add_predec_blk2->area.get_area() /1e6<<" mm2"<<endl;
|
||||
cout<<"memorybus.cc: total add_dec->area = " << num_dec_signals * add_dec->area.get_area() /1e6<<" mm2"<<endl;
|
||||
cout<<"wire bus width for one bank = " << g_tp.wire_outside_mat.pitch * double(add_bits + add_bits + data_bits);
|
||||
}
|
||||
|
||||
area.h = (height_bank + area_bank_horizontal_peripheral_circuitry /length_bank) * num_banks_ver_dir;
|
||||
area.w = (length_bank + area_bank_vertical_peripheral_circuitry /height_bank) * num_banks_hor_dir; // bank bus, should add cmd wire and predec/decoder space
|
||||
if(g_ip->partition_gran == 0)
|
||||
{
|
||||
area.h += g_tp.wire_outside_mat.pitch * double(add_bits + add_bits + data_bits); //center_stripe, should add cmd wire and other componets
|
||||
area.w += g_tp.wire_outside_mat.pitch * double(add_bits + add_bits + data_bits); // + g_tp.wire_outside_mat.pitch * add_bits * 2.5;
|
||||
}
|
||||
//---This coefficient comes from the extra overhead of voltage regulator,
|
||||
//---control logic, bank fuse, burst logic and I/O, see
|
||||
//--- A 5.6ns Random Cycle 144Mb DRAM with 1.4Gb/s/pin and DDR3-SRAM Interface
|
||||
|
||||
//area.w *= 1.0672;
|
||||
//area.h *= 1.0672;
|
||||
|
||||
if (g_ip->print_detail_debug)
|
||||
{
|
||||
cout<<"memorybus.cc: circuit height = "<<area_bank_horizontal_peripheral_circuitry /length_bank /1e3<<" mm"<<endl;
|
||||
cout<<"memorybus.cc: circuit length = "<<area_bank_vertical_peripheral_circuitry /height_bank /1e3<<" mm"<<endl;
|
||||
cout<<"memorybus.cc: area.h = "<<area.h/1e3<<" mm"<<endl;
|
||||
cout<<"memorybus.cc: area.w = "<<area.w/1e3<<" mm"<<endl;
|
||||
cout<<"memorybus.cc: area = "<<area.get_area()/1e6<<" mm2"<<endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
compute_delays(0);
|
||||
compute_power_energy();
|
||||
|
||||
}
|
||||
|
||||
|
||||
// This is based on the same function in mat.cc
|
||||
double Memorybus::compute_delays(double inrisetime)
|
||||
{
|
||||
|
||||
// double outrisetime = 0;
|
||||
double predec_outrisetime = 0, add_dec_outrisetime = 0;
|
||||
double lwl_drv_outrisetime = 0;///, tf = 0;
|
||||
//double local_data_drv_outrisetime = 0;
|
||||
|
||||
if (membus_type == Data_path)
|
||||
{
|
||||
delay = 0;
|
||||
delay_bus = center_stripe->delay + bank_bus->delay;
|
||||
delay += delay_bus;
|
||||
//outrisetime = local_data_drv->compute_delay(inrisetime);
|
||||
//local_data_drv_outrisetime = local_data_drv->delay;
|
||||
delay_global_data = (semi_repeated_global_line >0) ? (global_data_drv->delay*num_subarray_global_IO) : (global_data_drv->delay + global_data->delay);
|
||||
if(g_ip->partition_gran==0 || g_ip->partition_gran==1)
|
||||
delay += delay_global_data;
|
||||
//delay += local_data->delay;
|
||||
delay_local_data = local_data_drv->delay;
|
||||
delay += delay_local_data;
|
||||
delay_data_buffer = 2 * 1e-6/(double)g_ip->sys_freq_MHz;
|
||||
//delay += bank.mat.delay_subarray_out_drv_htree;
|
||||
delay += delay_data_buffer;
|
||||
//cout << 1e3/(double)g_ip->sys_freq_MHz<< endl;
|
||||
//delay += out_seg->delay * burst_length;
|
||||
if (g_ip->print_detail_debug)
|
||||
cout << "memorybus.cc: data path delay = " << delay << endl;
|
||||
out_rise_time = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
delay = 0;
|
||||
delay_bus = center_stripe->delay + bank_bus->delay;
|
||||
delay += delay_bus;
|
||||
predec_outrisetime = add_predec->compute_delays(inrisetime);
|
||||
add_dec_outrisetime = add_dec->compute_delays(predec_outrisetime);
|
||||
delay_add_predecoder = add_predec->delay;
|
||||
delay += delay_add_predecoder;
|
||||
|
||||
if (membus_type == Row_add_path)
|
||||
{
|
||||
if(semi_repeated_global_line)
|
||||
{
|
||||
delay_add_decoder = add_dec->delay * ndwl;
|
||||
if(g_ip->page_sz_bits > 8192)
|
||||
delay_add_decoder /= (double)(g_ip->page_sz_bits / 8192);
|
||||
}
|
||||
else
|
||||
{
|
||||
delay_add_decoder = add_dec->delay;
|
||||
}
|
||||
delay += delay_add_decoder;
|
||||
// There is no function to compute_delay in wire.cc, need to double check if center_stripe->delay and bank_bus->delay is correct.
|
||||
lwl_drv_outrisetime = lwl_drv->compute_delay(add_dec_outrisetime);
|
||||
///tf = (lwl_driver_c_gate_load + lwl_driver_c_wire_load) * lwl_driver_r_wire_load;
|
||||
// ### no need for global_WL->delay
|
||||
// delay_WL = global_WL->delay + lwl_drv->delay + horowitz(lwl_drv_outrisetime, tf, 0.5, 0.5, RISE);
|
||||
delay_lwl_drv = lwl_drv->delay;
|
||||
if(!g_ip->fine_gran_bank_lvl)
|
||||
delay += delay_lwl_drv;
|
||||
if (g_ip->print_detail_debug)
|
||||
cout << "memorybus.cc: row add path delay = " << delay << endl;
|
||||
|
||||
out_rise_time = lwl_drv_outrisetime;
|
||||
}
|
||||
|
||||
else if (membus_type == Col_add_path)
|
||||
{
|
||||
if(semi_repeated_global_line)
|
||||
{
|
||||
delay_add_decoder = add_dec->delay * num_subarray_global_IO;
|
||||
}
|
||||
else
|
||||
{
|
||||
delay += column_sel->delay;
|
||||
delay_add_decoder = add_dec->delay;
|
||||
}
|
||||
delay += delay_add_decoder;
|
||||
|
||||
out_rise_time = 0;
|
||||
if (g_ip->print_detail_debug)
|
||||
{
|
||||
//cout << "memorybus.cc, compute_delays col: center_stripe->delay = " << center_stripe->delay << endl;
|
||||
//cout << "memorybus.cc, compute_delays col: bank_bus->delay = " << bank_bus->delay << endl;
|
||||
//cout << "memorybus.cc, compute_delays col: add_predec->delay = " << add_predec->delay << endl;
|
||||
//cout << "memorybus.cc, compute_delays col: add_dec->delay = " << add_dec->delay << endl;
|
||||
|
||||
cout << "memorybus.cc: column add path delay = " << delay << endl;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Double check!
|
||||
out_rise_time = delay / (1.0-0.5);
|
||||
// Is delay_wl_reset necessary here? Is the 'false' condition appropriate? See the same code as in mat.cc
|
||||
/*if (add_dec->exist == false)
|
||||
{
|
||||
int delay_wl_reset = MAX(add_predec->blk1->delay, add_predec->blk2->delay);
|
||||
//delay += delay_wl_reset;
|
||||
}*/
|
||||
|
||||
return out_rise_time;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void Memorybus::compute_power_energy()
|
||||
{
|
||||
double coeff1[4] = {(double)add_bits, (double)add_bits, (double)add_bits, (double)add_bits};
|
||||
double coeff2[4] = {(double)data_bits, (double)data_bits, (double)data_bits, (double)data_bits};
|
||||
double coeff3[4] = {(double)num_lwl_drv, (double)num_lwl_drv, (double)num_lwl_drv, (double)num_lwl_drv};
|
||||
double coeff4[4] = {(double)burst_length*chip_IO_width, (double)burst_length*chip_IO_width,
|
||||
(double)burst_length*chip_IO_width, (double)burst_length*chip_IO_width};
|
||||
double coeff5[4] = {(double)ndwl, (double)ndwl, (double)ndwl, (double)ndwl};
|
||||
double coeff6[4] = {(double)num_subarray_global_IO, (double)num_subarray_global_IO, (double)num_subarray_global_IO, (double)num_subarray_global_IO};
|
||||
|
||||
//double coeff4[4] = {(double)num_dec_signals, (double)num_dec_signals, (double)num_dec_signals, (double)num_dec_signals};
|
||||
switch (membus_type)
|
||||
{
|
||||
case Data_path:
|
||||
power_bus = (center_stripe->power + bank_bus->power) * coeff2;
|
||||
power_local_data = local_data_drv->power * coeff2;
|
||||
power_global_data = semi_repeated_global_line >0 ? (global_data_drv->power*coeff2) : (global_data_drv->power+global_data->power);
|
||||
|
||||
power_global_data.readOp.dynamic = power_global_data.readOp.dynamic + 1.8/1e3*deviceType->Vdd*10.0/1e9/64*data_bits;
|
||||
power = power_bus + power_local_data;
|
||||
if(!g_ip->fine_gran_bank_lvl)
|
||||
power = power + power_global_data;
|
||||
//power += local_data->power;
|
||||
|
||||
power_burst = out_seg->power * coeff4;//Account for burst read, approxmate the wire length by the center stripe
|
||||
//power = power + power_burst;
|
||||
if(g_ip->print_detail_debug)
|
||||
{
|
||||
cout << "memorybus.cc: data path center stripe energy = " << center_stripe->power.readOp.dynamic*1e9 << " nJ" << endl;
|
||||
cout << "memorybus.cc: data path bank bus energy = " << bank_bus->power.readOp.dynamic*1e9 << " nJ" << endl;
|
||||
cout << "memorybus.cc: data path data driver energy = " << local_data_drv->power.readOp.dynamic*1e9 << " nJ" << endl;
|
||||
}
|
||||
break;
|
||||
case Row_add_path:
|
||||
power_bus = (center_stripe->power + bank_bus->power) * coeff1;
|
||||
power_add_predecoder = add_predec->power;
|
||||
if(semi_repeated_global_line)
|
||||
{
|
||||
power_add_decoders = add_dec->power * coeff5;
|
||||
//power_add_decoders.readOp.dynamic /= (g_ip->page_sz_bits > 8192)?((double)g_ip->page_sz_bits/8192):1;
|
||||
if(g_ip->page_sz_bits > 8192)
|
||||
power_add_decoders.readOp.dynamic /= (double)(g_ip->page_sz_bits / 8192);
|
||||
}
|
||||
else
|
||||
power_add_decoders = add_dec->power;// * (1<< add_predec->blk1->number_input_addr_bits);
|
||||
power_lwl_drv = lwl_drv->power * coeff3;
|
||||
//power_local_WL.readOp.dynamic = num_lwl_drv * C_LWL * deviceType->Vdd * deviceType->Vdd;
|
||||
power = power_bus + power_add_predecoder + power_add_decoders + power_lwl_drv;
|
||||
break;
|
||||
case Col_add_path:
|
||||
power_bus = (center_stripe->power + bank_bus->power) * coeff1;// + column_sel->power * double(chip_IO_width * burst_length);
|
||||
power_add_predecoder = add_predec->power;
|
||||
if(semi_repeated_global_line)
|
||||
{
|
||||
power_add_decoders = add_dec->power * coeff6;
|
||||
power_add_decoders.readOp.dynamic = power_add_decoders.readOp.dynamic * g_ip->page_sz_bits / data_bits;
|
||||
power_col_sel.readOp.dynamic = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
power_add_decoders = add_dec->power;// * (1<< add_predec->blk1->number_input_addr_bits);
|
||||
power_col_sel.readOp.dynamic = column_sel->power.readOp.dynamic * g_ip->page_sz_bits / data_bits;
|
||||
}
|
||||
power = power_bus + power_add_predecoder + power_add_decoders;
|
||||
if(!g_ip->fine_gran_bank_lvl)
|
||||
power = power + power_col_sel;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
150
T1/TP/TP1/cacti_7/memorybus.h
Normal file
150
T1/TP/TP1/cacti_7/memorybus.h
Normal file
|
@ -0,0 +1,150 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
#ifndef __MEMORYBUS_H__
|
||||
#define __MEMORYBUS_H__
|
||||
|
||||
#include "basic_circuit.h"
|
||||
#include "component.h"
|
||||
#include "parameter.h"
|
||||
//#include "assert.h"
|
||||
#include "cacti_interface.h"
|
||||
//#include "wire.h"
|
||||
class Wire;
|
||||
//#include "area.h"
|
||||
#include "decoder.h"
|
||||
|
||||
class Memorybus : public Component
|
||||
{
|
||||
public:
|
||||
Memorybus(enum Wire_type wire_model, double mat_w, double mat_h, double subarray_w, double subarray_h,
|
||||
int _row_add_bits, int _col_add_bits, int _data_bits, int _ndbl, int _ndwl, /*enum Htree_type htree_type,*/
|
||||
enum Memorybus_type membus_type, const DynamicParameter & dp_,
|
||||
/*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global)
|
||||
);
|
||||
~Memorybus();
|
||||
|
||||
//void in_membus();
|
||||
//void out_membus();
|
||||
void Network();
|
||||
|
||||
// repeaters only at h-tree nodes
|
||||
void limited_in_membus();
|
||||
void limited_out_membus();
|
||||
void input_nand(double s1, double s2, double l);
|
||||
//void output_buffer(double s1, double s2, double l);
|
||||
|
||||
const DynamicParameter & dp;
|
||||
|
||||
double in_rise_time, out_rise_time;
|
||||
|
||||
void set_in_rise_time(double rt)
|
||||
{
|
||||
in_rise_time = rt;
|
||||
}
|
||||
|
||||
double max_unpipelined_link_delay;
|
||||
powerDef power_bit;
|
||||
void memory_bus();
|
||||
|
||||
double height_bank, length_bank; // The actual height and length of a single bank including all wires between subarrays.
|
||||
Wire * center_stripe;
|
||||
Wire * bank_bus;
|
||||
Wire * global_WL; //3 hierarchical connection wires.
|
||||
Wire * column_sel;
|
||||
Wire * local_data;
|
||||
Wire * global_data;
|
||||
Wire * out_seg;
|
||||
// Driver for LWL connecting GWL, same as in mat.cc
|
||||
double lwl_driver_c_gate_load, lwl_driver_c_wire_load, lwl_driver_r_wire_load;
|
||||
|
||||
powerDef power_bus;
|
||||
powerDef power_lwl_drv;
|
||||
powerDef power_add_decoders;
|
||||
powerDef power_global_WL;
|
||||
powerDef power_local_WL;
|
||||
powerDef power_add_predecoder;
|
||||
powerDef power_burst;
|
||||
powerDef power_col_sel;
|
||||
powerDef power_local_data;
|
||||
powerDef power_global_data;
|
||||
double delay_bus, delay_add_predecoder, delay_add_decoder, delay_lwl_drv, delay_global_data, delay_local_data, delay_data_buffer;
|
||||
double area_lwl_drv, area_row_predec_dec, area_col_predec_dec, area_subarray, area_bus, area_address_bus, area_data_bus, area_data_drv, area_IOSA, area_local_dataline, area_sense_amp;
|
||||
|
||||
|
||||
Area cell;
|
||||
bool is_dram;
|
||||
|
||||
Driver * lwl_drv, * local_data_drv, * global_data_drv ;
|
||||
Predec * add_predec;
|
||||
Decoder * add_dec;
|
||||
|
||||
double compute_delays(double inrisetime); // return outrisetime
|
||||
void compute_power_energy(); //
|
||||
|
||||
|
||||
|
||||
|
||||
private:
|
||||
double wire_bw;
|
||||
double init_wire_bw; // bus width at root
|
||||
enum Memorybus_type membus_type;
|
||||
// double htree_hnodes;
|
||||
// double htree_vnodes;
|
||||
double mat_width;
|
||||
double mat_height;
|
||||
double subarray_width, subarray_height;
|
||||
//int add_bits, data_in_bits,search_data_in_bits,data_out_bits, search_data_out_bits;
|
||||
int row_add_bits, col_add_bits;
|
||||
int add_bits, data_bits, num_dec_signals;
|
||||
int semi_repeated_global_line;
|
||||
|
||||
int ndbl, ndwl;
|
||||
// bool uca_tree; // should have full bandwidth to access all banks in the array simultaneously
|
||||
// bool search_tree;
|
||||
|
||||
enum Wire_type wt;
|
||||
double min_w_nmos;
|
||||
double min_w_pmos;
|
||||
|
||||
int num_lwl_drv; //Ratio between GWL and LWL, how many local WL drives each GWL drives.
|
||||
int chip_IO_width;
|
||||
int burst_length;
|
||||
int num_subarray_global_IO;
|
||||
|
||||
double C_GWL, C_LWL, R_GWL, R_LWL, C_colsel, R_colsel, C_global_data, R_global_data; // Capacitance of global/local WLs.
|
||||
|
||||
/*TechnologyParameter::*/DeviceType *deviceType;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
611
T1/TP/TP1/cacti_7/nuca.cc
Normal file
611
T1/TP/TP1/cacti_7/nuca.cc
Normal file
|
@ -0,0 +1,611 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "nuca.h"
|
||||
#include "Ucache.h"
|
||||
#include <assert.h>
|
||||
|
||||
unsigned int MIN_BANKSIZE=65536;
|
||||
#define FIXED_OVERHEAD 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */
|
||||
#define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */
|
||||
#define CONTR_2_BANK_LAT 0
|
||||
|
||||
int cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cycle time */];
|
||||
|
||||
Nuca::Nuca(
|
||||
/*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global)
|
||||
):deviceType(dt)
|
||||
{
|
||||
init_cont();
|
||||
}
|
||||
|
||||
void
|
||||
Nuca::init_cont()
|
||||
{
|
||||
FILE *cont;
|
||||
char line[5000];
|
||||
char jk[5000];
|
||||
cont = fopen("contention.dat", "r");
|
||||
if (!cont) {
|
||||
cout << "contention.dat file is missing!\n";
|
||||
exit(0);
|
||||
}
|
||||
|
||||
for(int i=0; i<2; i++) {
|
||||
for(int j=2; j<5; j++) {
|
||||
for(int k=0; k<ROUTER_TYPES; k++) {
|
||||
for(int l=0;l<7; l++) {
|
||||
int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
|
||||
assert(fscanf(cont, "%[^\n]\n", line) != EOF);
|
||||
sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d",jk, &temp[0], &temp[1], &temp[2], &temp[3],
|
||||
&temp[4], &temp[5], &temp[6], &temp[7]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(cont);
|
||||
}
|
||||
|
||||
void
|
||||
Nuca::print_cont_stats()
|
||||
{
|
||||
for(int i=0; i<2; i++) {
|
||||
for(int j=2; j<5; j++) {
|
||||
for(int k=0; k<ROUTER_TYPES; k++) {
|
||||
for(int l=0;l<7; l++) {
|
||||
for(int m=0;l<7; l++) {
|
||||
cout << cont_stats[i][j][k][l][m] << " ";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
Nuca::~Nuca(){
|
||||
for (int i = wt_min; i <= wt_max; i++) {
|
||||
delete wire_vertical[i];
|
||||
delete wire_horizontal[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* converts latency (in s) to cycles depending upon the FREQUENCY (in GHz) */
|
||||
int
|
||||
Nuca::calc_cycles(double lat, double oper_freq)
|
||||
{
|
||||
//TODO: convert latch delay to FO4 */
|
||||
double cycle_time = (1.0/(oper_freq*1e9)); /*s*/
|
||||
cycle_time -= LATCH_DELAY;
|
||||
cycle_time -= FIXED_OVERHEAD;
|
||||
|
||||
return (int)ceil(lat/cycle_time);
|
||||
}
|
||||
|
||||
|
||||
nuca_org_t::~nuca_org_t() {
|
||||
// if(h_wire) delete h_wire;
|
||||
// if(v_wire) delete v_wire;
|
||||
// if(router) delete router;
|
||||
}
|
||||
|
||||
/*
|
||||
* Version - 6.0
|
||||
*
|
||||
* Perform exhaustive search across different bank organizatons,
|
||||
* router configurations, grid organizations, and wire models and
|
||||
* find an optimal NUCA organization
|
||||
* For different bank count values
|
||||
* 1. Optimal bank organization is calculated
|
||||
* 2. For each bank organization, find different NUCA organizations
|
||||
* using various router configurations, grid organizations,
|
||||
* and wire models.
|
||||
* 3. NUCA model with the least cost is picked for
|
||||
* this particular bank count
|
||||
* Finally include contention statistics and find the optimal
|
||||
* NUCA configuration
|
||||
*/
|
||||
void
|
||||
Nuca::sim_nuca()
|
||||
{
|
||||
/* temp variables */
|
||||
int it, ro, wr;
|
||||
int num_cyc;
|
||||
unsigned int i, j;//, k;
|
||||
unsigned int r, c;
|
||||
int l2_c;
|
||||
int bank_count = 0;
|
||||
uca_org_t ures;
|
||||
nuca_org_t *opt_n;
|
||||
mem_array tag, data;
|
||||
list<nuca_org_t *> nuca_list;
|
||||
Router *router_s[ROUTER_TYPES];
|
||||
router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global));
|
||||
router_s[0]->print_router();
|
||||
router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global));
|
||||
router_s[1]->print_router();
|
||||
router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global));
|
||||
router_s[2]->print_router();
|
||||
|
||||
int core_in; // to store no. of cores
|
||||
|
||||
/* to search diff grid organizations */
|
||||
double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat,
|
||||
curr_acclat;
|
||||
double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power,
|
||||
avg_leakage_power;
|
||||
|
||||
double opt_acclat = INF;//, opt_avg_lat = INF, opt_tot_lat = INF;
|
||||
int opt_rows = 0;
|
||||
int opt_columns = 0;
|
||||
// double opt_totno_hops = 0;
|
||||
double opt_avg_hop = 0;
|
||||
double opt_dyn_power = 0, opt_leakage_power = 0;
|
||||
min_values_t minval;
|
||||
|
||||
int bank_start = 0;
|
||||
|
||||
int flit_width = 0;
|
||||
|
||||
/* vertical and horizontal hop latency values */
|
||||
int ver_hop_lat, hor_hop_lat; /* in cycles */
|
||||
|
||||
|
||||
/* no. of different bank sizes to consider */
|
||||
int iterations;
|
||||
|
||||
|
||||
g_ip->nuca_cache_sz = g_ip->cache_sz;
|
||||
nuca_list.push_back(new nuca_org_t());
|
||||
|
||||
if (g_ip->cache_level == 0) l2_c = 1;
|
||||
else l2_c = 0;
|
||||
|
||||
if (g_ip->cores <= 4) core_in = 2;
|
||||
else if (g_ip->cores <= 8) core_in = 3;
|
||||
else if (g_ip->cores <= 16) core_in = 4;
|
||||
else {cout << "Number of cores should be <= 16!\n"; exit(0);}
|
||||
|
||||
|
||||
// set the lower bound to an appropriate value. this depends on cache associativity
|
||||
if (g_ip->assoc > 2) {
|
||||
i = 2;
|
||||
while (i != g_ip->assoc) {
|
||||
MIN_BANKSIZE *= 2;
|
||||
i *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
iterations = (int)logtwo((int)g_ip->cache_sz/MIN_BANKSIZE);
|
||||
|
||||
if (g_ip->force_wiretype)
|
||||
{
|
||||
if (g_ip->wt == Low_swing) {
|
||||
wt_min = Low_swing;
|
||||
wt_max = Low_swing;
|
||||
}
|
||||
else {
|
||||
wt_min = Global;
|
||||
wt_max = Low_swing-1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
wt_min = Global;
|
||||
wt_max = Low_swing;
|
||||
}
|
||||
if (g_ip->nuca_bank_count != 0) { // simulate just one bank
|
||||
if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
|
||||
g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
|
||||
g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
|
||||
fprintf(stderr,"Incorrect bank count value! Please fix the value in cache.cfg\n");
|
||||
}
|
||||
bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
|
||||
iterations = bank_start+1;
|
||||
g_ip->cache_sz = g_ip->cache_sz/g_ip->nuca_bank_count;
|
||||
}
|
||||
cout << "Simulating various NUCA configurations\n";
|
||||
for (it=bank_start; it<iterations; it++) { /* different bank count values */
|
||||
ures.tag_array2 = &tag;
|
||||
ures.data_array2 = &data;
|
||||
/*
|
||||
* find the optimal bank organization
|
||||
*/
|
||||
solve(&ures);
|
||||
// output_UCA(&ures);
|
||||
bank_count = g_ip->nuca_cache_sz/g_ip->cache_sz;
|
||||
cout << "====" << g_ip->cache_sz << "\n";
|
||||
|
||||
for (wr=wt_min; wr<=wt_max; wr++) {
|
||||
|
||||
for (ro=0; ro<ROUTER_TYPES; ro++)
|
||||
{
|
||||
flit_width = (int) router_s[ro]->flit_size; //initialize router
|
||||
nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
|
||||
|
||||
/* calculate router and wire parameters */
|
||||
|
||||
double vlength = ures.cache_ht; /* length of the wire (u)*/
|
||||
double hlength = ures.cache_len; // u
|
||||
|
||||
/* find delay, area, and power for wires */
|
||||
wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength);
|
||||
wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
|
||||
|
||||
|
||||
hor_hop_lat = calc_cycles(wire_horizontal[wr]->delay,
|
||||
1/(nuca_list.back()->nuca_pda.cycle_time*.001));
|
||||
ver_hop_lat = calc_cycles(wire_vertical[wr]->delay,
|
||||
1/(nuca_list.back()->nuca_pda.cycle_time*.001));
|
||||
|
||||
/*
|
||||
* assume a grid like topology and explore for optimal network
|
||||
* configuration using different row and column count values.
|
||||
*/
|
||||
for (c=1; c<=(unsigned int)bank_count; c++) {
|
||||
while (bank_count%c != 0) c++;
|
||||
r = bank_count/c;
|
||||
|
||||
/*
|
||||
* to find the avg access latency of a NUCA cache, uncontended
|
||||
* access time to each bank from the
|
||||
* cache controller is calculated.
|
||||
* avg latency =
|
||||
* sum of the access latencies to individual banks)/bank
|
||||
* count value.
|
||||
*/
|
||||
totno_hops = totno_hhops = totno_vhops = tot_lat = 0;
|
||||
/// k = 1;
|
||||
for (i=0; i<r; i++) {
|
||||
for (j=0; j<c; j++) {
|
||||
/*
|
||||
* vertical hops including the
|
||||
* first hop from the cache controller
|
||||
*/
|
||||
curr_hop = i + 1;
|
||||
curr_hop += j; /* horizontal hops */
|
||||
totno_hhops += j;
|
||||
totno_vhops += (i+1);
|
||||
curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT +
|
||||
j * hor_hop_lat);
|
||||
|
||||
tot_lat += curr_acclat;
|
||||
totno_hops += curr_hop;
|
||||
}
|
||||
}
|
||||
avg_lat = tot_lat/bank_count;
|
||||
avg_hop = totno_hops/bank_count;
|
||||
avg_hhop = totno_hhops/bank_count;
|
||||
avg_vhop = totno_vhops/bank_count;
|
||||
|
||||
/* net access latency */
|
||||
curr_acclat = 2*avg_lat + 2*(router_s[ro]->delay*avg_hop) +
|
||||
calc_cycles(ures.access_time,
|
||||
1/(nuca_list.back()->nuca_pda.cycle_time*.001));
|
||||
|
||||
/* avg access lat of nuca */
|
||||
avg_dyn_power =
|
||||
avg_hop *
|
||||
(router_s[ro]->power.readOp.dynamic) + avg_hhop *
|
||||
(wire_horizontal[wr]->power.readOp.dynamic) *
|
||||
(g_ip->block_sz*8 + 64) + avg_vhop *
|
||||
(wire_vertical[wr]->power.readOp.dynamic) *
|
||||
(g_ip->block_sz*8 + 64) + ures.power.readOp.dynamic;
|
||||
|
||||
avg_leakage_power =
|
||||
bank_count * router_s[ro]->power.readOp.leakage +
|
||||
avg_hhop * (wire_horizontal[wr]->power.readOp.leakage*
|
||||
wire_horizontal[wr]->delay) * flit_width +
|
||||
avg_vhop * (wire_vertical[wr]->power.readOp.leakage *
|
||||
wire_horizontal[wr]->delay);
|
||||
|
||||
if (curr_acclat < opt_acclat) {
|
||||
opt_acclat = curr_acclat;
|
||||
/// opt_tot_lat = tot_lat;
|
||||
/// opt_avg_lat = avg_lat;
|
||||
/// opt_totno_hops = totno_hops;
|
||||
opt_avg_hop = avg_hop;
|
||||
opt_rows = r;
|
||||
opt_columns = c;
|
||||
opt_dyn_power = avg_dyn_power;
|
||||
opt_leakage_power = avg_leakage_power;
|
||||
}
|
||||
totno_hops = 0;
|
||||
tot_lat = 0;
|
||||
totno_hhops = 0;
|
||||
totno_vhops = 0;
|
||||
}
|
||||
nuca_list.back()->wire_pda.power.readOp.dynamic =
|
||||
opt_avg_hop * flit_width *
|
||||
(wire_horizontal[wr]->power.readOp.dynamic +
|
||||
wire_vertical[wr]->power.readOp.dynamic);
|
||||
nuca_list.back()->avg_hops = opt_avg_hop;
|
||||
/* network delay/power */
|
||||
nuca_list.back()->h_wire = wire_horizontal[wr];
|
||||
nuca_list.back()->v_wire = wire_vertical[wr];
|
||||
nuca_list.back()->router = router_s[ro];
|
||||
/* bank delay/power */
|
||||
|
||||
nuca_list.back()->bank_pda.delay = ures.access_time;
|
||||
nuca_list.back()->bank_pda.power = ures.power;
|
||||
nuca_list.back()->bank_pda.area.h = ures.cache_ht;
|
||||
nuca_list.back()->bank_pda.area.w = ures.cache_len;
|
||||
nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
|
||||
|
||||
num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
|
||||
1/(nuca_list.back()->nuca_pda.cycle_time*.001/*GHz*/));
|
||||
if(num_cyc%2 != 0) num_cyc++;
|
||||
if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
|
||||
|
||||
if (it < 7) {
|
||||
nuca_list.back()->nuca_pda.delay = opt_acclat +
|
||||
cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
|
||||
nuca_list.back()->contention =
|
||||
cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
|
||||
}
|
||||
else {
|
||||
nuca_list.back()->nuca_pda.delay = opt_acclat +
|
||||
cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
|
||||
nuca_list.back()->contention =
|
||||
cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
|
||||
}
|
||||
nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power;
|
||||
nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power;
|
||||
|
||||
/* array organization */
|
||||
nuca_list.back()->bank_count = bank_count;
|
||||
nuca_list.back()->rows = opt_rows;
|
||||
nuca_list.back()->columns = opt_columns;
|
||||
calculate_nuca_area (nuca_list.back());
|
||||
|
||||
minval.update_min_values(nuca_list.back());
|
||||
nuca_list.push_back(new nuca_org_t());
|
||||
opt_acclat = BIGNUM;
|
||||
|
||||
}
|
||||
}
|
||||
g_ip->cache_sz /= 2;
|
||||
}
|
||||
|
||||
delete(nuca_list.back());
|
||||
nuca_list.pop_back();
|
||||
opt_n = find_optimal_nuca(&nuca_list, &minval);
|
||||
print_nuca(opt_n);
|
||||
g_ip->cache_sz = g_ip->nuca_cache_sz/opt_n->bank_count;
|
||||
|
||||
list<nuca_org_t *>::iterator niter;
|
||||
for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter)
|
||||
{
|
||||
delete *niter;
|
||||
}
|
||||
nuca_list.clear();
|
||||
|
||||
for(int i=0; i < ROUTER_TYPES; i++)
|
||||
{
|
||||
delete router_s[i];
|
||||
}
|
||||
g_ip->display_ip();
|
||||
// g_ip->force_cache_config = true;
|
||||
// g_ip->ndwl = 8;
|
||||
// g_ip->ndbl = 16;
|
||||
// g_ip->nspd = 4;
|
||||
// g_ip->ndcm = 1;
|
||||
// g_ip->ndsam1 = 8;
|
||||
// g_ip->ndsam2 = 32;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Nuca::print_nuca (nuca_org_t *fr)
|
||||
{
|
||||
printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
|
||||
"----------\n\n");
|
||||
printf("Optimal number of banks - %d\n", fr->bank_count);
|
||||
printf("Grid organization rows x columns - %d x %d\n",
|
||||
fr->rows, fr->columns);
|
||||
printf("Network frequency - %g GHz\n",
|
||||
(1/fr->nuca_pda.cycle_time)*1e3);
|
||||
printf("Cache dimension (mm x mm) - %g x %g\n",
|
||||
fr->nuca_pda.area.h*1e-3,
|
||||
fr->nuca_pda.area.w*1e-3);
|
||||
|
||||
fr->router->print_router();
|
||||
|
||||
printf("\n\nWire stats:\n");
|
||||
if (fr->h_wire->wt == Global) {
|
||||
printf("\tWire type - Full swing global wires with least "
|
||||
"possible delay\n");
|
||||
}
|
||||
else if (fr->h_wire->wt == Global_5) {
|
||||
printf("\tWire type - Full swing global wires with "
|
||||
"5%% delay penalty\n");
|
||||
}
|
||||
else if (fr->h_wire->wt == Global_10) {
|
||||
printf("\tWire type - Full swing global wires with "
|
||||
"10%% delay penalty\n");
|
||||
}
|
||||
else if (fr->h_wire->wt == Global_20) {
|
||||
printf("\tWire type - Full swing global wires with "
|
||||
"20%% delay penalty\n");
|
||||
}
|
||||
else if (fr->h_wire->wt == Global_30) {
|
||||
printf("\tWire type - Full swing global wires with "
|
||||
"30%% delay penalty\n");
|
||||
}
|
||||
else if(fr->h_wire->wt == Low_swing) {
|
||||
printf("\tWire type - Low swing wires\n");
|
||||
}
|
||||
|
||||
printf("\tHorizontal link delay - %g (ns)\n",
|
||||
fr->h_wire->delay*1e9);
|
||||
printf("\tVertical link delay - %g (ns)\n",
|
||||
fr->v_wire->delay*1e9);
|
||||
printf("\tDelay/length - %g (ns/mm)\n",
|
||||
fr->h_wire->delay*1e9/fr->bank_pda.area.w);
|
||||
printf("\tHorizontal link energy -dynamic/access %g (nJ)\n"
|
||||
"\t -leakage %g (nW)\n\n",
|
||||
fr->h_wire->power.readOp.dynamic*1e9,
|
||||
fr->h_wire->power.readOp.leakage*1e9);
|
||||
printf("\tVertical link energy -dynamic/access %g (nJ)\n"
|
||||
"\t -leakage %g (nW)\n\n",
|
||||
fr->v_wire->power.readOp.dynamic*1e9,
|
||||
fr->v_wire->power.readOp.leakage*1e9);
|
||||
printf("\n\n");
|
||||
fr->v_wire->print_wire();
|
||||
printf("\n\nBank stats:\n");
|
||||
}
|
||||
|
||||
|
||||
nuca_org_t *
|
||||
Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval)
|
||||
{
|
||||
double cost = 0;
|
||||
double min_cost = BIGNUM;
|
||||
nuca_org_t *res = NULL;
|
||||
float d, a, dp, lp, c;
|
||||
int v;
|
||||
dp = g_ip->dynamic_power_wt_nuca;
|
||||
lp = g_ip->leakage_power_wt_nuca;
|
||||
a = g_ip->area_wt_nuca;
|
||||
d = g_ip->delay_wt_nuca;
|
||||
c = g_ip->cycle_time_wt_nuca;
|
||||
|
||||
list<nuca_org_t *>::iterator niter;
|
||||
|
||||
|
||||
for (niter = n->begin(); niter != n->end(); niter++) {
|
||||
fprintf(stderr, "\n-----------------------------"
|
||||
"---------------\n");
|
||||
|
||||
|
||||
printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t "
|
||||
"bank_dpower = %g \tleak = %g \tcycle = %g\n",
|
||||
(*niter)->bank_count,
|
||||
(*niter)->nuca_pda.delay,
|
||||
(*niter)->nuca_pda.power.readOp.dynamic,
|
||||
(*niter)->h_wire->wt,
|
||||
(*niter)->bank_pda.power.readOp.dynamic,
|
||||
(*niter)->nuca_pda.power.readOp.leakage,
|
||||
(*niter)->nuca_pda.cycle_time);
|
||||
|
||||
|
||||
if (g_ip->ed == 1) {
|
||||
cost = ((*niter)->nuca_pda.delay/minval->min_delay)*
|
||||
((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn);
|
||||
if (min_cost > cost) {
|
||||
min_cost = cost;
|
||||
res = ((*niter));
|
||||
}
|
||||
}
|
||||
else if (g_ip->ed == 2) {
|
||||
cost = ((*niter)->nuca_pda.delay/minval->min_delay)*
|
||||
((*niter)->nuca_pda.delay/minval->min_delay)*
|
||||
((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn);
|
||||
if (min_cost > cost) {
|
||||
min_cost = cost;
|
||||
res = ((*niter));
|
||||
}
|
||||
}
|
||||
else {
|
||||
/*
|
||||
* check whether the current organization
|
||||
* meets the input deviation constraints
|
||||
*/
|
||||
v = check_nuca_org((*niter), minval);
|
||||
if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
|
||||
|
||||
if (v) {
|
||||
cost = (d * ((*niter)->nuca_pda.delay/minval->min_delay) +
|
||||
c * ((*niter)->nuca_pda.cycle_time/minval->min_cyc) +
|
||||
dp * ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn) +
|
||||
lp * ((*niter)->nuca_pda.power.readOp.leakage/minval->min_leakage) +
|
||||
a * ((*niter)->nuca_pda.area.get_area()/minval->min_area));
|
||||
fprintf(stderr, "cost = %g\n", cost);
|
||||
|
||||
if (min_cost > cost) {
|
||||
min_cost = cost;
|
||||
res = ((*niter));
|
||||
}
|
||||
}
|
||||
else {
|
||||
niter = n->erase(niter);
|
||||
if (niter !=n->begin())
|
||||
niter --;
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int
|
||||
Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval)
|
||||
{
|
||||
if (((n->nuca_pda.delay - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev_nuca) {
|
||||
return 0;
|
||||
}
|
||||
if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
|
||||
g_ip->dynamic_power_dev_nuca) {
|
||||
return 0;
|
||||
}
|
||||
if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
|
||||
g_ip->leakage_power_dev_nuca) {
|
||||
return 0;
|
||||
}
|
||||
if (((n->nuca_pda.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
|
||||
g_ip->cycle_time_dev_nuca) {
|
||||
return 0;
|
||||
}
|
||||
if (((n->nuca_pda.area.get_area() - minval->min_area)/minval->min_area)*100 >
|
||||
g_ip->area_dev_nuca) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
void
|
||||
Nuca::calculate_nuca_area (nuca_org_t *nuca)
|
||||
{
|
||||
nuca->nuca_pda.area.h=
|
||||
nuca->rows * ((nuca->h_wire->wire_width +
|
||||
nuca->h_wire->wire_spacing)
|
||||
* nuca->router->flit_size +
|
||||
nuca->bank_pda.area.h);
|
||||
|
||||
nuca->nuca_pda.area.w =
|
||||
nuca->columns * ((nuca->v_wire->wire_width +
|
||||
nuca->v_wire->wire_spacing)
|
||||
* nuca->router->flit_size +
|
||||
nuca->bank_pda.area.w);
|
||||
}
|
||||
|
101
T1/TP/TP1/cacti_7/nuca.h
Normal file
101
T1/TP/TP1/cacti_7/nuca.h
Normal file
|
@ -0,0 +1,101 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
#ifndef __NUCA_H__
|
||||
#define __NUCA_H__
|
||||
|
||||
#include "basic_circuit.h"
|
||||
#include "component.h"
|
||||
#include "parameter.h"
|
||||
#include "assert.h"
|
||||
#include "cacti_interface.h"
|
||||
#include "wire.h"
|
||||
#include "mat.h"
|
||||
#include "io.h"
|
||||
#include "router.h"
|
||||
#include <iostream>
|
||||
|
||||
|
||||
|
||||
class nuca_org_t {
|
||||
public:
|
||||
~nuca_org_t();
|
||||
// int size;
|
||||
/* area, power, access time, and cycle time stats */
|
||||
Component nuca_pda;
|
||||
Component bank_pda;
|
||||
Component wire_pda;
|
||||
Wire *h_wire;
|
||||
Wire *v_wire;
|
||||
Router *router;
|
||||
/* for particular network configuration
|
||||
* calculated based on a cycle accurate
|
||||
* simulation Ref: CACTI 6 - Tech report
|
||||
*/
|
||||
double contention;
|
||||
|
||||
/* grid network stats */
|
||||
double avg_hops;
|
||||
int rows;
|
||||
int columns;
|
||||
int bank_count;
|
||||
};
|
||||
|
||||
|
||||
|
||||
class Nuca : public Component
|
||||
{
|
||||
public:
|
||||
Nuca(
|
||||
/*TechnologyParameter::*/DeviceType *dt);
|
||||
void print_router();
|
||||
~Nuca();
|
||||
void sim_nuca();
|
||||
void init_cont();
|
||||
int calc_cycles(double lat, double oper_freq);
|
||||
void calculate_nuca_area (nuca_org_t *nuca);
|
||||
int check_nuca_org (nuca_org_t *n, min_values_t *minval);
|
||||
nuca_org_t * find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval);
|
||||
void print_nuca(nuca_org_t *n);
|
||||
void print_cont_stats();
|
||||
|
||||
private:
|
||||
|
||||
/*TechnologyParameter::*/DeviceType *deviceType;
|
||||
int wt_min, wt_max;
|
||||
Wire *wire_vertical[WIRE_TYPES],
|
||||
*wire_horizontal[WIRE_TYPES];
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
BIN
T1/TP/TP1/cacti_7/obj_dbg/TSV.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/TSV.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/Ucache.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/Ucache.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/arbiter.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/arbiter.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/area.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/area.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/bank.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/bank.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/basic_circuit.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/basic_circuit.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/cacti
Executable file
BIN
T1/TP/TP1/cacti_7/obj_dbg/cacti
Executable file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/cacti_interface.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/cacti_interface.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/component.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/component.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/crossbar.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/crossbar.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/decoder.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/decoder.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/extio.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/extio.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/extio_technology.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/extio_technology.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/htree2.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/htree2.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/io.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/io.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/main.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/main.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/mat.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/mat.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/memcad.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/memcad.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/memcad_parameters.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/memcad_parameters.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/memorybus.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/memorybus.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/nuca.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/nuca.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/parameter.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/parameter.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/powergating.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/powergating.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/router.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/router.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/subarray.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/subarray.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/technology.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/technology.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/uca.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/uca.o
Normal file
Binary file not shown.
BIN
T1/TP/TP1/cacti_7/obj_dbg/wire.o
Normal file
BIN
T1/TP/TP1/cacti_7/obj_dbg/wire.o
Normal file
Binary file not shown.
2837
T1/TP/TP1/cacti_7/parameter.cc
Normal file
2837
T1/TP/TP1/cacti_7/parameter.cc
Normal file
File diff suppressed because it is too large
Load diff
779
T1/TP/TP1/cacti_7/parameter.h
Normal file
779
T1/TP/TP1/cacti_7/parameter.h
Normal file
|
@ -0,0 +1,779 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
#ifndef __PARAMETER_H__
|
||||
#define __PARAMETER_H__
|
||||
|
||||
#include "area.h"
|
||||
#include "const.h"
|
||||
#include "cacti_interface.h"
|
||||
#include "io.h"
|
||||
|
||||
// parameters which are functions of certain device technology
|
||||
/**
|
||||
class TechnologyParameter
|
||||
{
|
||||
public:
|
||||
class DeviceType
|
||||
{
|
||||
public:
|
||||
double C_g_ideal;
|
||||
double C_fringe;
|
||||
double C_overlap;
|
||||
double C_junc; // C_junc_area
|
||||
double C_junc_sidewall;
|
||||
double l_phy;
|
||||
double l_elec;
|
||||
double R_nch_on;
|
||||
double R_pch_on;
|
||||
double Vdd;
|
||||
double Vth;
|
||||
double Vcc_min;//allowed min vcc; for memory cell it is the lowest vcc for data retention. for logic it is the vcc to balance the leakage reduction and wakeup latency
|
||||
double I_on_n;
|
||||
double I_on_p;
|
||||
double I_off_n;
|
||||
double I_off_p;
|
||||
double I_g_on_n;
|
||||
double I_g_on_p;
|
||||
double C_ox;
|
||||
double t_ox;
|
||||
double n_to_p_eff_curr_drv_ratio;
|
||||
double long_channel_leakage_reduction;
|
||||
double Mobility_n;
|
||||
|
||||
DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0),
|
||||
C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0),
|
||||
Vdd(0), Vth(0), Vcc_min(0),
|
||||
I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0),I_g_on_n(0),I_g_on_p(0),
|
||||
C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0),
|
||||
Mobility_n(0) { };
|
||||
void reset()
|
||||
{
|
||||
C_g_ideal = 0;
|
||||
C_fringe = 0;
|
||||
C_overlap = 0;
|
||||
C_junc = 0;
|
||||
l_phy = 0;
|
||||
l_elec = 0;
|
||||
R_nch_on = 0;
|
||||
R_pch_on = 0;
|
||||
Vdd = 0;
|
||||
Vth = 0;
|
||||
Vcc_min = 0;
|
||||
I_on_n = 0;
|
||||
I_on_p = 0;
|
||||
I_off_n = 0;
|
||||
I_off_p = 0;
|
||||
I_g_on_n = 0;
|
||||
I_g_on_p = 0;
|
||||
C_ox = 0;
|
||||
t_ox = 0;
|
||||
n_to_p_eff_curr_drv_ratio = 0;
|
||||
long_channel_leakage_reduction = 0;
|
||||
Mobility_n = 0;
|
||||
}
|
||||
|
||||
void display(uint32_t indent = 0);
|
||||
};
|
||||
class InterconnectType
|
||||
{
|
||||
public:
|
||||
double pitch;
|
||||
double R_per_um;
|
||||
double C_per_um;
|
||||
double horiz_dielectric_constant;
|
||||
double vert_dielectric_constant;
|
||||
double aspect_ratio;
|
||||
double miller_value;
|
||||
double ild_thickness;
|
||||
|
||||
InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { };
|
||||
|
||||
void reset()
|
||||
{
|
||||
pitch = 0;
|
||||
R_per_um = 0;
|
||||
C_per_um = 0;
|
||||
horiz_dielectric_constant = 0;
|
||||
vert_dielectric_constant = 0;
|
||||
aspect_ratio = 0;
|
||||
miller_value = 0;
|
||||
ild_thickness = 0;
|
||||
}
|
||||
|
||||
void display(uint32_t indent = 0);
|
||||
};
|
||||
class MemoryType
|
||||
{
|
||||
public:
|
||||
double b_w;
|
||||
double b_h;
|
||||
double cell_a_w;
|
||||
double cell_pmos_w;
|
||||
double cell_nmos_w;
|
||||
double Vbitpre;
|
||||
double Vbitfloating;//voltage when floating bitline is supported
|
||||
|
||||
void reset()
|
||||
{
|
||||
b_w = 0; //fs and tech
|
||||
b_h = 0; //fs and tech
|
||||
cell_a_w = 0; // ram_cell_tech_type
|
||||
cell_pmos_w = 0; //fs
|
||||
cell_nmos_w = 0;
|
||||
Vbitpre = 0;
|
||||
Vbitfloating = 0;
|
||||
}
|
||||
|
||||
void display(uint32_t indent = 0);
|
||||
};
|
||||
|
||||
class ScalingFactor
|
||||
{
|
||||
public:
|
||||
double logic_scaling_co_eff;
|
||||
double core_tx_density;
|
||||
double long_channel_leakage_reduction;
|
||||
|
||||
ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0),
|
||||
long_channel_leakage_reduction(0) { };
|
||||
|
||||
void reset()
|
||||
{
|
||||
logic_scaling_co_eff= 0;
|
||||
core_tx_density = 0;
|
||||
long_channel_leakage_reduction= 0;
|
||||
}
|
||||
|
||||
void display(uint32_t indent = 0);
|
||||
};
|
||||
|
||||
double ram_wl_stitching_overhead_; //fs
|
||||
double min_w_nmos_; //fs
|
||||
double max_w_nmos_; //fs
|
||||
double max_w_nmos_dec; //fs+ ram_cell_tech_type
|
||||
double unit_len_wire_del; //wire_inside_mat
|
||||
double FO4; //fs
|
||||
double kinv; //fs
|
||||
double vpp; //input
|
||||
double w_sense_en;//fs
|
||||
double w_sense_n; //fs
|
||||
double w_sense_p; //fs
|
||||
double sense_delay; // input
|
||||
double sense_dy_power; //input
|
||||
double w_iso; //fs
|
||||
double w_poly_contact; //fs
|
||||
double spacing_poly_to_poly; //fs
|
||||
double spacing_poly_to_contact;//fs
|
||||
|
||||
//CACTI3DD TSV params
|
||||
double tsv_parasitic_capacitance_fine;
|
||||
double tsv_parasitic_resistance_fine;
|
||||
double tsv_minimum_area_fine;
|
||||
|
||||
double tsv_parasitic_capacitance_coarse;
|
||||
double tsv_parasitic_resistance_coarse;
|
||||
double tsv_minimum_area_coarse;
|
||||
|
||||
//fs
|
||||
double w_comp_inv_p1;
|
||||
double w_comp_inv_p2;
|
||||
double w_comp_inv_p3;
|
||||
double w_comp_inv_n1;
|
||||
double w_comp_inv_n2;
|
||||
double w_comp_inv_n3;
|
||||
double w_eval_inv_p;
|
||||
double w_eval_inv_n;
|
||||
double w_comp_n;
|
||||
double w_comp_p;
|
||||
|
||||
double dram_cell_I_on; //ram_cell_tech_type
|
||||
double dram_cell_Vdd;
|
||||
double dram_cell_I_off_worst_case_len_temp;
|
||||
double dram_cell_C;
|
||||
double gm_sense_amp_latch; // depends on many things
|
||||
|
||||
double w_nmos_b_mux;//fs
|
||||
double w_nmos_sa_mux;//fs
|
||||
double w_pmos_bl_precharge;//fs
|
||||
double w_pmos_bl_eq;//fs
|
||||
double MIN_GAP_BET_P_AND_N_DIFFS;//fs
|
||||
double MIN_GAP_BET_SAME_TYPE_DIFFS;//fs
|
||||
double HPOWERRAIL;//fs
|
||||
double cell_h_def;//fs
|
||||
|
||||
double chip_layout_overhead; //input
|
||||
double macro_layout_overhead;
|
||||
double sckt_co_eff;
|
||||
|
||||
double fringe_cap;//input
|
||||
|
||||
uint64_t h_dec; //ram_cell_tech_type
|
||||
|
||||
DeviceType sram_cell; // SRAM cell transistor
|
||||
DeviceType dram_acc; // DRAM access transistor
|
||||
DeviceType dram_wl; // DRAM wordline transistor
|
||||
DeviceType peri_global; // peripheral global
|
||||
DeviceType cam_cell; // SRAM cell transistor
|
||||
|
||||
DeviceType sleep_tx; // Sleep transistor cell transistor
|
||||
|
||||
InterconnectType wire_local;
|
||||
InterconnectType wire_inside_mat;
|
||||
InterconnectType wire_outside_mat;
|
||||
|
||||
ScalingFactor scaling_factor;
|
||||
|
||||
MemoryType sram;
|
||||
MemoryType dram;
|
||||
MemoryType cam;
|
||||
|
||||
void display(uint32_t indent = 0);
|
||||
|
||||
void reset()
|
||||
{
|
||||
dram_cell_Vdd = 0;
|
||||
dram_cell_I_on = 0;
|
||||
dram_cell_C = 0;
|
||||
vpp = 0;
|
||||
|
||||
sense_delay = 0;
|
||||
sense_dy_power = 0;
|
||||
fringe_cap = 0;
|
||||
// horiz_dielectric_constant = 0;
|
||||
// vert_dielectric_constant = 0;
|
||||
// aspect_ratio = 0;
|
||||
// miller_value = 0;
|
||||
// ild_thickness = 0;
|
||||
|
||||
dram_cell_I_off_worst_case_len_temp = 0;
|
||||
|
||||
sram_cell.reset();
|
||||
dram_acc.reset();
|
||||
dram_wl.reset();
|
||||
peri_global.reset();
|
||||
cam_cell.reset();
|
||||
sleep_tx.reset();
|
||||
|
||||
scaling_factor.reset();
|
||||
|
||||
wire_local.reset();
|
||||
wire_inside_mat.reset();
|
||||
wire_outside_mat.reset();
|
||||
|
||||
sram.reset();
|
||||
dram.reset();
|
||||
cam.reset();
|
||||
|
||||
chip_layout_overhead = 0;
|
||||
macro_layout_overhead = 0;
|
||||
sckt_co_eff = 0;
|
||||
}
|
||||
};
|
||||
|
||||
**/
|
||||
//ali
|
||||
class DeviceType
|
||||
{
|
||||
public:
|
||||
double C_g_ideal;
|
||||
double C_fringe;
|
||||
double C_overlap;
|
||||
double C_junc; // C_junc_area
|
||||
double C_junc_sidewall;
|
||||
double l_phy;
|
||||
double l_elec;
|
||||
double R_nch_on;
|
||||
double R_pch_on;
|
||||
double Vdd;
|
||||
double Vth;
|
||||
double Vcc_min;//allowed min vcc; for memory cell it is the lowest vcc for data retention. for logic it is the vcc to balance the leakage reduction and wakeup latency
|
||||
double I_on_n;
|
||||
double I_on_p;
|
||||
double I_off_n;
|
||||
double I_off_p;
|
||||
double I_g_on_n;
|
||||
double I_g_on_p;
|
||||
double C_ox;
|
||||
double t_ox;
|
||||
double n_to_p_eff_curr_drv_ratio;
|
||||
double long_channel_leakage_reduction;
|
||||
double Mobility_n;
|
||||
|
||||
// auxilary parameters
|
||||
double Vdsat;
|
||||
double gmp_to_gmn_multiplier;
|
||||
|
||||
|
||||
DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0),
|
||||
C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0),
|
||||
Vdd(0), Vth(0), Vcc_min(0),
|
||||
I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0),I_g_on_n(0),I_g_on_p(0),
|
||||
C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0),
|
||||
Mobility_n(0) { reset();};
|
||||
|
||||
void assign(const string & in_file, int tech_flavor, unsigned int temp);
|
||||
void interpolate(double alpha, const DeviceType& dev1, const DeviceType& dev2);
|
||||
void reset()
|
||||
{
|
||||
C_g_ideal=0;
|
||||
C_fringe=0;
|
||||
C_overlap=0;
|
||||
C_junc=0; // C_junc_area
|
||||
C_junc_sidewall=0;
|
||||
l_phy=0;
|
||||
l_elec=0;
|
||||
R_nch_on=0;
|
||||
R_pch_on=0;
|
||||
Vdd=0;
|
||||
Vth=0;
|
||||
Vcc_min=0;//allowed min vcc, for memory cell it is the lowest vcc for data retention. for logic it is the vcc to balance the leakage reduction and wakeup latency
|
||||
I_on_n=0;
|
||||
I_on_p=0;
|
||||
I_off_n=0;
|
||||
I_off_p=0;
|
||||
I_g_on_n=0;
|
||||
I_g_on_p=0;
|
||||
C_ox=0;
|
||||
t_ox=0;
|
||||
n_to_p_eff_curr_drv_ratio=0;
|
||||
long_channel_leakage_reduction=0;
|
||||
Mobility_n=0;
|
||||
|
||||
// auxilary parameters
|
||||
Vdsat=0;
|
||||
gmp_to_gmn_multiplier=0;
|
||||
}
|
||||
|
||||
void display(uint32_t indent = 0) const;
|
||||
bool isEqual(const DeviceType & dev);
|
||||
};
|
||||
|
||||
class InterconnectType
|
||||
{
|
||||
public:
|
||||
double pitch;
|
||||
double R_per_um;
|
||||
double C_per_um;
|
||||
double horiz_dielectric_constant;
|
||||
double vert_dielectric_constant;
|
||||
double aspect_ratio;
|
||||
double miller_value;
|
||||
double ild_thickness;
|
||||
|
||||
//auxilary parameters
|
||||
double wire_width;
|
||||
double wire_thickness;
|
||||
double wire_spacing;
|
||||
double barrier_thickness;
|
||||
double dishing_thickness;
|
||||
double alpha_scatter;
|
||||
double fringe_cap;
|
||||
|
||||
|
||||
InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { reset(); };
|
||||
|
||||
void reset()
|
||||
{
|
||||
pitch=0;
|
||||
R_per_um=0;
|
||||
C_per_um=0;
|
||||
horiz_dielectric_constant=0;
|
||||
vert_dielectric_constant=0;
|
||||
aspect_ratio=0;
|
||||
miller_value=0;
|
||||
ild_thickness=0;
|
||||
|
||||
//auxilary parameters
|
||||
wire_width=0;
|
||||
wire_thickness=0;
|
||||
wire_spacing=0;
|
||||
barrier_thickness=0;
|
||||
dishing_thickness=0;
|
||||
alpha_scatter=0;
|
||||
fringe_cap=0;
|
||||
|
||||
}
|
||||
void assign(const string & in_file, int projection_type, int tech_flavor);
|
||||
void interpolate(double alpha, const InterconnectType & inter1, const InterconnectType & inter2);
|
||||
void display(uint32_t indent = 0);
|
||||
bool isEqual(const InterconnectType & inter);
|
||||
};
|
||||
|
||||
class MemoryType
|
||||
{
|
||||
public:
|
||||
double b_w;
|
||||
double b_h;
|
||||
double cell_a_w;
|
||||
double cell_pmos_w;
|
||||
double cell_nmos_w;
|
||||
double Vbitpre;
|
||||
double Vbitfloating;//voltage when floating bitline is supported
|
||||
|
||||
// needed to calculate b_w b_h
|
||||
double area_cell;
|
||||
double asp_ratio_cell;
|
||||
|
||||
MemoryType(){reset();}
|
||||
void reset()
|
||||
{
|
||||
b_w=0;
|
||||
b_h=0;
|
||||
cell_a_w=0;
|
||||
cell_pmos_w=0;
|
||||
cell_nmos_w=0;
|
||||
Vbitpre=0;
|
||||
Vbitfloating=0;
|
||||
}
|
||||
void assign(const string & in_file, int tech_flavor, int cell_type); // sram(0),cam(1),dram(2)
|
||||
void interpolate(double alpha, const MemoryType& dev1, const MemoryType& dev2);
|
||||
void display(uint32_t indent = 0) const;
|
||||
bool isEqual(const MemoryType & mem);
|
||||
};
|
||||
|
||||
class ScalingFactor
|
||||
{
|
||||
public:
|
||||
double logic_scaling_co_eff;
|
||||
double core_tx_density;
|
||||
double long_channel_leakage_reduction;
|
||||
|
||||
ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0),
|
||||
long_channel_leakage_reduction(0) { reset(); };
|
||||
|
||||
void reset()
|
||||
{
|
||||
logic_scaling_co_eff=0;
|
||||
core_tx_density=0;
|
||||
long_channel_leakage_reduction=0;
|
||||
}
|
||||
void assign(const string & in_file);
|
||||
void interpolate(double alpha, const ScalingFactor& dev1, const ScalingFactor& dev2);
|
||||
void display(uint32_t indent = 0);
|
||||
bool isEqual(const ScalingFactor & scal);
|
||||
};
|
||||
|
||||
// parameters which are functions of certain device technology
|
||||
class TechnologyParameter
|
||||
{
|
||||
public:
|
||||
double ram_wl_stitching_overhead_; //fs
|
||||
double min_w_nmos_; //fs
|
||||
double max_w_nmos_; //fs
|
||||
double max_w_nmos_dec; //fs+ ram_cell_tech_type
|
||||
double unit_len_wire_del; //wire_inside_mat
|
||||
double FO4; //fs
|
||||
double kinv; //fs
|
||||
double vpp; //input
|
||||
double w_sense_en;//fs
|
||||
double w_sense_n; //fs
|
||||
double w_sense_p; //fs
|
||||
double sense_delay; // input
|
||||
double sense_dy_power; //input
|
||||
double w_iso; //fs
|
||||
double w_poly_contact; //fs
|
||||
double spacing_poly_to_poly; //fs
|
||||
double spacing_poly_to_contact;//fs
|
||||
|
||||
//CACTI3D auxilary variables
|
||||
double tsv_pitch;
|
||||
double tsv_diameter;
|
||||
double tsv_length;
|
||||
double tsv_dielec_thickness;
|
||||
double tsv_contact_resistance;
|
||||
double tsv_depletion_width;
|
||||
double tsv_liner_dielectric_constant;
|
||||
|
||||
//CACTI3DD TSV params
|
||||
|
||||
double tsv_parasitic_capacitance_fine;
|
||||
double tsv_parasitic_resistance_fine;
|
||||
double tsv_minimum_area_fine;
|
||||
|
||||
double tsv_parasitic_capacitance_coarse;
|
||||
double tsv_parasitic_resistance_coarse;
|
||||
double tsv_minimum_area_coarse;
|
||||
|
||||
//fs
|
||||
double w_comp_inv_p1;
|
||||
double w_comp_inv_p2;
|
||||
double w_comp_inv_p3;
|
||||
double w_comp_inv_n1;
|
||||
double w_comp_inv_n2;
|
||||
double w_comp_inv_n3;
|
||||
double w_eval_inv_p;
|
||||
double w_eval_inv_n;
|
||||
double w_comp_n;
|
||||
double w_comp_p;
|
||||
|
||||
double dram_cell_I_on; //ram_cell_tech_type
|
||||
double dram_cell_Vdd;
|
||||
double dram_cell_I_off_worst_case_len_temp;
|
||||
double dram_cell_C;
|
||||
double gm_sense_amp_latch; // depends on many things
|
||||
|
||||
double w_nmos_b_mux;//fs
|
||||
double w_nmos_sa_mux;//fs
|
||||
double w_pmos_bl_precharge;//fs
|
||||
double w_pmos_bl_eq;//fs
|
||||
double MIN_GAP_BET_P_AND_N_DIFFS;//fs
|
||||
double MIN_GAP_BET_SAME_TYPE_DIFFS;//fs
|
||||
double HPOWERRAIL;//fs
|
||||
double cell_h_def;//fs
|
||||
|
||||
double chip_layout_overhead; //input
|
||||
double macro_layout_overhead;
|
||||
double sckt_co_eff;
|
||||
|
||||
double fringe_cap;//input
|
||||
|
||||
uint64_t h_dec; //ram_cell_tech_type
|
||||
|
||||
DeviceType sram_cell; // SRAM cell transistor
|
||||
DeviceType dram_acc; // DRAM access transistor
|
||||
DeviceType dram_wl; // DRAM wordline transistor
|
||||
DeviceType peri_global; // peripheral global
|
||||
DeviceType cam_cell; // SRAM cell transistor
|
||||
|
||||
DeviceType sleep_tx; // Sleep transistor cell transistor
|
||||
|
||||
InterconnectType wire_local;
|
||||
InterconnectType wire_inside_mat;
|
||||
InterconnectType wire_outside_mat;
|
||||
|
||||
ScalingFactor scaling_factor;
|
||||
|
||||
MemoryType sram;
|
||||
MemoryType dram;
|
||||
MemoryType cam;
|
||||
|
||||
void display(uint32_t indent = 0);
|
||||
bool isEqual(const TechnologyParameter & tech);
|
||||
|
||||
|
||||
void find_upper_and_lower_tech(double technology, int &tech_lo, string& in_file_lo, int &tech_hi, string& in_file_hi);
|
||||
void assign_tsv(const string & in_file);
|
||||
void init(double technology, bool is_tag);
|
||||
TechnologyParameter()
|
||||
{
|
||||
reset();
|
||||
}
|
||||
void reset()
|
||||
{
|
||||
ram_wl_stitching_overhead_ =0; //fs
|
||||
min_w_nmos_ =0; //fs
|
||||
max_w_nmos_ =0; //fs
|
||||
max_w_nmos_dec =0; //fs+ ram_cell_tech_type
|
||||
unit_len_wire_del =0; //wire_inside_mat
|
||||
FO4 =0; //fs
|
||||
kinv =0; //fs
|
||||
vpp =0; //input
|
||||
w_sense_en =0;//fs
|
||||
w_sense_n =0; //fs
|
||||
w_sense_p =0; //fs
|
||||
sense_delay =0; // input
|
||||
sense_dy_power =0; //input
|
||||
w_iso =0; //fs
|
||||
w_poly_contact =0; //fs
|
||||
spacing_poly_to_poly =0; //fs
|
||||
spacing_poly_to_contact =0;//fs
|
||||
|
||||
//CACTI3D auxilary variables
|
||||
tsv_pitch =0;
|
||||
tsv_diameter =0;
|
||||
tsv_length =0;
|
||||
tsv_dielec_thickness =0;
|
||||
tsv_contact_resistance =0;
|
||||
tsv_depletion_width =0;
|
||||
tsv_liner_dielectric_constant =0;
|
||||
|
||||
//CACTI3DD TSV params
|
||||
|
||||
tsv_parasitic_capacitance_fine =0;
|
||||
tsv_parasitic_resistance_fine =0;
|
||||
tsv_minimum_area_fine =0;
|
||||
|
||||
tsv_parasitic_capacitance_coarse =0;
|
||||
tsv_parasitic_resistance_coarse =0;
|
||||
tsv_minimum_area_coarse =0;
|
||||
|
||||
//fs
|
||||
w_comp_inv_p1 =0;
|
||||
w_comp_inv_p2 =0;
|
||||
w_comp_inv_p3 =0;
|
||||
w_comp_inv_n1 =0;
|
||||
w_comp_inv_n2 =0;
|
||||
w_comp_inv_n3 =0;
|
||||
w_eval_inv_p =0;
|
||||
w_eval_inv_n =0;
|
||||
w_comp_n =0;
|
||||
w_comp_p =0;
|
||||
|
||||
dram_cell_I_on =0; //ram_cell_tech_type
|
||||
dram_cell_Vdd =0;
|
||||
dram_cell_I_off_worst_case_len_temp =0;
|
||||
dram_cell_C =0;
|
||||
gm_sense_amp_latch =0; // depends on many things
|
||||
|
||||
w_nmos_b_mux =0;//fs
|
||||
w_nmos_sa_mux =0;//fs
|
||||
w_pmos_bl_precharge =0;//fs
|
||||
w_pmos_bl_eq =0;//fs
|
||||
MIN_GAP_BET_P_AND_N_DIFFS =0;//fs
|
||||
MIN_GAP_BET_SAME_TYPE_DIFFS =0;//fs
|
||||
HPOWERRAIL =0;//fs
|
||||
cell_h_def =0;//fs
|
||||
|
||||
chip_layout_overhead = 0;
|
||||
macro_layout_overhead = 0;
|
||||
sckt_co_eff = 0;
|
||||
|
||||
fringe_cap=0;//input
|
||||
|
||||
h_dec=0; //ram_cell_tech_type
|
||||
|
||||
sram_cell.reset();
|
||||
dram_acc.reset();
|
||||
dram_wl.reset();
|
||||
peri_global.reset();
|
||||
cam_cell.reset();
|
||||
sleep_tx.reset();
|
||||
|
||||
scaling_factor.reset();
|
||||
|
||||
wire_local.reset();
|
||||
wire_inside_mat.reset();
|
||||
wire_outside_mat.reset();
|
||||
|
||||
sram.reset();
|
||||
dram.reset();
|
||||
cam.reset();
|
||||
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
//end ali
|
||||
|
||||
class DynamicParameter
|
||||
{
|
||||
public:
|
||||
bool is_tag;
|
||||
bool pure_ram;
|
||||
bool pure_cam;
|
||||
bool fully_assoc;
|
||||
int tagbits;
|
||||
int num_subarrays; // only for leakage computation -- the number of subarrays per bank
|
||||
int num_mats; // only for leakage computation -- the number of mats per bank
|
||||
double Nspd;
|
||||
int Ndwl;
|
||||
int Ndbl;
|
||||
int Ndcm;
|
||||
int deg_bl_muxing;
|
||||
int deg_senseamp_muxing_non_associativity;
|
||||
int Ndsam_lev_1;
|
||||
int Ndsam_lev_2;
|
||||
Wire_type wtype; // merge from cacti-7 code to cacti3d code.
|
||||
|
||||
int number_addr_bits_mat; // per port
|
||||
int number_subbanks_decode; // per_port
|
||||
int num_di_b_bank_per_port;
|
||||
int num_do_b_bank_per_port;
|
||||
int num_di_b_mat;
|
||||
int num_do_b_mat;
|
||||
int num_di_b_subbank;
|
||||
int num_do_b_subbank;
|
||||
|
||||
int num_si_b_mat;
|
||||
int num_so_b_mat;
|
||||
int num_si_b_subbank;
|
||||
int num_so_b_subbank;
|
||||
int num_si_b_bank_per_port;
|
||||
int num_so_b_bank_per_port;
|
||||
|
||||
int number_way_select_signals_mat;
|
||||
int num_act_mats_hor_dir;
|
||||
|
||||
int num_act_mats_hor_dir_sl;
|
||||
bool is_dram;
|
||||
double V_b_sense;
|
||||
unsigned int num_r_subarray;
|
||||
unsigned int num_c_subarray;
|
||||
int tag_num_r_subarray;//: fully associative cache tag and data must be computed together, data and tag must be separate
|
||||
int tag_num_c_subarray;
|
||||
int data_num_r_subarray;
|
||||
int data_num_c_subarray;
|
||||
int num_mats_h_dir;
|
||||
int num_mats_v_dir;
|
||||
uint32_t ram_cell_tech_type;
|
||||
double dram_refresh_period;
|
||||
|
||||
DynamicParameter();
|
||||
DynamicParameter(
|
||||
bool is_tag_,
|
||||
int pure_ram_,
|
||||
int pure_cam_,
|
||||
double Nspd_,
|
||||
unsigned int Ndwl_,
|
||||
unsigned int Ndbl_,
|
||||
unsigned int Ndcm_,
|
||||
unsigned int Ndsam_lev_1_,
|
||||
unsigned int Ndsam_lev_2_,
|
||||
Wire_type wt, // merged from cacti-7 to cacti3d
|
||||
bool is_main_mem_);
|
||||
|
||||
int use_inp_params;
|
||||
unsigned int num_rw_ports;
|
||||
unsigned int num_rd_ports;
|
||||
unsigned int num_wr_ports;
|
||||
unsigned int num_se_rd_ports; // number of single ended read ports
|
||||
unsigned int num_search_ports;
|
||||
unsigned int out_w;// == nr_bits_out
|
||||
bool is_main_mem;
|
||||
Area cell, cam_cell;//cell is the sram_cell in both nomal cache/ram and FA.
|
||||
bool is_valid;
|
||||
private:
|
||||
void ECC_adjustment();
|
||||
void init_CAM();
|
||||
void init_FA();
|
||||
bool calc_subarr_rc(unsigned int cap); //to calculate and check subarray rows and columns
|
||||
};
|
||||
|
||||
|
||||
|
||||
extern InputParameter * g_ip;
|
||||
extern TechnologyParameter g_tp;
|
||||
|
||||
#endif
|
||||
|
129
T1/TP/TP1/cacti_7/powergating.cc
Normal file
129
T1/TP/TP1/cacti_7/powergating.cc
Normal file
|
@ -0,0 +1,129 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include "area.h"
|
||||
#include "powergating.h"
|
||||
#include "parameter.h"
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
//TODO: although DTSN is used,since for memory array, the number of sleep txs
|
||||
//is related to the number of rows and cols. so All calculations are still base on
|
||||
//single sleep tx cases
|
||||
|
||||
Sleep_tx::Sleep_tx(
|
||||
double _perf_with_sleep_tx,
|
||||
double _active_Isat,//of circuit block, not sleep tx
|
||||
bool _is_footer,
|
||||
double _c_circuit_wakeup,
|
||||
double _V_delta,
|
||||
int _num_sleep_tx,
|
||||
// double _vt_circuit,
|
||||
// double _vt_sleep_tx,
|
||||
// double _mobility,//of sleep tx
|
||||
// double _c_ox,//of sleep tx
|
||||
const Area & cell_)
|
||||
:perf_with_sleep_tx(_perf_with_sleep_tx),
|
||||
active_Isat(_active_Isat),
|
||||
is_footer(_is_footer),
|
||||
c_circuit_wakeup(_c_circuit_wakeup),
|
||||
V_delta(_V_delta),
|
||||
num_sleep_tx(_num_sleep_tx),
|
||||
// vt_circuit(_vt_circuit),
|
||||
// vt_sleep_tx(_vt_sleep_tx),
|
||||
// mobility(_mobility),
|
||||
// c_ox(_c_ox)
|
||||
cell(cell_),
|
||||
is_sleep_tx(true)
|
||||
{
|
||||
|
||||
//a single sleep tx in a network
|
||||
double raw_area, raw_width, raw_hight;
|
||||
double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true);
|
||||
vdd = g_tp.peri_global.Vdd;
|
||||
vt_circuit = g_tp.peri_global.Vth;
|
||||
vt_sleep_tx = g_tp.sleep_tx.Vth;
|
||||
mobility = g_tp.sleep_tx.Mobility_n;
|
||||
c_ox = g_tp.sleep_tx.C_ox;
|
||||
|
||||
width = active_Isat/(perf_with_sleep_tx*mobility*c_ox*(vdd-vt_circuit)*(vdd-vt_sleep_tx))*g_ip->F_sz_um;//W/L uses physical numbers
|
||||
width /= num_sleep_tx;
|
||||
|
||||
raw_area = compute_gate_area(INV, 1, width, p_to_n_sz_ratio*width, cell.w*2)/2; //Only single device, assuming device is laide on the side
|
||||
raw_width = cell.w;
|
||||
raw_hight = raw_area/cell.w;
|
||||
area.set_h(raw_hight);
|
||||
area.set_w(raw_width);
|
||||
|
||||
compute_penalty();
|
||||
|
||||
}
|
||||
|
||||
double Sleep_tx::compute_penalty()
|
||||
{
|
||||
//V_delta = VDD - VCCmin nothing to do with threshold of sleep tx. Although it might be OK to use sleep tx to control the V_delta
|
||||
// double c_load;
|
||||
double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(false, false, true);
|
||||
|
||||
if (is_footer)
|
||||
{
|
||||
c_intrinsic_sleep = drain_C_(width, NCH, 1, 1, area.h, false, false, false,is_sleep_tx);
|
||||
// V_delta = _V_delta;
|
||||
wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep)*V_delta/(simplified_nmos_Isat(width, false, false, false,is_sleep_tx)/Ilinear_to_Isat_ratio);
|
||||
wakeup_power.readOp.dynamic = (c_circuit_wakeup + c_intrinsic_sleep)*g_tp.sram_cell.Vdd*V_delta;
|
||||
//no 0.5 because the half of the energy spend in entering sleep and half of the energy will be spent in waking up. And they are pairs
|
||||
}
|
||||
else
|
||||
{
|
||||
c_intrinsic_sleep = drain_C_(width*p_to_n_sz_ratio, PCH, 1, 1, area.h, false, false, false,is_sleep_tx);
|
||||
// V_delta = _V_delta;
|
||||
wakeup_delay = (c_circuit_wakeup + c_intrinsic_sleep)*V_delta/(simplified_pmos_Isat(width, false, false, false,is_sleep_tx)/Ilinear_to_Isat_ratio);
|
||||
wakeup_power.readOp.dynamic = (c_circuit_wakeup + c_intrinsic_sleep)*g_tp.sram_cell.Vdd*V_delta;
|
||||
}
|
||||
|
||||
return wakeup_delay;
|
||||
|
||||
/*
|
||||
The number of cycles in the wake-up latency set the constraint on the
|
||||
minimum number of idle clock cycles needed before a processor
|
||||
can enter in the corresponding sleep mode without any wakeup
|
||||
overhead.
|
||||
|
||||
If the circuit is half way to sleep then waken up, it is still OK
|
||||
just the wakeup latency will be shorter than the wakeup time from full asleep.
|
||||
So, the sleep time and energy does not matter
|
||||
*/
|
||||
|
||||
}
|
||||
|
86
T1/TP/TP1/cacti_7/powergating.h
Normal file
86
T1/TP/TP1/cacti_7/powergating.h
Normal file
|
@ -0,0 +1,86 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef POWERGATING_H_
|
||||
#define POWERGATING_H_
|
||||
|
||||
#include "component.h"
|
||||
|
||||
class Sleep_tx : public Component
|
||||
{
|
||||
public:
|
||||
Sleep_tx(
|
||||
double _perf_with_sleep_tx,
|
||||
double _active_Isat,//of circuit block, not sleep tx
|
||||
bool _is_footer,
|
||||
double _c_circuit_wakeup,
|
||||
double _V_delta,
|
||||
int _num_sleep_tx,
|
||||
// double _vt_circuit,
|
||||
// double _vt_sleep_tx,
|
||||
// double _mobility,//of sleep tx
|
||||
// double _c_ox,//of sleep tx
|
||||
const Area & cell_);
|
||||
|
||||
double perf_with_sleep_tx;
|
||||
double active_Isat;
|
||||
bool is_footer;
|
||||
|
||||
double vt_circuit;
|
||||
double vt_sleep_tx;
|
||||
double vdd;// of circuit block not sleep tx
|
||||
double mobility;//of sleep tx
|
||||
double c_ox;
|
||||
double width;
|
||||
double c_circuit_wakeup;
|
||||
double c_intrinsic_sleep;
|
||||
double delay, wakeup_delay;
|
||||
powerDef power, wakeup_power;
|
||||
// double c_circuit_sleep;
|
||||
// double sleep_delay;
|
||||
// powerDef sleep_power;
|
||||
double V_delta;
|
||||
|
||||
int num_sleep_tx;
|
||||
|
||||
const Area & cell;
|
||||
bool is_sleep_tx;
|
||||
|
||||
|
||||
|
||||
// void compute_area();
|
||||
double compute_penalty(); // return outrisetime
|
||||
|
||||
void leakage_feedback(double temperature){};
|
||||
~Sleep_tx(){};
|
||||
};
|
||||
|
||||
#endif /* POWERGATING_H_ */
|
45
T1/TP/TP1/cacti_7/regression.test
Executable file
45
T1/TP/TP1/cacti_7/regression.test
Executable file
|
@ -0,0 +1,45 @@
|
|||
cache 4 types
|
||||
./cacti -infile test_configs/cache1.cfg #L1 2-way 32K
|
||||
./cacti -infile test_configs/cache2.cfg #L2 4-way 256K
|
||||
./cacti -infile test_configs/cache3.cfg #L3 8-way 16M
|
||||
./cacti -infile test_configs/cache4.cfg #L1 full-asso 4K with single search port
|
||||
RAM 4 types
|
||||
./cacti -infile test_configs/ram1.cfg # 16M
|
||||
./cacti -infile test_configs/ram2.cfg # itrs-hp itrs-lstp
|
||||
./cacti -infile test_configs/ram3.cfg # two banks no-ecc 128M
|
||||
./cacti -infile test_configs/ram4.cfg # 32K 2-way
|
||||
CAM 4 types
|
||||
./cacti -infile test_configs/cam1.cfg # same as ram1 but ram->cam and full-asso
|
||||
./cacti -infile test_configs/cam2.cfg # same as cam1 with line size = 128
|
||||
./cacti -infile test_configs/cam3.cfg # cam1 for 40nm technology
|
||||
./cacti -infile test_configs/cam4.cfg # ca1 with exclusive read and write port
|
||||
NUCA 4 types
|
||||
./cacti -infile test_configs/nuca1.cfg #
|
||||
./cacti -infile test_configs/nuca2.cfg
|
||||
./cacti -infile test_configs/nuca3.cfg
|
||||
./cacti -infile test_configs/nuca3.cfg
|
||||
eDRAM 4 types
|
||||
./cacti -infile test_configs/edram1.cfg #
|
||||
./cacti -infile test_configs/edram2.cfg
|
||||
./cacti -infile test_configs/edram3.cfg
|
||||
./cacti -infile test_configs/edram4.cfg
|
||||
DRAM 4 types
|
||||
./cacti -infile test_configs/dram1.cfg #
|
||||
./cacti -infile test_configs/dram2.cfg
|
||||
./cacti -infile test_configs/dram3.cfg
|
||||
./cacti -infile test_configs/dram4.cfg
|
||||
IO 4 different parameters
|
||||
./cacti -infile test_configs/io1.cfg #
|
||||
./cacti -infile test_configs/io2.cfg
|
||||
./cacti -infile test_configs/io3.cfg
|
||||
./cacti -infile test_configs/io4.cfg
|
||||
Power gating 4 types
|
||||
./cacti -infile test_configs/power_gate1.cfg
|
||||
./cacti -infile test_configs/power_gate2.cfg
|
||||
./cacti -infile test_configs/power_gate3.cfg
|
||||
./cacti -infile test_configs/power_gate4.cfg
|
||||
3D 4 types
|
||||
./cacti -infile test_configs/3D1.cfg
|
||||
./cacti -infile test_configs/3D2.cfg
|
||||
./cacti -infile test_configs/3D3.cfg
|
||||
./cacti -infile test_configs/3D4.cfg
|
311
T1/TP/TP1/cacti_7/router.cc
Normal file
311
T1/TP/TP1/cacti_7/router.cc
Normal file
|
@ -0,0 +1,311 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
#include "router.h"
|
||||
|
||||
Router::Router(
|
||||
double flit_size_,
|
||||
double vc_buf, /* vc size = vc_buffer_size * flit_size */
|
||||
double vc_c,
|
||||
/*TechnologyParameter::*/DeviceType *dt,
|
||||
double I_,
|
||||
double O_,
|
||||
double M_
|
||||
):flit_size(flit_size_),
|
||||
deviceType(dt),
|
||||
I(I_),
|
||||
O(O_),
|
||||
M(M_)
|
||||
{
|
||||
vc_buffer_size = vc_buf;
|
||||
vc_count = vc_c;
|
||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
|
||||
double technology = g_ip->F_sz_um;
|
||||
|
||||
Vdd = dt->Vdd;
|
||||
|
||||
/*Crossbar parameters. Transmisson gate is employed for connector*/
|
||||
NTtr = 10*technology*1e-6/2; /*Transmission gate's nmos tr. length*/
|
||||
PTtr = 20*technology*1e-6/2; /* pmos tr. length*/
|
||||
wt = 15*technology*1e-6/2; /*track width*/
|
||||
ht = 15*technology*1e-6/2; /*track height*/
|
||||
// I = 5; /*Number of crossbar input ports*/
|
||||
// O = 5; /*Number of crossbar output ports*/
|
||||
NTi = 12.5*technology*1e-6/2;
|
||||
PTi = 25*technology*1e-6/2;
|
||||
|
||||
NTid = 60*technology*1e-6/2; //m
|
||||
PTid = 120*technology*1e-6/2; // m
|
||||
NTod = 60*technology*1e-6/2; // m
|
||||
PTod = 120*technology*1e-6/2; // m
|
||||
|
||||
calc_router_parameters();
|
||||
}
|
||||
|
||||
Router::~Router(){}
|
||||
|
||||
|
||||
double //wire cap with triple spacing
|
||||
Router::Cw3(double length) {
|
||||
Wire wc(g_ip->wt, length, 1, 3, 3);
|
||||
return (wc.wire_cap(length));
|
||||
}
|
||||
|
||||
/*Function to calculate the gate capacitance*/
|
||||
double
|
||||
Router::gate_cap(double w) {
|
||||
return (double) gate_C (w*1e6 /*u*/, 0);
|
||||
}
|
||||
|
||||
/*Function to calculate the diffusion capacitance*/
|
||||
double
|
||||
Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/,
|
||||
double s /*number of stacking transistors*/) {
|
||||
return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def);
|
||||
}
|
||||
|
||||
|
||||
/*crossbar related functions */
|
||||
|
||||
// Model for simple transmission gate
|
||||
double
|
||||
Router::transmission_buf_inpcap() {
|
||||
return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
|
||||
}
|
||||
|
||||
double
|
||||
Router::transmission_buf_outcap() {
|
||||
return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
|
||||
}
|
||||
|
||||
double
|
||||
Router::transmission_buf_ctrcap() {
|
||||
return gate_cap(NTtr)+gate_cap(PTtr);
|
||||
}
|
||||
|
||||
double
|
||||
Router::crossbar_inpline() {
|
||||
return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) +
|
||||
gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1));
|
||||
}
|
||||
|
||||
double
|
||||
Router::crossbar_outline() {
|
||||
return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) +
|
||||
gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1));
|
||||
}
|
||||
|
||||
double
|
||||
Router::crossbar_ctrline() {
|
||||
return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() +
|
||||
diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) +
|
||||
gate_cap(NTi) + gate_cap(PTi));
|
||||
}
|
||||
|
||||
double
|
||||
Router::tr_crossbar_power() {
|
||||
return (crossbar_inpline()*Vdd*Vdd*flit_size/2 +
|
||||
crossbar_outline()*Vdd*Vdd*flit_size/2)*2;
|
||||
}
|
||||
|
||||
void Router::buffer_stats()
|
||||
{
|
||||
DynamicParameter dyn_p;
|
||||
dyn_p.is_tag = false;
|
||||
dyn_p.pure_cam = false;
|
||||
dyn_p.fully_assoc = false;
|
||||
dyn_p.pure_ram = true;
|
||||
dyn_p.is_dram = false;
|
||||
dyn_p.is_main_mem = false;
|
||||
dyn_p.num_subarrays = 1;
|
||||
dyn_p.num_mats = 1;
|
||||
dyn_p.Ndbl = 1;
|
||||
dyn_p.Ndwl = 1;
|
||||
dyn_p.Nspd = 1;
|
||||
dyn_p.deg_bl_muxing = 1;
|
||||
dyn_p.deg_senseamp_muxing_non_associativity = 1;
|
||||
dyn_p.Ndsam_lev_1 = 1;
|
||||
dyn_p.Ndsam_lev_2 = 1;
|
||||
dyn_p.Ndcm = 1;
|
||||
dyn_p.number_addr_bits_mat = 8;
|
||||
dyn_p.number_way_select_signals_mat = 1;
|
||||
dyn_p.number_subbanks_decode = 0;
|
||||
dyn_p.num_act_mats_hor_dir = 1;
|
||||
dyn_p.V_b_sense = Vdd; // FIXME check power calc.
|
||||
dyn_p.ram_cell_tech_type = 0;
|
||||
dyn_p.num_r_subarray = (int) vc_buffer_size;
|
||||
dyn_p.num_c_subarray = (int) flit_size * (int) vc_count;
|
||||
dyn_p.num_mats_h_dir = 1;
|
||||
dyn_p.num_mats_v_dir = 1;
|
||||
dyn_p.num_do_b_subbank = (int)flit_size;
|
||||
dyn_p.num_di_b_subbank = (int)flit_size;
|
||||
dyn_p.num_do_b_mat = (int) flit_size;
|
||||
dyn_p.num_di_b_mat = (int) flit_size;
|
||||
dyn_p.num_do_b_mat = (int) flit_size;
|
||||
dyn_p.num_di_b_mat = (int) flit_size;
|
||||
dyn_p.num_do_b_bank_per_port = (int) flit_size;
|
||||
dyn_p.num_di_b_bank_per_port = (int) flit_size;
|
||||
dyn_p.out_w = (int) flit_size;
|
||||
|
||||
dyn_p.use_inp_params = 1;
|
||||
dyn_p.num_wr_ports = (unsigned int) vc_count;
|
||||
dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book
|
||||
dyn_p.num_rw_ports = 0;
|
||||
dyn_p.num_se_rd_ports =0;
|
||||
dyn_p.num_search_ports =0;
|
||||
|
||||
|
||||
|
||||
dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports +
|
||||
dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports);
|
||||
dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 +
|
||||
(dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) +
|
||||
dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports;
|
||||
|
||||
Mat buff(dyn_p);
|
||||
buff.compute_delays(0);
|
||||
buff.compute_power_energy();
|
||||
buffer.power.readOp = buff.power.readOp;
|
||||
buffer.power.writeOp = buffer.power.readOp; //FIXME
|
||||
buffer.area = buff.area;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
Router::cb_stats ()
|
||||
{
|
||||
if (1) {
|
||||
Crossbar c_b(I, O, flit_size);
|
||||
c_b.compute_power();
|
||||
crossbar.delay = c_b.delay;
|
||||
crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic;
|
||||
crossbar.power.readOp.leakage = c_b.power.readOp.leakage;
|
||||
crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
|
||||
crossbar.area = c_b.area;
|
||||
// c_b.print_crossbar();
|
||||
}
|
||||
else {
|
||||
crossbar.power.readOp.dynamic = tr_crossbar_power();
|
||||
crossbar.power.readOp.leakage = flit_size * I * O *
|
||||
cmos_Isub_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
|
||||
crossbar.power.readOp.gate_leakage = flit_size * I * O *
|
||||
cmos_Ig_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Router::get_router_power()
|
||||
{
|
||||
/* calculate buffer stats */
|
||||
buffer_stats();
|
||||
|
||||
/* calculate cross-bar stats */
|
||||
cb_stats();
|
||||
|
||||
/* calculate arbiter stats */
|
||||
Arbiter vcarb(vc_count, flit_size, buffer.area.w);
|
||||
Arbiter cbarb(I, flit_size, crossbar.area.w);
|
||||
vcarb.compute_power();
|
||||
cbarb.compute_power();
|
||||
arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I +
|
||||
cbarb.power.readOp.dynamic * O;
|
||||
arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I +
|
||||
cbarb.power.readOp.leakage * O;
|
||||
arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I +
|
||||
cbarb.power.readOp.gate_leakage * O;
|
||||
|
||||
// arb_stats();
|
||||
power.readOp.dynamic = ((buffer.power.readOp.dynamic+buffer.power.writeOp.dynamic) +
|
||||
crossbar.power.readOp.dynamic +
|
||||
arbiter.power.readOp.dynamic)*MIN(I, O)*M;
|
||||
double pppm_t[4] = {1,I,I,1};
|
||||
power = power + (buffer.power*pppm_t + crossbar.power + arbiter.power)*pppm_lkg;
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
Router::get_router_delay ()
|
||||
{
|
||||
FREQUENCY=5; // move this to config file --TODO
|
||||
cycle_time = (1/(double)FREQUENCY)*1e3; //ps
|
||||
delay = 4;
|
||||
max_cyc = 17 * g_tp.FO4; //s
|
||||
max_cyc *= 1e12; //ps
|
||||
if (cycle_time < max_cyc) {
|
||||
FREQUENCY = (1/max_cyc)*1e3; //GHz
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Router::get_router_area()
|
||||
{
|
||||
area.h = I*buffer.area.h;
|
||||
area.w = buffer.area.w+crossbar.area.w;
|
||||
}
|
||||
|
||||
void
|
||||
Router::calc_router_parameters()
|
||||
{
|
||||
/* calculate router frequency and pipeline cycles */
|
||||
get_router_delay();
|
||||
|
||||
/* router power stats */
|
||||
get_router_power();
|
||||
|
||||
/* area stats */
|
||||
get_router_area();
|
||||
}
|
||||
|
||||
void
|
||||
Router::print_router()
|
||||
{
|
||||
cout << "\n\nRouter stats:\n";
|
||||
cout << "\tRouter Area - "<< area.get_area()*1e-6<<"(mm^2)\n";
|
||||
cout << "\tMaximum possible network frequency - " << (1/max_cyc)*1e3 << "GHz\n";
|
||||
cout << "\tNetwork frequency - " << FREQUENCY <<" GHz\n";
|
||||
cout << "\tNo. of Virtual channels - " << vc_count << "\n";
|
||||
cout << "\tNo. of pipeline stages - " << delay << endl;
|
||||
cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
|
||||
cout << "\tNo. of buffer entries per virtual channel - "<< vc_buffer_size << "\n";
|
||||
cout << "\tSimple buffer Area - "<< buffer.area.get_area()*1e-6<<"(mm^2)\n";
|
||||
cout << "\tSimple buffer access (Read) - " << buffer.power.readOp.dynamic * 1e9 <<" (nJ)\n";
|
||||
cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 <<" (mW)\n";
|
||||
cout << "\tCrossbar Area - "<< crossbar.area.get_area()*1e-6<<"(mm^2)\n";
|
||||
cout << "\tCross bar access energy - " << crossbar.power.readOp.dynamic * 1e9<<" (nJ)\n";
|
||||
cout << "\tCross bar leakage power - " << crossbar.power.readOp.leakage * 1e3<<" (mW)\n";
|
||||
cout << "\tArbiter access energy (VC arb + Crossbar arb) - "<<arbiter.power.readOp.dynamic * 1e9 <<" (nJ)\n";
|
||||
cout << "\tArbiter leakage (VC arb + Crossbar arb) - "<<arbiter.power.readOp.leakage * 1e3 <<" (mW)\n";
|
||||
|
||||
}
|
||||
|
115
T1/TP/TP1/cacti_7/router.h
Normal file
115
T1/TP/TP1/cacti_7/router.h
Normal file
|
@ -0,0 +1,115 @@
|
|||
/*****************************************************************************
|
||||
* CACTI 7.0
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2015 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
|
||||
#ifndef __ROUTER_H__
|
||||
#define __ROUTER_H__
|
||||
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include "basic_circuit.h"
|
||||
#include "cacti_interface.h"
|
||||
#include "component.h"
|
||||
#include "mat.h"
|
||||
#include "parameter.h"
|
||||
#include "wire.h"
|
||||
#include "crossbar.h"
|
||||
#include "arbiter.h"
|
||||
|
||||
|
||||
|
||||
class Router : public Component
|
||||
{
|
||||
public:
|
||||
Router(
|
||||
double flit_size_,
|
||||
double vc_buf, /* vc size = vc_buffer_size * flit_size */
|
||||
double vc_count,
|
||||
/*TechnologyParameter::*/DeviceType *dt = &(g_tp.peri_global),
|
||||
double I_ = 5,
|
||||
double O_ = 5,
|
||||
double M_ = 0.6);
|
||||
~Router();
|
||||
|
||||
|
||||
void print_router();
|
||||
|
||||
Component arbiter, crossbar, buffer;
|
||||
|
||||
double cycle_time, max_cyc;
|
||||
double flit_size;
|
||||
double vc_count;
|
||||
double vc_buffer_size; /* vc size = vc_buffer_size * flit_size */
|
||||
|
||||
private:
|
||||
/*TechnologyParameter::*/DeviceType *deviceType;
|
||||
double FREQUENCY; // move this to config file --TODO
|
||||
double Cw3(double len);
|
||||
double gate_cap(double w);
|
||||
double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, double stack);
|
||||
enum Wire_type wtype;
|
||||
enum Wire_placement wire_placement;
|
||||
//corssbar
|
||||
double NTtr, PTtr, wt, ht, I, O, NTi, PTi, NTid, PTid, NTod, PTod, TriS1, TriS2;
|
||||
double M; //network load
|
||||
double transmission_buf_inpcap();
|
||||
double transmission_buf_outcap();
|
||||
double transmission_buf_ctrcap();
|
||||
double crossbar_inpline();
|
||||
double crossbar_outline();
|
||||
double crossbar_ctrline();
|
||||
double tr_crossbar_power();
|
||||
void cb_stats ();
|
||||
double arb_power();
|
||||
void arb_stats ();
|
||||
double buffer_params();
|
||||
void buffer_stats();
|
||||
|
||||
|
||||
//arbiter
|
||||
|
||||
//buffer
|
||||
|
||||
//router params
|
||||
double Vdd;
|
||||
|
||||
void calc_router_parameters();
|
||||
void get_router_area();
|
||||
void get_router_power();
|
||||
void get_router_delay();
|
||||
|
||||
double min_w_pmos;
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
259
T1/TP/TP1/cacti_7/sample_config_files/ddr3_cache.cfg
Normal file
259
T1/TP/TP1/cacti_7/sample_config_files/ddr3_cache.cfg
Normal file
|
@ -0,0 +1,259 @@
|
|||
# Cache size
|
||||
//-size (bytes) 2048
|
||||
//-size (bytes) 4096
|
||||
//-size (bytes) 32768
|
||||
//-size (bytes) 131072
|
||||
//-size (bytes) 262144
|
||||
//-size (bytes) 1048576
|
||||
//-size (bytes) 2097152
|
||||
//-size (bytes) 4194304
|
||||
-size (bytes) 8388608
|
||||
//-size (bytes) 16777216
|
||||
//-size (bytes) 33554432
|
||||
//-size (bytes) 134217728
|
||||
//-size (bytes) 67108864
|
||||
//-size (bytes) 1073741824
|
||||
|
||||
# power gating
|
||||
-Array Power Gating - "false"
|
||||
-WL Power Gating - "false"
|
||||
-CL Power Gating - "false"
|
||||
-Bitline floating - "false"
|
||||
-Interconnect Power Gating - "false"
|
||||
-Power Gating Performance Loss 0.01
|
||||
|
||||
# Line size
|
||||
//-block size (bytes) 8
|
||||
-block size (bytes) 64
|
||||
|
||||
# To model Fully Associative cache, set associativity to zero
|
||||
//-associativity 0
|
||||
//-associativity 2
|
||||
//-associativity 4
|
||||
//-associativity 8
|
||||
-associativity 8
|
||||
|
||||
-read-write port 1
|
||||
-exclusive read port 0
|
||||
-exclusive write port 0
|
||||
-single ended read ports 0
|
||||
|
||||
# Multiple banks connected using a bus
|
||||
-UCA bank count 1
|
||||
-technology (u) 0.022
|
||||
//-technology (u) 0.040
|
||||
//-technology (u) 0.032
|
||||
//-technology (u) 0.090
|
||||
|
||||
# following three parameters are meaningful only for main memories
|
||||
|
||||
-page size (bits) 8192
|
||||
-burst length 8
|
||||
-internal prefetch width 8
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Data array cell type - "itrs-hp"
|
||||
//-Data array cell type - "itrs-lstp"
|
||||
//-Data array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Data array peripheral type - "itrs-hp"
|
||||
//-Data array peripheral type - "itrs-lstp"
|
||||
//-Data array peripheral type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Tag array cell type - "itrs-hp"
|
||||
//-Tag array cell type - "itrs-lstp"
|
||||
//-Tag array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Tag array peripheral type - "itrs-hp"
|
||||
//-Tag array peripheral type - "itrs-lstp"
|
||||
//-Tag array peripheral type - "itrs-lop
|
||||
|
||||
# Bus width include data bits and address bits required by the decoder
|
||||
//-output/input bus width 16
|
||||
-output/input bus width 512
|
||||
|
||||
// 300-400 in steps of 10
|
||||
-operating temperature (K) 360
|
||||
|
||||
# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
|
||||
# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
|
||||
-cache type "cache"
|
||||
//-cache type "ram"
|
||||
//-cache type "main memory"
|
||||
|
||||
# to model special structure like branch target buffers, directory, etc.
|
||||
# change the tag size parameter
|
||||
# if you want cacti to calculate the tagbits, set the tag size to "default"
|
||||
-tag size (b) "default"
|
||||
//-tag size (b) 22
|
||||
|
||||
# fast - data and tag access happen in parallel
|
||||
# sequential - data array is accessed after accessing the tag array
|
||||
# normal - data array lookup and tag access happen in parallel
|
||||
# final data block is broadcasted in data array h-tree
|
||||
# after getting the signal from the tag array
|
||||
//-access mode (normal, sequential, fast) - "fast"
|
||||
-access mode (normal, sequential, fast) - "normal"
|
||||
//-access mode (normal, sequential, fast) - "sequential"
|
||||
|
||||
|
||||
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
|
||||
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0
|
||||
|
||||
# Percentage deviation from the minimum value
|
||||
# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
|
||||
# that compromises at most 10% delay.
|
||||
# NOTE: Try reasonable values for % deviation. Inconsistent deviation
|
||||
# percentage values will not produce any valid organizations. For example,
|
||||
# 0:0:100:100:100 will try to identify an organization that has both
|
||||
# least delay and dynamic power. Since such an organization is not possible, CACTI will
|
||||
# throw an error. Refer CACTI-6 Technical report for more details
|
||||
-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000
|
||||
|
||||
# Objective for NUCA
|
||||
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
|
||||
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
|
||||
|
||||
# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
|
||||
# energy-delay or energy-delay sq. product
|
||||
# Note: Optimize tag will disable weight or deviate values mentioned above
|
||||
# Set it to NONE to let weight and deviate values determine the
|
||||
# appropriate cache configuration
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
|
||||
-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
|
||||
|
||||
-Cache model (NUCA, UCA) - "UCA"
|
||||
//-Cache model (NUCA, UCA) - "NUCA"
|
||||
|
||||
# In order for CACTI to find the optimal NUCA bank value the following
|
||||
# variable should be assigned 0.
|
||||
-NUCA bank count 0
|
||||
|
||||
# NOTE: for nuca network frequency is set to a default value of
|
||||
# 5GHz in time.c. CACTI automatically
|
||||
# calculates the maximum possible frequency and downgrades this value if necessary
|
||||
|
||||
# By default CACTI considers both full-swing and low-swing
|
||||
# wires to find an optimal configuration. However, it is possible to
|
||||
# restrict the search space by changing the signaling from "default" to
|
||||
# "fullswing" or "lowswing" type.
|
||||
-Wire signaling (fullswing, lowswing, default) - "Global_30"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "default"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "lowswing"
|
||||
|
||||
//-Wire inside mat - "global"
|
||||
-Wire inside mat - "semi-global"
|
||||
//-Wire outside mat - "global"
|
||||
-Wire outside mat - "semi-global"
|
||||
|
||||
-Interconnect projection - "conservative"
|
||||
//-Interconnect projection - "aggressive"
|
||||
|
||||
# Contention in network (which is a function of core count and cache level) is one of
|
||||
# the critical factor used for deciding the optimal bank count value
|
||||
# core count can be 4, 8, or 16
|
||||
//-Core count 4
|
||||
-Core count 8
|
||||
//-Core count 16
|
||||
-Cache level (L2/L3) - "L3"
|
||||
|
||||
-Add ECC - "true"
|
||||
|
||||
//-Print level (DETAILED, CONCISE) - "CONCISE"
|
||||
-Print level (DETAILED, CONCISE) - "DETAILED"
|
||||
|
||||
# for debugging
|
||||
//-Print input parameters - "true"
|
||||
-Print input parameters - "false"
|
||||
# force CACTI to model the cache with the
|
||||
# following Ndbl, Ndwl, Nspd, Ndsam,
|
||||
# and Ndcm values
|
||||
//-Force cache config - "true"
|
||||
-Force cache config - "false"
|
||||
-Ndwl 1
|
||||
-Ndbl 1
|
||||
-Nspd 0
|
||||
-Ndcm 1
|
||||
-Ndsam1 0
|
||||
-Ndsam2 0
|
||||
|
||||
|
||||
|
||||
#### Default CONFIGURATION values for baseline external IO parameters to DRAM.
|
||||
|
||||
# Memory Type (D=DDR3, L=LPDDR2, W=WideIO, S=Low-swing differential)
|
||||
|
||||
-dram_type "D"
|
||||
//-dram_type "L"
|
||||
//-dram_type "W"
|
||||
//-dram_type "S"
|
||||
|
||||
# Memory State (R=Read, W=Write, I=Idle or S=Sleep)
|
||||
|
||||
//-iostate "R"
|
||||
-iostate "W"
|
||||
//-iostate "I"
|
||||
//-iostate "S"
|
||||
|
||||
# Is ECC Enabled (Y=Yes, N=No)
|
||||
|
||||
-dram_ecc "Y"
|
||||
|
||||
#Address bus timing
|
||||
|
||||
//-addr_timing 0.5 //DDR, for LPDDR2 and LPDDR3
|
||||
-addr_timing 1.0 //SDR for DDR3, Wide-IO
|
||||
//-addr_timing 2.0 //2T timing
|
||||
//addr_timing 3.0 // 3T timing
|
||||
|
||||
# Bandwidth (Gbytes per second, this is the effective bandwidth)
|
||||
|
||||
-bus_bw 12.8 GBps //Valid range 0 to 2*bus_freq*num_dq
|
||||
|
||||
# Memory Density (Gbit per memory/DRAM die)
|
||||
|
||||
-mem_density 4 Gb //Valid values 2^n Gb
|
||||
|
||||
# IO frequency (MHz) (frequency of the external memory interface).
|
||||
|
||||
-bus_freq 800 MHz //Valid range 0 to 1.5 GHz for DDR3, 0 to 1.2 GHz for LPDDR3, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential
|
||||
|
||||
# Duty Cycle (fraction of time in the Memory State defined above)
|
||||
|
||||
-duty_cycle 1.0 //Valid range 0 to 1.0
|
||||
|
||||
# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||||
|
||||
# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_ca 0.5 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||||
|
||||
# Number of DQ pins
|
||||
|
||||
-num_dq 72 //Include ECC pins as well (if present). If ECC pins are included, the bus bandwidth is 2*(num_dq-#of ECC pins)*bus_freq. Valid range 0 to 72.
|
||||
|
||||
# Number of DQS pins
|
||||
|
||||
-num_dqs 18 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins.
|
||||
|
||||
# Number of CA pins
|
||||
|
||||
-num_ca 25 //Valid range 0 to 35 pins.
|
||||
|
||||
# Number of CLK pins
|
||||
|
||||
-num_clk 2 //2 x differential pair. Valid values: 0/2/4.
|
||||
|
||||
# Number of Physical Ranks
|
||||
|
||||
-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per DIMM or buffer chip
|
||||
|
||||
# Width of the Memory Data Bus
|
||||
|
||||
-mem_data_width 8 //x4 or x8 or x16 or x32 or x128 memories
|
259
T1/TP/TP1/cacti_7/sample_config_files/diff_ddr3_cache.cfg
Normal file
259
T1/TP/TP1/cacti_7/sample_config_files/diff_ddr3_cache.cfg
Normal file
|
@ -0,0 +1,259 @@
|
|||
# Cache size
|
||||
//-size (bytes) 2048
|
||||
//-size (bytes) 4096
|
||||
//-size (bytes) 32768
|
||||
//-size (bytes) 131072
|
||||
//-size (bytes) 262144
|
||||
//-size (bytes) 1048576
|
||||
//-size (bytes) 2097152
|
||||
//-size (bytes) 4194304
|
||||
-size (bytes) 8388608
|
||||
//-size (bytes) 16777216
|
||||
//-size (bytes) 33554432
|
||||
//-size (bytes) 134217728
|
||||
//-size (bytes) 67108864
|
||||
//-size (bytes) 1073741824
|
||||
|
||||
# power gating
|
||||
-Array Power Gating - "false"
|
||||
-WL Power Gating - "false"
|
||||
-CL Power Gating - "false"
|
||||
-Bitline floating - "false"
|
||||
-Interconnect Power Gating - "false"
|
||||
-Power Gating Performance Loss 0.01
|
||||
|
||||
# Line size
|
||||
//-block size (bytes) 8
|
||||
-block size (bytes) 64
|
||||
|
||||
# To model Fully Associative cache, set associativity to zero
|
||||
//-associativity 0
|
||||
//-associativity 2
|
||||
//-associativity 4
|
||||
//-associativity 8
|
||||
-associativity 8
|
||||
|
||||
-read-write port 1
|
||||
-exclusive read port 0
|
||||
-exclusive write port 0
|
||||
-single ended read ports 0
|
||||
|
||||
# Multiple banks connected using a bus
|
||||
-UCA bank count 1
|
||||
-technology (u) 0.022
|
||||
//-technology (u) 0.040
|
||||
//-technology (u) 0.032
|
||||
//-technology (u) 0.090
|
||||
|
||||
# following three parameters are meaningful only for main memories
|
||||
|
||||
-page size (bits) 8192
|
||||
-burst length 8
|
||||
-internal prefetch width 8
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Data array cell type - "itrs-hp"
|
||||
//-Data array cell type - "itrs-lstp"
|
||||
//-Data array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Data array peripheral type - "itrs-hp"
|
||||
//-Data array peripheral type - "itrs-lstp"
|
||||
//-Data array peripheral type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Tag array cell type - "itrs-hp"
|
||||
//-Tag array cell type - "itrs-lstp"
|
||||
//-Tag array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Tag array peripheral type - "itrs-hp"
|
||||
//-Tag array peripheral type - "itrs-lstp"
|
||||
//-Tag array peripheral type - "itrs-lop
|
||||
|
||||
# Bus width include data bits and address bits required by the decoder
|
||||
//-output/input bus width 16
|
||||
-output/input bus width 512
|
||||
|
||||
// 300-400 in steps of 10
|
||||
-operating temperature (K) 360
|
||||
|
||||
# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
|
||||
# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
|
||||
-cache type "cache"
|
||||
//-cache type "ram"
|
||||
//-cache type "main memory"
|
||||
|
||||
# to model special structure like branch target buffers, directory, etc.
|
||||
# change the tag size parameter
|
||||
# if you want cacti to calculate the tagbits, set the tag size to "default"
|
||||
-tag size (b) "default"
|
||||
//-tag size (b) 22
|
||||
|
||||
# fast - data and tag access happen in parallel
|
||||
# sequential - data array is accessed after accessing the tag array
|
||||
# normal - data array lookup and tag access happen in parallel
|
||||
# final data block is broadcasted in data array h-tree
|
||||
# after getting the signal from the tag array
|
||||
//-access mode (normal, sequential, fast) - "fast"
|
||||
-access mode (normal, sequential, fast) - "normal"
|
||||
//-access mode (normal, sequential, fast) - "sequential"
|
||||
|
||||
|
||||
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
|
||||
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0
|
||||
|
||||
# Percentage deviation from the minimum value
|
||||
# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
|
||||
# that compromises at most 10% delay.
|
||||
# NOTE: Try reasonable values for % deviation. Inconsistent deviation
|
||||
# percentage values will not produce any valid organizations. For example,
|
||||
# 0:0:100:100:100 will try to identify an organization that has both
|
||||
# least delay and dynamic power. Since such an organization is not possible, CACTI will
|
||||
# throw an error. Refer CACTI-6 Technical report for more details
|
||||
-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000
|
||||
|
||||
# Objective for NUCA
|
||||
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
|
||||
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
|
||||
|
||||
# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
|
||||
# energy-delay or energy-delay sq. product
|
||||
# Note: Optimize tag will disable weight or deviate values mentioned above
|
||||
# Set it to NONE to let weight and deviate values determine the
|
||||
# appropriate cache configuration
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
|
||||
-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
|
||||
|
||||
-Cache model (NUCA, UCA) - "UCA"
|
||||
//-Cache model (NUCA, UCA) - "NUCA"
|
||||
|
||||
# In order for CACTI to find the optimal NUCA bank value the following
|
||||
# variable should be assigned 0.
|
||||
-NUCA bank count 0
|
||||
|
||||
# NOTE: for nuca network frequency is set to a default value of
|
||||
# 5GHz in time.c. CACTI automatically
|
||||
# calculates the maximum possible frequency and downgrades this value if necessary
|
||||
|
||||
# By default CACTI considers both full-swing and low-swing
|
||||
# wires to find an optimal configuration. However, it is possible to
|
||||
# restrict the search space by changing the signaling from "default" to
|
||||
# "fullswing" or "lowswing" type.
|
||||
-Wire signaling (fullswing, lowswing, default) - "Global_30"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "default"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "lowswing"
|
||||
|
||||
//-Wire inside mat - "global"
|
||||
-Wire inside mat - "semi-global"
|
||||
//-Wire outside mat - "global"
|
||||
-Wire outside mat - "semi-global"
|
||||
|
||||
-Interconnect projection - "conservative"
|
||||
//-Interconnect projection - "aggressive"
|
||||
|
||||
# Contention in network (which is a function of core count and cache level) is one of
|
||||
# the critical factor used for deciding the optimal bank count value
|
||||
# core count can be 4, 8, or 16
|
||||
//-Core count 4
|
||||
-Core count 8
|
||||
//-Core count 16
|
||||
-Cache level (L2/L3) - "L3"
|
||||
|
||||
-Add ECC - "true"
|
||||
|
||||
//-Print level (DETAILED, CONCISE) - "CONCISE"
|
||||
-Print level (DETAILED, CONCISE) - "DETAILED"
|
||||
|
||||
# for debugging
|
||||
//-Print input parameters - "true"
|
||||
-Print input parameters - "false"
|
||||
# force CACTI to model the cache with the
|
||||
# following Ndbl, Ndwl, Nspd, Ndsam,
|
||||
# and Ndcm values
|
||||
//-Force cache config - "true"
|
||||
-Force cache config - "false"
|
||||
-Ndwl 1
|
||||
-Ndbl 1
|
||||
-Nspd 0
|
||||
-Ndcm 1
|
||||
-Ndsam1 0
|
||||
-Ndsam2 0
|
||||
|
||||
|
||||
|
||||
#### Default CONFIGURATION values for baseline external IO parameters to DRAM.
|
||||
|
||||
# Memory Type (D=DDR3, L=LPDDR2, W=WideIO, S=Low-swing differential)
|
||||
|
||||
//-dram_type "D"
|
||||
//-dram_type "L"
|
||||
//-dram_type "W"
|
||||
-dram_type "S"
|
||||
|
||||
# Memory State (R=Read, W=Write, I=Idle or S=Sleep)
|
||||
|
||||
//-iostate "R"
|
||||
-iostate "W"
|
||||
//-iostate "I"
|
||||
//-iostate "S"
|
||||
|
||||
# Is ECC Enabled (Y=Yes, N=No)
|
||||
|
||||
-dram_ecc "N"
|
||||
|
||||
#Address bus timing
|
||||
|
||||
//-addr_timing 0.5 //DDR, for LPDDR2 and LPDDR3
|
||||
-addr_timing 1.0 //SDR for DDR3, Wide-IO
|
||||
//-addr_timing 2.0 //2T timing
|
||||
//addr_timing 3.0 // 3T timing
|
||||
|
||||
# Bandwidth (Gbytes per second, this is the effective bandwidth)
|
||||
|
||||
-bus_bw 6 GBps //Valid range 0 to 2*bus_freq*num_dq
|
||||
|
||||
# Memory Density (Gbit per memory/DRAM die)
|
||||
|
||||
-mem_density 4 Gb //Valid values 2^n Gb
|
||||
|
||||
# IO frequency (MHz) (frequency of the external memory interface).
|
||||
|
||||
-bus_freq 3000 MHz //Valid range 0 to 1.5 GHz for DDR3, 0 to 1.2 GHz for LPDDR3, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential
|
||||
|
||||
# Duty Cycle (fraction of time in the Memory State defined above)
|
||||
|
||||
-duty_cycle 1.0 //Valid range 0 to 1.0
|
||||
|
||||
# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||||
|
||||
# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_ca 0.5 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||||
|
||||
# Number of DQ pins
|
||||
|
||||
-num_dq 8 //Include ECC pins as well (if present). If ECC pins are included, the bus bandwidth is 2*(num_dq-#of ECC pins)*bus_freq. Valid range 0 to 72.
|
||||
|
||||
# Number of DQS pins
|
||||
|
||||
-num_dqs 2 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins.
|
||||
|
||||
# Number of CA pins
|
||||
|
||||
-num_ca 0 //Valid range 0 to 35 pins.
|
||||
|
||||
# Number of CLK pins
|
||||
|
||||
-num_clk 0 //2 x differential pair. Valid values: 0/2/4.
|
||||
|
||||
# Number of Physical Ranks
|
||||
|
||||
-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per DIMM or buffer chip
|
||||
|
||||
# Width of the Memory Data Bus
|
||||
|
||||
-mem_data_width 8 //x4 or x8 or x16 or x32 memories
|
259
T1/TP/TP1/cacti_7/sample_config_files/lpddr3_cache.cfg
Normal file
259
T1/TP/TP1/cacti_7/sample_config_files/lpddr3_cache.cfg
Normal file
|
@ -0,0 +1,259 @@
|
|||
# Cache size
|
||||
//-size (bytes) 2048
|
||||
//-size (bytes) 4096
|
||||
//-size (bytes) 32768
|
||||
//-size (bytes) 131072
|
||||
//-size (bytes) 262144
|
||||
//-size (bytes) 1048576
|
||||
//-size (bytes) 2097152
|
||||
//-size (bytes) 4194304
|
||||
-size (bytes) 8388608
|
||||
//-size (bytes) 16777216
|
||||
//-size (bytes) 33554432
|
||||
//-size (bytes) 134217728
|
||||
//-size (bytes) 67108864
|
||||
//-size (bytes) 1073741824
|
||||
|
||||
# power gating
|
||||
-Array Power Gating - "false"
|
||||
-WL Power Gating - "false"
|
||||
-CL Power Gating - "false"
|
||||
-Bitline floating - "false"
|
||||
-Interconnect Power Gating - "false"
|
||||
-Power Gating Performance Loss 0.01
|
||||
|
||||
# Line size
|
||||
//-block size (bytes) 8
|
||||
-block size (bytes) 64
|
||||
|
||||
# To model Fully Associative cache, set associativity to zero
|
||||
//-associativity 0
|
||||
//-associativity 2
|
||||
//-associativity 4
|
||||
//-associativity 8
|
||||
-associativity 8
|
||||
|
||||
-read-write port 1
|
||||
-exclusive read port 0
|
||||
-exclusive write port 0
|
||||
-single ended read ports 0
|
||||
|
||||
# Multiple banks connected using a bus
|
||||
-UCA bank count 1
|
||||
-technology (u) 0.022
|
||||
//-technology (u) 0.040
|
||||
//-technology (u) 0.032
|
||||
//-technology (u) 0.090
|
||||
|
||||
# following three parameters are meaningful only for main memories
|
||||
|
||||
-page size (bits) 8192
|
||||
-burst length 8
|
||||
-internal prefetch width 8
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Data array cell type - "itrs-hp"
|
||||
//-Data array cell type - "itrs-lstp"
|
||||
//-Data array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Data array peripheral type - "itrs-hp"
|
||||
//-Data array peripheral type - "itrs-lstp"
|
||||
//-Data array peripheral type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Tag array cell type - "itrs-hp"
|
||||
//-Tag array cell type - "itrs-lstp"
|
||||
//-Tag array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Tag array peripheral type - "itrs-hp"
|
||||
//-Tag array peripheral type - "itrs-lstp"
|
||||
//-Tag array peripheral type - "itrs-lop
|
||||
|
||||
# Bus width include data bits and address bits required by the decoder
|
||||
//-output/input bus width 16
|
||||
-output/input bus width 512
|
||||
|
||||
// 300-400 in steps of 10
|
||||
-operating temperature (K) 360
|
||||
|
||||
# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
|
||||
# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
|
||||
-cache type "cache"
|
||||
//-cache type "ram"
|
||||
//-cache type "main memory"
|
||||
|
||||
# to model special structure like branch target buffers, directory, etc.
|
||||
# change the tag size parameter
|
||||
# if you want cacti to calculate the tagbits, set the tag size to "default"
|
||||
-tag size (b) "default"
|
||||
//-tag size (b) 22
|
||||
|
||||
# fast - data and tag access happen in parallel
|
||||
# sequential - data array is accessed after accessing the tag array
|
||||
# normal - data array lookup and tag access happen in parallel
|
||||
# final data block is broadcasted in data array h-tree
|
||||
# after getting the signal from the tag array
|
||||
//-access mode (normal, sequential, fast) - "fast"
|
||||
-access mode (normal, sequential, fast) - "normal"
|
||||
//-access mode (normal, sequential, fast) - "sequential"
|
||||
|
||||
|
||||
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
|
||||
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0
|
||||
|
||||
# Percentage deviation from the minimum value
|
||||
# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
|
||||
# that compromises at most 10% delay.
|
||||
# NOTE: Try reasonable values for % deviation. Inconsistent deviation
|
||||
# percentage values will not produce any valid organizations. For example,
|
||||
# 0:0:100:100:100 will try to identify an organization that has both
|
||||
# least delay and dynamic power. Since such an organization is not possible, CACTI will
|
||||
# throw an error. Refer CACTI-6 Technical report for more details
|
||||
-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000
|
||||
|
||||
# Objective for NUCA
|
||||
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
|
||||
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
|
||||
|
||||
# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
|
||||
# energy-delay or energy-delay sq. product
|
||||
# Note: Optimize tag will disable weight or deviate values mentioned above
|
||||
# Set it to NONE to let weight and deviate values determine the
|
||||
# appropriate cache configuration
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
|
||||
-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
|
||||
|
||||
-Cache model (NUCA, UCA) - "UCA"
|
||||
//-Cache model (NUCA, UCA) - "NUCA"
|
||||
|
||||
# In order for CACTI to find the optimal NUCA bank value the following
|
||||
# variable should be assigned 0.
|
||||
-NUCA bank count 0
|
||||
|
||||
# NOTE: for nuca network frequency is set to a default value of
|
||||
# 5GHz in time.c. CACTI automatically
|
||||
# calculates the maximum possible frequency and downgrades this value if necessary
|
||||
|
||||
# By default CACTI considers both full-swing and low-swing
|
||||
# wires to find an optimal configuration. However, it is possible to
|
||||
# restrict the search space by changing the signaling from "default" to
|
||||
# "fullswing" or "lowswing" type.
|
||||
-Wire signaling (fullswing, lowswing, default) - "Global_30"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "default"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "lowswing"
|
||||
|
||||
//-Wire inside mat - "global"
|
||||
-Wire inside mat - "semi-global"
|
||||
//-Wire outside mat - "global"
|
||||
-Wire outside mat - "semi-global"
|
||||
|
||||
-Interconnect projection - "conservative"
|
||||
//-Interconnect projection - "aggressive"
|
||||
|
||||
# Contention in network (which is a function of core count and cache level) is one of
|
||||
# the critical factor used for deciding the optimal bank count value
|
||||
# core count can be 4, 8, or 16
|
||||
//-Core count 4
|
||||
-Core count 8
|
||||
//-Core count 16
|
||||
-Cache level (L2/L3) - "L3"
|
||||
|
||||
-Add ECC - "true"
|
||||
|
||||
//-Print level (DETAILED, CONCISE) - "CONCISE"
|
||||
-Print level (DETAILED, CONCISE) - "DETAILED"
|
||||
|
||||
# for debugging
|
||||
//-Print input parameters - "true"
|
||||
-Print input parameters - "false"
|
||||
# force CACTI to model the cache with the
|
||||
# following Ndbl, Ndwl, Nspd, Ndsam,
|
||||
# and Ndcm values
|
||||
//-Force cache config - "true"
|
||||
-Force cache config - "false"
|
||||
-Ndwl 1
|
||||
-Ndbl 1
|
||||
-Nspd 0
|
||||
-Ndcm 1
|
||||
-Ndsam1 0
|
||||
-Ndsam2 0
|
||||
|
||||
|
||||
|
||||
#### Default CONFIGURATION values for baseline external IO parameters to DRAM.
|
||||
|
||||
# Memory Type (D=DDR3, L=LPDDR2, W=WideIO, S=Low-swing differential)
|
||||
|
||||
//-dram_type "D"
|
||||
-dram_type "L"
|
||||
//-dram_type "W"
|
||||
//-dram_type "S"
|
||||
|
||||
# Memory State (R=Read, W=Write, I=Idle or S=Sleep)
|
||||
|
||||
//-iostate "R"
|
||||
-iostate "W"
|
||||
//-iostate "I"
|
||||
//-iostate "S"
|
||||
|
||||
# Is ECC Enabled (Y=Yes, N=No)
|
||||
|
||||
-dram_ecc "N"
|
||||
|
||||
#Address bus timing
|
||||
|
||||
-addr_timing 0.5 //DDR, for LPDDR2 and LPDDR3
|
||||
//-addr_timing 1.0 //SDR for DDR3, Wide-IO
|
||||
//-addr_timing 2.0 //2T timing
|
||||
//addr_timing 3.0 // 3T timing
|
||||
|
||||
# Bandwidth (Gbytes per second, this is the effective bandwidth)
|
||||
|
||||
-bus_bw 6.4 GBps //Valid range 0 to 2*bus_freq*num_dq
|
||||
|
||||
# Memory Density (Gbit per memory/DRAM die)
|
||||
|
||||
-mem_density 4 Gb //Valid values 2^n Gb
|
||||
|
||||
# IO frequency (MHz) (frequency of the external memory interface).
|
||||
|
||||
-bus_freq 800 MHz //Valid range 0 to 1.5 GHz for DDR3, 0 to 1.2 GHz for LPDDR3, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential
|
||||
|
||||
# Duty Cycle (fraction of time in the Memory State defined above)
|
||||
|
||||
-duty_cycle 1.0 //Valid range 0 to 1.0
|
||||
|
||||
# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||||
|
||||
# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_ca 0.5 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||||
|
||||
# Number of DQ pins
|
||||
|
||||
-num_dq 32 //Include ECC pins as well (if present). If ECC pins are included, the bus bandwidth is 2*(num_dq-#of ECC pins)*bus_freq. Valid range 0 to 72.
|
||||
|
||||
# Number of DQS pins
|
||||
|
||||
-num_dqs 8 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins.
|
||||
|
||||
# Number of CA pins
|
||||
|
||||
-num_ca 14 //Valid range 0 to 35 pins.
|
||||
|
||||
# Number of CLK pins
|
||||
|
||||
-num_clk 2 //2 x differential pair. Valid values: 0/2/4.
|
||||
|
||||
# Number of Physical Ranks
|
||||
|
||||
-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per DIMM or buffer chip
|
||||
|
||||
# Width of the Memory Data Bus
|
||||
|
||||
-mem_data_width 32 //x4 or x8 or x16 or x32 or x128 memories
|
259
T1/TP/TP1/cacti_7/sample_config_files/wideio_cache.cfg
Normal file
259
T1/TP/TP1/cacti_7/sample_config_files/wideio_cache.cfg
Normal file
|
@ -0,0 +1,259 @@
|
|||
# Cache size
|
||||
//-size (bytes) 2048
|
||||
//-size (bytes) 4096
|
||||
//-size (bytes) 32768
|
||||
//-size (bytes) 131072
|
||||
//-size (bytes) 262144
|
||||
//-size (bytes) 1048576
|
||||
//-size (bytes) 2097152
|
||||
//-size (bytes) 4194304
|
||||
-size (bytes) 8388608
|
||||
//-size (bytes) 16777216
|
||||
//-size (bytes) 33554432
|
||||
//-size (bytes) 134217728
|
||||
//-size (bytes) 67108864
|
||||
//-size (bytes) 1073741824
|
||||
|
||||
# power gating
|
||||
-Array Power Gating - "false"
|
||||
-WL Power Gating - "false"
|
||||
-CL Power Gating - "false"
|
||||
-Bitline floating - "false"
|
||||
-Interconnect Power Gating - "false"
|
||||
-Power Gating Performance Loss 0.01
|
||||
|
||||
# Line size
|
||||
//-block size (bytes) 8
|
||||
-block size (bytes) 64
|
||||
|
||||
# To model Fully Associative cache, set associativity to zero
|
||||
//-associativity 0
|
||||
//-associativity 2
|
||||
//-associativity 4
|
||||
//-associativity 8
|
||||
-associativity 8
|
||||
|
||||
-read-write port 1
|
||||
-exclusive read port 0
|
||||
-exclusive write port 0
|
||||
-single ended read ports 0
|
||||
|
||||
# Multiple banks connected using a bus
|
||||
-UCA bank count 1
|
||||
-technology (u) 0.022
|
||||
//-technology (u) 0.040
|
||||
//-technology (u) 0.032
|
||||
//-technology (u) 0.090
|
||||
|
||||
# following three parameters are meaningful only for main memories
|
||||
|
||||
-page size (bits) 8192
|
||||
-burst length 8
|
||||
-internal prefetch width 8
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Data array cell type - "itrs-hp"
|
||||
//-Data array cell type - "itrs-lstp"
|
||||
//-Data array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Data array peripheral type - "itrs-hp"
|
||||
//-Data array peripheral type - "itrs-lstp"
|
||||
//-Data array peripheral type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||||
-Tag array cell type - "itrs-hp"
|
||||
//-Tag array cell type - "itrs-lstp"
|
||||
//-Tag array cell type - "itrs-lop"
|
||||
|
||||
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||||
-Tag array peripheral type - "itrs-hp"
|
||||
//-Tag array peripheral type - "itrs-lstp"
|
||||
//-Tag array peripheral type - "itrs-lop
|
||||
|
||||
# Bus width include data bits and address bits required by the decoder
|
||||
//-output/input bus width 16
|
||||
-output/input bus width 512
|
||||
|
||||
// 300-400 in steps of 10
|
||||
-operating temperature (K) 360
|
||||
|
||||
# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
|
||||
# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
|
||||
-cache type "cache"
|
||||
//-cache type "ram"
|
||||
//-cache type "main memory"
|
||||
|
||||
# to model special structure like branch target buffers, directory, etc.
|
||||
# change the tag size parameter
|
||||
# if you want cacti to calculate the tagbits, set the tag size to "default"
|
||||
-tag size (b) "default"
|
||||
//-tag size (b) 22
|
||||
|
||||
# fast - data and tag access happen in parallel
|
||||
# sequential - data array is accessed after accessing the tag array
|
||||
# normal - data array lookup and tag access happen in parallel
|
||||
# final data block is broadcasted in data array h-tree
|
||||
# after getting the signal from the tag array
|
||||
//-access mode (normal, sequential, fast) - "fast"
|
||||
-access mode (normal, sequential, fast) - "normal"
|
||||
//-access mode (normal, sequential, fast) - "sequential"
|
||||
|
||||
|
||||
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
|
||||
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0
|
||||
|
||||
# Percentage deviation from the minimum value
|
||||
# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
|
||||
# that compromises at most 10% delay.
|
||||
# NOTE: Try reasonable values for % deviation. Inconsistent deviation
|
||||
# percentage values will not produce any valid organizations. For example,
|
||||
# 0:0:100:100:100 will try to identify an organization that has both
|
||||
# least delay and dynamic power. Since such an organization is not possible, CACTI will
|
||||
# throw an error. Refer CACTI-6 Technical report for more details
|
||||
-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000
|
||||
|
||||
# Objective for NUCA
|
||||
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
|
||||
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
|
||||
|
||||
# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
|
||||
# energy-delay or energy-delay sq. product
|
||||
# Note: Optimize tag will disable weight or deviate values mentioned above
|
||||
# Set it to NONE to let weight and deviate values determine the
|
||||
# appropriate cache configuration
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
|
||||
-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
|
||||
//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
|
||||
|
||||
-Cache model (NUCA, UCA) - "UCA"
|
||||
//-Cache model (NUCA, UCA) - "NUCA"
|
||||
|
||||
# In order for CACTI to find the optimal NUCA bank value the following
|
||||
# variable should be assigned 0.
|
||||
-NUCA bank count 0
|
||||
|
||||
# NOTE: for nuca network frequency is set to a default value of
|
||||
# 5GHz in time.c. CACTI automatically
|
||||
# calculates the maximum possible frequency and downgrades this value if necessary
|
||||
|
||||
# By default CACTI considers both full-swing and low-swing
|
||||
# wires to find an optimal configuration. However, it is possible to
|
||||
# restrict the search space by changing the signaling from "default" to
|
||||
# "fullswing" or "lowswing" type.
|
||||
-Wire signaling (fullswing, lowswing, default) - "Global_30"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "default"
|
||||
//-Wire signaling (fullswing, lowswing, default) - "lowswing"
|
||||
|
||||
//-Wire inside mat - "global"
|
||||
-Wire inside mat - "semi-global"
|
||||
//-Wire outside mat - "global"
|
||||
-Wire outside mat - "semi-global"
|
||||
|
||||
-Interconnect projection - "conservative"
|
||||
//-Interconnect projection - "aggressive"
|
||||
|
||||
# Contention in network (which is a function of core count and cache level) is one of
|
||||
# the critical factor used for deciding the optimal bank count value
|
||||
# core count can be 4, 8, or 16
|
||||
//-Core count 4
|
||||
-Core count 8
|
||||
//-Core count 16
|
||||
-Cache level (L2/L3) - "L3"
|
||||
|
||||
-Add ECC - "true"
|
||||
|
||||
//-Print level (DETAILED, CONCISE) - "CONCISE"
|
||||
-Print level (DETAILED, CONCISE) - "DETAILED"
|
||||
|
||||
# for debugging
|
||||
//-Print input parameters - "true"
|
||||
-Print input parameters - "false"
|
||||
# force CACTI to model the cache with the
|
||||
# following Ndbl, Ndwl, Nspd, Ndsam,
|
||||
# and Ndcm values
|
||||
//-Force cache config - "true"
|
||||
-Force cache config - "false"
|
||||
-Ndwl 1
|
||||
-Ndbl 1
|
||||
-Nspd 0
|
||||
-Ndcm 1
|
||||
-Ndsam1 0
|
||||
-Ndsam2 0
|
||||
|
||||
|
||||
|
||||
#### Default CONFIGURATION values for baseline external IO parameters to DRAM.
|
||||
|
||||
# Memory Type (D=DDR3, L=LPDDR2, W=WideIO, S=Low-swing differential)
|
||||
|
||||
//-dram_type "D"
|
||||
//-dram_type "L"
|
||||
-dram_type "W"
|
||||
//-dram_type "S"
|
||||
|
||||
# Memory State (R=Read, W=Write, I=Idle or S=Sleep)
|
||||
|
||||
//-iostate "R"
|
||||
-iostate "W"
|
||||
//-iostate "I"
|
||||
//-iostate "S"
|
||||
|
||||
# Is ECC Enabled (Y=Yes, N=No)
|
||||
|
||||
-dram_ecc "N"
|
||||
|
||||
#Address bus timing
|
||||
|
||||
//-addr_timing 0.5 //DDR, for LPDDR2 and LPDDR3
|
||||
-addr_timing 1.0 //SDR for DDR3, Wide-IO
|
||||
//-addr_timing 2.0 //2T timing
|
||||
//addr_timing 3.0 // 3T timing
|
||||
|
||||
# Bandwidth (Gbytes per second, this is the effective bandwidth)
|
||||
|
||||
-bus_bw 12.8 GBps //Valid range 0 to 2*bus_freq*num_dq
|
||||
|
||||
# Memory Density (Gbit per memory/DRAM die)
|
||||
|
||||
-mem_density 4 Gb //Valid values 2^n Gb
|
||||
|
||||
# IO frequency (MHz) (frequency of the external memory interface).
|
||||
|
||||
-bus_freq 400 MHz //Valid range 0 to 1.5 GHz for DDR3, 0 to 1.2 GHz for LPDDR3, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential
|
||||
|
||||
# Duty Cycle (fraction of time in the Memory State defined above)
|
||||
|
||||
-duty_cycle 1.0 //Valid range 0 to 1.0
|
||||
|
||||
# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||||
|
||||
# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||||
|
||||
-activity_ca 0.5 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||||
|
||||
# Number of DQ pins
|
||||
|
||||
-num_dq 128 //Include ECC pins as well (if present). If ECC pins are included, the bus bandwidth is 2*(num_dq-#of ECC pins)*bus_freq. Valid range 0 to 72.
|
||||
|
||||
# Number of DQS pins
|
||||
|
||||
-num_dqs 16 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins.
|
||||
|
||||
# Number of CA pins
|
||||
|
||||
-num_ca 30 //Valid range 0 to 35 pins.
|
||||
|
||||
# Number of CLK pins
|
||||
|
||||
-num_clk 2 //2 x differential pair. Valid values: 0/2/4.
|
||||
|
||||
# Number of Physical Ranks
|
||||
|
||||
-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per DIMM or buffer chip
|
||||
|
||||
# Width of the Memory Data Bus
|
||||
|
||||
-mem_data_width 128 //x4 or x8 or x16 or x32 or x128 memories
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue