255 lines
9.6 KiB
INI
255 lines
9.6 KiB
INI
|
# Cache size
|
||
|
//-size (bytes) 2048
|
||
|
//-size (bytes) 4096
|
||
|
//-size (bytes) 32768
|
||
|
//-size (bytes) 131072
|
||
|
//-size (bytes) 262144
|
||
|
//-size (bytes) 1048576
|
||
|
//-size (bytes) 2097152
|
||
|
//-size (bytes) 4194304
|
||
|
-size (bytes) 8388608
|
||
|
//-size (bytes) 16777216
|
||
|
//-size (bytes) 33554432
|
||
|
//-size (bytes) 134217728
|
||
|
//-size (bytes) 67108864
|
||
|
//-size (bytes) 1073741824
|
||
|
|
||
|
# power gating
|
||
|
-Array Power Gating - "false"
|
||
|
-WL Power Gating - "false"
|
||
|
-CL Power Gating - "false"
|
||
|
-Bitline floating - "false"
|
||
|
-Interconnect Power Gating - "false"
|
||
|
-Power Gating Performance Loss 0.01
|
||
|
|
||
|
# Line size
|
||
|
//-block size (bytes) 8
|
||
|
-block size (bytes) 64
|
||
|
|
||
|
# To model Fully Associative cache, set associativity to zero
|
||
|
//-associativity 0
|
||
|
//-associativity 2
|
||
|
//-associativity 4
|
||
|
//-associativity 8
|
||
|
-associativity 8
|
||
|
|
||
|
-read-write port 1
|
||
|
-exclusive read port 0
|
||
|
-exclusive write port 0
|
||
|
-single ended read ports 0
|
||
|
|
||
|
# Multiple banks connected using a bus
|
||
|
-UCA bank count 1
|
||
|
-technology (u) 0.022
|
||
|
//-technology (u) 0.040
|
||
|
//-technology (u) 0.032
|
||
|
//-technology (u) 0.090
|
||
|
|
||
|
# following three parameters are meaningful only for main memories
|
||
|
|
||
|
-page size (bits) 8192
|
||
|
-burst length 8
|
||
|
-internal prefetch width 8
|
||
|
|
||
|
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||
|
-Data array cell type - "itrs-hp"
|
||
|
//-Data array cell type - "itrs-lstp"
|
||
|
//-Data array cell type - "itrs-lop"
|
||
|
|
||
|
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||
|
-Data array peripheral type - "itrs-hp"
|
||
|
//-Data array peripheral type - "itrs-lstp"
|
||
|
//-Data array peripheral type - "itrs-lop"
|
||
|
|
||
|
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
|
||
|
-Tag array cell type - "itrs-hp"
|
||
|
//-Tag array cell type - "itrs-lstp"
|
||
|
//-Tag array cell type - "itrs-lop"
|
||
|
|
||
|
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
|
||
|
-Tag array peripheral type - "itrs-hp"
|
||
|
//-Tag array peripheral type - "itrs-lstp"
|
||
|
//-Tag array peripheral type - "itrs-lop
|
||
|
|
||
|
# Bus width include data bits and address bits required by the decoder
|
||
|
//-output/input bus width 16
|
||
|
-output/input bus width 512
|
||
|
|
||
|
// 300-400 in steps of 10
|
||
|
-operating temperature (K) 360
|
||
|
|
||
|
# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
|
||
|
# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
|
||
|
-cache type "cache"
|
||
|
//-cache type "ram"
|
||
|
//-cache type "main memory"
|
||
|
|
||
|
# to model special structure like branch target buffers, directory, etc.
|
||
|
# change the tag size parameter
|
||
|
# if you want cacti to calculate the tagbits, set the tag size to "default"
|
||
|
-tag size (b) "default"
|
||
|
//-tag size (b) 22
|
||
|
|
||
|
# fast - data and tag access happen in parallel
|
||
|
# sequential - data array is accessed after accessing the tag array
|
||
|
# normal - data array lookup and tag access happen in parallel
|
||
|
# final data block is broadcasted in data array h-tree
|
||
|
# after getting the signal from the tag array
|
||
|
//-access mode (normal, sequential, fast) - "fast"
|
||
|
-access mode (normal, sequential, fast) - "normal"
|
||
|
//-access mode (normal, sequential, fast) - "sequential"
|
||
|
|
||
|
|
||
|
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
|
||
|
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0
|
||
|
|
||
|
# Percentage deviation from the minimum value
|
||
|
# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
|
||
|
# that compromises at most 10% delay.
|
||
|
# NOTE: Try reasonable values for % deviation. Inconsistent deviation
|
||
|
# percentage values will not produce any valid organizations. For example,
|
||
|
# 0:0:100:100:100 will try to identify an organization that has both
|
||
|
# least delay and dynamic power. Since such an organization is not possible, CACTI will
|
||
|
# throw an error. Refer CACTI-6 Technical report for more details
|
||
|
-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000
|
||
|
|
||
|
# Objective for NUCA
|
||
|
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
|
||
|
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
|
||
|
|
||
|
# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
|
||
|
# energy-delay or energy-delay sq. product
|
||
|
# Note: Optimize tag will disable weight or deviate values mentioned above
|
||
|
# Set it to NONE to let weight and deviate values determine the
|
||
|
# appropriate cache configuration
|
||
|
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
|
||
|
-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
|
||
|
//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
|
||
|
|
||
|
-Cache model (NUCA, UCA) - "UCA"
|
||
|
//-Cache model (NUCA, UCA) - "NUCA"
|
||
|
|
||
|
# In order for CACTI to find the optimal NUCA bank value the following
|
||
|
# variable should be assigned 0.
|
||
|
-NUCA bank count 0
|
||
|
|
||
|
# NOTE: for nuca network frequency is set to a default value of
|
||
|
# 5GHz in time.c. CACTI automatically
|
||
|
# calculates the maximum possible frequency and downgrades this value if necessary
|
||
|
|
||
|
# By default CACTI considers both full-swing and low-swing
|
||
|
# wires to find an optimal configuration. However, it is possible to
|
||
|
# restrict the search space by changing the signaling from "default" to
|
||
|
# "fullswing" or "lowswing" type.
|
||
|
-Wire signaling (fullswing, lowswing, default) - "Global_30"
|
||
|
//-Wire signaling (fullswing, lowswing, default) - "default"
|
||
|
//-Wire signaling (fullswing, lowswing, default) - "lowswing"
|
||
|
|
||
|
//-Wire inside mat - "global"
|
||
|
-Wire inside mat - "semi-global"
|
||
|
//-Wire outside mat - "global"
|
||
|
-Wire outside mat - "semi-global"
|
||
|
|
||
|
-Interconnect projection - "conservative"
|
||
|
//-Interconnect projection - "aggressive"
|
||
|
|
||
|
# Contention in network (which is a function of core count and cache level) is one of
|
||
|
# the critical factor used for deciding the optimal bank count value
|
||
|
# core count can be 4, 8, or 16
|
||
|
//-Core count 4
|
||
|
-Core count 8
|
||
|
//-Core count 16
|
||
|
-Cache level (L2/L3) - "L3"
|
||
|
|
||
|
-Add ECC - "true"
|
||
|
|
||
|
//-Print level (DETAILED, CONCISE) - "CONCISE"
|
||
|
-Print level (DETAILED, CONCISE) - "DETAILED"
|
||
|
|
||
|
# for debugging
|
||
|
//-Print input parameters - "true"
|
||
|
-Print input parameters - "false"
|
||
|
# force CACTI to model the cache with the
|
||
|
# following Ndbl, Ndwl, Nspd, Ndsam,
|
||
|
# and Ndcm values
|
||
|
//-Force cache config - "true"
|
||
|
-Force cache config - "false"
|
||
|
-Ndwl 1
|
||
|
-Ndbl 1
|
||
|
-Nspd 0
|
||
|
-Ndcm 1
|
||
|
-Ndsam1 0
|
||
|
-Ndsam2 0
|
||
|
|
||
|
|
||
|
|
||
|
#### Default CONFIGURATION values for baseline external IO parameters to DRAM. More details can be found in the CACTI-IO technical report (), especially Chapters 2 and 3.
|
||
|
|
||
|
# Memory Type (D=DDR3, L=LPDDR2, W=WideIO). Additional memory types can be defined by the user in extio_technology.cc, along with their technology and configuration parameters.
|
||
|
|
||
|
-dram_type "D"
|
||
|
//-dram_type "L"
|
||
|
//-dram_type "W"
|
||
|
//-dram_type "S"
|
||
|
|
||
|
# Memory State (R=Read, W=Write, I=Idle or S=Sleep)
|
||
|
|
||
|
//-iostate "R"
|
||
|
-iostate "W"
|
||
|
//-iostate "I"
|
||
|
//-iostate "S"
|
||
|
|
||
|
#Address bus timing. To alleviate the timing on the command and address bus due to high loading (shared across all memories on the channel), the interface allows for multi-cycle timing options.
|
||
|
|
||
|
-addr_timing 0.5 //DDR
|
||
|
//-addr_timing 1.0 //SDR (half of DQ rate)
|
||
|
//-addr_timing 2.0 //2T timing (One fourth of DQ rate)
|
||
|
//-addr_timing 3.0 // 3T timing (One sixth of DQ rate)
|
||
|
|
||
|
# Memory Density (Gbit per memory/DRAM die)
|
||
|
|
||
|
-mem_density 8 Gb //Valid values 2^n Gb
|
||
|
|
||
|
# IO frequency (MHz) (frequency of the external memory interface).
|
||
|
|
||
|
-bus_freq 800 MHz //As of current memory standards (2013), valid range 0 to 1.5 GHz for DDR3, 0 to 533 MHz for LPDDR2, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential. However this can change, and the user is free to define valid ranges based on new memory types or extending beyond existing standards for existing dram types.
|
||
|
|
||
|
# Duty Cycle (fraction of time in the Memory State defined above)
|
||
|
|
||
|
-duty_cycle 1.0 //Valid range 0 to 1.0
|
||
|
|
||
|
# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||
|
|
||
|
-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||
|
#-activity_dq .50 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
|
||
|
|
||
|
# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
|
||
|
|
||
|
-activity_ca 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR, 0 to 0.25 for 2T, and 0 to 0.17 for 3T
|
||
|
#-activity_ca 0.25 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR, 0 to 0.25 for 2T, and 0 to 0.17 for 3T
|
||
|
|
||
|
# Number of DQ pins
|
||
|
|
||
|
-num_dq 72 //Number of DQ pins. Includes ECC pins.
|
||
|
|
||
|
# Number of DQS pins. DQS is a data strobe that is sent along with a small number of data-lanes so the source synchronous timing is local to these DQ bits. Typically, 1 DQS per byte (8 DQ bits) is used. The DQS is also typucally differential, just like the CLK pin.
|
||
|
|
||
|
-num_dqs 36 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins.
|
||
|
|
||
|
# Number of CA pins
|
||
|
|
||
|
-num_ca 35 //Valid range 0 to 35 pins.
|
||
|
#-num_ca 25 //Valid range 0 to 35 pins.
|
||
|
|
||
|
# Number of CLK pins. CLK is typically a differential pair. In some cases additional CLK pairs may be used to limit the loading on the CLK pin.
|
||
|
|
||
|
-num_clk 2 //2 x differential pair. Valid values: 0/2/4.
|
||
|
|
||
|
# Number of Physical Ranks
|
||
|
|
||
|
-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per buffer/register. If multiple LRDIMMs or buffer chips exist, the analysis for capacity and power is reported per buffer/register.
|
||
|
|
||
|
# Width of the Memory Data Bus
|
||
|
|
||
|
-mem_data_width 4 //x4 or x8 or x16 or x32 memories. For WideIO upto x128.
|