M2_SETI/T1/TP/TP1/cacti_7/cache.cfg
2022-11-18 15:07:43 +01:00

306 lines
10 KiB
INI

# Cache size
//-size (bytes) 2048
//-size (bytes) 4096
//-size (bytes) 32768
//-size (bytes) 262144
//-size (bytes) 1048576
//-size (bytes) 2097152
//-size (bytes) 4194304
//-size (bytes) 8388608
//-size (bytes) 16777216
//-size (bytes) 33554432
//-size (bytes) 134217728
//-size (bytes) 67108864
//-size (bytes) 1073741824
# power gating
-Array Power Gating - "false"
-WL Power Gating - "false"
-CL Power Gating - "false"
-Bitline floating - "false"
-Interconnect Power Gating - "false"
-Power Gating Performance Loss 0.01
# Line size
//-block size (bytes) 8
-block size (bytes) 64
# To model Fully Associative cache, set associativity to zero
//-associativity 0
-associativity 2
//-associativity 4
//-associativity 8
-size (bytes) 131072
-read-write port 1
-exclusive read port 0
-exclusive write port 0
-single ended read ports 0
# Multiple banks connected using a bus
-UCA bank count 1
//-technology (u) 0.022
//-technology (u) 0.040
//-technology (u) 0.032
-technology (u) 0.090
# following three parameters are meaningful only for main memories
-page size (bits) 8192
-burst length 8
-internal prefetch width 8
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
-Data array cell type - "itrs-hp"
//-Data array cell type - "itrs-lstp"
//-Data array cell type - "itrs-lop"
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
-Data array peripheral type - "itrs-hp"
//-Data array peripheral type - "itrs-lstp"
//-Data array peripheral type - "itrs-lop"
# following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram)
-Tag array cell type - "itrs-hp"
//-Tag array cell type - "itrs-lstp"
//-Tag array cell type - "itrs-lop"
# following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop)
-Tag array peripheral type - "itrs-hp"
//-Tag array peripheral type - "itrs-lstp"
//-Tag array peripheral type - "itrs-lop
# Bus width include data bits and address bits required by the decoder
//-output/input bus width 16
-output/input bus width 512
// 300-400 in steps of 10
-operating temperature (K) 360
# Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file)
# or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report)
-cache type "cache"
//-cache type "ram"
//-cache type "main memory"
# to model special structure like branch target buffers, directory, etc.
# change the tag size parameter
# if you want cacti to calculate the tagbits, set the tag size to "default"
-tag size (b) "default"
//-tag size (b) 22
# fast - data and tag access happen in parallel
# sequential - data array is accessed after accessing the tag array
# normal - data array lookup and tag access happen in parallel
# final data block is broadcasted in data array h-tree
# after getting the signal from the tag array
//-access mode (normal, sequential, fast) - "fast"
-access mode (normal, sequential, fast) - "normal"
//-access mode (normal, sequential, fast) - "sequential"
# DESIGN OBJECTIVE for UCA (or banks in NUCA)
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0
# Percentage deviation from the minimum value
# Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization
# that compromises at most 10% delay.
# NOTE: Try reasonable values for % deviation. Inconsistent deviation
# percentage values will not produce any valid organizations. For example,
# 0:0:100:100:100 will try to identify an organization that has both
# least delay and dynamic power. Since such an organization is not possible, CACTI will
# throw an error. Refer CACTI-6 Technical report for more details
-deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000
# Objective for NUCA
-NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100
-NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000
# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for
# energy-delay or energy-delay sq. product
# Note: Optimize tag will disable weight or deviate values mentioned above
# Set it to NONE to let weight and deviate values determine the
# appropriate cache configuration
//-Optimize ED or ED^2 (ED, ED^2, NONE): "ED"
-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2"
//-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
-Cache model (NUCA, UCA) - "UCA"
//-Cache model (NUCA, UCA) - "NUCA"
# In order for CACTI to find the optimal NUCA bank value the following
# variable should be assigned 0.
-NUCA bank count 0
# NOTE: for nuca network frequency is set to a default value of
# 5GHz in time.c. CACTI automatically
# calculates the maximum possible frequency and downgrades this value if necessary
# By default CACTI considers both full-swing and low-swing
# wires to find an optimal configuration. However, it is possible to
# restrict the search space by changing the signaling from "default" to
# "fullswing" or "lowswing" type.
-Wire signaling (fullswing, lowswing, default) - "Global_30"
//-Wire signaling (fullswing, lowswing, default) - "default"
//-Wire signaling (fullswing, lowswing, default) - "lowswing"
//-Wire inside mat - "global"
-Wire inside mat - "semi-global"
//-Wire outside mat - "global"
-Wire outside mat - "semi-global"
-Interconnect projection - "conservative"
//-Interconnect projection - "aggressive"
# Contention in network (which is a function of core count and cache level) is one of
# the critical factor used for deciding the optimal bank count value
# core count can be 4, 8, or 16
//-Core count 4
-Core count 8
//-Core count 16
-Cache level (L2/L3) - "L3"
-Add ECC - "true"
//-Print level (DETAILED, CONCISE) - "CONCISE"
-Print level (DETAILED, CONCISE) - "DETAILED"
# for debugging
-Print input parameters - "true"
//-Print input parameters - "false"
# force CACTI to model the cache with the
# following Ndbl, Ndwl, Nspd, Ndsam,
# and Ndcm values
//-Force cache config - "true"
-Force cache config - "false"
-Ndwl 1
-Ndbl 1
-Nspd 0
-Ndcm 1
-Ndsam1 0
-Ndsam2 0
#### Default CONFIGURATION values for baseline external IO parameters to DRAM. More details can be found in the CACTI-IO technical report (), especially Chapters 2 and 3.
# Memory Type (D3=DDR3, D4=DDR4, L=LPDDR2, W=WideIO, S=Serial). Additional memory types can be defined by the user in extio_technology.cc, along with their technology and configuration parameters.
-dram_type "DDR3"
//-dram_type "DDR4"
//-dram_type "LPDDR2"
//-dram_type "WideIO"
//-dram_type "Serial"
# Memory State (R=Read, W=Write, I=Idle or S=Sleep)
//-io state "READ"
-io state "WRITE"
//-io state "IDLE"
//-io state "SLEEP"
#Address bus timing. To alleviate the timing on the command and address bus due to high loading (shared across all memories on the channel), the interface allows for multi-cycle timing options.
//-addr_timing 0.5 //DDR
-addr_timing 1.0 //SDR (half of DQ rate)
//-addr_timing 2.0 //2T timing (One fourth of DQ rate)
//-addr_timing 3.0 // 3T timing (One sixth of DQ rate)
# Memory Density (Gbit per memory/DRAM die)
-mem_density 4 Gb //Valid values 2^n Gb
# IO frequency (MHz) (frequency of the external memory interface).
-bus_freq 800 MHz //As of current memory standards (2013), valid range 0 to 1.5 GHz for DDR3, 0 to 533 MHz for LPDDR2, 0 - 800 MHz for WideIO and 0 - 3 GHz for Low-swing differential. However this can change, and the user is free to define valid ranges based on new memory types or extending beyond existing standards for existing dram types.
# Duty Cycle (fraction of time in the Memory State defined above)
-duty_cycle 1.0 //Valid range 0 to 1.0
# Activity factor for Data (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
-activity_dq 1.0 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR
# Activity factor for Control/Address (0->1 transitions) per cycle (for DDR, need to account for the higher activity in this parameter. E.g. max. activity factor for DDR is 1.0, for SDR is 0.5)
-activity_ca 0.5 //Valid range 0 to 1.0 for DDR, 0 to 0.5 for SDR, 0 to 0.25 for 2T, and 0 to 0.17 for 3T
# Number of DQ pins
-num_dq 72 //Number of DQ pins. Includes ECC pins.
# Number of DQS pins. DQS is a data strobe that is sent along with a small number of data-lanes so the source synchronous timing is local to these DQ bits. Typically, 1 DQS per byte (8 DQ bits) is used. The DQS is also typucally differential, just like the CLK pin.
-num_dqs 18 //2 x differential pairs. Include ECC pins as well. Valid range 0 to 18. For x4 memories, could have 36 DQS pins.
# Number of CA pins
-num_ca 25 //Valid range 0 to 35 pins.
# Number of CLK pins. CLK is typically a differential pair. In some cases additional CLK pairs may be used to limit the loading on the CLK pin.
-num_clk 2 //2 x differential pair. Valid values: 0/2/4.
# Number of Physical Ranks
-num_mem_dq 2 //Number of ranks (loads on DQ and DQS) per buffer/register. If multiple LRDIMMs or buffer chips exist, the analysis for capacity and power is reported per buffer/register.
# Width of the Memory Data Bus
-mem_data_width 8 //x4 or x8 or x16 or x32 memories. For WideIO upto x128.
# RTT Termination Resistance
-rtt_value 10000
# RON Termination Resistance
-ron_value 34
# Time of flight for DQ
-tflight_value
# Parameter related to MemCAD
# Number of BoBs: 1,2,3,4,5,6,
-num_bobs 1
# Memory System Capacity in GB
-capacity 80
# Number of Channel per BoB: 1,2.
-num_channels_per_bob 1
# First Metric for ordering different design points
-first metric "Cost"
#-first metric "Bandwidth"
#-first metric "Energy"
# Second Metric for ordering different design points
#-second metric "Cost"
-second metric "Bandwidth"
#-second metric "Energy"
# Third Metric for ordering different design points
#-third metric "Cost"
#-third metric "Bandwidth"
-third metric "Energy"
# Possible DIMM option to consider
#-DIMM model "JUST_UDIMM"
#-DIMM model "JUST_RDIMM"
#-DIMM model "JUST_LRDIMM"
-DIMM model "ALL"
#if channels of each bob have the same configurations
#-mirror_in_bob "T"
-mirror_in_bob "F"
#if we want to see all channels/bobs/memory configurations explored
#-verbose "T"
#-verbose "F"