# Cache size //-size (bytes) 528 //-size (bytes) 4096 //-size (bytes) 262144 //-size (bytes) 1048576 //-size (bytes) 2097152 //-size (bytes) 4194304 //-size (bytes) 8388608 //-size (bytes) 16777216 //-size (bytes) 33554432 //-size (bytes) 134217728 //-size (bytes) 268435456 //-size (bytes) 536870912 //-size (bytes) 67108864 //-size (bytes) 536870912 //-size (bytes) 1073741824 # For 3D DRAM memory please use Gb as units -size (Gb) 8 # Line size //-block size (bytes) 8 -block size (bytes) 128 # To model Fully Associative cache, set associativity to zero //-associativity 0 //-associativity 2 //-associativity 4 -associativity 1 //-associativity 16 -read-write port 1 -exclusive read port 0 -exclusive write port 0 -single ended read ports 0 # Multiple banks connected using a bus -UCA bank count 8 //-technology (u) 0.032 //-technology (u) 0.040 //-technology (u) 0.065 //-technology (u) 0.078 //-technology (u) 0.080 //-technology (u) 0.090 -technology (u) 0.050 # following three parameters are meaningful only for main memories //-page size (bits) 8192 -burst length 4 -internal prefetch width 1 # following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) //-Data array cell type - "itrs-hp" //-Data array cell type - "itrs-lstp" //-Data array cell type - "itrs-lop" -Data array cell type - "comm-dram" # following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) //-Data array peripheral type - "itrs-hp" -Data array peripheral type - "itrs-lstp" //-Data array peripheral type - "itrs-lop" # following parameter can have one of five values -- (itrs-hp, itrs-lstp, itrs-lop, lp-dram, comm-dram) -Tag array cell type - "itrs-hp" //-Tag array cell type - "itrs-lstp" # following parameter can have one of three values -- (itrs-hp, itrs-lstp, itrs-lop) -Tag array peripheral type - "itrs-hp" //-Tag array peripheral type - "itrs-lstp" # Bus width include data bits and address bits required by the decoder //-output/input bus width 16 //-output/input bus width 64 -output/input bus width 64 // 300-400 in steps of 10 -operating temperature (K) 350 # Type of memory - cache (with a tag array) or ram (scratch ram similar to a register file) # or main memory (no tag array and every access will happen at a page granularity Ref: CACTI 5.3 report) //-cache type "cache" //-cache type "ram" //-cache type "main memory" # old main memory model, in fact, it is eDRAM model. -cache type "3D memory or 2D main memory" # once this parameter is used, the new parameter section below of will override the same parameter above # //-page size (bits) 16384 -page size (bits) 8192 //-page size (bits) 4096 -burst depth 8 // for 3D DRAM, IO per bank equals the product of burst depth and IO width -IO width 4 -system frequency (MHz) 677 -stacked die count 4 -partitioning granularity 0 // 0: coarse-grained rank-level; 1: fine-grained rank-level -TSV projection 1 // 0: ITRS aggressive; 1: industrial conservative ## End of parameters for 3D DRAM # to model special structure like branch target buffers, directory, etc. # change the tag size parameter # if you want cacti to calculate the tagbits, set the tag size to "default" -tag size (b) "default" //-tag size (b) 45 # fast - data and tag access happen in parallel # sequential - data array is accessed after accessing the tag array # normal - data array lookup and tag access happen in parallel # final data block is broadcasted in data array h-tree # after getting the signal from the tag array -access mode (normal, sequential, fast) - "fast" //-access mode (normal, sequential, fast) - "normal" //-access mode (normal, sequential, fast) - "sequential" # DESIGN OBJECTIVE for UCA (or banks in NUCA) -design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:0:100 # Percentage deviation from the minimum value # Ex: A deviation value of 10:1000:1000:1000:1000 will try to find an organization # that compromises at most 10% delay. # NOTE: Try reasonable values for % deviation. Inconsistent deviation # percentage values will not produce any valid organizations. For example, # 0:0:100:100:100 will try to identify an organization that has both # least delay and dynamic power. Since such an organization is not possible, CACTI will # throw an error. Refer CACTI-6 Technical report for more details -deviate (delay, dynamic power, leakage power, cycle time, area) 50:100000:100000:100000:1000000 # Objective for NUCA -NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:0:100 -NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 # Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for # energy-delay or energy-delay sq. product # Note: Optimize tag will disable weight or deviate values mentioned above # Set it to NONE to let weight and deviate values determine the # appropriate cache configuration //-Optimize ED or ED^2 (ED, ED^2, NONE): "ED" //-Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2" -Optimize ED or ED^2 (ED, ED^2, NONE): "NONE" -Cache model (NUCA, UCA) - "UCA" //-Cache model (NUCA, UCA) - "NUCA" # In order for CACTI to find the optimal NUCA bank value the following # variable should be assigned 0. -NUCA bank count 0 # NOTE: for nuca network frequency is set to a default value of # 5GHz in time.c. CACTI automatically # calculates the maximum possible frequency and downgrades this value if necessary # By default CACTI considers both full-swing and low-swing # wires to find an optimal configuration. However, it is possible to # restrict the search space by changing the signaling from "default" to # "fullswing" or "lowswing" type. -Wire signaling (fullswing, lowswing, default) - "Global_30" //-Wire signaling (fullswing, lowswing, default) - "default" //-Wire signaling (fullswing, lowswing, default) - "lowswing" //-Wire inside mat - "global" -Wire inside mat - "semi-global" -Wire outside mat - "global" //-Wire outside mat - "semi-global" -Interconnect projection - "conservative" //-Interconnect projection - "aggressive" # Contention in network (which is a function of core count and cache level) is one of # the critical factor used for deciding the optimal bank count value # core count can be 4, 8, or 16 //-Core count 4 -Core count 8 //-Core count 16 -Cache level (L2/L3) - "L3" -Add ECC - "true" //-Print level (DETAILED, CONCISE) - "CONCISE" -Print level (DETAILED, CONCISE) - "DETAILED" # for debugging //-Print input parameters - "true" -Print input parameters - "false" # force CACTI to model the cache with the # following Ndbl, Ndwl, Nspd, Ndsam, # and Ndcm values -Force cache config - "true" //-Force cache config - "false" -Ndwl 16 -Ndbl 32 -Nspd 1 -Ndcm 1 -Ndsam1 1 -Ndsam2 1