351 lines
7.4 KiB
Text
351 lines
7.4 KiB
Text
#mach: crisv32
|
|
#output: Basic clock cycles, total @: *\n
|
|
#output: Memory source stall cycles: 82\n
|
|
#output: Memory read-after-write stall cycles: 0\n
|
|
#output: Movem source stall cycles: 6\n
|
|
#output: Movem destination stall cycles: 880\n
|
|
#output: Movem address stall cycles: 4\n
|
|
#output: Multiplication source stall cycles: 18\n
|
|
#output: Jump source stall cycles: 6\n
|
|
#output: Branch misprediction stall cycles: 0\n
|
|
#output: Jump target stall cycles: 0\n
|
|
#sim: --cris-cycles=basic
|
|
|
|
.include "testutils.inc"
|
|
|
|
; Macros for testing correctness of movem destination stall
|
|
; cycles for various insn types. Beware: macro parameters can
|
|
; be comma or space-delimited. There are problems (i.e. bugs)
|
|
; with using space-delimited operands and operands with
|
|
; non-alphanumeric characters, like "[]-." so use comma for
|
|
; them. Lots of trouble passing empty parameters and parameters
|
|
; with comma. Ugh. FIXME: Report bugs, fix bugs, fix other
|
|
; shortcomings, fix that darn old macro-parameter-in-string.
|
|
|
|
; Helper macro. Unfortunately I find no cleaner way to unify
|
|
; one and two-operand cases, the main problem being the comma
|
|
; operand delimiter clashing with macro operand delimiter.
|
|
.macro t_S_x_y S insn x y=none
|
|
movem [r7],r6
|
|
.ifc \y,none
|
|
.ifc \S,none
|
|
\insn \x
|
|
.else
|
|
\insn\S \x
|
|
.endif
|
|
.else
|
|
.ifc \S,none
|
|
\insn \x,\y
|
|
.else
|
|
\insn\S \x,\y
|
|
.endif
|
|
.endif
|
|
nop
|
|
nop
|
|
nop
|
|
.endm
|
|
|
|
; An insn-type that has a single register operand. The register
|
|
; may or may not be a source register for the insn.
|
|
.macro t_r insn
|
|
t_S_x_y none,\insn,r3
|
|
t_S_x_y none,\insn,r8
|
|
.endm
|
|
|
|
; An insn-type that jumps to the destination of the register.
|
|
.macro t_r_j insn
|
|
move.d 0f,r7
|
|
move.d 1f,r8
|
|
move.d r8,r9
|
|
nop
|
|
nop
|
|
nop
|
|
.section ".rodata"
|
|
.p2align 5
|
|
0:
|
|
.dword 1f
|
|
.dword 1f
|
|
.dword 1f
|
|
.dword 1f
|
|
.dword 1f
|
|
.dword 1f
|
|
.dword 1f
|
|
.previous
|
|
t_r \insn
|
|
1:
|
|
.endm
|
|
|
|
; An insn-type that has a size-modifier and two register
|
|
; operands.
|
|
.macro t_xr_r S insn
|
|
t_S_x_y \S \insn r3 r8
|
|
t_S_x_y \S \insn r8 r3
|
|
move.d r3,r9
|
|
t_S_x_y \S \insn r4 r3
|
|
t_S_x_y \S \insn r8 r9
|
|
.endm
|
|
|
|
; An insn-type that has two register operands.
|
|
.macro t_r_r insn
|
|
t_xr_r none \insn
|
|
.endm
|
|
|
|
; An t_r_rx insn with a byte or word-size modifier.
|
|
.macro t_wbr_r insn
|
|
t_xr_r .b,\insn
|
|
t_xr_r .w,\insn
|
|
.endm
|
|
|
|
; Ditto with a dword-size modifier.
|
|
.macro t_dwbr_r insn
|
|
t_xr_r .d,\insn
|
|
t_wbr_r \insn
|
|
.endm
|
|
|
|
; An insn-type that has a size-modifier, a constant and a
|
|
; register operand.
|
|
.macro t_xc_r S insn
|
|
t_S_x_y \S \insn 24 r3
|
|
move.d r3,r9
|
|
t_S_x_y \S \insn 24 r8
|
|
.endm
|
|
|
|
; An insn-type that has a constant and a register operand.
|
|
.macro t_c_r insn
|
|
t_xc_r none \insn
|
|
.endm
|
|
|
|
; An t_c_r insn with a byte or word-size modifier.
|
|
.macro t_wbc_r insn
|
|
t_xc_r .b,\insn
|
|
t_xc_r .w,\insn
|
|
.endm
|
|
|
|
; Ditto with a dword-size modifier.
|
|
.macro t_dwbc_r insn
|
|
t_xc_r .d,\insn
|
|
t_wbc_r \insn
|
|
.endm
|
|
|
|
; An insn-type that has size-modifier, a memory operand and a
|
|
; register operand.
|
|
.macro t_xm_r S insn
|
|
move.d 9b,r8
|
|
t_S_x_y \S,\insn,[r4],r3
|
|
move.d r3,r9
|
|
t_S_x_y \S,\insn,[r8],r5
|
|
move.d r5,r9
|
|
t_S_x_y \S,\insn,[r3],r9
|
|
t_S_x_y \S,\insn,[r8],r9
|
|
.endm
|
|
|
|
; Ditto, to memory.
|
|
.macro t_xr_m S insn
|
|
move.d 9b,r8
|
|
t_S_x_y \S,\insn,r3,[r4]
|
|
t_S_x_y \S,\insn,r8,[r3]
|
|
t_S_x_y \S,\insn,r3,[r8]
|
|
t_S_x_y \S,\insn,r9,[r8]
|
|
.endm
|
|
|
|
; An insn-type that has a memory operand and a register operand.
|
|
.macro t_m_r insn
|
|
t_xm_r none \insn
|
|
.endm
|
|
|
|
; An t_m_r insn with a byte or word-size modifier.
|
|
.macro t_wbm_r insn
|
|
t_xm_r .b,\insn
|
|
t_xm_r .w,\insn
|
|
.endm
|
|
|
|
; Ditto with a dword-size modifier.
|
|
.macro t_dwbm_r insn
|
|
t_xm_r .d,\insn
|
|
t_wbm_r \insn
|
|
.endm
|
|
|
|
; Insn types of the regular type (r, c, m, size d w b).
|
|
.macro t_dwb insn
|
|
t_dwbr_r \insn
|
|
t_dwbc_r \insn
|
|
t_dwbm_r \insn
|
|
.endm
|
|
|
|
; Similar, sizes w b.
|
|
.macro t_wb insn
|
|
t_wbr_r \insn
|
|
t_wbc_r \insn
|
|
t_wbm_r \insn
|
|
.endm
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
startnostack
|
|
|
|
; Initialize registers so they don't contain unknowns.
|
|
|
|
move.d 9f,r7
|
|
move.d r7,r8
|
|
moveq 0,r9
|
|
|
|
; Movem source area. Register contents must be valid
|
|
; addresses, aligned on a cache boundary.
|
|
.section ".rodata"
|
|
.p2align 5
|
|
9:
|
|
.dword 9b
|
|
.dword 9b
|
|
.dword 9b
|
|
.dword 9b
|
|
.dword 9b
|
|
.dword 9b
|
|
.dword 9b
|
|
.dword 9b
|
|
.dword 9b
|
|
.dword 9b
|
|
.previous
|
|
|
|
; The actual tests. The numbers in the comments specify the
|
|
; number of movem destination stall cycles. Some of them may be
|
|
; filed as memory source address stalls, multiplication source
|
|
; stalls or jump source stalls, duly marked so.
|
|
|
|
t_r_r abs ; 3+3
|
|
|
|
t_dwb add ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
|
|
|
|
t_r_r addc ; (3+3+3)
|
|
t_c_r addc ; 3
|
|
t_m_r addc ; (3+3+3) (2 mem src)
|
|
|
|
t_dwb move ; (3+3)+(3+3+3)*2+3*2+(3+3+3)*3 (6 mem src)
|
|
t_xr_m .b move ; 3+3+3 (2 mem src)
|
|
t_xr_m .w move ; 3+3+3 (2 mem src)
|
|
t_xr_m .d move ; 3+3+3 (2 mem src)
|
|
|
|
t_S_x_y none addi r3.b r8 ; 3
|
|
t_S_x_y none addi r8.w r3 ; 3
|
|
t_S_x_y none addi r4.d r3 ; 3
|
|
t_S_x_y none addi r8.w r9
|
|
|
|
; Addo has three-operand syntax, so we have to expand (a useful
|
|
; subset of) "t_dwb".
|
|
t_S_x_y none addi r3.b "r8,acr" ; 3
|
|
t_S_x_y none addi r8.w "r3,acr" ; 3
|
|
t_S_x_y none addi r4.d "r3,acr" ; 3
|
|
t_S_x_y none addi r8.w "r9,acr"
|
|
|
|
t_S_x_y .b addo 42 "r8,acr"
|
|
t_S_x_y .w addo 4200 "r3,acr" ; 3
|
|
t_S_x_y .d addo 420000 "r3,acr" ; 3
|
|
|
|
move.d 9b,r8
|
|
t_S_x_y .d,addo,[r4],"r3,acr" ; 3 (1 mem src)
|
|
t_S_x_y .b,addo,[r3],"r8,acr" ; 3 (1 mem src)
|
|
t_S_x_y .w,addo,[r8],"r3,acr" ; 3
|
|
t_S_x_y .w,addo,[r8],"r9,acr"
|
|
|
|
; Similar for addoq.
|
|
t_S_x_y none addoq 42 "r8,acr"
|
|
t_S_x_y none addoq 42 "r3,acr" ; 3
|
|
|
|
t_c_r addq ; 3
|
|
|
|
t_wb adds ; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src)
|
|
t_wb addu ; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src)
|
|
|
|
t_dwb and ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
|
|
t_c_r andq ; 3
|
|
|
|
t_dwbr_r asr ; (3+3+3)*3
|
|
t_c_r asrq ; 3
|
|
|
|
t_dwbr_r bound ; (3+3+3)*3
|
|
t_dwbc_r bound ; 3*3
|
|
|
|
t_r_r btst ; (3+3+3)
|
|
t_c_r btstq ; 3
|
|
|
|
t_dwb cmp ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
|
|
t_c_r cmpq ; 3
|
|
|
|
t_wbc_r cmps ; 3*2
|
|
t_wbc_r cmpu ; 3*2
|
|
t_wbm_r cmps ; (3+3+3)*2 (4 mem src)
|
|
t_wbm_r cmpu ; (3+3+3)*2 (4 mem src)
|
|
|
|
t_r_r dstep ; (3+3+3)
|
|
|
|
; FIXME: idxd, fidxi, ftagd, ftagi when supported.
|
|
|
|
t_r_j jsr ; 3 (2 jump src)
|
|
t_r_j jump ; 3 (2 jump src)
|
|
|
|
t_c_r lapc.d
|
|
|
|
; The "quick operand" must be in range [. to .+15*2] so we can't
|
|
; use t_c_r.
|
|
t_S_x_y none lapcq .+4 r3
|
|
t_S_x_y none lapcq .+4 r8
|
|
|
|
t_dwbr_r lsl ; (3+3+3)*3
|
|
t_c_r lslq ; 3
|
|
|
|
t_dwbr_r lsr ; (3+3+3)*3
|
|
t_c_r lsrq ; 3
|
|
|
|
t_r_r lz ; 3+3
|
|
|
|
t_S_x_y none mcp srp r3 ; 3
|
|
t_S_x_y none mcp srp r8
|
|
|
|
t_c_r moveq
|
|
|
|
t_S_x_y none move srp r8
|
|
t_S_x_y none move srp r3
|
|
t_S_x_y none move r8 srp
|
|
t_S_x_y none move r3 srp ; 3
|
|
|
|
; FIXME: move supreg,Rd and move Rs,supreg when supported.
|
|
|
|
t_wb movs ; (3+3)*2+0+(3+3)*2 (4 mem src)
|
|
t_wb movu ; (3+3)*2+0+(3+3)*2 (4 mem src)
|
|
|
|
t_dwbr_r muls ; (3+3+3)*3 (9 mul src)
|
|
t_dwbr_r mulu ; (3+3+3)*3 (9 mul src)
|
|
|
|
t_dwbr_r neg ; (3+3)*3
|
|
|
|
t_r not ; 3 cycles.
|
|
|
|
t_dwb or ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
|
|
t_c_r orq ; 3
|
|
|
|
t_r seq
|
|
|
|
t_dwb sub ; (3+3+3)*3+3*3+(3+3+3)*3 (6 mem src)
|
|
t_c_r subq ; 3
|
|
|
|
t_wb subs ; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src)
|
|
t_wb subu ; (3+3+3)*2+3*2+(3+3+3)*2 (4 mem src)
|
|
|
|
t_r swapw ; 3 cycles.
|
|
t_r swapnwbr ; 3 cycles.
|
|
|
|
t_r_j jsrc ; 3 (2 jump src)
|
|
|
|
t_r_r xor ; (3+3+3)
|
|
|
|
move.d 9b,r7
|
|
nop
|
|
nop
|
|
nop
|
|
t_xm_r none movem ; (3+3) (2 mem src, 1+1 movem addr)
|
|
; As implied by the comment, all movem destination penalty
|
|
; cycles (but one) are accounted for as memory source address
|
|
; and movem source penalties. There are also two movem address
|
|
; cache-line straddle penalties.
|
|
t_xr_m none movem ; (3+3+2+2) (2 mem, 6 movem src, +2 movem addr)
|
|
|
|
break 15
|