# mach: aarch64 # Check the load single 1-element structure and replicate to all lanes insns: # ld1r, ld2r, ld3r, ld4r. # Check the addressing modes: no offset, post-index immediate offset, # post-index register offset. .include "testutils.inc" .data .align 4 input: .word 0x04030201 .word 0x08070605 .word 0x0c0b0a09 .word 0x100f0e0d input2: .word 0x00000001 .word 0x00000002 .word 0x00000003 .word 0x00000004 .word 0x00000005 .word 0x00000006 .word 0x00000007 .word 0x00000008 .word 0x00000009 .word 0x0000000a .word 0x0000000b .word 0x0000000c start adrp x0, input add x0, x0, :lo12:input adrp x1, input2 add x1, x1, :lo12:input2 mov x2, x0 mov x3, #1 ld1r {v0.8b}, [x2], 1 ld1r {v1.16b}, [x2], x3 ld1r {v2.4h}, [x2], 2 ld1r {v3.8h}, [x2] addv b0, v0.8b addv b1, v1.16b addv b2, v2.8b addv b3, v3.16b mov x2, v0.d[0] mov x3, v1.d[0] mov x4, v2.d[0] mov x5, v3.d[0] cmp x2, #8 bne .Lfailure cmp x3, #32 bne .Lfailure cmp x4, #28 bne .Lfailure cmp x5, #88 bne .Lfailure mov x2, x1 mov x3, #8 ld2r {v0.2s, v1.2s}, [x2], 8 ld2r {v2.4s, v3.4s}, [x2], x3 ld2r {v4.1d, v5.1d}, [x2], 16 ld2r {v6.2d, v7.2d}, [x2] addp v0.2s, v0.2s, v1.2s addv s2, v2.4s addv s3, v3.4s addp v4.2s, v4.2s, v5.2s addv s6, v6.4s addv s7, v7.4s mov w2, v0.s[0] mov w3, v0.s[1] mov x4, v2.d[0] mov x5, v3.d[0] mov w6, v4.s[0] mov w7, v4.s[1] mov x8, v6.d[0] mov x9, v7.d[0] cmp w2, #2 bne .Lfailure cmp w3, #4 bne .Lfailure cmp x4, #12 bne .Lfailure cmp x5, #16 bne .Lfailure cmp w6, #11 bne .Lfailure cmp w7, #15 bne .Lfailure cmp x8, #38 bne .Lfailure cmp x9, #46 bne .Lfailure mov x2, x0 mov x3, #3 ld3r {v0.8b, v1.8b, v2.8b}, [x2], 3 ld3r {v3.8b, v4.8b, v5.8b}, [x2], x3 ld3r {v6.8b, v7.8b, v8.8b}, [x2] addv b0, v0.8b addv b1, v1.8b addv b2, v2.8b addv b3, v3.8b addv b4, v4.8b addv b5, v5.8b addv b6, v6.8b addv b7, v7.8b addv b8, v8.8b addv b9, v9.8b mov x2, v0.d[0] mov x3, v1.d[0] mov x4, v2.d[0] mov x5, v3.d[0] mov x6, v4.d[0] mov x7, v5.d[0] mov x8, v6.d[0] mov x9, v7.d[0] mov x10, v8.d[0] cmp x2, #8 bne .Lfailure cmp x3, #16 bne .Lfailure cmp x4, #24 bne .Lfailure cmp x5, #32 bne .Lfailure cmp x6, #40 bne .Lfailure cmp x7, #48 bne .Lfailure cmp x8, #56 bne .Lfailure cmp x9, #64 bne .Lfailure cmp x10, #72 bne .Lfailure mov x2, x1 ld4r {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], 16 ld4r {v4.4s, v5.4s, v6.4s, v7.4s}, [x2] addv s0, v0.4s addv s1, v1.4s addv s2, v2.4s addv s3, v3.4s addv s4, v4.4s addv s5, v5.4s addv s6, v6.4s addv s7, v7.4s mov x2, v0.d[0] mov x3, v1.d[0] mov x4, v2.d[0] mov x5, v3.d[0] mov x6, v4.d[0] mov x7, v5.d[0] mov x8, v6.d[0] mov x9, v7.d[0] cmp x2, #4 bne .Lfailure cmp x3, #8 bne .Lfailure cmp x4, #12 bne .Lfailure cmp x5, #16 bne .Lfailure cmp x6, #20 bne .Lfailure cmp x7, #24 bne .Lfailure cmp x8, #28 bne .Lfailure cmp x9, #32 bne .Lfailure pass .Lfailure: fail