232 lines
3.8 KiB
ArmAsm
232 lines
3.8 KiB
ArmAsm
|
.file "toto2.c"
|
||
|
.text
|
||
|
.p2align 4
|
||
|
.globl f
|
||
|
.type f, @function
|
||
|
f:
|
||
|
.LFB23:
|
||
|
.cfi_startproc
|
||
|
endbr64
|
||
|
testl %edx, %edx
|
||
|
jle .L1
|
||
|
leaq 15(%rsi), %rcx
|
||
|
leal -1(%rdx), %eax
|
||
|
subq %rdi, %rcx
|
||
|
cmpq $30, %rcx
|
||
|
jbe .L3
|
||
|
cmpl $2, %eax
|
||
|
jbe .L3
|
||
|
movl %edx, %ecx
|
||
|
movaps %xmm0, %xmm2
|
||
|
xorl %eax, %eax
|
||
|
shrl $2, %ecx
|
||
|
shufps $0, %xmm2, %xmm2
|
||
|
salq $4, %rcx
|
||
|
.p2align 4,,10
|
||
|
.p2align 3
|
||
|
.L4:
|
||
|
movups (%rdi,%rax), %xmm1
|
||
|
movups (%rsi,%rax), %xmm3
|
||
|
mulps %xmm2, %xmm1
|
||
|
addps %xmm3, %xmm1
|
||
|
movups %xmm1, (%rsi,%rax)
|
||
|
addq $16, %rax
|
||
|
cmpq %rcx, %rax
|
||
|
jne .L4
|
||
|
movl %edx, %eax
|
||
|
andl $-4, %eax
|
||
|
testb $3, %dl
|
||
|
je .L1
|
||
|
movl %eax, %r8d
|
||
|
movss (%rdi,%r8,4), %xmm1
|
||
|
leaq (%rsi,%r8,4), %rcx
|
||
|
mulss %xmm0, %xmm1
|
||
|
addss (%rcx), %xmm1
|
||
|
movss %xmm1, (%rcx)
|
||
|
leal 1(%rax), %ecx
|
||
|
cmpl %ecx, %edx
|
||
|
jle .L1
|
||
|
movslq %ecx, %rcx
|
||
|
addl $2, %eax
|
||
|
movss (%rdi,%rcx,4), %xmm1
|
||
|
leaq (%rsi,%rcx,4), %r8
|
||
|
mulss %xmm0, %xmm1
|
||
|
addss (%r8), %xmm1
|
||
|
movss %xmm1, (%r8)
|
||
|
cmpl %eax, %edx
|
||
|
jle .L1
|
||
|
cltq
|
||
|
mulss (%rdi,%rax,4), %xmm0
|
||
|
leaq (%rsi,%rax,4), %rdx
|
||
|
addss (%rdx), %xmm0
|
||
|
movss %xmm0, (%rdx)
|
||
|
ret
|
||
|
.p2align 4,,10
|
||
|
.p2align 3
|
||
|
.L3:
|
||
|
movl %eax, %edx
|
||
|
xorl %eax, %eax
|
||
|
.p2align 4,,10
|
||
|
.p2align 3
|
||
|
.L6:
|
||
|
movss (%rdi,%rax,4), %xmm1
|
||
|
movq %rax, %rcx
|
||
|
mulss %xmm0, %xmm1
|
||
|
addss (%rsi,%rax,4), %xmm1
|
||
|
movss %xmm1, (%rsi,%rax,4)
|
||
|
addq $1, %rax
|
||
|
cmpq %rdx, %rcx
|
||
|
jne .L6
|
||
|
.L1:
|
||
|
ret
|
||
|
.cfi_endproc
|
||
|
.LFE23:
|
||
|
.size f, .-f
|
||
|
.p2align 4
|
||
|
.globl f2
|
||
|
.type f2, @function
|
||
|
f2:
|
||
|
.LFB24:
|
||
|
.cfi_startproc
|
||
|
endbr64
|
||
|
testl %edx, %edx
|
||
|
jle .L17
|
||
|
leaq 15(%rsi), %rax
|
||
|
leal -1(%rdx), %ecx
|
||
|
subq %rdi, %rax
|
||
|
cmpq $30, %rax
|
||
|
jbe .L19
|
||
|
cmpl $2, %ecx
|
||
|
jbe .L19
|
||
|
movl %edx, %ecx
|
||
|
movaps %xmm0, %xmm7
|
||
|
xorl %eax, %eax
|
||
|
shrl $2, %ecx
|
||
|
shufps $0, %xmm7, %xmm7
|
||
|
salq $4, %rcx
|
||
|
.p2align 4,,10
|
||
|
.p2align 3
|
||
|
.L20:
|
||
|
movups (%rdi,%rax), %xmm2
|
||
|
movlps 8(%rsi,%rax), %xmm6
|
||
|
mulps %xmm7, %xmm2
|
||
|
movhlps %xmm2, %xmm5
|
||
|
cvtps2pd %xmm2, %xmm1
|
||
|
sqrtpd %xmm1, %xmm4
|
||
|
cvtps2pd (%rsi,%rax), %xmm1
|
||
|
cvtps2pd %xmm5, %xmm2
|
||
|
addpd %xmm4, %xmm1
|
||
|
sqrtpd %xmm2, %xmm3
|
||
|
cvtps2pd %xmm6, %xmm2
|
||
|
addpd %xmm3, %xmm2
|
||
|
cvtpd2ps %xmm1, %xmm1
|
||
|
cvtpd2ps %xmm2, %xmm2
|
||
|
movlhps %xmm2, %xmm1
|
||
|
movups %xmm1, (%rsi,%rax)
|
||
|
addq $16, %rax
|
||
|
cmpq %rcx, %rax
|
||
|
jne .L20
|
||
|
movl %edx, %eax
|
||
|
andl $-4, %eax
|
||
|
testb $3, %dl
|
||
|
je .L17
|
||
|
movl %eax, %r8d
|
||
|
movss (%rdi,%r8,4), %xmm1
|
||
|
leaq (%rsi,%r8,4), %rcx
|
||
|
mulss %xmm0, %xmm1
|
||
|
cvtss2sd %xmm1, %xmm1
|
||
|
movapd %xmm1, %xmm2
|
||
|
pxor %xmm1, %xmm1
|
||
|
sqrtsd %xmm2, %xmm2
|
||
|
cvtss2sd (%rcx), %xmm1
|
||
|
addsd %xmm2, %xmm1
|
||
|
cvtsd2ss %xmm1, %xmm1
|
||
|
movss %xmm1, (%rcx)
|
||
|
leal 1(%rax), %ecx
|
||
|
cmpl %ecx, %edx
|
||
|
jle .L17
|
||
|
movslq %ecx, %rcx
|
||
|
addl $2, %eax
|
||
|
movss (%rdi,%rcx,4), %xmm1
|
||
|
leaq (%rsi,%rcx,4), %r8
|
||
|
mulss %xmm0, %xmm1
|
||
|
cvtss2sd %xmm1, %xmm1
|
||
|
movapd %xmm1, %xmm2
|
||
|
pxor %xmm1, %xmm1
|
||
|
sqrtsd %xmm2, %xmm2
|
||
|
cvtss2sd (%r8), %xmm1
|
||
|
addsd %xmm2, %xmm1
|
||
|
cvtsd2ss %xmm1, %xmm1
|
||
|
movss %xmm1, (%r8)
|
||
|
cmpl %eax, %edx
|
||
|
jle .L17
|
||
|
cltq
|
||
|
mulss (%rdi,%rax,4), %xmm0
|
||
|
leaq (%rsi,%rax,4), %rdx
|
||
|
cvtss2sd %xmm0, %xmm0
|
||
|
sqrtsd %xmm0, %xmm0
|
||
|
movapd %xmm0, %xmm1
|
||
|
pxor %xmm0, %xmm0
|
||
|
cvtss2sd (%rdx), %xmm0
|
||
|
addsd %xmm1, %xmm0
|
||
|
cvtsd2ss %xmm0, %xmm0
|
||
|
movss %xmm0, (%rdx)
|
||
|
ret
|
||
|
.p2align 4,,10
|
||
|
.p2align 3
|
||
|
.L19:
|
||
|
leaq 4(%rsi,%rcx,4), %rax
|
||
|
.p2align 4,,10
|
||
|
.p2align 3
|
||
|
.L22:
|
||
|
movss (%rdi), %xmm1
|
||
|
addq $4, %rsi
|
||
|
addq $4, %rdi
|
||
|
mulss %xmm0, %xmm1
|
||
|
cvtss2sd %xmm1, %xmm1
|
||
|
movapd %xmm1, %xmm2
|
||
|
pxor %xmm1, %xmm1
|
||
|
sqrtsd %xmm2, %xmm2
|
||
|
cvtss2sd -4(%rsi), %xmm1
|
||
|
addsd %xmm2, %xmm1
|
||
|
cvtsd2ss %xmm1, %xmm1
|
||
|
movss %xmm1, -4(%rsi)
|
||
|
cmpq %rax, %rsi
|
||
|
jne .L22
|
||
|
.L17:
|
||
|
ret
|
||
|
.cfi_endproc
|
||
|
.LFE24:
|
||
|
.size f2, .-f2
|
||
|
.section .text.startup,"ax",@progbits
|
||
|
.p2align 4
|
||
|
.globl main
|
||
|
.type main, @function
|
||
|
main:
|
||
|
.LFB25:
|
||
|
.cfi_startproc
|
||
|
endbr64
|
||
|
xorl %eax, %eax
|
||
|
ret
|
||
|
.cfi_endproc
|
||
|
.LFE25:
|
||
|
.size main, .-main
|
||
|
.ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"
|
||
|
.section .note.GNU-stack,"",@progbits
|
||
|
.section .note.gnu.property,"a"
|
||
|
.align 8
|
||
|
.long 1f - 0f
|
||
|
.long 4f - 1f
|
||
|
.long 5
|
||
|
0:
|
||
|
.string "GNU"
|
||
|
1:
|
||
|
.align 8
|
||
|
.long 0xc0000002
|
||
|
.long 3f - 2f
|
||
|
2:
|
||
|
.long 0x3
|
||
|
3:
|
||
|
.align 8
|
||
|
4:
|