208 lines
3.2 KiB
ArmAsm
208 lines
3.2 KiB
ArmAsm
|
.file "toto.c"
|
||
|
.text
|
||
|
.p2align 4
|
||
|
.globl f
|
||
|
.type f, @function
|
||
|
f:
|
||
|
.LFB23:
|
||
|
.cfi_startproc
|
||
|
endbr64
|
||
|
testl %esi, %esi
|
||
|
jle .L1
|
||
|
leal -1(%rsi), %eax
|
||
|
cmpl $2, %eax
|
||
|
jbe .L8
|
||
|
movl %esi, %edx
|
||
|
movq %rdi, %rax
|
||
|
shrl $2, %edx
|
||
|
salq $4, %rdx
|
||
|
addq %rdi, %rdx
|
||
|
.p2align 4,,10
|
||
|
.p2align 3
|
||
|
.L4:
|
||
|
movups (%rax), %xmm0
|
||
|
addq $16, %rax
|
||
|
mulps %xmm0, %xmm0
|
||
|
rcpps %xmm0, %xmm1
|
||
|
mulps %xmm1, %xmm0
|
||
|
mulps %xmm1, %xmm0
|
||
|
addps %xmm1, %xmm1
|
||
|
subps %xmm0, %xmm1
|
||
|
movups %xmm1, -16(%rax)
|
||
|
cmpq %rdx, %rax
|
||
|
jne .L4
|
||
|
movl %esi, %eax
|
||
|
andl $-4, %eax
|
||
|
testb $3, %sil
|
||
|
je .L11
|
||
|
.L3:
|
||
|
movslq %eax, %rdx
|
||
|
leaq (%rdi,%rdx,4), %rdx
|
||
|
movss (%rdx), %xmm0
|
||
|
movaps %xmm0, %xmm1
|
||
|
mulss %xmm0, %xmm1
|
||
|
movss .LC1(%rip), %xmm0
|
||
|
movaps %xmm0, %xmm3
|
||
|
divss %xmm1, %xmm3
|
||
|
movss %xmm3, (%rdx)
|
||
|
leal 1(%rax), %edx
|
||
|
cmpl %edx, %esi
|
||
|
jle .L1
|
||
|
movslq %edx, %rdx
|
||
|
movaps %xmm0, %xmm4
|
||
|
addl $2, %eax
|
||
|
leaq (%rdi,%rdx,4), %rdx
|
||
|
movss (%rdx), %xmm1
|
||
|
mulss %xmm1, %xmm1
|
||
|
divss %xmm1, %xmm4
|
||
|
movss %xmm4, (%rdx)
|
||
|
cmpl %eax, %esi
|
||
|
jle .L1
|
||
|
cltq
|
||
|
leaq (%rdi,%rax,4), %rax
|
||
|
movss (%rax), %xmm1
|
||
|
mulss %xmm1, %xmm1
|
||
|
divss %xmm1, %xmm0
|
||
|
movss %xmm0, (%rax)
|
||
|
.L1:
|
||
|
ret
|
||
|
.p2align 4,,10
|
||
|
.p2align 3
|
||
|
.L11:
|
||
|
ret
|
||
|
.L8:
|
||
|
xorl %eax, %eax
|
||
|
jmp .L3
|
||
|
.cfi_endproc
|
||
|
.LFE23:
|
||
|
.size f, .-f
|
||
|
.p2align 4
|
||
|
.globl fsum
|
||
|
.type fsum, @function
|
||
|
fsum:
|
||
|
.LFB24:
|
||
|
.cfi_startproc
|
||
|
endbr64
|
||
|
testl %esi, %esi
|
||
|
jle .L18
|
||
|
leal -1(%rsi), %eax
|
||
|
cmpl $2, %eax
|
||
|
jbe .L19
|
||
|
movl %esi, %edx
|
||
|
movq %rdi, %rax
|
||
|
pxor %xmm2, %xmm2
|
||
|
shrl $2, %edx
|
||
|
salq $4, %rdx
|
||
|
addq %rdi, %rdx
|
||
|
.p2align 4,,10
|
||
|
.p2align 3
|
||
|
.L15:
|
||
|
movups (%rax), %xmm1
|
||
|
addq $16, %rax
|
||
|
mulps %xmm1, %xmm1
|
||
|
rcpps %xmm1, %xmm0
|
||
|
mulps %xmm0, %xmm1
|
||
|
mulps %xmm0, %xmm1
|
||
|
addps %xmm0, %xmm0
|
||
|
subps %xmm1, %xmm0
|
||
|
movups %xmm0, -16(%rax)
|
||
|
addps %xmm0, %xmm2
|
||
|
cmpq %rdx, %rax
|
||
|
jne .L15
|
||
|
movaps %xmm2, %xmm0
|
||
|
movl %esi, %eax
|
||
|
movhlps %xmm2, %xmm0
|
||
|
andl $-4, %eax
|
||
|
addps %xmm0, %xmm2
|
||
|
movaps %xmm2, %xmm0
|
||
|
shufps $85, %xmm2, %xmm0
|
||
|
addps %xmm0, %xmm2
|
||
|
movaps %xmm2, %xmm0
|
||
|
testb $3, %sil
|
||
|
je .L21
|
||
|
.L14:
|
||
|
movslq %eax, %rdx
|
||
|
leaq (%rdi,%rdx,4), %rdx
|
||
|
movss (%rdx), %xmm1
|
||
|
movaps %xmm1, %xmm2
|
||
|
mulss %xmm1, %xmm2
|
||
|
movss .LC1(%rip), %xmm1
|
||
|
movaps %xmm1, %xmm4
|
||
|
divss %xmm2, %xmm4
|
||
|
movss %xmm4, (%rdx)
|
||
|
leal 1(%rax), %edx
|
||
|
addss %xmm4, %xmm0
|
||
|
cmpl %edx, %esi
|
||
|
jle .L12
|
||
|
movslq %edx, %rdx
|
||
|
movaps %xmm1, %xmm5
|
||
|
addl $2, %eax
|
||
|
leaq (%rdi,%rdx,4), %rdx
|
||
|
movss (%rdx), %xmm2
|
||
|
mulss %xmm2, %xmm2
|
||
|
divss %xmm2, %xmm5
|
||
|
addss %xmm5, %xmm0
|
||
|
movss %xmm5, (%rdx)
|
||
|
cmpl %eax, %esi
|
||
|
jle .L12
|
||
|
cltq
|
||
|
leaq (%rdi,%rax,4), %rax
|
||
|
movss (%rax), %xmm2
|
||
|
mulss %xmm2, %xmm2
|
||
|
divss %xmm2, %xmm1
|
||
|
addss %xmm1, %xmm0
|
||
|
movss %xmm1, (%rax)
|
||
|
ret
|
||
|
.p2align 4,,10
|
||
|
.p2align 3
|
||
|
.L18:
|
||
|
pxor %xmm0, %xmm0
|
||
|
.L12:
|
||
|
ret
|
||
|
.p2align 4,,10
|
||
|
.p2align 3
|
||
|
.L21:
|
||
|
ret
|
||
|
.L19:
|
||
|
xorl %eax, %eax
|
||
|
pxor %xmm0, %xmm0
|
||
|
jmp .L14
|
||
|
.cfi_endproc
|
||
|
.LFE24:
|
||
|
.size fsum, .-fsum
|
||
|
.section .text.startup,"ax",@progbits
|
||
|
.p2align 4
|
||
|
.globl main
|
||
|
.type main, @function
|
||
|
main:
|
||
|
.LFB25:
|
||
|
.cfi_startproc
|
||
|
endbr64
|
||
|
xorl %eax, %eax
|
||
|
ret
|
||
|
.cfi_endproc
|
||
|
.LFE25:
|
||
|
.size main, .-main
|
||
|
.section .rodata.cst4,"aM",@progbits,4
|
||
|
.align 4
|
||
|
.LC1:
|
||
|
.long 1065353216
|
||
|
.ident "GCC: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0"
|
||
|
.section .note.GNU-stack,"",@progbits
|
||
|
.section .note.gnu.property,"a"
|
||
|
.align 8
|
||
|
.long 1f - 0f
|
||
|
.long 4f - 1f
|
||
|
.long 5
|
||
|
0:
|
||
|
.string "GNU"
|
||
|
1:
|
||
|
.align 8
|
||
|
.long 0xc0000002
|
||
|
.long 3f - 2f
|
||
|
2:
|
||
|
.long 0x3
|
||
|
3:
|
||
|
.align 8
|
||
|
4:
|