aboutsummaryrefslogtreecommitdiff
path: root/bolt/test
diff options
context:
space:
mode:
authorMaksim Panchenko <maks@fb.com>2024-06-17 16:45:34 -0700
committerGitHub <noreply@github.com>2024-06-17 16:45:34 -0700
commitc67ecf385395ecb6184faf577b5b60367c923aa8 (patch)
tree6efb78197442080787b3948399721d46047104ab /bolt/test
parentbea329ecb0bd756d2e169169fb4333e9cd4d2dae (diff)
downloadllvm-c67ecf385395ecb6184faf577b5b60367c923aa8.zip
llvm-c67ecf385395ecb6184faf577b5b60367c923aa8.tar.gz
llvm-c67ecf385395ecb6184faf577b5b60367c923aa8.tar.bz2
[BOLT][tests] Fix jrcxz instruction test (#95861)
Rewrite the test case intended to check that BOLT does not separate jrcxz instruction from its destination by more than a one-byte offset.
Diffstat (limited to 'bolt/test')
-rw-r--r--bolt/test/X86/bug-reorder-bb-jrcxz.s649
1 files changed, 21 insertions, 628 deletions
diff --git a/bolt/test/X86/bug-reorder-bb-jrcxz.s b/bolt/test/X86/bug-reorder-bb-jrcxz.s
index d5ac354..8a11ac4 100644
--- a/bolt/test/X86/bug-reorder-bb-jrcxz.s
+++ b/bolt/test/X86/bug-reorder-bb-jrcxz.s
@@ -1,640 +1,33 @@
-## Test performs a BB reordering with unsupported
-## instruction jrcxz. Reordering works correctly with the
-## follow options: None, Normal or Reverse. Other strategies
-## are completed with Assertion `isIntN(Size * 8 + 1, Value).
-## The cause is the distance between BB where one contains
-## jrcxz instruction.
-## Example: OpenSSL
-## https://github.com/openssl/openssl/blob/master/crypto/bn/asm/x86_64-mont5.pl#L3319
+## Check that BOLT handles code with jrcxz instruction that has a one-byte
+## signed offset restriction. If we try to separate jrcxz instruction from its
+## destination, e.g. by placing it in a different code fragment, then the link
+## step will fail.
# REQUIRES: system-linux
-# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
-# RUN: %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
# RUN: link_fdata %s %t.o %t.fdata
-# RUN: %clang %cflags %t.o -falign-labels -march=native -o %t.exe -Wl,-q
+# RUN: llvm-strip --strip-unneeded %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
-# RUN: llvm-bolt %t.exe -o %t.bolted --data %t.fdata \
-# RUN: --reorder-blocks=ext-tsp --reorder-functions=hfsort \
-# RUN: --split-functions --split-all-cold --split-eh --dyno-stats \
-# RUN: --print-finalized 2>&1 | FileCheck %s
+## Disable relocation mode to leave main fragment in its original location.
-# CHECK-NOT: value of -2105 is too large for field of 1 byte.
+# RUN: llvm-bolt %t.exe -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp \
+# RUN: --split-functions --relocs=0
- .text
- .section .text.startup,"ax",@progbits
- .p2align 5,,31
- .globl main
- .type main, @function
+ .text
+ .globl main
+ .type main,@function
main:
- jmp bn_sqrx8x_internal
-
-.globl bn_sqrx8x_internal
-.hidden bn_sqrx8x_internal
-.type bn_sqrx8x_internal,@function
-.align 32
-bn_sqrx8x_internal:
-__bn_sqrx8x_internal:
-# FDATA: 1 bn_from_mont8x 160 1 bn_sqrx8x_internal 0 0 56
-# FDATA: 1 bn_sqrx8x_internal 13 1 bn_sqrx8x_internal 40 0 60972
-# FDATA: 1 bn_sqrx8x_internal 5f 1 bn_sqrx8x_internal 2c 0 60972
-# FDATA: 1 bn_sqrx8x_internal 2f1 1 bn_sqrx8x_internal 500 0 60972
-# FDATA: 1 bn_sqrx8x_internal 34a 1 bn_sqrx8x_internal 360 0 60972
-# FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 360 0 447888
-# FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 417 0 63984
-# FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 480 0 60972
-# FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 429 0 3012
-# FDATA: 1 bn_sqrx8x_internal 467 1 bn_sqrx8x_internal 360 0 3012
-# FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 80 0 58964
-# FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 4c0 0 2008
-# FDATA: 1 bn_sqrx8x_internal 4fb 1 bn_sqrx8x_internal 80 0 2008
-# FDATA: 1 bn_sqrx8x_internal 5f0 1 bn_sqrx8x_internal 5f2 0 180908
-# FDATA: 1 bn_sqrx8x_internal 61b 1 bn_sqrx8x_internal 540 0 180908
-# FDATA: 1 bn_sqrx8x_internal 632 1 bn_sqrx8x_internal 637 0 59020
-# FDATA: 1 bn_sqrx8x_internal 657 1 bn_sqrx8x_internal 660 0 59020
-# FDATA: 1 bn_sqrx8x_internal 696 1 bn_sqrx8x_internal 6a0 0 120048
-# FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 6a0 0 840336
-# FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 760 0 120048
-# FDATA: 1 bn_sqrx8x_internal 768 1 bn_sqrx8x_internal 76e 0 120048
-# FDATA: 1 bn_sqrx8x_internal 7b2 1 bn_sqrx8x_internal 7c0 0 120048
-# FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 7c0 0 896560
-# FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 874 0 128080
-# FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 8c0 0 120048
-# FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 87b 0 8032
-# FDATA: 1 bn_sqrx8x_internal 8bb 1 bn_sqrx8x_internal 7c0 0 8032
-# FDATA: 1 bn_sqrx8x_internal 8e8 1 bn_sqrx8x_internal 8ed 0 120048
-# FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 660 0 61028
-# FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 95b 0 59020
-# FDATA: 0 [unknown] 0 1 bn_sqrx8x_internal 5f0 0 59020
+# FDATA: 0 [unknown] 0 1 main 0 0 1
+# FDATA: 1 main 0 1 main #.hot# 0 1
.cfi_startproc
- leaq 48+8(%rsp),%rdi
- leaq (%rsi,%r9,1),%rbp
- movq %r9,0+8(%rsp)
- movq %rbp,8+8(%rsp)
- jmp .Lsqr8x_zero_start
-
-.align 32
-.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
-.Lsqrx8x_zero:
-.byte 0x3e
- movdqa %xmm0,0(%rdi)
- movdqa %xmm0,16(%rdi)
- movdqa %xmm0,32(%rdi)
- movdqa %xmm0,48(%rdi)
-.Lsqr8x_zero_start:
- movdqa %xmm0,64(%rdi)
- movdqa %xmm0,80(%rdi)
- movdqa %xmm0,96(%rdi)
- movdqa %xmm0,112(%rdi)
- leaq 128(%rdi),%rdi
- subq $64,%r9
- jnz .Lsqrx8x_zero
-
- movq 0(%rsi),%rdx
-
- xorq %r10,%r10
- xorq %r11,%r11
- xorq %r12,%r12
- xorq %r13,%r13
- xorq %r14,%r14
- xorq %r15,%r15
- leaq 48+8(%rsp),%rdi
- xorq %rbp,%rbp
- jmp .Lsqrx8x_outer_loop
-
-.align 32
-.Lsqrx8x_outer_loop:
- mulxq 8(%rsi),%r8,%rax
- adcxq %r9,%r8
- adoxq %rax,%r10
- mulxq 16(%rsi),%r9,%rax
- adcxq %r10,%r9
- adoxq %rax,%r11
-.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
- adcxq %r11,%r10
- adoxq %rax,%r12
-.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
- adcxq %r12,%r11
- adoxq %rax,%r13
- mulxq 40(%rsi),%r12,%rax
- adcxq %r13,%r12
- adoxq %rax,%r14
- mulxq 48(%rsi),%r13,%rax
- adcxq %r14,%r13
- adoxq %r15,%rax
- mulxq 56(%rsi),%r14,%r15
- movq 8(%rsi),%rdx
- adcxq %rax,%r14
- adoxq %rbp,%r15
- adcq 64(%rdi),%r15
- movq %r8,8(%rdi)
- movq %r9,16(%rdi)
- sbbq %rcx,%rcx
- xorq %rbp,%rbp
-
- mulxq 16(%rsi),%r8,%rbx
- mulxq 24(%rsi),%r9,%rax
- adcxq %r10,%r8
- adoxq %rbx,%r9
- mulxq 32(%rsi),%r10,%rbx
- adcxq %r11,%r9
- adoxq %rax,%r10
-.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
- adcxq %r12,%r10
- adoxq %rbx,%r11
-.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
- adcxq %r13,%r11
- adoxq %r14,%r12
-.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
- movq 16(%rsi),%rdx
- adcxq %rax,%r12
- adoxq %rbx,%r13
- adcxq %r15,%r13
- adoxq %rbp,%r14
- adcxq %rbp,%r14
-
- movq %r8,24(%rdi)
- movq %r9,32(%rdi)
-
- mulxq 24(%rsi),%r8,%rbx
- mulxq 32(%rsi),%r9,%rax
- adcxq %r10,%r8
- adoxq %rbx,%r9
- mulxq 40(%rsi),%r10,%rbx
- adcxq %r11,%r9
- adoxq %rax,%r10
-.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
- adcxq %r12,%r10
- adoxq %r13,%r11
-.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
-.byte 0x3e
- movq 24(%rsi),%rdx
- adcxq %rbx,%r11
- adoxq %rax,%r12
- adcxq %r14,%r12
- movq %r8,40(%rdi)
- movq %r9,48(%rdi)
- mulxq 32(%rsi),%r8,%rax
- adoxq %rbp,%r13
- adcxq %rbp,%r13
-
- mulxq 40(%rsi),%r9,%rbx
- adcxq %r10,%r8
- adoxq %rax,%r9
- mulxq 48(%rsi),%r10,%rax
- adcxq %r11,%r9
- adoxq %r12,%r10
- mulxq 56(%rsi),%r11,%r12
- movq 32(%rsi),%rdx
- movq 40(%rsi),%r14
- adcxq %rbx,%r10
- adoxq %rax,%r11
- movq 48(%rsi),%r15
- adcxq %r13,%r11
- adoxq %rbp,%r12
- adcxq %rbp,%r12
-
- movq %r8,56(%rdi)
- movq %r9,64(%rdi)
-
- mulxq %r14,%r9,%rax
- movq 56(%rsi),%r8
- adcxq %r10,%r9
- mulxq %r15,%r10,%rbx
- adoxq %rax,%r10
- adcxq %r11,%r10
- mulxq %r8,%r11,%rax
- movq %r14,%rdx
- adoxq %rbx,%r11
- adcxq %r12,%r11
-
- adcxq %rbp,%rax
-
- mulxq %r15,%r14,%rbx
- mulxq %r8,%r12,%r13
- movq %r15,%rdx
- leaq 64(%rsi),%rsi
- adcxq %r14,%r11
- adoxq %rbx,%r12
- adcxq %rax,%r12
- adoxq %rbp,%r13
-
-.byte 0x67,0x67
- mulxq %r8,%r8,%r14
- adcxq %r8,%r13
- adcxq %rbp,%r14
-
- cmpq 8+8(%rsp),%rsi
- je .Lsqrx8x_outer_break
-
- negq %rcx
- movq $-8,%rcx
- movq %rbp,%r15
- movq 64(%rdi),%r8
- adcxq 72(%rdi),%r9
- adcxq 80(%rdi),%r10
- adcxq 88(%rdi),%r11
- adcq 96(%rdi),%r12
- adcq 104(%rdi),%r13
- adcq 112(%rdi),%r14
- adcq 120(%rdi),%r15
- leaq (%rsi),%rbp
- leaq 128(%rdi),%rdi
- sbbq %rax,%rax
-
- movq -64(%rsi),%rdx
- movq %rax,16+8(%rsp)
- movq %rdi,24+8(%rsp)
-
+ jrcxz .Lcold
+.hot:
+ ret
+.Lcold:
xorl %eax,%eax
- jmp .Lsqrx8x_loop
-
-.align 32
-.Lsqrx8x_loop:
- movq %r8,%rbx
- mulxq 0(%rbp),%rax,%r8
- adcxq %rax,%rbx
- adoxq %r9,%r8
-
- mulxq 8(%rbp),%rax,%r9
- adcxq %rax,%r8
- adoxq %r10,%r9
-
- mulxq 16(%rbp),%rax,%r10
- adcxq %rax,%r9
- adoxq %r11,%r10
-
- mulxq 24(%rbp),%rax,%r11
- adcxq %rax,%r10
- adoxq %r12,%r11
-
-.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
- adcxq %rax,%r11
- adoxq %r13,%r12
-
- mulxq 40(%rbp),%rax,%r13
- adcxq %rax,%r12
- adoxq %r14,%r13
-
- mulxq 48(%rbp),%rax,%r14
- movq %rbx,(%rdi,%rcx,8)
- movl $0,%ebx
- adcxq %rax,%r13
- adoxq %r15,%r14
-
-.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
- movq 8(%rsi,%rcx,8),%rdx
- adcxq %rax,%r14
- adoxq %rbx,%r15
- adcxq %rbx,%r15
-
-.byte 0x67
- incq %rcx
- jnz .Lsqrx8x_loop
-
- leaq 64(%rbp),%rbp
- movq $-8,%rcx
- cmpq 8+8(%rsp),%rbp
- je .Lsqrx8x_break
-
- subq 16+8(%rsp),%rbx
-.byte 0x66
- movq -64(%rsi),%rdx
- adcxq 0(%rdi),%r8
- adcxq 8(%rdi),%r9
- adcq 16(%rdi),%r10
- adcq 24(%rdi),%r11
- adcq 32(%rdi),%r12
- adcq 40(%rdi),%r13
- adcq 48(%rdi),%r14
- adcq 56(%rdi),%r15
- leaq 64(%rdi),%rdi
-.byte 0x67
- sbbq %rax,%rax
- xorl %ebx,%ebx
- movq %rax,16+8(%rsp)
- jmp .Lsqrx8x_loop
-
-.align 32
-.Lsqrx8x_break:
- xorq %rbp,%rbp
- subq 16+8(%rsp),%rbx
- adcxq %rbp,%r8
- movq 24+8(%rsp),%rcx
- adcxq %rbp,%r9
- movq 0(%rsi),%rdx
- adcq $0,%r10
- movq %r8,0(%rdi)
- adcq $0,%r11
- adcq $0,%r12
- adcq $0,%r13
- adcq $0,%r14
- adcq $0,%r15
- cmpq %rcx,%rdi
- je .Lsqrx8x_outer_loop
-
- movq %r9,8(%rdi)
- movq 8(%rcx),%r9
- movq %r10,16(%rdi)
- movq 16(%rcx),%r10
- movq %r11,24(%rdi)
- movq 24(%rcx),%r11
- movq %r12,32(%rdi)
- movq 32(%rcx),%r12
- movq %r13,40(%rdi)
- movq 40(%rcx),%r13
- movq %r14,48(%rdi)
- movq 48(%rcx),%r14
- movq %r15,56(%rdi)
- movq 56(%rcx),%r15
- movq %rcx,%rdi
- jmp .Lsqrx8x_outer_loop
-
-.align 32
-.Lsqrx8x_outer_break:
- movq %r9,72(%rdi)
-.byte 102,72,15,126,217
- movq %r10,80(%rdi)
- movq %r11,88(%rdi)
- movq %r12,96(%rdi)
- movq %r13,104(%rdi)
- movq %r14,112(%rdi)
- leaq 48+8(%rsp),%rdi
- movq (%rsi,%rcx,1),%rdx
-
- movq 8(%rdi),%r11
- xorq %r10,%r10
- movq 0+8(%rsp),%r9
- adoxq %r11,%r11
- movq 16(%rdi),%r12
- movq 24(%rdi),%r13
-
-.align 32
-.Lsqrx4x_shift_n_add:
- mulxq %rdx,%rax,%rbx
- adoxq %r12,%r12
- adcxq %r10,%rax
-.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
-.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
- adoxq %r13,%r13
- adcxq %r11,%rbx
- movq 40(%rdi),%r11
- movq %rax,0(%rdi)
- movq %rbx,8(%rdi)
-
- mulxq %rdx,%rax,%rbx
- adoxq %r10,%r10
- adcxq %r12,%rax
- movq 16(%rsi,%rcx,1),%rdx
- movq 48(%rdi),%r12
- adoxq %r11,%r11
- adcxq %r13,%rbx
- movq 56(%rdi),%r13
- movq %rax,16(%rdi)
- movq %rbx,24(%rdi)
-
- mulxq %rdx,%rax,%rbx
- adoxq %r12,%r12
- adcxq %r10,%rax
- movq 24(%rsi,%rcx,1),%rdx
- leaq 32(%rcx),%rcx
- movq 64(%rdi),%r10
- adoxq %r13,%r13
- adcxq %r11,%rbx
- movq 72(%rdi),%r11
- movq %rax,32(%rdi)
- movq %rbx,40(%rdi)
-
- mulxq %rdx,%rax,%rbx
- adoxq %r10,%r10
- adcxq %r12,%rax
- jrcxz .Lsqrx4x_shift_n_add_break
-.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
- adoxq %r11,%r11
- adcxq %r13,%rbx
- movq 80(%rdi),%r12
- movq 88(%rdi),%r13
- movq %rax,48(%rdi)
- movq %rbx,56(%rdi)
- leaq 64(%rdi),%rdi
- nop
- jmp .Lsqrx4x_shift_n_add
-
-.align 32
-.Lsqrx4x_shift_n_add_break:
- adcxq %r13,%rbx
- movq %rax,48(%rdi)
- movq %rbx,56(%rdi)
- leaq 64(%rdi),%rdi
-.byte 102,72,15,126,213
-__bn_sqrx8x_reduction:
- xorl %eax,%eax
- movq 32+8(%rsp),%rbx
- movq 48+8(%rsp),%rdx
- leaq -64(%rbp,%r9,1),%rcx
-
- movq %rcx,0+8(%rsp)
- movq %rdi,8+8(%rsp)
-
- leaq 48+8(%rsp),%rdi
- jmp .Lsqrx8x_reduction_loop
-
-.align 32
-.Lsqrx8x_reduction_loop:
- movq 8(%rdi),%r9
- movq 16(%rdi),%r10
- movq 24(%rdi),%r11
- movq 32(%rdi),%r12
- movq %rdx,%r8
- imulq %rbx,%rdx
- movq 40(%rdi),%r13
- movq 48(%rdi),%r14
- movq 56(%rdi),%r15
- movq %rax,24+8(%rsp)
-
- leaq 64(%rdi),%rdi
- xorq %rsi,%rsi
- movq $-8,%rcx
- jmp .Lsqrx8x_reduce
-
-.align 32
-.Lsqrx8x_reduce:
- movq %r8,%rbx
- mulxq 0(%rbp),%rax,%r8
- adcxq %rbx,%rax
- adoxq %r9,%r8
-
- mulxq 8(%rbp),%rbx,%r9
- adcxq %rbx,%r8
- adoxq %r10,%r9
-
- mulxq 16(%rbp),%rbx,%r10
- adcxq %rbx,%r9
- adoxq %r11,%r10
-
- mulxq 24(%rbp),%rbx,%r11
- adcxq %rbx,%r10
- adoxq %r12,%r11
-
-.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
- movq %rdx,%rax
- movq %r8,%rdx
- adcxq %rbx,%r11
- adoxq %r13,%r12
-
- mulxq 32+8(%rsp),%rbx,%rdx
- movq %rax,%rdx
- movq %rax,64+48+8(%rsp,%rcx,8)
-
- mulxq 40(%rbp),%rax,%r13
- adcxq %rax,%r12
- adoxq %r14,%r13
-
- mulxq 48(%rbp),%rax,%r14
- adcxq %rax,%r13
- adoxq %r15,%r14
-
- mulxq 56(%rbp),%rax,%r15
- movq %rbx,%rdx
- adcxq %rax,%r14
- adoxq %rsi,%r15
- adcxq %rsi,%r15
-
-.byte 0x67,0x67,0x67
- incq %rcx
- jnz .Lsqrx8x_reduce
-
- movq %rsi,%rax
- cmpq 0+8(%rsp),%rbp
- jae .Lsqrx8x_no_tail
-
- movq 48+8(%rsp),%rdx
- addq 0(%rdi),%r8
- leaq 64(%rbp),%rbp
- movq $-8,%rcx
- adcxq 8(%rdi),%r9
- adcxq 16(%rdi),%r10
- adcq 24(%rdi),%r11
- adcq 32(%rdi),%r12
- adcq 40(%rdi),%r13
- adcq 48(%rdi),%r14
- adcq 56(%rdi),%r15
- leaq 64(%rdi),%rdi
- sbbq %rax,%rax
-
- xorq %rsi,%rsi
- movq %rax,16+8(%rsp)
- jmp .Lsqrx8x_tail
-
-.align 32
-.Lsqrx8x_tail:
- movq %r8,%rbx
- mulxq 0(%rbp),%rax,%r8
- adcxq %rax,%rbx
- adoxq %r9,%r8
-
- mulxq 8(%rbp),%rax,%r9
- adcxq %rax,%r8
- adoxq %r10,%r9
-
- mulxq 16(%rbp),%rax,%r10
- adcxq %rax,%r9
- adoxq %r11,%r10
-
- mulxq 24(%rbp),%rax,%r11
- adcxq %rax,%r10
- adoxq %r12,%r11
-
-.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
- adcxq %rax,%r11
- adoxq %r13,%r12
-
- mulxq 40(%rbp),%rax,%r13
- adcxq %rax,%r12
- adoxq %r14,%r13
-
- mulxq 48(%rbp),%rax,%r14
- adcxq %rax,%r13
- adoxq %r15,%r14
-
- mulxq 56(%rbp),%rax,%r15
- movq 72+48+8(%rsp,%rcx,8),%rdx
- adcxq %rax,%r14
- adoxq %rsi,%r15
- movq %rbx,(%rdi,%rcx,8)
- movq %r8,%rbx
- adcxq %rsi,%r15
-
- incq %rcx
- jnz .Lsqrx8x_tail
-
- cmpq 0+8(%rsp),%rbp
- jae .Lsqrx8x_tail_done
-
- subq 16+8(%rsp),%rsi
- movq 48+8(%rsp),%rdx
- leaq 64(%rbp),%rbp
- adcq 0(%rdi),%r8
- adcq 8(%rdi),%r9
- adcq 16(%rdi),%r10
- adcq 24(%rdi),%r11
- adcq 32(%rdi),%r12
- adcq 40(%rdi),%r13
- adcq 48(%rdi),%r14
- adcq 56(%rdi),%r15
- leaq 64(%rdi),%rdi
- sbbq %rax,%rax
- subq $8,%rcx
-
- xorq %rsi,%rsi
- movq %rax,16+8(%rsp)
- jmp .Lsqrx8x_tail
-
-.align 32
-.Lsqrx8x_tail_done:
- xorq %rax,%rax
- addq 24+8(%rsp),%r8
- adcq $0,%r9
- adcq $0,%r10
- adcq $0,%r11
- adcq $0,%r12
- adcq $0,%r13
- adcq $0,%r14
- adcq $0,%r15
- adcq $0,%rax
-
- subq 16+8(%rsp),%rsi
-.Lsqrx8x_no_tail:
- adcq 0(%rdi),%r8
-.byte 102,72,15,126,217
- adcq 8(%rdi),%r9
- movq 56(%rbp),%rsi
-.byte 102,72,15,126,213
- adcq 16(%rdi),%r10
- adcq 24(%rdi),%r11
- adcq 32(%rdi),%r12
- adcq 40(%rdi),%r13
- adcq 48(%rdi),%r14
- adcq 56(%rdi),%r15
- adcq $0,%rax
-
- movq 32+8(%rsp),%rbx
- movq 64(%rdi,%rcx,1),%rdx
-
- movq %r8,0(%rdi)
- leaq 64(%rdi),%r8
- movq %r9,8(%rdi)
- movq %r10,16(%rdi)
- movq %r11,24(%rdi)
- movq %r12,32(%rdi)
- movq %r13,40(%rdi)
- movq %r14,48(%rdi)
- movq %r15,56(%rdi)
-
- leaq 64(%rdi,%rcx,1),%rdi
- cmpq 8+8(%rsp),%r8
- jb .Lsqrx8x_reduction_loop
- .byte 0xf3,0xc3
+ ret
.cfi_endproc
-.size bn_sqrx8x_internal,.-bn_sqrx8x_internal
+.size main,.-main