diff options
author | Maksim Panchenko <maks@fb.com> | 2024-06-17 16:45:34 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-17 16:45:34 -0700 |
commit | c67ecf385395ecb6184faf577b5b60367c923aa8 (patch) | |
tree | 6efb78197442080787b3948399721d46047104ab /bolt/test | |
parent | bea329ecb0bd756d2e169169fb4333e9cd4d2dae (diff) | |
download | llvm-c67ecf385395ecb6184faf577b5b60367c923aa8.zip llvm-c67ecf385395ecb6184faf577b5b60367c923aa8.tar.gz llvm-c67ecf385395ecb6184faf577b5b60367c923aa8.tar.bz2 |
[BOLT][tests] Fix jrcxz instruction test (#95861)
Rewrite the test case intended to check that BOLT does not separate
jrcxz instruction from its destination by more than a one-byte offset.
Diffstat (limited to 'bolt/test')
-rw-r--r-- | bolt/test/X86/bug-reorder-bb-jrcxz.s | 649 |
1 files changed, 21 insertions, 628 deletions
diff --git a/bolt/test/X86/bug-reorder-bb-jrcxz.s b/bolt/test/X86/bug-reorder-bb-jrcxz.s index d5ac354..8a11ac4 100644 --- a/bolt/test/X86/bug-reorder-bb-jrcxz.s +++ b/bolt/test/X86/bug-reorder-bb-jrcxz.s @@ -1,640 +1,33 @@ -## Test performs a BB reordering with unsupported -## instruction jrcxz. Reordering works correctly with the -## follow options: None, Normal or Reverse. Other strategies -## are completed with Assertion `isIntN(Size * 8 + 1, Value). -## The cause is the distance between BB where one contains -## jrcxz instruction. -## Example: OpenSSL -## https://github.com/openssl/openssl/blob/master/crypto/bn/asm/x86_64-mont5.pl#L3319 +## Check that BOLT handles code with jrcxz instruction that has a one-byte +## signed offset restriction. If we try to separate jrcxz instruction from its +## destination, e.g. by placing it in a different code fragment, then the link +## step will fail. # REQUIRES: system-linux -# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ -# RUN: %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o # RUN: link_fdata %s %t.o %t.fdata -# RUN: %clang %cflags %t.o -falign-labels -march=native -o %t.exe -Wl,-q +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -# RUN: llvm-bolt %t.exe -o %t.bolted --data %t.fdata \ -# RUN: --reorder-blocks=ext-tsp --reorder-functions=hfsort \ -# RUN: --split-functions --split-all-cold --split-eh --dyno-stats \ -# RUN: --print-finalized 2>&1 | FileCheck %s +## Disable relocation mode to leave main fragment in its original location. -# CHECK-NOT: value of -2105 is too large for field of 1 byte. +# RUN: llvm-bolt %t.exe -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp \ +# RUN: --split-functions --relocs=0 - .text - .section .text.startup,"ax",@progbits - .p2align 5,,31 - .globl main - .type main, @function + .text + .globl main + .type main,@function main: - jmp bn_sqrx8x_internal - -.globl bn_sqrx8x_internal -.hidden bn_sqrx8x_internal -.type bn_sqrx8x_internal,@function -.align 32 -bn_sqrx8x_internal: -__bn_sqrx8x_internal: -# FDATA: 1 bn_from_mont8x 160 1 bn_sqrx8x_internal 0 0 56 -# FDATA: 1 bn_sqrx8x_internal 13 1 bn_sqrx8x_internal 40 0 60972 -# FDATA: 1 bn_sqrx8x_internal 5f 1 bn_sqrx8x_internal 2c 0 60972 -# FDATA: 1 bn_sqrx8x_internal 2f1 1 bn_sqrx8x_internal 500 0 60972 -# FDATA: 1 bn_sqrx8x_internal 34a 1 bn_sqrx8x_internal 360 0 60972 -# FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 360 0 447888 -# FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 417 0 63984 -# FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 480 0 60972 -# FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 429 0 3012 -# FDATA: 1 bn_sqrx8x_internal 467 1 bn_sqrx8x_internal 360 0 3012 -# FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 80 0 58964 -# FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 4c0 0 2008 -# FDATA: 1 bn_sqrx8x_internal 4fb 1 bn_sqrx8x_internal 80 0 2008 -# FDATA: 1 bn_sqrx8x_internal 5f0 1 bn_sqrx8x_internal 5f2 0 180908 -# FDATA: 1 bn_sqrx8x_internal 61b 1 bn_sqrx8x_internal 540 0 180908 -# FDATA: 1 bn_sqrx8x_internal 632 1 bn_sqrx8x_internal 637 0 59020 -# FDATA: 1 bn_sqrx8x_internal 657 1 bn_sqrx8x_internal 660 0 59020 -# FDATA: 1 bn_sqrx8x_internal 696 1 bn_sqrx8x_internal 6a0 0 120048 -# FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 6a0 0 840336 -# FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 760 0 120048 -# FDATA: 1 bn_sqrx8x_internal 768 1 bn_sqrx8x_internal 76e 0 120048 -# FDATA: 1 bn_sqrx8x_internal 7b2 1 bn_sqrx8x_internal 7c0 0 120048 -# FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 7c0 0 896560 -# FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 874 0 128080 -# FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 8c0 0 120048 -# FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 87b 0 8032 -# FDATA: 1 bn_sqrx8x_internal 8bb 1 bn_sqrx8x_internal 7c0 0 8032 -# FDATA: 1 bn_sqrx8x_internal 8e8 1 bn_sqrx8x_internal 8ed 0 120048 -# FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 660 0 61028 -# FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 95b 0 59020 -# FDATA: 0 [unknown] 0 1 bn_sqrx8x_internal 5f0 0 59020 +# FDATA: 0 [unknown] 0 1 main 0 0 1 +# FDATA: 1 main 0 1 main #.hot# 0 1 .cfi_startproc - leaq 48+8(%rsp),%rdi - leaq (%rsi,%r9,1),%rbp - movq %r9,0+8(%rsp) - movq %rbp,8+8(%rsp) - jmp .Lsqr8x_zero_start - -.align 32 -.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 -.Lsqrx8x_zero: -.byte 0x3e - movdqa %xmm0,0(%rdi) - movdqa %xmm0,16(%rdi) - movdqa %xmm0,32(%rdi) - movdqa %xmm0,48(%rdi) -.Lsqr8x_zero_start: - movdqa %xmm0,64(%rdi) - movdqa %xmm0,80(%rdi) - movdqa %xmm0,96(%rdi) - movdqa %xmm0,112(%rdi) - leaq 128(%rdi),%rdi - subq $64,%r9 - jnz .Lsqrx8x_zero - - movq 0(%rsi),%rdx - - xorq %r10,%r10 - xorq %r11,%r11 - xorq %r12,%r12 - xorq %r13,%r13 - xorq %r14,%r14 - xorq %r15,%r15 - leaq 48+8(%rsp),%rdi - xorq %rbp,%rbp - jmp .Lsqrx8x_outer_loop - -.align 32 -.Lsqrx8x_outer_loop: - mulxq 8(%rsi),%r8,%rax - adcxq %r9,%r8 - adoxq %rax,%r10 - mulxq 16(%rsi),%r9,%rax - adcxq %r10,%r9 - adoxq %rax,%r11 -.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 - adcxq %r11,%r10 - adoxq %rax,%r12 -.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 - adcxq %r12,%r11 - adoxq %rax,%r13 - mulxq 40(%rsi),%r12,%rax - adcxq %r13,%r12 - adoxq %rax,%r14 - mulxq 48(%rsi),%r13,%rax - adcxq %r14,%r13 - adoxq %r15,%rax - mulxq 56(%rsi),%r14,%r15 - movq 8(%rsi),%rdx - adcxq %rax,%r14 - adoxq %rbp,%r15 - adcq 64(%rdi),%r15 - movq %r8,8(%rdi) - movq %r9,16(%rdi) - sbbq %rcx,%rcx - xorq %rbp,%rbp - - mulxq 16(%rsi),%r8,%rbx - mulxq 24(%rsi),%r9,%rax - adcxq %r10,%r8 - adoxq %rbx,%r9 - mulxq 32(%rsi),%r10,%rbx - adcxq %r11,%r9 - adoxq %rax,%r10 -.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 - adcxq %r12,%r10 - adoxq %rbx,%r11 -.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 - adcxq %r13,%r11 - adoxq %r14,%r12 -.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 - movq 16(%rsi),%rdx - adcxq %rax,%r12 - adoxq %rbx,%r13 - adcxq %r15,%r13 - adoxq %rbp,%r14 - adcxq %rbp,%r14 - - movq %r8,24(%rdi) - movq %r9,32(%rdi) - - mulxq 24(%rsi),%r8,%rbx - mulxq 32(%rsi),%r9,%rax - adcxq %r10,%r8 - adoxq %rbx,%r9 - mulxq 40(%rsi),%r10,%rbx - adcxq %r11,%r9 - adoxq %rax,%r10 -.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 - adcxq %r12,%r10 - adoxq %r13,%r11 -.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 -.byte 0x3e - movq 24(%rsi),%rdx - adcxq %rbx,%r11 - adoxq %rax,%r12 - adcxq %r14,%r12 - movq %r8,40(%rdi) - movq %r9,48(%rdi) - mulxq 32(%rsi),%r8,%rax - adoxq %rbp,%r13 - adcxq %rbp,%r13 - - mulxq 40(%rsi),%r9,%rbx - adcxq %r10,%r8 - adoxq %rax,%r9 - mulxq 48(%rsi),%r10,%rax - adcxq %r11,%r9 - adoxq %r12,%r10 - mulxq 56(%rsi),%r11,%r12 - movq 32(%rsi),%rdx - movq 40(%rsi),%r14 - adcxq %rbx,%r10 - adoxq %rax,%r11 - movq 48(%rsi),%r15 - adcxq %r13,%r11 - adoxq %rbp,%r12 - adcxq %rbp,%r12 - - movq %r8,56(%rdi) - movq %r9,64(%rdi) - - mulxq %r14,%r9,%rax - movq 56(%rsi),%r8 - adcxq %r10,%r9 - mulxq %r15,%r10,%rbx - adoxq %rax,%r10 - adcxq %r11,%r10 - mulxq %r8,%r11,%rax - movq %r14,%rdx - adoxq %rbx,%r11 - adcxq %r12,%r11 - - adcxq %rbp,%rax - - mulxq %r15,%r14,%rbx - mulxq %r8,%r12,%r13 - movq %r15,%rdx - leaq 64(%rsi),%rsi - adcxq %r14,%r11 - adoxq %rbx,%r12 - adcxq %rax,%r12 - adoxq %rbp,%r13 - -.byte 0x67,0x67 - mulxq %r8,%r8,%r14 - adcxq %r8,%r13 - adcxq %rbp,%r14 - - cmpq 8+8(%rsp),%rsi - je .Lsqrx8x_outer_break - - negq %rcx - movq $-8,%rcx - movq %rbp,%r15 - movq 64(%rdi),%r8 - adcxq 72(%rdi),%r9 - adcxq 80(%rdi),%r10 - adcxq 88(%rdi),%r11 - adcq 96(%rdi),%r12 - adcq 104(%rdi),%r13 - adcq 112(%rdi),%r14 - adcq 120(%rdi),%r15 - leaq (%rsi),%rbp - leaq 128(%rdi),%rdi - sbbq %rax,%rax - - movq -64(%rsi),%rdx - movq %rax,16+8(%rsp) - movq %rdi,24+8(%rsp) - + jrcxz .Lcold +.hot: + ret +.Lcold: xorl %eax,%eax - jmp .Lsqrx8x_loop - -.align 32 -.Lsqrx8x_loop: - movq %r8,%rbx - mulxq 0(%rbp),%rax,%r8 - adcxq %rax,%rbx - adoxq %r9,%r8 - - mulxq 8(%rbp),%rax,%r9 - adcxq %rax,%r8 - adoxq %r10,%r9 - - mulxq 16(%rbp),%rax,%r10 - adcxq %rax,%r9 - adoxq %r11,%r10 - - mulxq 24(%rbp),%rax,%r11 - adcxq %rax,%r10 - adoxq %r12,%r11 - -.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 - adcxq %rax,%r11 - adoxq %r13,%r12 - - mulxq 40(%rbp),%rax,%r13 - adcxq %rax,%r12 - adoxq %r14,%r13 - - mulxq 48(%rbp),%rax,%r14 - movq %rbx,(%rdi,%rcx,8) - movl $0,%ebx - adcxq %rax,%r13 - adoxq %r15,%r14 - -.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 - movq 8(%rsi,%rcx,8),%rdx - adcxq %rax,%r14 - adoxq %rbx,%r15 - adcxq %rbx,%r15 - -.byte 0x67 - incq %rcx - jnz .Lsqrx8x_loop - - leaq 64(%rbp),%rbp - movq $-8,%rcx - cmpq 8+8(%rsp),%rbp - je .Lsqrx8x_break - - subq 16+8(%rsp),%rbx -.byte 0x66 - movq -64(%rsi),%rdx - adcxq 0(%rdi),%r8 - adcxq 8(%rdi),%r9 - adcq 16(%rdi),%r10 - adcq 24(%rdi),%r11 - adcq 32(%rdi),%r12 - adcq 40(%rdi),%r13 - adcq 48(%rdi),%r14 - adcq 56(%rdi),%r15 - leaq 64(%rdi),%rdi -.byte 0x67 - sbbq %rax,%rax - xorl %ebx,%ebx - movq %rax,16+8(%rsp) - jmp .Lsqrx8x_loop - -.align 32 -.Lsqrx8x_break: - xorq %rbp,%rbp - subq 16+8(%rsp),%rbx - adcxq %rbp,%r8 - movq 24+8(%rsp),%rcx - adcxq %rbp,%r9 - movq 0(%rsi),%rdx - adcq $0,%r10 - movq %r8,0(%rdi) - adcq $0,%r11 - adcq $0,%r12 - adcq $0,%r13 - adcq $0,%r14 - adcq $0,%r15 - cmpq %rcx,%rdi - je .Lsqrx8x_outer_loop - - movq %r9,8(%rdi) - movq 8(%rcx),%r9 - movq %r10,16(%rdi) - movq 16(%rcx),%r10 - movq %r11,24(%rdi) - movq 24(%rcx),%r11 - movq %r12,32(%rdi) - movq 32(%rcx),%r12 - movq %r13,40(%rdi) - movq 40(%rcx),%r13 - movq %r14,48(%rdi) - movq 48(%rcx),%r14 - movq %r15,56(%rdi) - movq 56(%rcx),%r15 - movq %rcx,%rdi - jmp .Lsqrx8x_outer_loop - -.align 32 -.Lsqrx8x_outer_break: - movq %r9,72(%rdi) -.byte 102,72,15,126,217 - movq %r10,80(%rdi) - movq %r11,88(%rdi) - movq %r12,96(%rdi) - movq %r13,104(%rdi) - movq %r14,112(%rdi) - leaq 48+8(%rsp),%rdi - movq (%rsi,%rcx,1),%rdx - - movq 8(%rdi),%r11 - xorq %r10,%r10 - movq 0+8(%rsp),%r9 - adoxq %r11,%r11 - movq 16(%rdi),%r12 - movq 24(%rdi),%r13 - -.align 32 -.Lsqrx4x_shift_n_add: - mulxq %rdx,%rax,%rbx - adoxq %r12,%r12 - adcxq %r10,%rax -.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 -.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 - adoxq %r13,%r13 - adcxq %r11,%rbx - movq 40(%rdi),%r11 - movq %rax,0(%rdi) - movq %rbx,8(%rdi) - - mulxq %rdx,%rax,%rbx - adoxq %r10,%r10 - adcxq %r12,%rax - movq 16(%rsi,%rcx,1),%rdx - movq 48(%rdi),%r12 - adoxq %r11,%r11 - adcxq %r13,%rbx - movq 56(%rdi),%r13 - movq %rax,16(%rdi) - movq %rbx,24(%rdi) - - mulxq %rdx,%rax,%rbx - adoxq %r12,%r12 - adcxq %r10,%rax - movq 24(%rsi,%rcx,1),%rdx - leaq 32(%rcx),%rcx - movq 64(%rdi),%r10 - adoxq %r13,%r13 - adcxq %r11,%rbx - movq 72(%rdi),%r11 - movq %rax,32(%rdi) - movq %rbx,40(%rdi) - - mulxq %rdx,%rax,%rbx - adoxq %r10,%r10 - adcxq %r12,%rax - jrcxz .Lsqrx4x_shift_n_add_break -.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 - adoxq %r11,%r11 - adcxq %r13,%rbx - movq 80(%rdi),%r12 - movq 88(%rdi),%r13 - movq %rax,48(%rdi) - movq %rbx,56(%rdi) - leaq 64(%rdi),%rdi - nop - jmp .Lsqrx4x_shift_n_add - -.align 32 -.Lsqrx4x_shift_n_add_break: - adcxq %r13,%rbx - movq %rax,48(%rdi) - movq %rbx,56(%rdi) - leaq 64(%rdi),%rdi -.byte 102,72,15,126,213 -__bn_sqrx8x_reduction: - xorl %eax,%eax - movq 32+8(%rsp),%rbx - movq 48+8(%rsp),%rdx - leaq -64(%rbp,%r9,1),%rcx - - movq %rcx,0+8(%rsp) - movq %rdi,8+8(%rsp) - - leaq 48+8(%rsp),%rdi - jmp .Lsqrx8x_reduction_loop - -.align 32 -.Lsqrx8x_reduction_loop: - movq 8(%rdi),%r9 - movq 16(%rdi),%r10 - movq 24(%rdi),%r11 - movq 32(%rdi),%r12 - movq %rdx,%r8 - imulq %rbx,%rdx - movq 40(%rdi),%r13 - movq 48(%rdi),%r14 - movq 56(%rdi),%r15 - movq %rax,24+8(%rsp) - - leaq 64(%rdi),%rdi - xorq %rsi,%rsi - movq $-8,%rcx - jmp .Lsqrx8x_reduce - -.align 32 -.Lsqrx8x_reduce: - movq %r8,%rbx - mulxq 0(%rbp),%rax,%r8 - adcxq %rbx,%rax - adoxq %r9,%r8 - - mulxq 8(%rbp),%rbx,%r9 - adcxq %rbx,%r8 - adoxq %r10,%r9 - - mulxq 16(%rbp),%rbx,%r10 - adcxq %rbx,%r9 - adoxq %r11,%r10 - - mulxq 24(%rbp),%rbx,%r11 - adcxq %rbx,%r10 - adoxq %r12,%r11 - -.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 - movq %rdx,%rax - movq %r8,%rdx - adcxq %rbx,%r11 - adoxq %r13,%r12 - - mulxq 32+8(%rsp),%rbx,%rdx - movq %rax,%rdx - movq %rax,64+48+8(%rsp,%rcx,8) - - mulxq 40(%rbp),%rax,%r13 - adcxq %rax,%r12 - adoxq %r14,%r13 - - mulxq 48(%rbp),%rax,%r14 - adcxq %rax,%r13 - adoxq %r15,%r14 - - mulxq 56(%rbp),%rax,%r15 - movq %rbx,%rdx - adcxq %rax,%r14 - adoxq %rsi,%r15 - adcxq %rsi,%r15 - -.byte 0x67,0x67,0x67 - incq %rcx - jnz .Lsqrx8x_reduce - - movq %rsi,%rax - cmpq 0+8(%rsp),%rbp - jae .Lsqrx8x_no_tail - - movq 48+8(%rsp),%rdx - addq 0(%rdi),%r8 - leaq 64(%rbp),%rbp - movq $-8,%rcx - adcxq 8(%rdi),%r9 - adcxq 16(%rdi),%r10 - adcq 24(%rdi),%r11 - adcq 32(%rdi),%r12 - adcq 40(%rdi),%r13 - adcq 48(%rdi),%r14 - adcq 56(%rdi),%r15 - leaq 64(%rdi),%rdi - sbbq %rax,%rax - - xorq %rsi,%rsi - movq %rax,16+8(%rsp) - jmp .Lsqrx8x_tail - -.align 32 -.Lsqrx8x_tail: - movq %r8,%rbx - mulxq 0(%rbp),%rax,%r8 - adcxq %rax,%rbx - adoxq %r9,%r8 - - mulxq 8(%rbp),%rax,%r9 - adcxq %rax,%r8 - adoxq %r10,%r9 - - mulxq 16(%rbp),%rax,%r10 - adcxq %rax,%r9 - adoxq %r11,%r10 - - mulxq 24(%rbp),%rax,%r11 - adcxq %rax,%r10 - adoxq %r12,%r11 - -.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 - adcxq %rax,%r11 - adoxq %r13,%r12 - - mulxq 40(%rbp),%rax,%r13 - adcxq %rax,%r12 - adoxq %r14,%r13 - - mulxq 48(%rbp),%rax,%r14 - adcxq %rax,%r13 - adoxq %r15,%r14 - - mulxq 56(%rbp),%rax,%r15 - movq 72+48+8(%rsp,%rcx,8),%rdx - adcxq %rax,%r14 - adoxq %rsi,%r15 - movq %rbx,(%rdi,%rcx,8) - movq %r8,%rbx - adcxq %rsi,%r15 - - incq %rcx - jnz .Lsqrx8x_tail - - cmpq 0+8(%rsp),%rbp - jae .Lsqrx8x_tail_done - - subq 16+8(%rsp),%rsi - movq 48+8(%rsp),%rdx - leaq 64(%rbp),%rbp - adcq 0(%rdi),%r8 - adcq 8(%rdi),%r9 - adcq 16(%rdi),%r10 - adcq 24(%rdi),%r11 - adcq 32(%rdi),%r12 - adcq 40(%rdi),%r13 - adcq 48(%rdi),%r14 - adcq 56(%rdi),%r15 - leaq 64(%rdi),%rdi - sbbq %rax,%rax - subq $8,%rcx - - xorq %rsi,%rsi - movq %rax,16+8(%rsp) - jmp .Lsqrx8x_tail - -.align 32 -.Lsqrx8x_tail_done: - xorq %rax,%rax - addq 24+8(%rsp),%r8 - adcq $0,%r9 - adcq $0,%r10 - adcq $0,%r11 - adcq $0,%r12 - adcq $0,%r13 - adcq $0,%r14 - adcq $0,%r15 - adcq $0,%rax - - subq 16+8(%rsp),%rsi -.Lsqrx8x_no_tail: - adcq 0(%rdi),%r8 -.byte 102,72,15,126,217 - adcq 8(%rdi),%r9 - movq 56(%rbp),%rsi -.byte 102,72,15,126,213 - adcq 16(%rdi),%r10 - adcq 24(%rdi),%r11 - adcq 32(%rdi),%r12 - adcq 40(%rdi),%r13 - adcq 48(%rdi),%r14 - adcq 56(%rdi),%r15 - adcq $0,%rax - - movq 32+8(%rsp),%rbx - movq 64(%rdi,%rcx,1),%rdx - - movq %r8,0(%rdi) - leaq 64(%rdi),%r8 - movq %r9,8(%rdi) - movq %r10,16(%rdi) - movq %r11,24(%rdi) - movq %r12,32(%rdi) - movq %r13,40(%rdi) - movq %r14,48(%rdi) - movq %r15,56(%rdi) - - leaq 64(%rdi,%rcx,1),%rdi - cmpq 8+8(%rsp),%r8 - jb .Lsqrx8x_reduction_loop - .byte 0xf3,0xc3 + ret .cfi_endproc -.size bn_sqrx8x_internal,.-bn_sqrx8x_internal +.size main,.-main |