diff options
author | Hongtao Liu <hongtao.liu@intel.com> | 2020-03-03 06:21:37 -0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2020-03-03 06:21:37 -0800 |
commit | 79d72f45a7c9b8b92c7d0452caf968b6709a0d44 (patch) | |
tree | 1cfe668a14e35d6787222a7d6ab4ff159af70597 /gas/testsuite | |
parent | bb7b70ab85fb9d185d1fa6bad192c90fe9c17ff3 (diff) | |
download | gdb-79d72f45a7c9b8b92c7d0452caf968b6709a0d44.zip gdb-79d72f45a7c9b8b92c7d0452caf968b6709a0d44.tar.gz gdb-79d72f45a7c9b8b92c7d0452caf968b6709a0d44.tar.bz2 |
x86: Improve -malign-branch
According to intel SDM manual, not all compare flag-modifying instructions
are marcro-fusible with subsequent jcc instructions. For those non-fusible
instructions, -malign-branch doesn't need to align them, only jcc itself
needs to be aligned.
Here are 2 restrictions which separate macro-fusible instruction from not
Restriction 1:
If TEST/AND/CMP/ADD/SUB/INC/DEC is one of the following format:
cmp m, imm
add m, imm
sub m, imm
test m, imm
and m, imm
inc m
dec m
it is unfusible with any jcc instruction.
Restriction 2:
/* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
Note it also works for Skylake and Cascadelake.
---------------------------------------------------------------------
| JCC | ADD/SUB/CMP | INC/DEC | TEST/AND |
| ------ | ----------- | ------- | -------- |
| Jo | N | N | Y |
| Jno | N | N | Y |
| Jc/Jb | Y | N | Y |
| Jae/Jnb | Y | N | Y |
| Je/Jz | Y | Y | Y |
| Jne/Jnz | Y | Y | Y |
| Jna/Jbe | Y | N | Y |
| Ja/Jnbe | Y | N | Y |
| Js | N | N | Y |
| Jns | N | N | Y |
| Jp/Jpe | N | N | Y |
| Jnp/Jpo | N | N | Y |
| Jl/Jnge | Y | Y | Y |
| Jge/Jnl | Y | Y | Y |
| Jle/Jng | Y | Y | Y |
| Jg/Jnle | Y | Y | Y |
Update maybe_fused_with_jcc_p to check if operands of CMP like instructions
can be fused with condition jump.
* gas/config/tc-i386.h (i386_tc_frag_data): Add member mf_type.
(TC_FRAG_INIT): Init mf_type.
* gas/config/tc-i386.c (enum mf_jcc_kind): New enum.
(enum mf_cmp_kind): Ditto.
(maybe_fused_with_jcc_p): Add argument mf_cmp_p to get
mf_type of corresponding instructons, exclude unfusible
instructions.
(add_fused_jcc_padding_frag_p): Likewise.
(add_branch_padding_frag_p): Likewise.
(output_insn): Record mf_type for corresponding instructions.
(i386_macro_fusible_p): New function.
(i386_next_fusible_jcc_frag): Rename from i386_next_jcc_frag,
add argument cmp_fragP to return next fusible jcc frag only.
(i386_classify_machine_dependant_frag): Seperate macro-fusible
instructions from condition jump.
* gas/testsuite/gas/i386/align-branch-9.s: New file.
* gas/testsuite/gas/i386/align-branch-9.d: Ditto.
* gas/testsuite/gas/i386/x86-64-align-branch-9.s: Ditto.
* gas/testsuite/gas/i386/x86-64-align-branch-9.d: Ditto.
* gas/testsuite/gas/i386/i386.exp: Run new tests.
Diffstat (limited to 'gas/testsuite')
-rw-r--r-- | gas/testsuite/gas/i386/align-branch-9.d | 78 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/align-branch-9.s | 74 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/i386.exp | 2 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-align-branch-9.d | 46 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-align-branch-9.s | 43 |
5 files changed, 243 insertions, 0 deletions
diff --git a/gas/testsuite/gas/i386/align-branch-9.d b/gas/testsuite/gas/i386/align-branch-9.d new file mode 100644 index 0000000..6340817 --- /dev/null +++ b/gas/testsuite/gas/i386/align-branch-9.d @@ -0,0 +1,78 @@ +#as: -mbranches-within-32B-boundaries +#objdump: -dw + +.*: +file format .* + +Disassembly of section .text: + +0+ <foo>: + 0: 65 a3 01 00 00 00 mov %eax,%gs:0x1 + 6: 55 push %ebp + 7: 55 push %ebp + 8: 55 push %ebp + 9: 55 push %ebp + a: 89 e5 mov %esp,%ebp + c: 89 7d f8 mov %edi,-0x8\(%ebp\) + f: 89 75 f4 mov %esi,-0xc\(%ebp\) + 12: 89 75 f4 mov %esi,-0xc\(%ebp\) + 15: 89 75 f4 mov %esi,-0xc\(%ebp\) + 18: 89 75 f4 mov %esi,-0xc\(%ebp\) + 1b: 89 75 f4 mov %esi,-0xc\(%ebp\) + 1e: 39 c5 cmp %eax,%ebp + 20: 70 62 jo 84 <foo\+0x84> + 22: 89 73 f4 mov %esi,-0xc\(%ebx\) + 25: 89 75 f4 mov %esi,-0xc\(%ebp\) + 28: 89 7d f8 mov %edi,-0x8\(%ebp\) + 2b: 89 75 f4 mov %esi,-0xc\(%ebp\) + 2e: 89 75 f4 mov %esi,-0xc\(%ebp\) + 31: 89 75 f4 mov %esi,-0xc\(%ebp\) + 34: 89 75 f4 mov %esi,-0xc\(%ebp\) + 37: 89 75 f4 mov %esi,-0xc\(%ebp\) + 3a: 5d pop %ebp + 3b: 5d pop %ebp + 3c: 5d pop %ebp + 3d: 74 45 je 84 <foo\+0x84> + 3f: 5d pop %ebp + 40: 74 42 je 84 <foo\+0x84> + 42: 89 44 24 fc mov %eax,-0x4\(%esp\) + 46: 89 75 f4 mov %esi,-0xc\(%ebp\) + 49: 89 7d f8 mov %edi,-0x8\(%ebp\) + 4c: 89 75 f4 mov %esi,-0xc\(%ebp\) + 4f: 89 75 f4 mov %esi,-0xc\(%ebp\) + 52: 89 75 f4 mov %esi,-0xc\(%ebp\) + 55: 89 75 f4 mov %esi,-0xc\(%ebp\) + 58: 89 75 f4 mov %esi,-0xc\(%ebp\) + 5b: 5d pop %ebp + 5c: eb 2c jmp 8a <foo\+0x8a> + 5e: 66 90 xchg %ax,%ax + 60: eb 28 jmp 8a <foo\+0x8a> + 62: eb 26 jmp 8a <foo\+0x8a> + 64: 89 45 fc mov %eax,-0x4\(%ebp\) + 67: 89 75 f4 mov %esi,-0xc\(%ebp\) + 6a: 89 7d f8 mov %edi,-0x8\(%ebp\) + 6d: 5d pop %ebp + 6e: 5d pop %ebp + 6f: 40 inc %eax + 70: 72 12 jb 84 <foo\+0x84> + 72: 36 36 89 45 fc ss mov %eax,%ss:-0x4\(%ebp\) + 77: 89 75 f4 mov %esi,-0xc\(%ebp\) + 7a: 89 7d f8 mov %edi,-0x8\(%ebp\) + 7d: 89 75 f4 mov %esi,-0xc\(%ebp\) + 80: 21 c3 and %eax,%ebx + 82: 7c 06 jl 8a <foo\+0x8a> + 84: 8b 45 f4 mov -0xc\(%ebp\),%eax + 87: 89 45 fc mov %eax,-0x4\(%ebp\) + 8a: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + 90: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + 96: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + 9c: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + a2: 89 75 0c mov %esi,0xc\(%ebp\) + a5: e9 fc ff ff ff jmp a6 <foo\+0xa6> + aa: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + b0: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + b6: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + bc: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + c2: 89 75 00 mov %esi,0x0\(%ebp\) + c5: 74 c3 je 8a <foo\+0x8a> + c7: 74 c1 je 8a <foo\+0x8a> +#pass diff --git a/gas/testsuite/gas/i386/align-branch-9.s b/gas/testsuite/gas/i386/align-branch-9.s new file mode 100644 index 0000000..357abe3 --- /dev/null +++ b/gas/testsuite/gas/i386/align-branch-9.s @@ -0,0 +1,74 @@ + .text + .globl foo + .p2align 4 +foo: + movl %eax, %gs:0x1 + pushl %ebp + pushl %ebp + pushl %ebp + pushl %ebp + movl %esp, %ebp + movl %edi, -8(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + cmp %eax, %ebp + jo .L_2 + movl %esi, -12(%ebx) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + popl %ebp + popl %ebp + popl %ebp + je .L_2 + popl %ebp + je .L_2 + movl %eax, -4(%esp) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + popl %ebp + jmp .L_3 + jmp .L_3 + jmp .L_3 + movl %eax, -4(%ebp) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + popl %ebp + popl %ebp + inc %eax + jc .L_2 + movl %eax, -4(%ebp) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + movl %esi, -12(%ebp) + and %eax, %ebx + jl .L_3 +.L_2: + movl -12(%ebp), %eax + movl %eax, -4(%ebp) +.L_3: + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, 12(%ebp) + jmp bar + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, (%ebp) + je .L_3 + je .L_3 diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp index 685e62e..8fc621f 100644 --- a/gas/testsuite/gas/i386/i386.exp +++ b/gas/testsuite/gas/i386/i386.exp @@ -525,6 +525,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_32_check]] run_dump_test "align-branch-6" run_dump_test "align-branch-7" run_dump_test "align-branch-8" + run_dump_test "align-branch-9" # These tests require support for 8 and 16 bit relocs, # so we only run them for ELF and COFF targets. @@ -1100,6 +1101,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_64_check]] t run_dump_test "x86-64-align-branch-6" run_dump_test "x86-64-align-branch-7" run_dump_test "x86-64-align-branch-8" + run_dump_test "x86-64-align-branch-9" if { ![istarget "*-*-aix*"] && ![istarget "*-*-beos*"] diff --git a/gas/testsuite/gas/i386/x86-64-align-branch-9.d b/gas/testsuite/gas/i386/x86-64-align-branch-9.d new file mode 100644 index 0000000..1041fd0 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-align-branch-9.d @@ -0,0 +1,46 @@ +#as: -mbranches-within-32B-boundaries +#objdump: -dw + +.*: +file format .* + +Disassembly of section .text: + +0+ <foo>: + 0: c1 e9 02 shr \$0x2,%ecx + 3: c1 e9 02 shr \$0x2,%ecx + 6: c1 e9 02 shr \$0x2,%ecx + 9: 89 d1 mov %edx,%ecx + b: 31 c0 xor %eax,%eax + d: c1 e9 02 shr \$0x2,%ecx + 10: c1 e9 02 shr \$0x2,%ecx + 13: c1 e9 02 shr \$0x2,%ecx + 16: c1 e9 02 shr \$0x2,%ecx + 19: c1 e9 02 shr \$0x2,%ecx + 1c: c1 e9 02 shr \$0x2,%ecx + 1f: 80 fa 02 cmp \$0x2,%dl + 22: 70 df jo 3 <foo\+0x3> + 24: 2e 2e 2e 2e 31 c0 cs cs cs cs xor %eax,%eax + 2a: c1 e9 02 shr \$0x2,%ecx + 2d: c1 e9 02 shr \$0x2,%ecx + 30: c1 e9 02 shr \$0x2,%ecx + 33: 89 d1 mov %edx,%ecx + 35: 31 c0 xor %eax,%eax + 37: c1 e9 02 shr \$0x2,%ecx + 3a: c1 e9 02 shr \$0x2,%ecx + 3d: c1 e9 02 shr \$0x2,%ecx + 40: f6 c2 02 test \$0x2,%dl + 43: 75 e8 jne 2d <foo\+0x2d> + 45: 31 c0 xor %eax,%eax + 47: c1 e9 02 shr \$0x2,%ecx + 4a: c1 e9 02 shr \$0x2,%ecx + 4d: 89 d1 mov %edx,%ecx + 4f: c1 e9 02 shr \$0x2,%ecx + 52: c1 e9 02 shr \$0x2,%ecx + 55: 89 d1 mov %edx,%ecx + 57: c1 e9 02 shr \$0x2,%ecx + 5a: 89 d1 mov %edx,%ecx + 5c: 31 c0 xor %eax,%eax + 5e: ff c0 inc %eax + 60: 76 cb jbe 2d <foo\+0x2d> + 62: 31 c0 xor %eax,%eax +#pass diff --git a/gas/testsuite/gas/i386/x86-64-align-branch-9.s b/gas/testsuite/gas/i386/x86-64-align-branch-9.s new file mode 100644 index 0000000..917579b --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-align-branch-9.s @@ -0,0 +1,43 @@ + .text + .p2align 4,,15 +foo: + shrl $2, %ecx +.L1: + shrl $2, %ecx + shrl $2, %ecx + movl %edx, %ecx + xorl %eax, %eax + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + cmpb $2, %dl + jo .L1 + xorl %eax, %eax + shrl $2, %ecx +.L2: + shrl $2, %ecx + shrl $2, %ecx + movl %edx, %ecx + xorl %eax, %eax + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + testb $2, %dl + jne .L2 + xorl %eax, %eax +.L3: + shrl $2, %ecx + shrl $2, %ecx + movl %edx, %ecx + shrl $2, %ecx + shrl $2, %ecx + movl %edx, %ecx + shrl $2, %ecx + movl %edx, %ecx + xorl %eax, %eax + inc %eax + jbe .L2 + xorl %eax, %eax |