aboutsummaryrefslogtreecommitdiff
path: root/gas/testsuite
diff options
context:
space:
mode:
authorHongtao Liu <hongtao.liu@intel.com>2020-03-03 06:21:37 -0800
committerH.J. Lu <hjl.tools@gmail.com>2020-03-03 06:21:37 -0800
commit79d72f45a7c9b8b92c7d0452caf968b6709a0d44 (patch)
tree1cfe668a14e35d6787222a7d6ab4ff159af70597 /gas/testsuite
parentbb7b70ab85fb9d185d1fa6bad192c90fe9c17ff3 (diff)
downloadgdb-79d72f45a7c9b8b92c7d0452caf968b6709a0d44.zip
gdb-79d72f45a7c9b8b92c7d0452caf968b6709a0d44.tar.gz
gdb-79d72f45a7c9b8b92c7d0452caf968b6709a0d44.tar.bz2
x86: Improve -malign-branch
According to intel SDM manual, not all compare flag-modifying instructions are marcro-fusible with subsequent jcc instructions. For those non-fusible instructions, -malign-branch doesn't need to align them, only jcc itself needs to be aligned. Here are 2 restrictions which separate macro-fusible instruction from not Restriction 1: If TEST/AND/CMP/ADD/SUB/INC/DEC is one of the following format: cmp m, imm add m, imm sub m, imm test m, imm and m, imm inc m dec m it is unfusible with any jcc instruction. Restriction 2: /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture Note it also works for Skylake and Cascadelake. --------------------------------------------------------------------- | JCC | ADD/SUB/CMP | INC/DEC | TEST/AND | | ------ | ----------- | ------- | -------- | | Jo | N | N | Y | | Jno | N | N | Y | | Jc/Jb | Y | N | Y | | Jae/Jnb | Y | N | Y | | Je/Jz | Y | Y | Y | | Jne/Jnz | Y | Y | Y | | Jna/Jbe | Y | N | Y | | Ja/Jnbe | Y | N | Y | | Js | N | N | Y | | Jns | N | N | Y | | Jp/Jpe | N | N | Y | | Jnp/Jpo | N | N | Y | | Jl/Jnge | Y | Y | Y | | Jge/Jnl | Y | Y | Y | | Jle/Jng | Y | Y | Y | | Jg/Jnle | Y | Y | Y | Update maybe_fused_with_jcc_p to check if operands of CMP like instructions can be fused with condition jump. * gas/config/tc-i386.h (i386_tc_frag_data): Add member mf_type. (TC_FRAG_INIT): Init mf_type. * gas/config/tc-i386.c (enum mf_jcc_kind): New enum. (enum mf_cmp_kind): Ditto. (maybe_fused_with_jcc_p): Add argument mf_cmp_p to get mf_type of corresponding instructons, exclude unfusible instructions. (add_fused_jcc_padding_frag_p): Likewise. (add_branch_padding_frag_p): Likewise. (output_insn): Record mf_type for corresponding instructions. (i386_macro_fusible_p): New function. (i386_next_fusible_jcc_frag): Rename from i386_next_jcc_frag, add argument cmp_fragP to return next fusible jcc frag only. (i386_classify_machine_dependant_frag): Seperate macro-fusible instructions from condition jump. * gas/testsuite/gas/i386/align-branch-9.s: New file. * gas/testsuite/gas/i386/align-branch-9.d: Ditto. * gas/testsuite/gas/i386/x86-64-align-branch-9.s: Ditto. * gas/testsuite/gas/i386/x86-64-align-branch-9.d: Ditto. * gas/testsuite/gas/i386/i386.exp: Run new tests.
Diffstat (limited to 'gas/testsuite')
-rw-r--r--gas/testsuite/gas/i386/align-branch-9.d78
-rw-r--r--gas/testsuite/gas/i386/align-branch-9.s74
-rw-r--r--gas/testsuite/gas/i386/i386.exp2
-rw-r--r--gas/testsuite/gas/i386/x86-64-align-branch-9.d46
-rw-r--r--gas/testsuite/gas/i386/x86-64-align-branch-9.s43
5 files changed, 243 insertions, 0 deletions
diff --git a/gas/testsuite/gas/i386/align-branch-9.d b/gas/testsuite/gas/i386/align-branch-9.d
new file mode 100644
index 0000000..6340817
--- /dev/null
+++ b/gas/testsuite/gas/i386/align-branch-9.d
@@ -0,0 +1,78 @@
+#as: -mbranches-within-32B-boundaries
+#objdump: -dw
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+ <foo>:
+ 0: 65 a3 01 00 00 00 mov %eax,%gs:0x1
+ 6: 55 push %ebp
+ 7: 55 push %ebp
+ 8: 55 push %ebp
+ 9: 55 push %ebp
+ a: 89 e5 mov %esp,%ebp
+ c: 89 7d f8 mov %edi,-0x8\(%ebp\)
+ f: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 12: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 15: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 18: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 1b: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 1e: 39 c5 cmp %eax,%ebp
+ 20: 70 62 jo 84 <foo\+0x84>
+ 22: 89 73 f4 mov %esi,-0xc\(%ebx\)
+ 25: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 28: 89 7d f8 mov %edi,-0x8\(%ebp\)
+ 2b: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 2e: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 31: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 34: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 37: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 3a: 5d pop %ebp
+ 3b: 5d pop %ebp
+ 3c: 5d pop %ebp
+ 3d: 74 45 je 84 <foo\+0x84>
+ 3f: 5d pop %ebp
+ 40: 74 42 je 84 <foo\+0x84>
+ 42: 89 44 24 fc mov %eax,-0x4\(%esp\)
+ 46: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 49: 89 7d f8 mov %edi,-0x8\(%ebp\)
+ 4c: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 4f: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 52: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 55: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 58: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 5b: 5d pop %ebp
+ 5c: eb 2c jmp 8a <foo\+0x8a>
+ 5e: 66 90 xchg %ax,%ax
+ 60: eb 28 jmp 8a <foo\+0x8a>
+ 62: eb 26 jmp 8a <foo\+0x8a>
+ 64: 89 45 fc mov %eax,-0x4\(%ebp\)
+ 67: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 6a: 89 7d f8 mov %edi,-0x8\(%ebp\)
+ 6d: 5d pop %ebp
+ 6e: 5d pop %ebp
+ 6f: 40 inc %eax
+ 70: 72 12 jb 84 <foo\+0x84>
+ 72: 36 36 89 45 fc ss mov %eax,%ss:-0x4\(%ebp\)
+ 77: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 7a: 89 7d f8 mov %edi,-0x8\(%ebp\)
+ 7d: 89 75 f4 mov %esi,-0xc\(%ebp\)
+ 80: 21 c3 and %eax,%ebx
+ 82: 7c 06 jl 8a <foo\+0x8a>
+ 84: 8b 45 f4 mov -0xc\(%ebp\),%eax
+ 87: 89 45 fc mov %eax,-0x4\(%ebp\)
+ 8a: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
+ 90: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
+ 96: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
+ 9c: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
+ a2: 89 75 0c mov %esi,0xc\(%ebp\)
+ a5: e9 fc ff ff ff jmp a6 <foo\+0xa6>
+ aa: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
+ b0: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
+ b6: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
+ bc: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\)
+ c2: 89 75 00 mov %esi,0x0\(%ebp\)
+ c5: 74 c3 je 8a <foo\+0x8a>
+ c7: 74 c1 je 8a <foo\+0x8a>
+#pass
diff --git a/gas/testsuite/gas/i386/align-branch-9.s b/gas/testsuite/gas/i386/align-branch-9.s
new file mode 100644
index 0000000..357abe3
--- /dev/null
+++ b/gas/testsuite/gas/i386/align-branch-9.s
@@ -0,0 +1,74 @@
+ .text
+ .globl foo
+ .p2align 4
+foo:
+ movl %eax, %gs:0x1
+ pushl %ebp
+ pushl %ebp
+ pushl %ebp
+ pushl %ebp
+ movl %esp, %ebp
+ movl %edi, -8(%ebp)
+ movl %esi, -12(%ebp)
+ movl %esi, -12(%ebp)
+ movl %esi, -12(%ebp)
+ movl %esi, -12(%ebp)
+ movl %esi, -12(%ebp)
+ cmp %eax, %ebp
+ jo .L_2
+ movl %esi, -12(%ebx)
+ movl %esi, -12(%ebp)
+ movl %edi, -8(%ebp)
+ movl %esi, -12(%ebp)
+ movl %esi, -12(%ebp)
+ movl %esi, -12(%ebp)
+ movl %esi, -12(%ebp)
+ movl %esi, -12(%ebp)
+ popl %ebp
+ popl %ebp
+ popl %ebp
+ je .L_2
+ popl %ebp
+ je .L_2
+ movl %eax, -4(%esp)
+ movl %esi, -12(%ebp)
+ movl %edi, -8(%ebp)
+ movl %esi, -12(%ebp)
+ movl %esi, -12(%ebp)
+ movl %esi, -12(%ebp)
+ movl %esi, -12(%ebp)
+ movl %esi, -12(%ebp)
+ popl %ebp
+ jmp .L_3
+ jmp .L_3
+ jmp .L_3
+ movl %eax, -4(%ebp)
+ movl %esi, -12(%ebp)
+ movl %edi, -8(%ebp)
+ popl %ebp
+ popl %ebp
+ inc %eax
+ jc .L_2
+ movl %eax, -4(%ebp)
+ movl %esi, -12(%ebp)
+ movl %edi, -8(%ebp)
+ movl %esi, -12(%ebp)
+ and %eax, %ebx
+ jl .L_3
+.L_2:
+ movl -12(%ebp), %eax
+ movl %eax, -4(%ebp)
+.L_3:
+ movl %esi, -1200(%ebp)
+ movl %esi, -1200(%ebp)
+ movl %esi, -1200(%ebp)
+ movl %esi, -1200(%ebp)
+ movl %esi, 12(%ebp)
+ jmp bar
+ movl %esi, -1200(%ebp)
+ movl %esi, -1200(%ebp)
+ movl %esi, -1200(%ebp)
+ movl %esi, -1200(%ebp)
+ movl %esi, (%ebp)
+ je .L_3
+ je .L_3
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index 685e62e..8fc621f 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -525,6 +525,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_32_check]]
run_dump_test "align-branch-6"
run_dump_test "align-branch-7"
run_dump_test "align-branch-8"
+ run_dump_test "align-branch-9"
# These tests require support for 8 and 16 bit relocs,
# so we only run them for ELF and COFF targets.
@@ -1100,6 +1101,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_64_check]] t
run_dump_test "x86-64-align-branch-6"
run_dump_test "x86-64-align-branch-7"
run_dump_test "x86-64-align-branch-8"
+ run_dump_test "x86-64-align-branch-9"
if { ![istarget "*-*-aix*"]
&& ![istarget "*-*-beos*"]
diff --git a/gas/testsuite/gas/i386/x86-64-align-branch-9.d b/gas/testsuite/gas/i386/x86-64-align-branch-9.d
new file mode 100644
index 0000000..1041fd0
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-align-branch-9.d
@@ -0,0 +1,46 @@
+#as: -mbranches-within-32B-boundaries
+#objdump: -dw
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+ <foo>:
+ 0: c1 e9 02 shr \$0x2,%ecx
+ 3: c1 e9 02 shr \$0x2,%ecx
+ 6: c1 e9 02 shr \$0x2,%ecx
+ 9: 89 d1 mov %edx,%ecx
+ b: 31 c0 xor %eax,%eax
+ d: c1 e9 02 shr \$0x2,%ecx
+ 10: c1 e9 02 shr \$0x2,%ecx
+ 13: c1 e9 02 shr \$0x2,%ecx
+ 16: c1 e9 02 shr \$0x2,%ecx
+ 19: c1 e9 02 shr \$0x2,%ecx
+ 1c: c1 e9 02 shr \$0x2,%ecx
+ 1f: 80 fa 02 cmp \$0x2,%dl
+ 22: 70 df jo 3 <foo\+0x3>
+ 24: 2e 2e 2e 2e 31 c0 cs cs cs cs xor %eax,%eax
+ 2a: c1 e9 02 shr \$0x2,%ecx
+ 2d: c1 e9 02 shr \$0x2,%ecx
+ 30: c1 e9 02 shr \$0x2,%ecx
+ 33: 89 d1 mov %edx,%ecx
+ 35: 31 c0 xor %eax,%eax
+ 37: c1 e9 02 shr \$0x2,%ecx
+ 3a: c1 e9 02 shr \$0x2,%ecx
+ 3d: c1 e9 02 shr \$0x2,%ecx
+ 40: f6 c2 02 test \$0x2,%dl
+ 43: 75 e8 jne 2d <foo\+0x2d>
+ 45: 31 c0 xor %eax,%eax
+ 47: c1 e9 02 shr \$0x2,%ecx
+ 4a: c1 e9 02 shr \$0x2,%ecx
+ 4d: 89 d1 mov %edx,%ecx
+ 4f: c1 e9 02 shr \$0x2,%ecx
+ 52: c1 e9 02 shr \$0x2,%ecx
+ 55: 89 d1 mov %edx,%ecx
+ 57: c1 e9 02 shr \$0x2,%ecx
+ 5a: 89 d1 mov %edx,%ecx
+ 5c: 31 c0 xor %eax,%eax
+ 5e: ff c0 inc %eax
+ 60: 76 cb jbe 2d <foo\+0x2d>
+ 62: 31 c0 xor %eax,%eax
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-align-branch-9.s b/gas/testsuite/gas/i386/x86-64-align-branch-9.s
new file mode 100644
index 0000000..917579b
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-align-branch-9.s
@@ -0,0 +1,43 @@
+ .text
+ .p2align 4,,15
+foo:
+ shrl $2, %ecx
+.L1:
+ shrl $2, %ecx
+ shrl $2, %ecx
+ movl %edx, %ecx
+ xorl %eax, %eax
+ shrl $2, %ecx
+ shrl $2, %ecx
+ shrl $2, %ecx
+ shrl $2, %ecx
+ shrl $2, %ecx
+ shrl $2, %ecx
+ cmpb $2, %dl
+ jo .L1
+ xorl %eax, %eax
+ shrl $2, %ecx
+.L2:
+ shrl $2, %ecx
+ shrl $2, %ecx
+ movl %edx, %ecx
+ xorl %eax, %eax
+ shrl $2, %ecx
+ shrl $2, %ecx
+ shrl $2, %ecx
+ testb $2, %dl
+ jne .L2
+ xorl %eax, %eax
+.L3:
+ shrl $2, %ecx
+ shrl $2, %ecx
+ movl %edx, %ecx
+ shrl $2, %ecx
+ shrl $2, %ecx
+ movl %edx, %ecx
+ shrl $2, %ecx
+ movl %edx, %ecx
+ xorl %eax, %eax
+ inc %eax
+ jbe .L2
+ xorl %eax, %eax