aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2024-01-26 10:31:38 +0100
committerJan Beulich <jbeulich@suse.com>2024-01-26 10:31:38 +0100
commit0ebcbb1bd03c4fde91b032b50ec9c3a8f376b8be (patch)
tree028ab219218cc96320f7d3decb8a68829fd4904e
parentcd35c9fd1e67b2dd8acbda3dfa398e395b22f220 (diff)
downloadfsf-binutils-gdb-0ebcbb1bd03c4fde91b032b50ec9c3a8f376b8be.zip
fsf-binutils-gdb-0ebcbb1bd03c4fde91b032b50ec9c3a8f376b8be.tar.gz
fsf-binutils-gdb-0ebcbb1bd03c4fde91b032b50ec9c3a8f376b8be.tar.bz2
x86/APX: optimize MOVBE
With identical source and destination it can be covered by the NDD-to- legacy conversion logic as well, even if in this case the original insn doesn't use an NDD encoding. The size savings are even better here, for the replacement (BSWAP) not having a ModR/M byte.
-rw-r--r--gas/config/tc-i386.c15
-rw-r--r--gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d3
-rw-r--r--gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s3
-rw-r--r--opcodes/i386-opc.tbl6
-rw-r--r--opcodes/i386-tbl.h68
5 files changed, 55 insertions, 40 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 51166ef..3d3d4a3 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -8959,10 +8959,11 @@ match_template (char mnem_suffix)
if (optimize
&& !i.no_optimize
&& i.vec_encoding != vex_encoding_evex
- && t + 1 < current_templates.end
- && !t[1].opcode_modifier.evex
- && t[1].opcode_space <= SPACE_0F38
- && t->opcode_modifier.vexvvvv == VexVVVV_DST
+ && ((t + 1 < current_templates.end
+ && !t[1].opcode_modifier.evex
+ && t[1].opcode_space <= SPACE_0F38
+ && t->opcode_modifier.vexvvvv == VexVVVV_DST)
+ || t->mnem_off == MN_movbe)
&& (i.types[i.operands - 1].bitfield.dword
|| i.types[i.operands - 1].bitfield.qword))
{
@@ -8999,6 +9000,12 @@ match_template (char mnem_suffix)
--i.operands;
--i.reg_operands;
+ if (t->mnem_off == MN_movbe)
+ {
+ gas_assert (t[1].mnem_off == MN_bswap);
+ ++current_templates.end;
+ }
+
specific_error = progress (internal_error);
continue;
}
diff --git a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
index 773ceea..8069799 100644
--- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
@@ -118,6 +118,9 @@ Disassembly of section .text:
\s*[a-f0-9]+:\s*67 0f 4d 90 90 90 90 90 cmovge -0x6f6f6f70\(%eax\),%edx
\s*[a-f0-9]+:\s*67 0f 4e 90 90 90 90 90 cmovle -0x6f6f6f70\(%eax\),%edx
\s*[a-f0-9]+:\s*67 0f 4f 90 90 90 90 90 cmovg -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*62 f4 7d 08 60 c0 movbe %ax,%ax
+\s*[a-f0-9]+:\s*49 0f c8 bswap %r8
+\s*[a-f0-9]+:\s*d5 98 c8 bswap %r16
\s*[a-f0-9]+:\s*66 0f 38 f6 c3 adcx %ebx,%eax
\s*[a-f0-9]+:\s*66 0f 38 f6 c3 adcx %ebx,%eax
\s*[a-f0-9]+:\s*62 f4 fd 18 66 c3 adcx %rbx,%rax,%rax
diff --git a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
index 6ffdf5a..c83e119 100644
--- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
@@ -111,6 +111,9 @@ cmovl 0x90909090(%eax),%edx,%edx
cmovge 0x90909090(%eax),%edx,%edx
cmovle 0x90909090(%eax),%edx,%edx
cmovg 0x90909090(%eax),%edx,%edx
+movbe %ax,%ax
+movbe %r8,%r8
+movbe %r16,%r16
adcx %ebx,%eax,%eax
adcx %eax,%ebx,%eax
adcx %rbx,%rax,%rax
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 717e208..55c704b 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -210,6 +210,9 @@ mov, 0xf24, i386&No64, D|RegMem|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Te
// Move after swapping the bytes
movbe, 0x0f38f0, Movbe, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
movbe, 0x60, Movbe&APX_F, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+// This needs to live here for easy EVEX -> REX2 conversion, which wants to
+// restart with the next sequential template.
+bswap, 0xfc8, i486, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 }
// Move with sign extend.
movsb, 0xfbe, i386, Modrm|No_bSuf|No_sSuf, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
@@ -980,9 +983,8 @@ rex.wrxb, 0x4f, x64, NoSuf|IsPrefix, {}
{<pseudopfx>}, PSEUDO_PREFIX/Prefix_<pseudopfx:ident>, <pseudopfx:cpu>, NoSuf|IsPrefix, {}
-// 486 extensions.
+// 486 extensions (BSWAP moved elsewhere).
-bswap, 0xfc8, i486, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 }
xadd, 0xfc0, i486, W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
cmpxchg, 0xfb0, i486, W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
invd, 0xf08, i486, NoSuf, {}
diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h
index bdcc3c8..4031b6a 100644
--- a/opcodes/i386-tbl.h
+++ b/opcodes/i386-tbl.h
@@ -204,6 +204,14 @@ static const insn_template i386_optab[] =
0, 0, 0, 0, 1, 0 } },
{ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,
0, 0, 0, 0, 0, 0 } } } },
+ { MN_bswap, 0xc8, 1, SPACE_0F, None,
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0 },
+ { { 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { { { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0 } } } },
{ MN_movsb, 0xbe, 2, SPACE_0F, None,
{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -6000,14 +6008,6 @@ static const insn_template i386_optab[] =
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0 } } } },
- { MN_bswap, 0xc8, 1, SPACE_0F, None,
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0 },
- { { 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { { { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
- 0, 0, 0, 0, 0, 0 } } } },
{ MN_xadd, 0xc0, 2, SPACE_0F, None,
{ 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 5,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -41301,41 +41301,41 @@ static const insn_template i386_optab[] =
typedef unsigned short i386_op_off_t;
static const i386_op_off_t i386_op_sets[] =
{
- 0, 14, 16, 18, 19, 20, 21, 23,
- 26, 27, 28, 29, 39, 40, 41, 47,
- 48, 49, 51, 55, 59, 60, 61, 62,
- 64, 66, 68, 69, 70, 71, 72, 73,
- 75, 77, 79, 81, 82, 83, 84, 91,
- 94, 101, 104, 112, 116, 119, 126, 133,
- 140, 141, 148, 150, 152, 153, 154, 155,
- 156, 158, 160, 161, 162, 163, 164, 165,
+ 0, 14, 16, 18, 19, 20, 21, 22,
+ 24, 27, 28, 29, 30, 40, 41, 42,
+ 48, 49, 50, 52, 56, 60, 61, 62,
+ 63, 65, 67, 69, 70, 71, 72, 73,
+ 74, 76, 78, 80, 82, 83, 84, 85,
+ 92, 95, 102, 105, 113, 117, 120, 127,
+ 134, 141, 142, 149, 151, 153, 154, 155,
+ 156, 157, 159, 161, 162, 163, 164, 165,
166, 167, 168, 169, 170, 171, 172, 173,
- 180, 182, 184, 191, 198, 209, 220, 227,
- 234, 241, 248, 254, 260, 269, 272, 280,
- 283, 289, 291, 293, 295, 297, 298, 299,
+ 174, 181, 183, 185, 192, 199, 210, 221,
+ 228, 235, 242, 249, 255, 261, 270, 273,
+ 281, 284, 290, 292, 294, 296, 298, 299,
300, 301, 302, 303, 304, 305, 306, 307,
308, 309, 310, 311, 312, 313, 314, 315,
316, 317, 318, 319, 320, 321, 322, 323,
- 324, 325, 326, 327, 328, 329, 330, 332,
- 334, 336, 338, 340, 341, 342, 343, 344,
+ 324, 325, 326, 327, 328, 329, 330, 331,
+ 333, 335, 337, 339, 341, 342, 343, 344,
345, 346, 347, 348, 349, 350, 351, 352,
353, 354, 355, 356, 357, 358, 359, 360,
361, 362, 363, 364, 365, 366, 367, 368,
- 369, 370, 372, 374, 376, 378, 381, 384,
- 386, 388, 391, 394, 397, 400, 402, 403,
- 404, 406, 408, 410, 412, 413, 414, 415,
- 416, 417, 418, 419, 420, 422, 424, 426,
- 428, 430, 432, 433, 435, 437, 439, 441,
- 443, 445, 447, 449, 451, 455, 457, 458,
- 459, 460, 463, 464, 468, 470, 471, 472,
- 473, 475, 479, 480, 484, 485, 486, 488,
- 490, 491, 492, 493, 494, 495, 496, 497,
- 498, 499, 500, 504, 505, 508, 512, 513,
- 519, 523, 524, 530, 534, 535, 538, 542,
- 543, 549, 553, 554, 560, 561, 562, 563,
+ 369, 370, 371, 373, 375, 377, 379, 382,
+ 385, 387, 389, 392, 395, 398, 401, 403,
+ 404, 405, 407, 409, 411, 413, 414, 415,
+ 416, 417, 418, 419, 420, 421, 423, 425,
+ 427, 429, 431, 433, 434, 436, 438, 440,
+ 442, 444, 446, 448, 450, 452, 456, 458,
+ 459, 460, 461, 464, 465, 469, 471, 472,
+ 473, 474, 476, 480, 481, 485, 486, 487,
+ 489, 491, 492, 493, 494, 495, 496, 497,
+ 498, 499, 500, 501, 505, 506, 509, 513,
+ 514, 520, 524, 525, 531, 535, 536, 539,
+ 543, 544, 550, 554, 555, 561, 562, 563,
564, 565, 566, 567, 568, 569, 570, 571,
572, 573, 574, 575, 576, 577, 578, 579,
- 580, 581, 582, 583, 586, 589, 590, 591,
+ 580, 581, 582, 583, 584, 587, 590, 591,
592, 593, 594, 595, 596, 597, 598, 599,
600, 601, 602, 603, 604, 605, 606, 607,
608, 609, 610, 611, 612, 613, 614, 615,