diff options
author | Jan Beulich <jbeulich@suse.com> | 2024-01-26 10:31:38 +0100 |
---|---|---|
committer | Jan Beulich <jbeulich@suse.com> | 2024-01-26 10:31:38 +0100 |
commit | 0ebcbb1bd03c4fde91b032b50ec9c3a8f376b8be (patch) | |
tree | 028ab219218cc96320f7d3decb8a68829fd4904e | |
parent | cd35c9fd1e67b2dd8acbda3dfa398e395b22f220 (diff) | |
download | fsf-binutils-gdb-0ebcbb1bd03c4fde91b032b50ec9c3a8f376b8be.zip fsf-binutils-gdb-0ebcbb1bd03c4fde91b032b50ec9c3a8f376b8be.tar.gz fsf-binutils-gdb-0ebcbb1bd03c4fde91b032b50ec9c3a8f376b8be.tar.bz2 |
x86/APX: optimize MOVBE
With identical source and destination it can be covered by the NDD-to-
legacy conversion logic as well, even if in this case the original insn
doesn't use an NDD encoding. The size savings are even better here, for
the replacement (BSWAP) not having a ModR/M byte.
-rw-r--r-- | gas/config/tc-i386.c | 15 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d | 3 | ||||
-rw-r--r-- | gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s | 3 | ||||
-rw-r--r-- | opcodes/i386-opc.tbl | 6 | ||||
-rw-r--r-- | opcodes/i386-tbl.h | 68 |
5 files changed, 55 insertions, 40 deletions
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 51166ef..3d3d4a3 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -8959,10 +8959,11 @@ match_template (char mnem_suffix) if (optimize && !i.no_optimize && i.vec_encoding != vex_encoding_evex - && t + 1 < current_templates.end - && !t[1].opcode_modifier.evex - && t[1].opcode_space <= SPACE_0F38 - && t->opcode_modifier.vexvvvv == VexVVVV_DST + && ((t + 1 < current_templates.end + && !t[1].opcode_modifier.evex + && t[1].opcode_space <= SPACE_0F38 + && t->opcode_modifier.vexvvvv == VexVVVV_DST) + || t->mnem_off == MN_movbe) && (i.types[i.operands - 1].bitfield.dword || i.types[i.operands - 1].bitfield.qword)) { @@ -8999,6 +9000,12 @@ match_template (char mnem_suffix) --i.operands; --i.reg_operands; + if (t->mnem_off == MN_movbe) + { + gas_assert (t[1].mnem_off == MN_bswap); + ++current_templates.end; + } + specific_error = progress (internal_error); continue; } diff --git a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d index 773ceea..8069799 100644 --- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d +++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d @@ -118,6 +118,9 @@ Disassembly of section .text: \s*[a-f0-9]+:\s*67 0f 4d 90 90 90 90 90 cmovge -0x6f6f6f70\(%eax\),%edx \s*[a-f0-9]+:\s*67 0f 4e 90 90 90 90 90 cmovle -0x6f6f6f70\(%eax\),%edx \s*[a-f0-9]+:\s*67 0f 4f 90 90 90 90 90 cmovg -0x6f6f6f70\(%eax\),%edx +\s*[a-f0-9]+:\s*62 f4 7d 08 60 c0 movbe %ax,%ax +\s*[a-f0-9]+:\s*49 0f c8 bswap %r8 +\s*[a-f0-9]+:\s*d5 98 c8 bswap %r16 \s*[a-f0-9]+:\s*66 0f 38 f6 c3 adcx %ebx,%eax \s*[a-f0-9]+:\s*66 0f 38 f6 c3 adcx %ebx,%eax \s*[a-f0-9]+:\s*62 f4 fd 18 66 c3 adcx %rbx,%rax,%rax diff --git a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s index 6ffdf5a..c83e119 100644 --- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s +++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s @@ -111,6 +111,9 @@ cmovl 0x90909090(%eax),%edx,%edx cmovge 0x90909090(%eax),%edx,%edx cmovle 0x90909090(%eax),%edx,%edx cmovg 0x90909090(%eax),%edx,%edx +movbe %ax,%ax +movbe %r8,%r8 +movbe %r16,%r16 adcx %ebx,%eax,%eax adcx %eax,%ebx,%eax adcx %rbx,%rax,%rax diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index 717e208..55c704b 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -210,6 +210,9 @@ mov, 0xf24, i386&No64, D|RegMem|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Te // Move after swapping the bytes movbe, 0x0f38f0, Movbe, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } movbe, 0x60, Movbe&APX_F, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } +// This needs to live here for easy EVEX -> REX2 conversion, which wants to +// restart with the next sequential template. +bswap, 0xfc8, i486, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 } // Move with sign extend. movsb, 0xfbe, i386, Modrm|No_bSuf|No_sSuf, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } @@ -980,9 +983,8 @@ rex.wrxb, 0x4f, x64, NoSuf|IsPrefix, {} {<pseudopfx>}, PSEUDO_PREFIX/Prefix_<pseudopfx:ident>, <pseudopfx:cpu>, NoSuf|IsPrefix, {} -// 486 extensions. +// 486 extensions (BSWAP moved elsewhere). -bswap, 0xfc8, i486, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 } xadd, 0xfc0, i486, W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex } cmpxchg, 0xfb0, i486, W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex } invd, 0xf08, i486, NoSuf, {} diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index bdcc3c8..4031b6a 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -204,6 +204,14 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 1, 0 } }, { { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0 } } } }, + { MN_bswap, 0xc8, 1, SPACE_0F, None, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0 }, + { { 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { { { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0 } } } }, { MN_movsb, 0xbe, 2, SPACE_0F, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -6000,14 +6008,6 @@ static const insn_template i386_optab[] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - { MN_bswap, 0xc8, 1, SPACE_0F, None, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0 }, - { { 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { { { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, - 0, 0, 0, 0, 0, 0 } } } }, { MN_xadd, 0xc0, 2, SPACE_0F, None, { 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -41301,41 +41301,41 @@ static const insn_template i386_optab[] = typedef unsigned short i386_op_off_t; static const i386_op_off_t i386_op_sets[] = { - 0, 14, 16, 18, 19, 20, 21, 23, - 26, 27, 28, 29, 39, 40, 41, 47, - 48, 49, 51, 55, 59, 60, 61, 62, - 64, 66, 68, 69, 70, 71, 72, 73, - 75, 77, 79, 81, 82, 83, 84, 91, - 94, 101, 104, 112, 116, 119, 126, 133, - 140, 141, 148, 150, 152, 153, 154, 155, - 156, 158, 160, 161, 162, 163, 164, 165, + 0, 14, 16, 18, 19, 20, 21, 22, + 24, 27, 28, 29, 30, 40, 41, 42, + 48, 49, 50, 52, 56, 60, 61, 62, + 63, 65, 67, 69, 70, 71, 72, 73, + 74, 76, 78, 80, 82, 83, 84, 85, + 92, 95, 102, 105, 113, 117, 120, 127, + 134, 141, 142, 149, 151, 153, 154, 155, + 156, 157, 159, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, - 180, 182, 184, 191, 198, 209, 220, 227, - 234, 241, 248, 254, 260, 269, 272, 280, - 283, 289, 291, 293, 295, 297, 298, 299, + 174, 181, 183, 185, 192, 199, 210, 221, + 228, 235, 242, 249, 255, 261, 270, 273, + 281, 284, 290, 292, 294, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, - 324, 325, 326, 327, 328, 329, 330, 332, - 334, 336, 338, 340, 341, 342, 343, 344, + 324, 325, 326, 327, 328, 329, 330, 331, + 333, 335, 337, 339, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, - 369, 370, 372, 374, 376, 378, 381, 384, - 386, 388, 391, 394, 397, 400, 402, 403, - 404, 406, 408, 410, 412, 413, 414, 415, - 416, 417, 418, 419, 420, 422, 424, 426, - 428, 430, 432, 433, 435, 437, 439, 441, - 443, 445, 447, 449, 451, 455, 457, 458, - 459, 460, 463, 464, 468, 470, 471, 472, - 473, 475, 479, 480, 484, 485, 486, 488, - 490, 491, 492, 493, 494, 495, 496, 497, - 498, 499, 500, 504, 505, 508, 512, 513, - 519, 523, 524, 530, 534, 535, 538, 542, - 543, 549, 553, 554, 560, 561, 562, 563, + 369, 370, 371, 373, 375, 377, 379, 382, + 385, 387, 389, 392, 395, 398, 401, 403, + 404, 405, 407, 409, 411, 413, 414, 415, + 416, 417, 418, 419, 420, 421, 423, 425, + 427, 429, 431, 433, 434, 436, 438, 440, + 442, 444, 446, 448, 450, 452, 456, 458, + 459, 460, 461, 464, 465, 469, 471, 472, + 473, 474, 476, 480, 481, 485, 486, 487, + 489, 491, 492, 493, 494, 495, 496, 497, + 498, 499, 500, 501, 505, 506, 509, 513, + 514, 520, 524, 525, 531, 535, 536, 539, + 543, 544, 550, 554, 555, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, - 580, 581, 582, 583, 586, 589, 590, 591, + 580, 581, 582, 583, 584, 587, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, |