From 0ebcbb1bd03c4fde91b032b50ec9c3a8f376b8be Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 26 Jan 2024 10:31:38 +0100 Subject: x86/APX: optimize MOVBE With identical source and destination it can be covered by the NDD-to- legacy conversion logic as well, even if in this case the original insn doesn't use an NDD encoding. The size savings are even better here, for the replacement (BSWAP) not having a ModR/M byte. --- gas/config/tc-i386.c | 15 ++++-- gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d | 3 ++ gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s | 3 ++ opcodes/i386-opc.tbl | 6 ++- opcodes/i386-tbl.h | 68 ++++++++++++------------ 5 files changed, 55 insertions(+), 40 deletions(-) diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 51166ef..3d3d4a3 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -8959,10 +8959,11 @@ match_template (char mnem_suffix) if (optimize && !i.no_optimize && i.vec_encoding != vex_encoding_evex - && t + 1 < current_templates.end - && !t[1].opcode_modifier.evex - && t[1].opcode_space <= SPACE_0F38 - && t->opcode_modifier.vexvvvv == VexVVVV_DST + && ((t + 1 < current_templates.end + && !t[1].opcode_modifier.evex + && t[1].opcode_space <= SPACE_0F38 + && t->opcode_modifier.vexvvvv == VexVVVV_DST) + || t->mnem_off == MN_movbe) && (i.types[i.operands - 1].bitfield.dword || i.types[i.operands - 1].bitfield.qword)) { @@ -8999,6 +9000,12 @@ match_template (char mnem_suffix) --i.operands; --i.reg_operands; + if (t->mnem_off == MN_movbe) + { + gas_assert (t[1].mnem_off == MN_bswap); + ++current_templates.end; + } + specific_error = progress (internal_error); continue; } diff --git a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d index 773ceea..8069799 100644 --- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d +++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d @@ -118,6 +118,9 @@ Disassembly of section .text: \s*[a-f0-9]+:\s*67 0f 4d 90 90 90 90 90 cmovge -0x6f6f6f70\(%eax\),%edx \s*[a-f0-9]+:\s*67 0f 4e 90 90 90 90 90 cmovle -0x6f6f6f70\(%eax\),%edx \s*[a-f0-9]+:\s*67 0f 4f 90 90 90 90 90 cmovg -0x6f6f6f70\(%eax\),%edx +\s*[a-f0-9]+:\s*62 f4 7d 08 60 c0 movbe %ax,%ax +\s*[a-f0-9]+:\s*49 0f c8 bswap %r8 +\s*[a-f0-9]+:\s*d5 98 c8 bswap %r16 \s*[a-f0-9]+:\s*66 0f 38 f6 c3 adcx %ebx,%eax \s*[a-f0-9]+:\s*66 0f 38 f6 c3 adcx %ebx,%eax \s*[a-f0-9]+:\s*62 f4 fd 18 66 c3 adcx %rbx,%rax,%rax diff --git a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s index 6ffdf5a..c83e119 100644 --- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s +++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s @@ -111,6 +111,9 @@ cmovl 0x90909090(%eax),%edx,%edx cmovge 0x90909090(%eax),%edx,%edx cmovle 0x90909090(%eax),%edx,%edx cmovg 0x90909090(%eax),%edx,%edx +movbe %ax,%ax +movbe %r8,%r8 +movbe %r16,%r16 adcx %ebx,%eax,%eax adcx %eax,%ebx,%eax adcx %rbx,%rax,%rax diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index 717e208..55c704b 100644 --- a/opcodes/i386-opc.tbl +++ b/opcodes/i386-opc.tbl @@ -210,6 +210,9 @@ mov, 0xf24, i386&No64, D|RegMem|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Te // Move after swapping the bytes movbe, 0x0f38f0, Movbe, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } movbe, 0x60, Movbe&APX_F, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } +// This needs to live here for easy EVEX -> REX2 conversion, which wants to +// restart with the next sequential template. +bswap, 0xfc8, i486, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 } // Move with sign extend. movsb, 0xfbe, i386, Modrm|No_bSuf|No_sSuf, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } @@ -980,9 +983,8 @@ rex.wrxb, 0x4f, x64, NoSuf|IsPrefix, {} {}, PSEUDO_PREFIX/Prefix_, , NoSuf|IsPrefix, {} -// 486 extensions. +// 486 extensions (BSWAP moved elsewhere). -bswap, 0xfc8, i486, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 } xadd, 0xfc0, i486, W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex } cmpxchg, 0xfb0, i486, W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex } invd, 0xf08, i486, NoSuf, {} diff --git a/opcodes/i386-tbl.h b/opcodes/i386-tbl.h index bdcc3c8..4031b6a 100644 --- a/opcodes/i386-tbl.h +++ b/opcodes/i386-tbl.h @@ -204,6 +204,14 @@ static const insn_template i386_optab[] = 0, 0, 0, 0, 1, 0 } }, { { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0 } } } }, + { MN_bswap, 0xc8, 1, SPACE_0F, None, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0 }, + { { 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { { { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0 } } } }, { MN_movsb, 0xbe, 2, SPACE_0F, None, { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -6000,14 +6008,6 @@ static const insn_template i386_optab[] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - { MN_bswap, 0xc8, 1, SPACE_0F, None, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0 }, - { { 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { { { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, - 0, 0, 0, 0, 0, 0 } } } }, { MN_xadd, 0xc0, 2, SPACE_0F, None, { 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -41301,41 +41301,41 @@ static const insn_template i386_optab[] = typedef unsigned short i386_op_off_t; static const i386_op_off_t i386_op_sets[] = { - 0, 14, 16, 18, 19, 20, 21, 23, - 26, 27, 28, 29, 39, 40, 41, 47, - 48, 49, 51, 55, 59, 60, 61, 62, - 64, 66, 68, 69, 70, 71, 72, 73, - 75, 77, 79, 81, 82, 83, 84, 91, - 94, 101, 104, 112, 116, 119, 126, 133, - 140, 141, 148, 150, 152, 153, 154, 155, - 156, 158, 160, 161, 162, 163, 164, 165, + 0, 14, 16, 18, 19, 20, 21, 22, + 24, 27, 28, 29, 30, 40, 41, 42, + 48, 49, 50, 52, 56, 60, 61, 62, + 63, 65, 67, 69, 70, 71, 72, 73, + 74, 76, 78, 80, 82, 83, 84, 85, + 92, 95, 102, 105, 113, 117, 120, 127, + 134, 141, 142, 149, 151, 153, 154, 155, + 156, 157, 159, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, - 180, 182, 184, 191, 198, 209, 220, 227, - 234, 241, 248, 254, 260, 269, 272, 280, - 283, 289, 291, 293, 295, 297, 298, 299, + 174, 181, 183, 185, 192, 199, 210, 221, + 228, 235, 242, 249, 255, 261, 270, 273, + 281, 284, 290, 292, 294, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, - 324, 325, 326, 327, 328, 329, 330, 332, - 334, 336, 338, 340, 341, 342, 343, 344, + 324, 325, 326, 327, 328, 329, 330, 331, + 333, 335, 337, 339, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, - 369, 370, 372, 374, 376, 378, 381, 384, - 386, 388, 391, 394, 397, 400, 402, 403, - 404, 406, 408, 410, 412, 413, 414, 415, - 416, 417, 418, 419, 420, 422, 424, 426, - 428, 430, 432, 433, 435, 437, 439, 441, - 443, 445, 447, 449, 451, 455, 457, 458, - 459, 460, 463, 464, 468, 470, 471, 472, - 473, 475, 479, 480, 484, 485, 486, 488, - 490, 491, 492, 493, 494, 495, 496, 497, - 498, 499, 500, 504, 505, 508, 512, 513, - 519, 523, 524, 530, 534, 535, 538, 542, - 543, 549, 553, 554, 560, 561, 562, 563, + 369, 370, 371, 373, 375, 377, 379, 382, + 385, 387, 389, 392, 395, 398, 401, 403, + 404, 405, 407, 409, 411, 413, 414, 415, + 416, 417, 418, 419, 420, 421, 423, 425, + 427, 429, 431, 433, 434, 436, 438, 440, + 442, 444, 446, 448, 450, 452, 456, 458, + 459, 460, 461, 464, 465, 469, 471, 472, + 473, 474, 476, 480, 481, 485, 486, 487, + 489, 491, 492, 493, 494, 495, 496, 497, + 498, 499, 500, 501, 505, 506, 509, 513, + 514, 520, 524, 525, 531, 535, 536, 539, + 543, 544, 550, 554, 555, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, - 580, 581, 582, 583, 586, 589, 590, 591, + 580, 581, 582, 583, 584, 587, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, -- cgit v1.1