diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86InstructionSelector.cpp | 97 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86LegalizerInfo.cpp | 9 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir | 17 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll | 68 |
4 files changed, 138 insertions, 53 deletions
diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp index c962c52..fc6f3bd 100644 --- a/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -114,8 +114,8 @@ private: bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const; - bool selectDivRem(MachineInstr &I, MachineRegisterInfo &MRI, - MachineFunction &MF) const; + bool selectMulDivRem(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; @@ -421,11 +421,14 @@ bool X86InstructionSelector::select(MachineInstr &I) { case TargetOpcode::G_IMPLICIT_DEF: case TargetOpcode::G_PHI: return selectImplicitDefOrPHI(I, MRI); + case TargetOpcode::G_MUL: + case TargetOpcode::G_SMULH: + case TargetOpcode::G_UMULH: case TargetOpcode::G_SDIV: case TargetOpcode::G_UDIV: case TargetOpcode::G_SREM: case TargetOpcode::G_UREM: - return selectDivRem(I, MRI, MF); + return selectMulDivRem(I, MRI, MF); case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: return selectIntrinsicWSideEffects(I, MRI, MF); } @@ -1558,11 +1561,14 @@ bool X86InstructionSelector::selectImplicitDefOrPHI( return true; } -bool X86InstructionSelector::selectDivRem(MachineInstr &I, - MachineRegisterInfo &MRI, - MachineFunction &MF) const { - // The implementation of this function is taken from X86FastISel. - assert((I.getOpcode() == TargetOpcode::G_SDIV || +bool X86InstructionSelector::selectMulDivRem(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + // The implementation of this function is adapted from X86FastISel. + assert((I.getOpcode() == TargetOpcode::G_MUL || + I.getOpcode() == TargetOpcode::G_SMULH || + I.getOpcode() == TargetOpcode::G_UMULH || + I.getOpcode() == TargetOpcode::G_SDIV || I.getOpcode() == TargetOpcode::G_SREM || I.getOpcode() == TargetOpcode::G_UDIV || I.getOpcode() == TargetOpcode::G_UREM) && @@ -1581,10 +1587,11 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, return false; const static unsigned NumTypes = 4; // i8, i16, i32, i64 - const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem + const static unsigned NumOps = 7; // SDiv/SRem/UDiv/URem/Mul/SMulH/UMulh const static bool S = true; // IsSigned const static bool U = false; // !IsSigned const static unsigned Copy = TargetOpcode::COPY; + // For the X86 IDIV instruction, in most cases the dividend // (numerator) must be in a specific register pair highreg:lowreg, // producing the quotient in lowreg and the remainder in highreg. @@ -1593,19 +1600,19 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, // exception is i8, where the dividend is defined as a single register rather // than a register pair, and we therefore directly sign-extend the dividend // into lowreg, instead of copying, and ignore the highreg. - const static struct DivRemEntry { + const static struct MulDivRemEntry { // The following portion depends only on the data type. unsigned SizeInBits; unsigned LowInReg; // low part of the register pair unsigned HighInReg; // high part of the register pair // The following portion depends on both the data type and the operation. - struct DivRemResult { - unsigned OpDivRem; // The specific DIV/IDIV opcode to use. + struct MulDivRemResult { + unsigned OpMulDivRem; // The specific MUL/DIV opcode to use. unsigned OpSignExtend; // Opcode for sign-extending lowreg into // highreg, or copying a zero into highreg. unsigned OpCopy; // Opcode for copying dividend into lowreg, or // zero/sign-extending into lowreg for i8. - unsigned DivRemResultReg; // Register containing the desired result. + unsigned ResultReg; // Register containing the desired result. bool IsOpSigned; // Whether to use signed or unsigned form. } ResultTable[NumOps]; } OpTable[NumTypes] = { @@ -1617,25 +1624,34 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U}, // UDiv {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U}, // URem + {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AL, S}, // Mul + {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SMulH + {X86::MUL8r, 0, X86::MOVZX16rr8, X86::AH, U}, // UMulH }}, // i8 {16, X86::AX, X86::DX, { - {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv - {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem - {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv - {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem - }}, // i16 + {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv + {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem + {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv + {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem + {X86::IMUL16r, X86::MOV32r0, Copy, X86::AX, S}, // Mul + {X86::IMUL16r, X86::MOV32r0, Copy, X86::DX, S}, // SMulH + {X86::MUL16r, X86::MOV32r0, Copy, X86::DX, U}, // UMulH + }}, // i16 {32, X86::EAX, X86::EDX, { - {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv - {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem - {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv - {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem - }}, // i32 + {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv + {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem + {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv + {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem + {X86::IMUL32r, X86::MOV32r0, Copy, X86::EAX, S}, // Mul + {X86::IMUL32r, X86::MOV32r0, Copy, X86::EDX, S}, // SMulH + {X86::MUL32r, X86::MOV32r0, Copy, X86::EDX, U}, // UMulH + }}, // i32 {64, X86::RAX, X86::RDX, @@ -1644,10 +1660,13 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, {X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem {X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv {X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem - }}, // i64 + {X86::IMUL64r, X86::MOV32r0, Copy, X86::RAX, S}, // Mul + {X86::IMUL64r, X86::MOV32r0, Copy, X86::RDX, S}, // SMulH + {X86::MUL64r, X86::MOV32r0, Copy, X86::RDX, U}, // UMulH + }}, // i64 }; - auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const DivRemEntry &El) { + auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const MulDivRemEntry &El) { return El.SizeInBits == RegTy.getSizeInBits(); }); if (OpEntryIt == std::end(OpTable)) @@ -1656,7 +1675,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, unsigned OpIndex; switch (I.getOpcode()) { default: - llvm_unreachable("Unexpected div/rem opcode"); + llvm_unreachable("Unexpected mul/div/rem opcode"); case TargetOpcode::G_SDIV: OpIndex = 0; break; @@ -1669,10 +1688,20 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, case TargetOpcode::G_UREM: OpIndex = 3; break; + case TargetOpcode::G_MUL: + OpIndex = 4; + break; + case TargetOpcode::G_SMULH: + OpIndex = 5; + break; + case TargetOpcode::G_UMULH: + OpIndex = 6; + break; } - const DivRemEntry &TypeEntry = *OpEntryIt; - const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex]; + const MulDivRemEntry &TypeEntry = *OpEntryIt; + const MulDivRemEntry::MulDivRemResult &OpEntry = + TypeEntry.ResultTable[OpIndex]; const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB); if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) || @@ -1687,6 +1716,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy), TypeEntry.LowInReg) .addReg(Op1Reg); + // Zero-extend or sign-extend into high-order input register. if (OpEntry.OpSignExtend) { if (OpEntry.IsOpSigned) @@ -1717,9 +1747,11 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, } } } - // Generate the DIV/IDIV instruction. - BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpDivRem)) + + // Generate the DIV/IDIV/MUL/IMUL instruction. + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpMulDivRem)) .addReg(Op2Reg); + // For i8 remainder, we can't reference ah directly, as we'll end // up with bogus copies like %r9b = COPY %ah. Reference ax // instead to prevent ah references in a rex instruction. @@ -1728,7 +1760,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, // won't generate explicit references to the GR8_NOREX registers. If // the allocator and/or the backend get enhanced to be more robust in // that regard, this can be, and should be, removed. - if (OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) { + if (OpEntry.ResultReg == X86::AH && STI.is64Bit()) { Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg) @@ -1750,9 +1782,10 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I, } else { BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), DstReg) - .addReg(OpEntry.DivRemResultReg); + .addReg(OpEntry.ResultReg); } I.eraseFromParent(); + return true; } diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp index 49a0f69..a4a247f8 100644 --- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp @@ -196,6 +196,15 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .clampScalar(0, s8, sMaxScalar) .scalarize(0); + getActionDefinitionsBuilder({G_SMULH, G_UMULH}) + .legalIf([=](const LegalityQuery &Query) -> bool { + return typeInSet(0, {s8, s16, s32})(Query) || + (Is64Bit && typeInSet(0, {s64})(Query)); + }) + .widenScalarToNextPow2(0, /*Min=*/32) + .clampScalar(0, s8, sMaxScalar) + .scalarize(0); + // integer divisions getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UDIV, G_UREM}) .legalIf([=](const LegalityQuery &Query) -> bool { diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir index d064d6c..deff2ba 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir @@ -1,10 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple=x86_64-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,X64 -# RUN: llc -O0 -mtriple=i386-linux-gnu -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' %s 2>%t -o - | FileCheck %s --check-prefixes=CHECK,X86 -# RUN: FileCheck -check-prefix=ERR32 %s < %t - -# ERR32: remark: <unknown>:0:0: unable to legalize instruction: %14:_(s32) = G_UMULH %7:_, %9:_ (in function: test_mul_i42) -# ERR32: remark: <unknown>:0:0: unable to legalize instruction: %10:_(s32) = G_UMULH %3:_, %5:_ (in function: test_mul_i64) +# RUN: llc -O0 -mtriple=i386-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,X86 --- | define void @test_mul_i1() { ret void } @@ -200,11 +196,8 @@ body: | ; X86: liveins: $rdi, $rsi ; X86-NEXT: {{ $}} ; X86-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx - ; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s42) = G_TRUNC [[COPY]](s64) - ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s42) - ; X86-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s42) - ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT1]](s64) + ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; X86-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] ; X86-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] ; X86-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] @@ -212,9 +205,7 @@ body: | ; X86-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] ; X86-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) - ; X86-NEXT: [[TRUNC1:%[0-9]+]]:_(s42) = G_TRUNC [[MV]](s64) - ; X86-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC1]](s42) - ; X86-NEXT: $rax = COPY [[ANYEXT2]](s64) + ; X86-NEXT: $rax = COPY [[MV]](s64) ; X86-NEXT: RET 0 %0(s64) = COPY $rdx %1(s42) = G_TRUNC %0(s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll index ba0499e..3835deb 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll @@ -1,40 +1,92 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64 +; RUN: llc -mtriple=i686-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X86 -;TODO: instruction selection not supported yet -;define i8 @test_mul_i8(i8 %arg1, i8 %arg2) { -; %ret = mul i8 %arg1, %arg2 -; ret i8 %ret -;} +define i8 @test_mul_i8(i8 %arg1, i8 %arg2) nounwind { +; X64-LABEL: test_mul_i8: +; X64: # %bb.0: +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: imulb %sil +; X64-NEXT: retq +; +; X86-LABEL: test_mul_i8: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cbtw +; X86-NEXT: imulb %cl +; X86-NEXT: retl + %ret = mul i8 %arg1, %arg2 + ret i8 %ret +} -define i16 @test_mul_i16(i16 %arg1, i16 %arg2) { +define i16 @test_mul_i16(i16 %arg1, i16 %arg2) nounwind { ; X64-LABEL: test_mul_i16: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax ; X64-NEXT: imulw %di, %ax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq +; +; X86-LABEL: test_mul_i16: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: imulw %cx, %ax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl %ret = mul i16 %arg1, %arg2 ret i16 %ret } -define i32 @test_mul_i32(i32 %arg1, i32 %arg2) { +define i32 @test_mul_i32(i32 %arg1, i32 %arg2) nounwind { ; X64-LABEL: test_mul_i32: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax ; X64-NEXT: imull %edi, %eax ; X64-NEXT: retq +; +; X86-LABEL: test_mul_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: imull {{[0-9]+}}(%esp), %eax +; X86-NEXT: retl %ret = mul i32 %arg1, %arg2 ret i32 %ret } -define i64 @test_mul_i64(i64 %arg1, i64 %arg2) { +define i64 @test_mul_i64(i64 %arg1, i64 %arg2) nounwind { ; X64-LABEL: test_mul_i64: ; X64: # %bb.0: ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: imulq %rdi, %rax ; X64-NEXT: retq +; +; X86-LABEL: test_mul_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: imull %eax, %esi +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: imull %edx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: imull %edx, %edi +; X86-NEXT: mull %edx +; X86-NEXT: addl %edi, %esi +; X86-NEXT: addl %esi, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl %ret = mul i64 %arg1, %arg2 ret i64 %ret } +;TODO: instruction selection not supported yet +;define i128 @test_mul_i128(i128 %arg1, i128 %arg2) nounwind { +; %ret = mul i128 %arg1, %arg2 +; ret i128 %ret +;} |
