[x86][CGP] try to hoist funnel shift above select-of-splats

This is basically the same patch as D63233, but converted to funnel shifts rather than regular shifts. I did not see a way to effectively share code for these 2 cases though. This follows D79718 and D79827 to re-fix PR37426 because that gets canonicalized to funnel shift intrinsics in IR. I did draft an alternative patch as an enhancement to "shouldSinkOperands()", but that was awkward because we have to key the transform from the select, but then look at both its users and its operands.
author: Sanjay Patel <spatel@rotateright.com> 2020-05-15 15:22:30 -0400
committer: Sanjay Patel <spatel@rotateright.com> 2020-05-16 10:44:47 -0400
commit: 5be37cb124f71e267f1cf7bc3929486fde2acd0a (patch)
tree: 094c8f2b6c6de886759c36b4916e59d65cb4093c /llvm/lib/CodeGen/CodeGenPrepare.cpp
parent: 72f1fb2edf596e6030f6b55439c732631edb4e91 (diff)
download: llvm-5be37cb124f71e267f1cf7bc3929486fde2acd0a.zip
llvm-5be37cb124f71e267f1cf7bc3929486fde2acd0a.tar.gz
llvm-5be37cb124f71e267f1cf7bc3929486fde2acd0a.tar.bz2
1 files changed, 39 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 8723e26..1c547ab 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -391,6 +391,7 @@ class TypePromotionTransaction;
     bool optimizeExtUses(Instruction *I);
     bool optimizeLoadExt(LoadInst *Load);
     bool optimizeShiftInst(BinaryOperator *BO);
+    bool optimizeFunnelShift(IntrinsicInst *Fsh);
     bool optimizeSelectInst(SelectInst *SI);
     bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
     bool optimizeSwitchInst(SwitchInst *SI);
@@ -2061,6 +2062,9 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
     case Intrinsic::ctlz:
       // If counting zeros is expensive, try to avoid it.
       return despeculateCountZeros(II, TLI, DL, ModifiedDT);
+    case Intrinsic::fshl:
+    case Intrinsic::fshr:
+      return optimizeFunnelShift(II);
     case Intrinsic::dbg_value:
       return fixupDbgValue(II);
     case Intrinsic::vscale: {
@@ -6240,6 +6244,41 @@ bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
   return true;
 }
 
+bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
+  Intrinsic::ID Opcode = Fsh->getIntrinsicID();
+  assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
+         "Expected a funnel shift");
+
+  // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
+  // than general vector shifts, and (3) the shift amount is select-of-splatted
+  // values, hoist the funnel shifts before the select:
+  //   fsh Op0, Op1, (select Cond, TVal, FVal) -->
+  //   select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
+  //
+  // This is inverting a generic IR transform when we know that the cost of a
+  // general vector shift is more than the cost of 2 shift-by-scalars.
+  // We can't do this effectively in SDAG because we may not be able to
+  // determine if the select operands are splats from within a basic block.
+  Type *Ty = Fsh->getType();
+  if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
+    return false;
+  Value *Cond, *TVal, *FVal;
+  if (!match(Fsh->getOperand(2),
+             m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
+    return false;
+  if (!isSplatValue(TVal) || !isSplatValue(FVal))
+    return false;
+
+  IRBuilder<> Builder(Fsh);
+  Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
+  Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, TVal });
+  Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, { X, Y, FVal });
+  Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
+  Fsh->replaceAllUsesWith(NewSel);
+  Fsh->eraseFromParent();
+  return true;
+}
+
 /// If we have a SelectInst that will likely profit from branch prediction,
 /// turn it into a branch.
 bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
author	Sanjay Patel <spatel@rotateright.com>	2020-05-15 15:22:30 -0400
committer	Sanjay Patel <spatel@rotateright.com>	2020-05-16 10:44:47 -0400
commit	5be37cb124f71e267f1cf7bc3929486fde2acd0a (patch)
tree	094c8f2b6c6de886759c36b4916e59d65cb4093c /llvm/lib/CodeGen/CodeGenPrepare.cpp
parent	72f1fb2edf596e6030f6b55439c732631edb4e91 (diff)
download	llvm-5be37cb124f71e267f1cf7bc3929486fde2acd0a.zip llvm-5be37cb124f71e267f1cf7bc3929486fde2acd0a.tar.gz llvm-5be37cb124f71e267f1cf7bc3929486fde2acd0a.tar.bz2