aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp33
-rw-r--r--llvm/test/Transforms/InstCombine/funnel.ll38
2 files changed, 31 insertions, 40 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index c13362f..0cc50cb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2052,22 +2052,22 @@ Instruction *InstCombinerImpl::matchBSwap(BinaryOperator &Or) {
return LastInst;
}
-/// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic.
-static Instruction *matchRotate(Instruction &Or) {
+/// Match UB-safe variants of the funnel shift intrinsic.
+static Instruction *matchFunnelShift(Instruction &Or) {
// TODO: Can we reduce the code duplication between this and the related
// rotate matching code under visitSelect and visitTrunc?
unsigned Width = Or.getType()->getScalarSizeInBits();
- // First, find an or'd pair of opposite shifts with the same shifted operand:
- // or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1)
+ // First, find an or'd pair of opposite shifts:
+ // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
BinaryOperator *Or0, *Or1;
if (!match(Or.getOperand(0), m_BinOp(Or0)) ||
!match(Or.getOperand(1), m_BinOp(Or1)))
return nullptr;
- Value *ShVal, *ShAmt0, *ShAmt1;
- if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) ||
- !match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1)))))
+ Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
+ if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
+ !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))))
return nullptr;
BinaryOperator::BinaryOps ShiftOpcode0 = Or0->getOpcode();
@@ -2075,9 +2075,9 @@ static Instruction *matchRotate(Instruction &Or) {
if (ShiftOpcode0 == ShiftOpcode1)
return nullptr;
- // Match the shift amount operands for a rotate pattern. This always matches
- // a subtraction on the R operand.
- auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
+ // Match the shift amount operands for a funnel shift pattern. This always
+ // matches a subtraction on the R operand.
+ auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
// Check for constant shift amounts that sum to the bitwidth.
// TODO: Support non-uniform shift amounts.
const APInt *LC, *RC;
@@ -2085,6 +2085,12 @@ static Instruction *matchRotate(Instruction &Or) {
if (LC->ult(Width) && RC->ult(Width) && (*LC + *RC) == Width)
return ConstantInt::get(L->getType(), *LC);
+ // For non-constant cases, the following patterns currently only work for
+ // rotation patterns.
+ // TODO: Add general funnel-shift compatible patterns.
+ if (ShVal0 != ShVal1)
+ return nullptr;
+
// For non-constant cases we don't support non-pow2 shift masks.
// TODO: Is it worth matching urem as well?
if (!isPowerOf2_32(Width))
@@ -2121,7 +2127,8 @@ static Instruction *matchRotate(Instruction &Or) {
(SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl);
Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());
- return IntrinsicInst::Create(F, { ShVal, ShVal, ShAmt });
+ return IntrinsicInst::Create(
+ F, {IsFshl ? ShVal0 : ShVal1, IsFshl ? ShVal1 : ShVal0, ShAmt});
}
/// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.
@@ -2574,8 +2581,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *BSwap = matchBSwap(I))
return BSwap;
- if (Instruction *Rotate = matchRotate(I))
- return Rotate;
+ if (Instruction *Funnel = matchFunnelShift(I))
+ return Funnel;
if (Instruction *Concat = matchOrConcat(I, Builder))
return replaceInstUsesWith(I, Concat);
diff --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll
index fca73a4..4dfcbcd 100644
--- a/llvm/test/Transforms/InstCombine/funnel.ll
+++ b/llvm/test/Transforms/InstCombine/funnel.ll
@@ -3,16 +3,14 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-; TODO: Canonicalize or(shl,lshr) by constant to funnel shift intrinsics.
+; Canonicalize or(shl,lshr) by constant to funnel shift intrinsics.
; This should help cost modeling for vectorization, inlining, etc.
; If a target does not have a fshl instruction, the expansion will
; be exactly these same 3 basic ops (shl/lshr/or).
define i32 @fshl_i32_constant(i32 %x, i32 %y) {
; CHECK-LABEL: @fshl_i32_constant(
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 11
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[Y:%.*]], 21
-; CHECK-NEXT: [[R:%.*]] = or i32 [[SHR]], [[SHL]]
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 11)
; CHECK-NEXT: ret i32 [[R]]
;
%shl = shl i32 %x, 11
@@ -23,9 +21,7 @@ define i32 @fshl_i32_constant(i32 %x, i32 %y) {
define i42 @fshr_i42_constant(i42 %x, i42 %y) {
; CHECK-LABEL: @fshr_i42_constant(
-; CHECK-NEXT: [[SHR:%.*]] = lshr i42 [[X:%.*]], 31
-; CHECK-NEXT: [[SHL:%.*]] = shl i42 [[Y:%.*]], 11
-; CHECK-NEXT: [[R:%.*]] = or i42 [[SHR]], [[SHL]]
+; CHECK-NEXT: [[R:%.*]] = call i42 @llvm.fshl.i42(i42 [[Y:%.*]], i42 [[X:%.*]], i42 11)
; CHECK-NEXT: ret i42 [[R]]
;
%shr = lshr i42 %x, 31
@@ -34,13 +30,11 @@ define i42 @fshr_i42_constant(i42 %x, i42 %y) {
ret i42 %r
}
-; TODO: Vector types are allowed.
+; Vector types are allowed.
define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) {
; CHECK-LABEL: @fshl_v2i16_constant_splat(
-; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 1, i16 1>
-; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], <i16 15, i16 15>
-; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
+; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
; CHECK-NEXT: ret <2 x i16> [[R]]
;
%shl = shl <2 x i16> %x, <i16 1, i16 1>
@@ -51,9 +45,7 @@ define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) {
define <2 x i16> @fshl_v2i16_constant_splat_undef0(<2 x i16> %x, <2 x i16> %y) {
; CHECK-LABEL: @fshl_v2i16_constant_splat_undef0(
-; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 undef, i16 1>
-; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], <i16 15, i16 15>
-; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
+; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
; CHECK-NEXT: ret <2 x i16> [[R]]
;
%shl = shl <2 x i16> %x, <i16 undef, i16 1>
@@ -64,9 +56,7 @@ define <2 x i16> @fshl_v2i16_constant_splat_undef0(<2 x i16> %x, <2 x i16> %y) {
define <2 x i16> @fshl_v2i16_constant_splat_undef1(<2 x i16> %x, <2 x i16> %y) {
; CHECK-LABEL: @fshl_v2i16_constant_splat_undef1(
-; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i16> [[X:%.*]], <i16 1, i16 1>
-; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i16> [[Y:%.*]], <i16 15, i16 undef>
-; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[SHL]], [[SHR]]
+; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
; CHECK-NEXT: ret <2 x i16> [[R]]
;
%shl = shl <2 x i16> %x, <i16 1, i16 1>
@@ -75,13 +65,11 @@ define <2 x i16> @fshl_v2i16_constant_splat_undef1(<2 x i16> %x, <2 x i16> %y) {
ret <2 x i16> %r
}
-; TODO: Non-power-of-2 vector types are allowed.
+; Non-power-of-2 vector types are allowed.
define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) {
; CHECK-LABEL: @fshr_v2i17_constant_splat(
-; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], <i17 12, i17 12>
-; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], <i17 5, i17 5>
-; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
+; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>)
; CHECK-NEXT: ret <2 x i17> [[R]]
;
%shr = lshr <2 x i17> %x, <i17 12, i17 12>
@@ -92,9 +80,7 @@ define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) {
define <2 x i17> @fshr_v2i17_constant_splat_undef0(<2 x i17> %x, <2 x i17> %y) {
; CHECK-LABEL: @fshr_v2i17_constant_splat_undef0(
-; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], <i17 12, i17 undef>
-; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], <i17 undef, i17 5>
-; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
+; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>)
; CHECK-NEXT: ret <2 x i17> [[R]]
;
%shr = lshr <2 x i17> %x, <i17 12, i17 undef>
@@ -105,9 +91,7 @@ define <2 x i17> @fshr_v2i17_constant_splat_undef0(<2 x i17> %x, <2 x i17> %y) {
define <2 x i17> @fshr_v2i17_constant_splat_undef1(<2 x i17> %x, <2 x i17> %y) {
; CHECK-LABEL: @fshr_v2i17_constant_splat_undef1(
-; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i17> [[X:%.*]], <i17 12, i17 undef>
-; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i17> [[Y:%.*]], <i17 5, i17 undef>
-; CHECK-NEXT: [[R:%.*]] = or <2 x i17> [[SHR]], [[SHL]]
+; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>)
; CHECK-NEXT: ret <2 x i17> [[R]]
;
%shr = lshr <2 x i17> %x, <i17 12, i17 undef>