2 files changed, 17 insertions, 11 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 24e278d..09c04e2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -192,11 +192,9 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
 
   // If we are casting an integer to vector and extracting a portion, that is
   // a shift-right and truncate.
-  // TODO: If no shift is needed, allow extra use?
   // TODO: Allow FP dest type by casting the trunc to FP?
   if (X->getType()->isIntegerTy() && DestTy->isIntegerTy() &&
-      isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
-      Ext.getVectorOperand()->hasOneUse()) {
+      isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) {
     assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
            "Expected fixed vector type for bitcast from scalar integer");
 
@@ -206,8 +204,10 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
     if (IsBigEndian)
       ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
     unsigned ShiftAmountC = ExtIndexC * DestTy->getPrimitiveSizeInBits();
-    Value *Lshr = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
-    return new TruncInst(Lshr, DestTy);
+    if (!ShiftAmountC || Ext.getVectorOperand()->hasOneUse()) {
+      Value *Lshr = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
+      return new TruncInst(Lshr, DestTy);
+    }
   }
 
   if (!X->getType()->isVectorTy())
diff --git a/llvm/test/Transforms/InstCombine/extractelement.ll b/llvm/test/Transforms/InstCombine/extractelement.ll
index 49d2965..d2ac127 100644
--- a/llvm/test/Transforms/InstCombine/extractelement.ll
+++ b/llvm/test/Transforms/InstCombine/extractelement.ll
@@ -433,14 +433,20 @@ define i8 @bitcast_scalar_index_variable(i32 %x, i64 %y) {
   ret i8 %r
 }
 
-; negative test - no extra uses
+; extra use is ok if we don't need a shift
 
 define i8 @bitcast_scalar_index0_use(i64 %x) {
-; ANY-LABEL: @bitcast_scalar_index0_use(
-; ANY-NEXT:    [[V:%.*]] = bitcast i64 [[X:%.*]] to <8 x i8>
-; ANY-NEXT:    call void @use(<8 x i8> [[V]])
-; ANY-NEXT:    [[R:%.*]] = extractelement <8 x i8> [[V]], i64 0
-; ANY-NEXT:    ret i8 [[R]]
+; LE-LABEL: @bitcast_scalar_index0_use(
+; LE-NEXT:    [[V:%.*]] = bitcast i64 [[X:%.*]] to <8 x i8>
+; LE-NEXT:    call void @use(<8 x i8> [[V]])
+; LE-NEXT:    [[R:%.*]] = trunc i64 [[X]] to i8
+; LE-NEXT:    ret i8 [[R]]
+;
+; BE-LABEL: @bitcast_scalar_index0_use(
+; BE-NEXT:    [[V:%.*]] = bitcast i64 [[X:%.*]] to <8 x i8>
+; BE-NEXT:    call void @use(<8 x i8> [[V]])
+; BE-NEXT:    [[R:%.*]] = extractelement <8 x i8> [[V]], i64 0
+; BE-NEXT:    ret i8 [[R]]
 ;
   %v = bitcast i64 %x to <8 x i8>
   call void @use(<8 x i8> %v)