diff options
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll')
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll index 1986b51..7c5f984 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/horizontal.ll @@ -228,7 +228,7 @@ for.end: ; preds = %for.end.loopexit, % ; YAML-NEXT: Function: test_unrolled_select ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost ' -; YAML-NEXT: - Cost: '-36' +; YAML-NEXT: - Cost: '-41' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '10' @@ -246,15 +246,17 @@ define i32 @test_unrolled_select(ptr noalias nocapture readonly %blk1, ptr noali ; CHECK-NEXT: [[P2_045:%.*]] = phi ptr [ [[BLK2:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR88:%.*]], [[IF_END_86]] ] ; CHECK-NEXT: [[P1_044:%.*]] = phi ptr [ [[BLK1:%.*]], [[FOR_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[IF_END_86]] ] ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[P1_044]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P2_045]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <8 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> [[TMP4]] -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP7]]) -; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP8]], [[S_047]] +; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i16> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i16> [[TMP4]] to <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = sub <8 x i16> zeroinitializer, [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> [[TMP7]], <8 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = sext <8 x i16> [[TMP8]] to <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]]) +; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP10]], [[S_047]] ; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_RDX]], [[LIM:%.*]] ; CHECK-NEXT: br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: if.end.86: |