diff options
author | Alexey Bataev <a.bataev@outlook.com> | 2024-10-31 13:50:02 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-31 13:50:02 -0400 |
commit | e05def081e43f8fe8be7f3a4ed2749ae01ab0ab3 (patch) | |
tree | ef7f5833d14933b286cd5f6cd1ab580208693a71 | |
parent | 88591aa0ca7e4d99da353d49f91ea63e43fb55e0 (diff) | |
download | llvm-e05def081e43f8fe8be7f3a4ed2749ae01ab0ab3.zip llvm-e05def081e43f8fe8be7f3a4ed2749ae01ab0ab3.tar.gz llvm-e05def081e43f8fe8be7f3a4ed2749ae01ab0ab3.tar.bz2 |
[SLP]Do not vectorize code in EH and non-returning blocks
The code in EH and non-returning blocks can be skipped by the
vectorizer, since it does not add to the perfromance, just consumes
compile/link time.
Reviewers: RKSimon
Reviewed By: RKSimon
Pull Request: https://github.com/llvm/llvm-project/pull/112221
8 files changed, 105 insertions, 45 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 97631f5..421bb9a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -8160,9 +8160,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, auto *VL0 = cast<Instruction>(S.OpValue); BB = VL0->getParent(); - if (S.MainOp && !DT->isReachableFromEntry(BB)) { + if (S.MainOp && + (BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator()) || + !DT->isReachableFromEntry(BB))) { // Don't go into unreachable blocks. They may contain instructions with // dependency cycles which confuse the final scheduling. + // Do not vectorize EH and non-returning blocks, not profitable in most + // cases. LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); return; @@ -17732,6 +17736,9 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, // Scan the blocks in the function in post order. for (auto *BB : post_order(&F.getEntryBlock())) { + if (BB->isEHPad() || isa_and_nonnull<UnreachableInst>(BB->getTerminator())) + continue; + // Start new block - clear the list of reduction roots. R.clearReductionData(); collectSeedInstructions(BB); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll index e4b6c06..8b548e3 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/landing_pad.ll @@ -28,9 +28,9 @@ ; YAML-NEXT: Function: foo ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'SLP vectorized with cost ' -; YAML-NEXT: - Cost: '2' +; YAML-NEXT: - Cost: '8' ; YAML-NEXT: - String: ' and with tree size ' -; YAML-NEXT: - TreeSize: '9' +; YAML-NEXT: - TreeSize: '5' define void @foo() personality ptr @bar { ; CHECK-LABEL: @foo( @@ -44,8 +44,10 @@ define void @foo() personality ptr @bar { ; CHECK-NEXT: ret void ; CHECK: bb3: ; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i64> [ [[TMP4:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = invoke i64 poison(ptr addrspace(1) nonnull poison, i64 0, i64 0, i64 poison) [ "deopt"() ] -; CHECK-NEXT: to label [[BB4:%.*]] unwind label [[BB10:%.*]] +; CHECK-NEXT: to label [[BB4:%.*]] unwind label [[BB10:%.*]] ; CHECK: bb4: ; CHECK-NEXT: br i1 poison, label [[BB11:%.*]], label [[BB5:%.*]] ; CHECK: bb5: @@ -55,9 +57,8 @@ define void @foo() personality ptr @bar { ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb7: ; CHECK-NEXT: [[LOCAL_5_84111:%.*]] = phi i64 [ poison, [[BB8]] ], [ poison, [[BB5]] ] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[LOCAL_5_84111]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = invoke i64 poison(ptr addrspace(1) nonnull poison, i64 poison, i64 poison, i64 poison) [ "deopt"() ] -; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]] +; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]] ; CHECK: bb8: ; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]] ; CHECK: bb9: @@ -65,16 +66,22 @@ define void @foo() personality ptr @bar { ; CHECK-NEXT: [[TMP7]] = phi <2 x i64> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ] ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: -; CHECK-NEXT: [[TMP8]] = phi <2 x i64> [ [[TMP2]], [[BB3]] ] +; CHECK-NEXT: [[LOCAL_10_38123_LCSSA:%.*]] = phi i64 [ [[TMP10]], [[BB3]] ] +; CHECK-NEXT: [[LOCAL_5_33118_LCSSA:%.*]] = phi i64 [ [[TMP5]], [[BB3]] ] ; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { ptr, i64 } -; CHECK-NEXT: cleanup +; CHECK-NEXT: cleanup +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[LOCAL_10_38123_LCSSA]], i32 0 +; CHECK-NEXT: [[TMP8]] = insertelement <2 x i64> [[TMP12]], i64 [[LOCAL_5_33118_LCSSA]], i32 1 ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb11: ; CHECK-NEXT: ret void ; CHECK: bb12: -; CHECK-NEXT: [[TMP9]] = phi <2 x i64> [ [[TMP5]], [[BB7]] ] +; CHECK-NEXT: [[LOCAL_10_89113_LCSSA:%.*]] = phi i64 [ poison, [[BB7]] ] +; CHECK-NEXT: [[LOCAL_5_84111_LCSSA:%.*]] = phi i64 [ [[LOCAL_5_84111]], [[BB7]] ] ; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i64 } -; CHECK-NEXT: cleanup +; CHECK-NEXT: cleanup +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[LOCAL_10_89113_LCSSA]], i32 0 +; CHECK-NEXT: [[TMP9]] = insertelement <2 x i64> [[TMP11]], i64 [[LOCAL_5_84111_LCSSA]], i32 1 ; CHECK-NEXT: br label [[BB9]] ; bb1: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll index 6f5f933..f1f83c0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll @@ -18,13 +18,15 @@ define void @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btCons ; CHECK: land.lhs.true.i.1: ; CHECK-NEXT: br i1 undef, label [[FOR_INC_1:%.*]], label [[IF_THEN7_1]] ; CHECK: if.then7.1: -; CHECK-NEXT: store i32 1, ptr [[INFO]], align 4 -; CHECK-NEXT: store i32 5, ptr [[NUB5]], align 4 +; CHECK-NEXT: store <2 x i32> <i32 1, i32 5>, ptr [[INFO]], align 4 ; CHECK-NEXT: br label [[FOR_INC_1]] ; CHECK: for.inc.1: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ <i32 1, i32 5>, [[IF_THEN7_1]] ], [ <i32 0, i32 6>, [[LAND_LHS_TRUE_I_1]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], <i32 1, i32 -1> -; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[INFO]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 5, [[IF_THEN7_1]] ], [ 6, [[LAND_LHS_TRUE_I_1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 1, [[IF_THEN7_1]] ], [ 0, [[LAND_LHS_TRUE_I_1]] ] +; CHECK-NEXT: [[INC_2:%.*]] = add nsw i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[INC_2]], ptr [[INFO]], align 4 +; CHECK-NEXT: [[DEC_2:%.*]] = add nsw i32 [[TMP0]], -1 +; CHECK-NEXT: store i32 [[DEC_2]], ptr [[NUB5]], align 4 ; CHECK-NEXT: unreachable ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-phi-in-landingpad.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-phi-in-landingpad.ll index 7476c77..adbe9c3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-phi-in-landingpad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-phi-in-landingpad.ll @@ -10,10 +10,10 @@ define void @test() personality ptr null { ; CHECK-NEXT: invoke void null() ; CHECK-NEXT: to label %[[BB65]] unwind label %[[BB4]] ; CHECK: [[BB4]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ poison, %[[BB2]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 0, %[[BB]] ], [ 0, %[[BB2]] ] +; CHECK-NEXT: [[PHI6:%.*]] = phi i32 [ 0, %[[BB]] ], [ 0, %[[BB2]] ] ; CHECK-NEXT: [[LANDINGPAD:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 ; CHECK-NEXT: call void null(i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]], i32 [[TMP1]]) ; CHECK-NEXT: ret void ; CHECK: [[BB65]]: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/funclet.ll b/llvm/test/Transforms/SLPVectorizer/X86/funclet.ll index 578d463..79698e2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/funclet.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/funclet.ll @@ -7,16 +7,24 @@ define void @test1(ptr %a, ptr %b, ptr %c) #0 personality ptr @__CxxFrameHandler ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: invoke void @_CxxThrowException(ptr null, ptr null) -; CHECK-NEXT: to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]] +; CHECK-NEXT: to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]] ; CHECK: catch.dispatch: ; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller ; CHECK: catch: ; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 64, ptr null] -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP6]]) [ "funclet"(token [[TMP1]]) ] -; CHECK-NEXT: store <2 x double> [[TMP7]], ptr [[C:%.*]], align 8 +; CHECK-NEXT: [[I0:%.*]] = load double, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[I1:%.*]] = load double, ptr [[B:%.*]], align 8 +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[I0]], [[I1]] +; CHECK-NEXT: [[CALL:%.*]] = tail call double @floor(double [[MUL]]) #[[ATTR1:[0-9]+]] [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1 +; CHECK-NEXT: [[I3:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1 +; CHECK-NEXT: [[I4:%.*]] = load double, ptr [[ARRAYIDX4]], align 8 +; CHECK-NEXT: [[MUL5:%.*]] = fmul double [[I3]], [[I4]] +; CHECK-NEXT: [[CALL5:%.*]] = tail call double @floor(double [[MUL5]]) #[[ATTR1]] [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: store double [[CALL]], ptr [[C:%.*]], align 8 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[C]], i64 1 +; CHECK-NEXT: store double [[CALL5]], ptr [[ARRAYIDX5]], align 8 ; CHECK-NEXT: catchret from [[TMP1]] to label [[TRY_CONT:%.*]] ; CHECK: try.cont: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll index 47b42bc..2a036cc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll @@ -14,6 +14,8 @@ define void @foo() personality ptr @bar { ; CHECK-NEXT: ret void ; CHECK: bb3: ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 0, i32 0, i32 poison) [ "deopt"() ] ; CHECK-NEXT: to label [[BB4:%.*]] unwind label [[BB10:%.*]] ; CHECK: bb4: @@ -25,7 +27,6 @@ define void @foo() personality ptr @bar { ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb7: ; CHECK-NEXT: [[LOCAL_5_84111:%.*]] = phi i32 [ poison, [[BB8]] ], [ poison, [[BB5]] ] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ] ; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]] ; CHECK: bb8: @@ -33,19 +34,25 @@ define void @foo() personality ptr @bar { ; CHECK: bb9: ; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ] ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ] -; CHECK-NEXT: [[TMP7]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 0> +; CHECK-NEXT: [[TMP7]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1> ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: -; CHECK-NEXT: [[TMP8]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] +; CHECK-NEXT: [[LOCAL_10_38123_LCSSA:%.*]] = phi i32 [ [[TMP10]], [[BB3]] ] +; CHECK-NEXT: [[LOCAL_5_33118_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[BB3]] ] ; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_10_38123_LCSSA]], i32 0 +; CHECK-NEXT: [[TMP8]] = insertelement <2 x i32> [[TMP12]], i32 [[LOCAL_5_33118_LCSSA]], i32 1 ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb11: ; CHECK-NEXT: ret void ; CHECK: bb12: -; CHECK-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP4]], [[BB7]] ] +; CHECK-NEXT: [[LOCAL_10_89113_LCSSA:%.*]] = phi i32 [ poison, [[BB7]] ] +; CHECK-NEXT: [[LOCAL_5_84111_LCSSA:%.*]] = phi i32 [ [[LOCAL_5_84111]], [[BB7]] ] ; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_10_89113_LCSSA]], i32 0 +; CHECK-NEXT: [[TMP9]] = insertelement <2 x i32> [[TMP11]], i32 [[LOCAL_5_84111_LCSSA]], i32 1 ; CHECK-NEXT: br label [[BB9]] ; bb1: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll index c758d05..3f765d5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll @@ -7,14 +7,17 @@ define void @test_phi_in_landingpad() personality ptr ; CHECK-LABEL: @test_phi_in_landingpad( ; CHECK-NEXT: entry: ; CHECK-NEXT: invoke void @foo() -; CHECK-NEXT: to label [[INNER:%.*]] unwind label [[LPAD:%.*]] +; CHECK-NEXT: to label [[INNER:%.*]] unwind label [[LPAD:%.*]] ; CHECK: inner: ; CHECK-NEXT: invoke void @foo() -; CHECK-NEXT: to label [[DONE:%.*]] unwind label [[LPAD]] +; CHECK-NEXT: to label [[DONE:%.*]] unwind label [[LPAD]] ; CHECK: lpad: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ undef, [[ENTRY:%.*]] ], [ undef, [[INNER]] ] +; CHECK-NEXT: [[X1:%.*]] = phi double [ undef, [[ENTRY:%.*]] ], [ undef, [[INNER]] ] +; CHECK-NEXT: [[Y1:%.*]] = phi double [ undef, [[ENTRY]] ], [ undef, [[INNER]] ] ; CHECK-NEXT: [[TMP1:%.*]] = landingpad { ptr, i32 } -; CHECK-NEXT: catch ptr null +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[X1]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> [[TMP3]], double [[Y1]], i32 1 ; CHECK-NEXT: br label [[DONE]] ; CHECK: done: ; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ undef, [[INNER]] ], [ [[TMP0]], [[LPAD]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll index 9500607..f981552 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -11,23 +11,49 @@ define void @hoge() { ; CHECK-NEXT: ret void ; CHECK: bb2: ; CHECK-NEXT: [[T:%.*]] = select i1 undef, i16 undef, i16 15 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> <i16 poison, i16 undef>, i16 [[T]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TMP0]] to <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> <i32 undef, i32 63>, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], undef -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0> -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], <i32 undef, i32 15, i32 31, i32 47> -; CHECK-NEXT: [[T18:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[T3:%.*]] = sext i16 undef to i32 +; CHECK-NEXT: [[T4:%.*]] = sext i16 [[T]] to i32 +; CHECK-NEXT: [[T5:%.*]] = sub nsw i32 undef, [[T4]] +; CHECK-NEXT: [[T6:%.*]] = sub i32 [[T5]], undef +; CHECK-NEXT: [[T7:%.*]] = sub nsw i32 63, [[T3]] +; CHECK-NEXT: [[T8:%.*]] = sub i32 [[T7]], undef +; CHECK-NEXT: [[T9:%.*]] = add i32 [[T8]], undef +; CHECK-NEXT: [[T10:%.*]] = add nsw i32 [[T6]], 15 +; CHECK-NEXT: [[T11:%.*]] = icmp sgt i32 [[T9]], [[T10]] +; CHECK-NEXT: [[T12:%.*]] = select i1 [[T11]], i32 [[T9]], i32 [[T10]] +; CHECK-NEXT: [[T13:%.*]] = add nsw i32 [[T6]], 31 +; CHECK-NEXT: [[T14:%.*]] = icmp sgt i32 [[T12]], [[T13]] +; CHECK-NEXT: [[T15:%.*]] = select i1 [[T14]], i32 [[T12]], i32 [[T13]] +; CHECK-NEXT: [[T16:%.*]] = add nsw i32 [[T6]], 47 +; CHECK-NEXT: [[T17:%.*]] = icmp sgt i32 [[T15]], [[T16]] +; CHECK-NEXT: [[T18:%.*]] = select i1 [[T17]], i32 [[T15]], i32 [[T16]] ; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[T18]], i32 undef ; CHECK-NEXT: [[T20:%.*]] = icmp sgt i32 [[T19]], 63 -; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], <i32 -49, i32 -33, i32 -33, i32 -17> -; CHECK-NEXT: [[T25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP10]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 undef, [[T25]] +; CHECK-NEXT: [[T21:%.*]] = sub nsw i32 undef, [[T3]] +; CHECK-NEXT: [[T22:%.*]] = sub i32 [[T21]], undef +; CHECK-NEXT: [[T23:%.*]] = sub nsw i32 undef, [[T4]] +; CHECK-NEXT: [[T24:%.*]] = sub i32 [[T23]], undef +; CHECK-NEXT: [[T25:%.*]] = add nsw i32 [[T24]], -49 +; CHECK-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[T25]], undef ; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 undef, i32 [[T25]] -; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX1]] +; CHECK-NEXT: [[T28:%.*]] = icmp sgt i32 [[OP_RDX1]], undef +; CHECK-NEXT: [[T30:%.*]] = select i1 [[T28]], i32 undef, i32 [[OP_RDX1]] +; CHECK-NEXT: [[T32:%.*]] = add nsw i32 [[T22]], -33 +; CHECK-NEXT: [[T31:%.*]] = icmp sgt i32 [[T32]], undef +; CHECK-NEXT: [[T35:%.*]] = select i1 [[T31]], i32 undef, i32 [[T32]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[T35]], [[T30]] +; CHECK-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[T30]], i32 [[T35]] +; CHECK-NEXT: [[T39:%.*]] = add nsw i32 [[T24]], -33 +; CHECK-NEXT: [[T36:%.*]] = icmp sgt i32 [[T39]], undef +; CHECK-NEXT: [[T37:%.*]] = select i1 [[T36]], i32 undef, i32 [[T39]] +; CHECK-NEXT: [[T38:%.*]] = icmp sgt i32 [[T37]], [[OP_RDX3]] +; CHECK-NEXT: [[OP_RDX5:%.*]] = select i1 [[T38]], i32 [[OP_RDX3]], i32 [[T37]] +; CHECK-NEXT: [[T42:%.*]] = add nsw i32 [[T22]], -17 +; CHECK-NEXT: [[T41:%.*]] = icmp sgt i32 [[T42]], undef +; CHECK-NEXT: [[T40:%.*]] = select i1 [[T41]], i32 undef, i32 [[T42]] +; CHECK-NEXT: [[OP_RDX6:%.*]] = icmp sgt i32 [[T40]], [[OP_RDX5]] +; CHECK-NEXT: [[OP_RDX7:%.*]] = select i1 [[OP_RDX6]], i32 [[OP_RDX5]], i32 [[T40]] +; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX7]] ; CHECK-NEXT: unreachable ; bb: |