diff options
Diffstat (limited to 'llvm/test/Transforms')
35 files changed, 428 insertions, 822 deletions
diff --git a/llvm/test/Transforms/CodeExtractor/PartialInlineDebug.ll b/llvm/test/Transforms/CodeExtractor/PartialInlineDebug.ll index eb2fb4f..ab01bbf 100644 --- a/llvm/test/Transforms/CodeExtractor/PartialInlineDebug.ll +++ b/llvm/test/Transforms/CodeExtractor/PartialInlineDebug.ll @@ -96,11 +96,11 @@ entry: !13 = !DILocalVariable(name: "v", arg: 1, scope: !8, file: !1, line: 3, type: !11) !14 = !DILocation(line: 5, column: 10, scope: !8) !15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 9, column: 7) -!16 = distinct !DISubprogram(name: "callee", scope: !1, file: !1, line: 8, type: !9, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!16 = distinct !DISubprogram(name: "callee", scope: !1, file: !1, line: 8, type: !9, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !17 = !DILocation(line: 10, column: 7, scope: !15) -!18 = distinct !DISubprogram(name: "callee2", scope: !1, file: !1, line: 8, type: !9, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!18 = distinct !DISubprogram(name: "callee2", scope: !1, file: !1, line: 8, type: !9, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !19 = distinct !DILexicalBlock(scope: !18, file: !1, line: 100, column: 1) !20 = !DILocation(line: 110, column: 17, scope: !19) -!21 = distinct !DISubprogram(name: "caller2", scope: !1, file: !1, line: 8, type: !9, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!21 = distinct !DISubprogram(name: "caller2", scope: !1, file: !1, line: 8, type: !9, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !22 = !DILocation(line: 110, column: 17, scope: !21) !23 = !DILocation(line: 15, column: 7, scope: !15) diff --git a/llvm/test/Transforms/DropUnnecessaryAssumes/dereferenceable.ll b/llvm/test/Transforms/DropUnnecessaryAssumes/dereferenceable.ll new file mode 100644 index 0000000..43fa08c --- /dev/null +++ b/llvm/test/Transforms/DropUnnecessaryAssumes/dereferenceable.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes='drop-unnecessary-assumes' -S %s | FileCheck %s +; RUN: opt -passes='drop-unnecessary-assumes<drop-deref>' -S %s | FileCheck --check-prefix=DROP-DEREF %s + +declare void @use(ptr) + +define i8 @test_dereferenceable_assume_ptr_not_used(ptr %p, i64 %size) { +; CHECK-LABEL: define i8 @test_dereferenceable_assume_ptr_not_used( +; CHECK-SAME: ptr [[P:%.*]], i64 [[SIZE:%.*]]) { +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[SIZE]]) ] +; CHECK-NEXT: ret i8 0 +; +; DROP-DEREF-LABEL: define i8 @test_dereferenceable_assume_ptr_not_used( +; DROP-DEREF-SAME: ptr [[P:%.*]], i64 [[SIZE:%.*]]) { +; DROP-DEREF-NEXT: ret i8 0 +; + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %size) ] + ret i8 0 +} + +define i8 @test_dereferenceable_assume_ptr_used_variable_size(ptr %p, i64 %size) { +; CHECK-LABEL: define i8 @test_dereferenceable_assume_ptr_used_variable_size( +; CHECK-SAME: ptr [[P:%.*]], i64 [[SIZE:%.*]]) { +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[SIZE]]) ] +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: ret i8 [[VAL]] +; +; DROP-DEREF-LABEL: define i8 @test_dereferenceable_assume_ptr_used_variable_size( +; DROP-DEREF-SAME: ptr [[P:%.*]], i64 [[SIZE:%.*]]) { +; DROP-DEREF-NEXT: [[VAL:%.*]] = load i8, ptr [[P]], align 1 +; DROP-DEREF-NEXT: ret i8 [[VAL]] +; + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %size) ] + %val = load i8, ptr %p + ret i8 %val +} + +define i8 @test_dereferenceable_with_align_ptr_used(ptr %p, i64 %size) { +; CHECK-LABEL: define i8 @test_dereferenceable_with_align_ptr_used( +; CHECK-SAME: ptr [[P:%.*]], i64 [[SIZE:%.*]]) { +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[SIZE]]), "align"(ptr [[P]], i64 8) ] +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: ret i8 [[VAL]] +; +; DROP-DEREF-LABEL: define i8 @test_dereferenceable_with_align_ptr_used( +; DROP-DEREF-SAME: ptr [[P:%.*]], i64 [[SIZE:%.*]]) { +; DROP-DEREF-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P]], i64 8) ] +; DROP-DEREF-NEXT: [[VAL:%.*]] = load i8, ptr [[P]], align 1 +; DROP-DEREF-NEXT: ret i8 [[VAL]] +; + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %size), "align"(ptr %p, i64 8) ] + %val = load i8, ptr %p + ret i8 %val +} diff --git a/llvm/test/Transforms/HotColdSplit/split-out-dbg-label.ll b/llvm/test/Transforms/HotColdSplit/split-out-dbg-label.ll index da6c19d..76406dd 100644 --- a/llvm/test/Transforms/HotColdSplit/split-out-dbg-label.ll +++ b/llvm/test/Transforms/HotColdSplit/split-out-dbg-label.ll @@ -66,7 +66,7 @@ define void @inline_me() !dbg !13 { !10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) !11 = !DILocation(line: 1, column: 1, scope: !6) !12 = !DILabel(scope: !6, name: "bye", file: !1, line: 28) -!13 = distinct !DISubprogram(name: "inline_me", linkageName: "inline_me", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8) +!13 = distinct !DISubprogram(name: "inline_me", linkageName: "inline_me", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2) !14 = !DILabel(scope: !13, name: "label_in_@inline_me", file: !1, line: 29) !15 = !DILocation(line: 2, column: 2, scope: !13, inlinedAt: !11) !16 = !DILabel(scope: !17, name: "scoped_label_in_foo", file: !1, line: 30) diff --git a/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll index 3f69f0c..f9dd9eaf 100644 --- a/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll +++ b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll @@ -106,7 +106,7 @@ define void @inline_me() !dbg !12{ !9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10) !10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) !11 = !DILocation(line: 1, column: 1, scope: !6) -!12 = distinct !DISubprogram(name: "inline_me", linkageName: "inline_me", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8) +!12 = distinct !DISubprogram(name: "inline_me", linkageName: "inline_me", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2) !13 = !DILocation(line: 2, column: 2, scope: !12, inlinedAt: !14) !14 = !DILocation(line: 3, column: 3, scope: !15) !15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 4, column: 4) diff --git a/llvm/test/Transforms/InstCombine/debuginfo-dce.ll b/llvm/test/Transforms/InstCombine/debuginfo-dce.ll index c1d7c30..ec90779 100644 --- a/llvm/test/Transforms/InstCombine/debuginfo-dce.ll +++ b/llvm/test/Transforms/InstCombine/debuginfo-dce.ll @@ -125,15 +125,15 @@ attributes #1 = { nounwind readnone } !19 = !DILocation(line: 6, column: 17, scope: !14) !20 = !DIExpression(DW_OP_plus_uconst, 0) !21 = !DILocation(line: 11, column: 1, scope: !14) -!22 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17) +!22 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !23 = !DILocation(line: 6, column: 17, scope: !22) !24 = !DILocalVariable(name: "entry", scope: !22, file: !1, line: 6, type: !4) -!25 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17) +!25 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !26 = !DILocation(line: 6, column: 17, scope: !25) !27 = !DILocalVariable(name: "entry", scope: !25, file: !1, line: 6, type: !4) -!28 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17) +!28 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !29 = !DILocation(line: 6, column: 17, scope: !28) !30 = !DILocalVariable(name: "entry", scope: !28, file: !1, line: 6, type: !4) -!31 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !17) +!31 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !15, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !32 = !DILocation(line: 6, column: 17, scope: !31) !33 = !DILocalVariable(name: "entry", scope: !31, file: !1, line: 6, type: !4) diff --git a/llvm/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll b/llvm/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll index 437e566..fa83575 100644 --- a/llvm/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll +++ b/llvm/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll @@ -131,7 +131,8 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !10 = !DILexicalBlockFile(scope: !6, file: !1, discriminator: 0) !11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !12 = !DILocation(line: 0, scope: !10) -!13 = distinct !DISubprogram(name: "multi_exit", scope: !1, file: !1, line: 10, type: !7, scopeLine: 10, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8) +!13 = distinct !DISubprogram(name: "multi_exit", scope: !1, file: !1, line: 10, type: !7, scopeLine: 10, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !17) !14 = !DILocation(line: 0, scope: !15) !15 = !DILexicalBlockFile(scope: !13, file: !1, discriminator: 0) !16 = !DILocalVariable(name: "sum2", scope: !15, file: !1, line: 11, type: !11) +!17 = !{!16} diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll index 9003072..dd347a7 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll @@ -19,9 +19,8 @@ define void @mulvl123_addressing(ptr %src, ptr %dst, i64 %count) #0 { ; COMMON-NEXT: ldr z3, [x0, #3, mul vl] ; COMMON-NEXT: addvl x0, x0, #5 ; COMMON-NEXT: umax z0.b, p0/m, z0.b, z1.b -; COMMON-NEXT: movprfx z1, z2 -; COMMON-NEXT: umax z1.b, p0/m, z1.b, z3.b -; COMMON-NEXT: umax z0.b, p0/m, z0.b, z1.b +; COMMON-NEXT: umax z2.b, p0/m, z2.b, z3.b +; COMMON-NEXT: umax z0.b, p0/m, z0.b, z2.b ; COMMON-NEXT: st1b { z0.b }, p0, [x1, x8] ; COMMON-NEXT: incb x8 ; COMMON-NEXT: cmp x8, x2 diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll index c12d813..082b876 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll @@ -234,16 +234,17 @@ define void @extrastride(ptr nocapture %main, i32 %main_stride, ptr nocapture %r ; X32-NEXT: .p2align 4 ; X32-NEXT: .LBB2_2: # %for.body ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl (%ebx,%esi), %ebp -; X32-NEXT: addl (%ebx), %ebp -; X32-NEXT: addl %esi, %ebx -; X32-NEXT: addl (%esi,%ebx), %ebp -; X32-NEXT: addl %esi, %ebx -; X32-NEXT: addl (%esi,%ebx), %ebp -; X32-NEXT: addl %esi, %ebx -; X32-NEXT: addl (%esi,%ebx), %ebp -; X32-NEXT: movl %ebp, (%edx) -; X32-NEXT: addl %esi, %ebx +; X32-NEXT: movl %ebx, %ebp +; X32-NEXT: movl (%ebx,%esi), %ebx +; X32-NEXT: addl (%ebp), %ebx +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: addl (%esi,%ebp), %ebx +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: addl (%esi,%ebp), %ebx +; X32-NEXT: addl %esi, %ebp +; X32-NEXT: addl (%esi,%ebp), %ebx +; X32-NEXT: movl %ebx, (%edx) +; X32-NEXT: leal (%ebp,%esi), %ebx ; X32-NEXT: addl %edi, %ebx ; X32-NEXT: addl %ecx, %edx ; X32-NEXT: decl %eax diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index f163517..2f7e356 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -429,48 +429,36 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] ; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE37:.*]] ] -; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]] -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT28]], <4 x i32> poison, <4 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]] -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP19]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP6:%.*]] = or <4 x i32> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT29]] -; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]] -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT30]], <4 x i32> poison, <4 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i32> [[BROADCAST_SPLAT31]], [[TMP6]] +; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE33:.*]] ] +; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]] +; DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]] +; DEFAULT-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]] +; DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]] +; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP5]] +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0 +; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0 -; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; DEFAULT: [[PRED_STORE_IF]]: -; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; DEFAULT-NEXT: store i32 [[TMP11]], ptr [[E]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]] +; DEFAULT-NEXT: store i32 [[TMP5]], ptr [[E]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]] ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]] ; DEFAULT: [[PRED_STORE_CONTINUE]]: -; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1 -; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]] +; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]] +; DEFAULT: [[PRED_STORE_IF28]]: +; DEFAULT-NEXT: store i32 [[TMP5]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE29]] +; DEFAULT: [[PRED_STORE_CONTINUE29]]: +; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]] +; DEFAULT: [[PRED_STORE_IF30]]: +; DEFAULT-NEXT: store i32 [[TMP5]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]] +; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE31]] +; DEFAULT: [[PRED_STORE_CONTINUE31]]: +; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33]] ; DEFAULT: [[PRED_STORE_IF32]]: -; DEFAULT-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; DEFAULT-NEXT: store i32 [[TMP13]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]] +; DEFAULT-NEXT: store i32 [[TMP5]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]] ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE33]] ; DEFAULT: [[PRED_STORE_CONTINUE33]]: -; DEFAULT-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2 -; DEFAULT-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35:.*]] -; DEFAULT: [[PRED_STORE_IF34]]: -; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; DEFAULT-NEXT: store i32 [[TMP15]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]] -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]] -; DEFAULT: [[PRED_STORE_CONTINUE35]]: -; DEFAULT-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3 -; DEFAULT-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF36:.*]], label %[[PRED_STORE_CONTINUE37]] -; DEFAULT: [[PRED_STORE_IF36]]: -; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; DEFAULT-NEXT: store i32 [[TMP22]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]] -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE37]] -; DEFAULT: [[PRED_STORE_CONTINUE37]]: -; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[TMP16]], <4 x i1> [[TMP8]]), !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]] +; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[TMP16]], <4 x i1> [[BROADCAST_SPLAT]]), !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]] ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] @@ -613,63 +601,17 @@ exit: define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; COMMON-LABEL: define void @low_trip_count_fold_tail_scalarized_store( ; COMMON-SAME: ptr [[DST:%.*]]) { -; COMMON-NEXT: [[ENTRY:.*:]] -; COMMON-NEXT: br label %[[VECTOR_PH:.*]] -; COMMON: [[VECTOR_PH]]: -; COMMON-NEXT: br label %[[VECTOR_BODY:.*]] -; COMMON: [[VECTOR_BODY]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; COMMON: [[PRED_STORE_IF]]: -; COMMON-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 0 -; COMMON-NEXT: store i8 0, ptr [[TMP0]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE]] -; COMMON: [[PRED_STORE_CONTINUE]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] -; COMMON: [[PRED_STORE_IF1]]: -; COMMON-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 1 -; COMMON-NEXT: store i8 1, ptr [[TMP1]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE2]] -; COMMON: [[PRED_STORE_CONTINUE2]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] -; COMMON: [[PRED_STORE_IF3]]: -; COMMON-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 2 -; COMMON-NEXT: store i8 2, ptr [[TMP2]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE4]] -; COMMON: [[PRED_STORE_CONTINUE4]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] -; COMMON: [[PRED_STORE_IF5]]: -; COMMON-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 3 -; COMMON-NEXT: store i8 3, ptr [[TMP3]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE6]] -; COMMON: [[PRED_STORE_CONTINUE6]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] -; COMMON: [[PRED_STORE_IF7]]: -; COMMON-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 4 -; COMMON-NEXT: store i8 4, ptr [[TMP4]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE8]] -; COMMON: [[PRED_STORE_CONTINUE8]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] -; COMMON: [[PRED_STORE_IF9]]: -; COMMON-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 5 -; COMMON-NEXT: store i8 5, ptr [[TMP5]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE10]] -; COMMON: [[PRED_STORE_CONTINUE10]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] -; COMMON: [[PRED_STORE_IF11]]: -; COMMON-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 6 -; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]] -; COMMON: [[PRED_STORE_CONTINUE12]]: -; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT:.*]] -; COMMON: [[PRED_STORE_IF13]]: -; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7 -; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1 -; COMMON-NEXT: br label %[[EXIT]] +; COMMON-NEXT: [[ENTRY:.*]]: +; COMMON-NEXT: br label %[[LOOP:.*]] +; COMMON: [[LOOP]]: +; COMMON-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; COMMON-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i8 +; COMMON-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] +; COMMON-NEXT: store i8 [[IV_TRUNC]], ptr [[GEP]], align 1 +; COMMON-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; COMMON-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 7 +; COMMON-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] ; COMMON: [[EXIT]]: -; COMMON-NEXT: br label %[[SCALAR_PH:.*]] -; COMMON: [[SCALAR_PH]]: -; COMMON-NEXT: br label %[[EXIT1:.*]] -; COMMON: [[EXIT1]]: ; COMMON-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll index 5970608..bea34e2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -16,7 +16,7 @@ ; CM: vector.ph: ; CM: CLONE ir<%a> = extractvalue ir<%sv> ; CM: CLONE ir<%b> = extractvalue ir<%sv> -; CM: WIDEN ir<%add> = add ir<%a>, ir<%b> +; CM: CLONE ir<%add> = add ir<%a>, ir<%b> ; CM: Successor(s): vector loop ; CM: LV: Scalar loop costs: 5. @@ -30,23 +30,22 @@ define void @test1(ptr %dst, {i64, i64} %sv) { ; FORCED-NEXT: br label %[[VECTOR_PH:.*]] ; FORCED: [[VECTOR_PH]]: ; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { i64, i64 } [[SV]], 0 -; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0 -; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { i64, i64 } [[SV]], 1 -; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0 +; FORCED-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], [[TMP4]] +; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0 ; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] ; FORCED-NEXT: br label %[[VECTOR_BODY:.*]] ; FORCED: [[VECTOR_BODY]]: ; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]] -; FORCED-NEXT: store <2 x i64> [[TMP1]], ptr [[TMP2]], align 4 +; FORCED-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP2]], align 4 ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FORCED: [[MIDDLE_BLOCK]]: -; FORCED-NEXT: br [[EXIT:label %.*]] -; FORCED: [[SCALAR_PH:.*:]] +; FORCED-NEXT: br label %[[EXIT:.*]] +; FORCED: [[EXIT]]: +; FORCED-NEXT: ret void ; entry: br label %loop.body @@ -99,10 +98,11 @@ define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) { ; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4 ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FORCED: [[MIDDLE_BLOCK]]: -; FORCED-NEXT: br [[EXIT:label %.*]] -; FORCED: [[SCALAR_PH:.*:]] +; FORCED-NEXT: br label %[[EXIT:.*]] +; FORCED: [[EXIT]]: +; FORCED-NEXT: ret void ; entry: br label %loop.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index cfc6cc8..4b097ba 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -271,69 +271,11 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; ; PRED-LABEL: define void @iv_trunc( ; PRED-SAME: i32 [[X:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; PRED-NEXT: [[ENTRY:.*:]] +; PRED-NEXT: [[ENTRY:.*]]: ; PRED-NEXT: [[MUL_X:%.*]] = add i32 [[X]], 1 -; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; PRED-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; PRED: [[VECTOR_SCEVCHECK]]: -; PRED-NEXT: [[TMP1:%.*]] = sub i32 -1, [[X]] -; PRED-NEXT: [[TMP2:%.*]] = icmp slt i32 [[MUL_X]], 0 -; PRED-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[MUL_X]] -; PRED-NEXT: [[TMP4:%.*]] = trunc i64 [[N]] to i32 -; PRED-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP3]], i32 [[TMP4]]) -; PRED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 -; PRED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; PRED-NEXT: [[TMP5:%.*]] = sub i32 0, [[MUL_RESULT]] -; PRED-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP5]], 0 -; PRED-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 false -; PRED-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] -; PRED-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], 4294967295 -; PRED-NEXT: [[TMP10:%.*]] = icmp ne i32 [[MUL_X]], 0 -; PRED-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -; PRED-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] -; PRED-NEXT: br i1 [[TMP12]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; PRED: [[VECTOR_PH]]: -; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], 2 -; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[TMP0]], 2 -; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 -; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]]) -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[MUL_X]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; PRED-NEXT: br label %[[VECTOR_BODY:.*]] -; PRED: [[VECTOR_BODY]]: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] -; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] -; PRED-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] -; PRED-NEXT: [[TMP16:%.*]] = mul <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]] -; PRED-NEXT: [[TMP17:%.*]] = zext <2 x i32> [[TMP16]] to <2 x i64> -; PRED-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0 -; PRED-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; PRED: [[PRED_STORE_IF]]: -; PRED-NEXT: [[TMP19:%.*]] = extractelement <2 x i64> [[TMP17]], i32 0 -; PRED-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP19]] -; PRED-NEXT: store i32 1, ptr [[TMP20]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]] -; PRED: [[PRED_STORE_CONTINUE]]: -; PRED-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1 -; PRED-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] -; PRED: [[PRED_STORE_IF1]]: -; PRED-NEXT: [[TMP22:%.*]] = extractelement <2 x i64> [[TMP17]], i32 1 -; PRED-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP22]] -; PRED-NEXT: store i32 1, ptr [[TMP23]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE2]] -; PRED: [[PRED_STORE_CONTINUE2]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; PRED-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 -; PRED-NEXT: [[TMP25:%.*]] = xor i1 [[TMP24]], true -; PRED-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) -; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; PRED: [[MIDDLE_BLOCK]]: -; PRED-NEXT: br label %[[EXIT:.*]] -; PRED: [[SCALAR_PH]]: ; PRED-NEXT: br label %[[FOR_BODY:.*]] ; PRED: [[FOR_BODY]]: -; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; PRED-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[IV]] to i32 ; PRED-NEXT: [[ADD_I:%.*]] = mul i32 [[MUL_X]], [[TRUNC_IV]] ; PRED-NEXT: [[IV_MUL:%.*]] = zext i32 [[ADD_I]] to i64 @@ -341,7 +283,7 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: store i32 1, ptr [[GEP]], align 4 ; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; PRED-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[FOR_BODY]] ; PRED: [[EXIT]]: ; PRED-NEXT: ret void ; @@ -437,101 +379,21 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; ; PRED-LABEL: define void @trunc_ivs_and_store( ; PRED-SAME: i32 [[X:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; PRED-NEXT: [[ENTRY:.*:]] -; PRED-NEXT: [[MUL:%.*]] = mul i32 [[X]], [[X]] -; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; PRED-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; PRED: [[VECTOR_SCEVCHECK]]: +; PRED-NEXT: [[ENTRY:.*]]: ; PRED-NEXT: [[TMP1:%.*]] = mul i32 [[X]], [[X]] -; PRED-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP1]] -; PRED-NEXT: [[TMP3:%.*]] = icmp slt i32 [[MUL]], 0 -; PRED-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 [[MUL]] -; PRED-NEXT: [[TMP5:%.*]] = trunc i64 [[N]] to i32 -; PRED-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP4]], i32 [[TMP5]]) -; PRED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 -; PRED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; PRED-NEXT: [[TMP6:%.*]] = sub i32 0, [[MUL_RESULT]] -; PRED-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], 0 -; PRED-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 false -; PRED-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; PRED-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[N]], 4294967295 -; PRED-NEXT: [[TMP11:%.*]] = icmp ne i32 [[MUL]], 0 -; PRED-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]] -; PRED-NEXT: [[TMP13:%.*]] = or i1 [[TMP9]], [[TMP12]] -; PRED-NEXT: br i1 [[TMP13]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; PRED: [[VECTOR_PH]]: -; PRED-NEXT: [[TMP14:%.*]] = sub i64 [[TMP0]], 4 -; PRED-NEXT: [[TMP15:%.*]] = icmp ugt i64 [[TMP0]], 4 -; PRED-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i64 [[TMP14]], i64 0 -; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[TMP0]]) -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[MUL]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; PRED-NEXT: br label %[[VECTOR_BODY:.*]] -; PRED: [[VECTOR_BODY]]: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] -; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE7]] ] -; PRED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE7]] ] -; PRED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; PRED-NEXT: [[TMP17:%.*]] = mul <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]] -; PRED-NEXT: [[TMP18:%.*]] = zext <4 x i32> [[TMP17]] to <4 x i64> -; PRED-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 -; PRED-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; PRED: [[PRED_STORE_IF]]: -; PRED-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP18]], i32 0 -; PRED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP20]] -; PRED-NEXT: [[TMP22:%.*]] = add i32 [[OFFSET_IDX]], 0 -; PRED-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]] -; PRED: [[PRED_STORE_CONTINUE]]: -; PRED-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1 -; PRED-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]] -; PRED: [[PRED_STORE_IF2]]: -; PRED-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP18]], i32 1 -; PRED-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP24]] -; PRED-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 1 -; PRED-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE3]] -; PRED: [[PRED_STORE_CONTINUE3]]: -; PRED-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2 -; PRED-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] -; PRED: [[PRED_STORE_IF4]]: -; PRED-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP18]], i32 2 -; PRED-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP28]] -; PRED-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], 2 -; PRED-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE5]] -; PRED: [[PRED_STORE_CONTINUE5]]: -; PRED-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3 -; PRED-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]] -; PRED: [[PRED_STORE_IF6]]: -; PRED-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP18]], i32 3 -; PRED-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP32]] -; PRED-NEXT: [[TMP34:%.*]] = add i32 [[OFFSET_IDX]], 3 -; PRED-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE7]] -; PRED: [[PRED_STORE_CONTINUE7]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP16]]) -; PRED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 -; PRED-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true -; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; PRED-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; PRED: [[MIDDLE_BLOCK]]: -; PRED-NEXT: br label %[[EXIT:.*]] -; PRED: [[SCALAR_PH]]: ; PRED-NEXT: br label %[[LOOP:.*]] ; PRED: [[LOOP]]: -; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] -; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] ; PRED-NEXT: [[IV_1_TRUNC:%.*]] = trunc i64 [[IV_1]] to i32 -; PRED-NEXT: [[IV_1_MUL:%.*]] = mul i32 [[MUL]], [[IV_1_TRUNC]] +; PRED-NEXT: [[IV_1_MUL:%.*]] = mul i32 [[TMP1]], [[IV_1_TRUNC]] ; PRED-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 ; PRED-NEXT: [[MUL_EXT:%.*]] = zext i32 [[IV_1_MUL]] to i64 ; PRED-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[MUL_EXT]] ; PRED-NEXT: store i32 [[IV_2]], ptr [[GEP]], align 4 ; PRED-NEXT: [[IV_1_NEXT]] = add i64 [[IV_1]], 1 ; PRED-NEXT: [[EXITCOND_3_NOT:%.*]] = icmp eq i64 [[IV_1]], [[N]] -; PRED-NEXT: br i1 [[EXITCOND_3_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; PRED-NEXT: br i1 [[EXITCOND_3_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; PRED: [[EXIT]]: ; PRED-NEXT: ret void ; @@ -627,91 +489,12 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; ; PRED-LABEL: define void @ivs_trunc_and_ext( ; PRED-SAME: i32 [[X:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; PRED-NEXT: [[ENTRY:.*:]] +; PRED-NEXT: [[ENTRY:.*]]: ; PRED-NEXT: [[ADD:%.*]] = add i32 [[X]], 1 -; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; PRED-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; PRED: [[VECTOR_SCEVCHECK]]: -; PRED-NEXT: [[TMP1:%.*]] = sub i32 -1, [[X]] -; PRED-NEXT: [[TMP2:%.*]] = icmp slt i32 [[ADD]], 0 -; PRED-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[ADD]] -; PRED-NEXT: [[TMP4:%.*]] = trunc i64 [[N]] to i32 -; PRED-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP3]], i32 [[TMP4]]) -; PRED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 -; PRED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; PRED-NEXT: [[TMP5:%.*]] = sub i32 0, [[MUL_RESULT]] -; PRED-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP5]], 0 -; PRED-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 false -; PRED-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] -; PRED-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], 4294967295 -; PRED-NEXT: [[TMP10:%.*]] = icmp ne i32 [[ADD]], 0 -; PRED-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -; PRED-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] -; PRED-NEXT: br i1 [[TMP12]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; PRED: [[VECTOR_PH]]: -; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], 4 -; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[TMP0]], 4 -; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 -; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[TMP0]]) -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[ADD]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; PRED-NEXT: br label %[[VECTOR_BODY:.*]] -; PRED: [[VECTOR_BODY]]: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] -; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] -; PRED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] -; PRED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; PRED-NEXT: [[TMP16:%.*]] = mul <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]] -; PRED-NEXT: [[TMP17:%.*]] = zext <4 x i32> [[TMP16]] to <4 x i64> -; PRED-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 -; PRED-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; PRED: [[PRED_STORE_IF]]: -; PRED-NEXT: [[TMP19:%.*]] = extractelement <4 x i64> [[TMP17]], i32 0 -; PRED-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP19]] -; PRED-NEXT: [[TMP21:%.*]] = add i32 [[OFFSET_IDX]], 0 -; PRED-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]] -; PRED: [[PRED_STORE_CONTINUE]]: -; PRED-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1 -; PRED-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] -; PRED: [[PRED_STORE_IF1]]: -; PRED-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1 -; PRED-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP23]] -; PRED-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 1 -; PRED-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE2]] -; PRED: [[PRED_STORE_CONTINUE2]]: -; PRED-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2 -; PRED-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] -; PRED: [[PRED_STORE_IF3]]: -; PRED-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP17]], i32 2 -; PRED-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP27]] -; PRED-NEXT: [[TMP29:%.*]] = add i32 [[OFFSET_IDX]], 2 -; PRED-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE4]] -; PRED: [[PRED_STORE_CONTINUE4]]: -; PRED-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3 -; PRED-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]] -; PRED: [[PRED_STORE_IF5]]: -; PRED-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP17]], i32 3 -; PRED-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP31]] -; PRED-NEXT: [[TMP33:%.*]] = add i32 [[OFFSET_IDX]], 3 -; PRED-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE6]] -; PRED: [[PRED_STORE_CONTINUE6]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; PRED-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 -; PRED-NEXT: [[TMP35:%.*]] = xor i1 [[TMP34]], true -; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; PRED-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; PRED: [[MIDDLE_BLOCK]]: -; PRED-NEXT: br label %[[EXIT:.*]] -; PRED: [[SCALAR_PH]]: ; PRED-NEXT: br label %[[LOOP:.*]] ; PRED: [[LOOP]]: -; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] -; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] ; PRED-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV_1]] to i32 ; PRED-NEXT: [[IV_MUL:%.*]] = mul i32 [[ADD]], [[IV_TRUNC]] ; PRED-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 @@ -720,7 +503,7 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: store i32 [[IV_2]], ptr [[GEP]], align 4 ; PRED-NEXT: [[IV_1_NEXT]] = add i64 [[IV_1]], 1 ; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_1]], [[N]] -; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; PRED-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] ; PRED: [[EXIT]]: ; PRED-NEXT: ret void ; @@ -842,7 +625,7 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; PRED: [[PRED_STORE_CONTINUE5]]: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; PRED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; PRED-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; PRED-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] ; PRED: [[SCALAR_PH]]: @@ -855,7 +638,7 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; PRED-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1 ; PRED-NEXT: [[IV_EXT]] = zext i32 [[IV_1_NEXT]] to i64 ; PRED-NEXT: [[C:%.*]] = icmp ult i64 [[IV_EXT]], [[N]] -; PRED-NEXT: br i1 [[C]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP11:![0-9]+]] +; PRED-NEXT: br i1 [[C]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; PRED: [[EXIT]]: ; PRED-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index d77ca98..37eac89 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -1589,8 +1589,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw nsw <16 x i64> [[TMP8]], [[TMP9]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i64> @llvm.vector.partial.reduce.add.v2i64.v16i64(<2 x i64> [[VEC_PHI1]], <16 x i64> [[TMP10]]) ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]] ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll index 0c6a490..eceda08 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll @@ -17,17 +17,15 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]] ; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0 -; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i64, i64 } [[SV]], 1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[EXTRACT0]], [[TMP10]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT2]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8 +; CHECK-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll index 6ea075f..83be070 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll @@ -181,178 +181,23 @@ for.cond.cleanup: define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) { ; DEFAULT-LABEL: define void @tail_predicate_without_optsize( ; DEFAULT-SAME: ptr [[P:%.*]], i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]], i32 [[N:%.*]]) { -; DEFAULT-NEXT: [[ENTRY:.*:]] -; DEFAULT-NEXT: br label %[[VECTOR_PH:.*]] -; DEFAULT: [[VECTOR_PH]]: -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <16 x i8> poison, i8 [[B]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT3]], <16 x i8> poison, <16 x i32> zeroinitializer -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <16 x i8> poison, i8 [[C]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT5]], <16 x i8> poison, <16 x i32> zeroinitializer -; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] -; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ] -; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE35]] ] -; DEFAULT-NEXT: [[VEC_IND1:%.*]] = phi <16 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], %[[PRED_STORE_CONTINUE35]] ] -; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <16 x i8> [[VEC_IND]], splat (i8 14) -; DEFAULT-NEXT: [[TMP1:%.*]] = mul <16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND1]] -; DEFAULT-NEXT: [[TMP2:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 1) -; DEFAULT-NEXT: [[TMP3:%.*]] = mul <16 x i8> [[TMP2]], [[BROADCAST_SPLAT4]] -; DEFAULT-NEXT: [[TMP4:%.*]] = add <16 x i8> [[TMP3]], [[TMP1]] -; DEFAULT-NEXT: [[TMP5:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 2) -; DEFAULT-NEXT: [[TMP6:%.*]] = mul <16 x i8> [[TMP5]], [[BROADCAST_SPLAT6]] -; DEFAULT-NEXT: [[TMP7:%.*]] = add <16 x i8> [[TMP4]], [[TMP6]] -; DEFAULT-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0 -; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; DEFAULT: [[PRED_STORE_IF]]: -; DEFAULT-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 -; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]] -; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <16 x i8> [[TMP7]], i32 0 -; DEFAULT-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]] -; DEFAULT: [[PRED_STORE_CONTINUE]]: -; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1 -; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] -; DEFAULT: [[PRED_STORE_IF6]]: -; DEFAULT-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 1 -; DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP13]] -; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <16 x i8> [[TMP7]], i32 1 -; DEFAULT-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE7]] -; DEFAULT: [[PRED_STORE_CONTINUE7]]: -; DEFAULT-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2 -; DEFAULT-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] -; DEFAULT: [[PRED_STORE_IF8]]: -; DEFAULT-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 -; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP17]] -; DEFAULT-NEXT: [[TMP19:%.*]] = extractelement <16 x i8> [[TMP7]], i32 2 -; DEFAULT-NEXT: store i8 [[TMP19]], ptr [[TMP18]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE9]] -; DEFAULT: [[PRED_STORE_CONTINUE9]]: -; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3 -; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]] -; DEFAULT: [[PRED_STORE_IF10]]: -; DEFAULT-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 -; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP21]] -; DEFAULT-NEXT: [[TMP23:%.*]] = extractelement <16 x i8> [[TMP7]], i32 3 -; DEFAULT-NEXT: store i8 [[TMP23]], ptr [[TMP22]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE11]] -; DEFAULT: [[PRED_STORE_CONTINUE11]]: -; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4 -; DEFAULT-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]] -; DEFAULT: [[PRED_STORE_IF12]]: -; DEFAULT-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], 4 -; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP25]] -; DEFAULT-NEXT: [[TMP27:%.*]] = extractelement <16 x i8> [[TMP7]], i32 4 -; DEFAULT-NEXT: store i8 [[TMP27]], ptr [[TMP26]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE13]] -; DEFAULT: [[PRED_STORE_CONTINUE13]]: -; DEFAULT-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5 -; DEFAULT-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] -; DEFAULT: [[PRED_STORE_IF14]]: -; DEFAULT-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 5 -; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP29]] -; DEFAULT-NEXT: [[TMP31:%.*]] = extractelement <16 x i8> [[TMP7]], i32 5 -; DEFAULT-NEXT: store i8 [[TMP31]], ptr [[TMP30]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE15]] -; DEFAULT: [[PRED_STORE_CONTINUE15]]: -; DEFAULT-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6 -; DEFAULT-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]] -; DEFAULT: [[PRED_STORE_IF16]]: -; DEFAULT-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 6 -; DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP33]] -; DEFAULT-NEXT: [[TMP35:%.*]] = extractelement <16 x i8> [[TMP7]], i32 6 -; DEFAULT-NEXT: store i8 [[TMP35]], ptr [[TMP34]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE17]] -; DEFAULT: [[PRED_STORE_CONTINUE17]]: -; DEFAULT-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7 -; DEFAULT-NEXT: br i1 [[TMP36]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]] -; DEFAULT: [[PRED_STORE_IF18]]: -; DEFAULT-NEXT: [[TMP37:%.*]] = add i64 [[INDEX]], 7 -; DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP37]] -; DEFAULT-NEXT: [[TMP39:%.*]] = extractelement <16 x i8> [[TMP7]], i32 7 -; DEFAULT-NEXT: store i8 [[TMP39]], ptr [[TMP38]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE19]] -; DEFAULT: [[PRED_STORE_CONTINUE19]]: -; DEFAULT-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8 -; DEFAULT-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]] -; DEFAULT: [[PRED_STORE_IF20]]: -; DEFAULT-NEXT: [[TMP41:%.*]] = add i64 [[INDEX]], 8 -; DEFAULT-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP41]] -; DEFAULT-NEXT: [[TMP43:%.*]] = extractelement <16 x i8> [[TMP7]], i32 8 -; DEFAULT-NEXT: store i8 [[TMP43]], ptr [[TMP42]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE21]] -; DEFAULT: [[PRED_STORE_CONTINUE21]]: -; DEFAULT-NEXT: [[TMP44:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9 -; DEFAULT-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]] -; DEFAULT: [[PRED_STORE_IF22]]: -; DEFAULT-NEXT: [[TMP45:%.*]] = add i64 [[INDEX]], 9 -; DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP45]] -; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <16 x i8> [[TMP7]], i32 9 -; DEFAULT-NEXT: store i8 [[TMP47]], ptr [[TMP46]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE23]] -; DEFAULT: [[PRED_STORE_CONTINUE23]]: -; DEFAULT-NEXT: [[TMP48:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10 -; DEFAULT-NEXT: br i1 [[TMP48]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]] -; DEFAULT: [[PRED_STORE_IF24]]: -; DEFAULT-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], 10 -; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP49]] -; DEFAULT-NEXT: [[TMP51:%.*]] = extractelement <16 x i8> [[TMP7]], i32 10 -; DEFAULT-NEXT: store i8 [[TMP51]], ptr [[TMP50]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE25]] -; DEFAULT: [[PRED_STORE_CONTINUE25]]: -; DEFAULT-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11 -; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27:.*]] -; DEFAULT: [[PRED_STORE_IF26]]: -; DEFAULT-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], 11 -; DEFAULT-NEXT: [[TMP54:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP53]] -; DEFAULT-NEXT: [[TMP55:%.*]] = extractelement <16 x i8> [[TMP7]], i32 11 -; DEFAULT-NEXT: store i8 [[TMP55]], ptr [[TMP54]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE27]] -; DEFAULT: [[PRED_STORE_CONTINUE27]]: -; DEFAULT-NEXT: [[TMP56:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12 -; DEFAULT-NEXT: br i1 [[TMP56]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]] -; DEFAULT: [[PRED_STORE_IF28]]: -; DEFAULT-NEXT: [[TMP57:%.*]] = add i64 [[INDEX]], 12 -; DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP57]] -; DEFAULT-NEXT: [[TMP59:%.*]] = extractelement <16 x i8> [[TMP7]], i32 12 -; DEFAULT-NEXT: store i8 [[TMP59]], ptr [[TMP58]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE29]] -; DEFAULT: [[PRED_STORE_CONTINUE29]]: -; DEFAULT-NEXT: [[TMP60:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13 -; DEFAULT-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]] -; DEFAULT: [[PRED_STORE_IF30]]: -; DEFAULT-NEXT: [[TMP61:%.*]] = add i64 [[INDEX]], 13 -; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP61]] -; DEFAULT-NEXT: [[TMP63:%.*]] = extractelement <16 x i8> [[TMP7]], i32 13 -; DEFAULT-NEXT: store i8 [[TMP63]], ptr [[TMP62]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE31]] -; DEFAULT: [[PRED_STORE_CONTINUE31]]: -; DEFAULT-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14 -; DEFAULT-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]] -; DEFAULT: [[PRED_STORE_IF32]]: -; DEFAULT-NEXT: [[TMP65:%.*]] = add i64 [[INDEX]], 14 -; DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP65]] -; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <16 x i8> [[TMP7]], i32 14 -; DEFAULT-NEXT: store i8 [[TMP67]], ptr [[TMP66]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE33]] -; DEFAULT: [[PRED_STORE_CONTINUE33]]: -; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15 -; DEFAULT-NEXT: br i1 [[TMP68]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35]] -; DEFAULT: [[PRED_STORE_IF34]]: -; DEFAULT-NEXT: [[TMP69:%.*]] = add i64 [[INDEX]], 15 +; DEFAULT-NEXT: [[ENTRY:.*]]: +; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] +; DEFAULT: [[FOR_BODY]]: +; DEFAULT-NEXT: [[TMP69:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; DEFAULT-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[TMP69]] to i8 +; DEFAULT-NEXT: [[MUL:%.*]] = mul i8 [[A]], [[TMP0]] +; DEFAULT-NEXT: [[SHR:%.*]] = lshr i8 [[TMP0]], 1 +; DEFAULT-NEXT: [[MUL5:%.*]] = mul i8 [[SHR]], [[B]] +; DEFAULT-NEXT: [[ADD:%.*]] = add i8 [[MUL5]], [[MUL]] +; DEFAULT-NEXT: [[SHR7:%.*]] = lshr i8 [[TMP0]], 2 +; DEFAULT-NEXT: [[MUL9:%.*]] = mul i8 [[SHR7]], [[C]] +; DEFAULT-NEXT: [[TMP71:%.*]] = add i8 [[ADD]], [[MUL9]] ; DEFAULT-NEXT: [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP69]] -; DEFAULT-NEXT: [[TMP71:%.*]] = extractelement <16 x i8> [[TMP7]], i32 15 ; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]] -; DEFAULT: [[PRED_STORE_CONTINUE35]]: -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) -; DEFAULT-NEXT: [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) -; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; DEFAULT: [[MIDDLE_BLOCK]]: -; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] +; DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[TMP69]], 1 +; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 15 +; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -449,7 +294,7 @@ define void @dont_vectorize_with_minsize() { ; DEFAULT-NEXT: store <4 x i16> [[TMP11]], ptr [[TMP9]], align 2 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4 ; DEFAULT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: @@ -555,7 +400,7 @@ define void @vectorization_forced() { ; DEFAULT-NEXT: store <4 x i16> [[TMP11]], ptr [[TMP9]], align 2 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4 ; DEFAULT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index f25b86d..b81637f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -293,9 +293,9 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[A]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = xor <vscale x 4 x i32> [[BROADCAST_SPLAT]], splat (i32 -1) ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 9) ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP7]] @@ -309,7 +309,7 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 4 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP19]], <vscale x 4 x ptr> align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP8]]) +; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP8]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index 8ef53ca..345f6f6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -295,8 +295,7 @@ define i8 @mul_non_pow_2_low_trip_count(ptr noalias %a) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP1]] = mul <8 x i8> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> [[TMP1]]) ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll index b26e9cf..718e03c 100644 --- a/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll @@ -1231,7 +1231,7 @@ define hidden void @scale_uv_row_down2(ptr nocapture noundef readonly %0, i32 no ; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %14 = load i8 ; CHECK: LV: Found an estimated cost of 26 for VF 8 For instruction: %20 = load i8 ; CHECK: LV: Found an estimated cost of 7 for VF 8 For instruction: store i8 %48 -; CHECK: LV: Vector loop of width 8 costs: 10. +; CHECK: LV: Vector loop of width 8 costs: 11. ; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %14 = load i8 ; CHECK: LV: Found an estimated cost of 132 for VF 16 For instruction: %20 = load i8 ; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction: store i8 %48 @@ -1442,8 +1442,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 18 -; CHECK: LV: Vector loop of width 2 costs: 23 -; CHECK: LV: Vector loop of width 4 costs: 13 +; CHECK: LV: Vector loop of width 2 costs: 27 +; CHECK: LV: Vector loop of width 4 costs: 15 ; CHECK: LV: Selecting VF: 4. define hidden void @two_bytes_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1484,8 +1484,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 18 -; CHECK: LV: Vector loop of width 2 costs: 23 -; CHECK: LV: Vector loop of width 4 costs: 13 +; CHECK: LV: Vector loop of width 2 costs: 27 +; CHECK: LV: Vector loop of width 4 costs: 15 ; CHECK: LV: Selecting VF: 4. define hidden void @two_bytes_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1526,9 +1526,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 16 -; CHECK: LV: Vector loop of width 2 costs: 21 -; CHECK: LV: Vector loop of width 4 costs: 14. -; CHECK: LV: Selecting VF: 4. +; CHECK: LV: Vector loop of width 2 costs: 26 +; CHECK: LV: Vector loop of width 4 costs: 16. +; CHECK: LV: Selecting VF: 1. define hidden void @two_floats_two_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp22.not = icmp eq i32 %N, 0 @@ -1566,9 +1566,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 16 -; CHECK: LV: Vector loop of width 2 costs: 21 -; CHECK: LV: Vector loop of width 4 costs: 14. -; CHECK: LV: Selecting VF: 4. +; CHECK: LV: Vector loop of width 2 costs: 26 +; CHECK: LV: Vector loop of width 4 costs: 16. +; CHECK: LV: Selecting VF: 1. define hidden void @two_floats_two_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp21.not = icmp eq i32 %N, 0 @@ -1608,8 +1608,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 18 -; CHECK: LV: Vector loop of width 2 costs: 22 -; CHECK: LV: Vector loop of width 4 costs: 11. +; CHECK: LV: Vector loop of width 2 costs: 24 +; CHECK: LV: Vector loop of width 4 costs: 12 ; CHECK: LV: Selecting VF: 4. define hidden void @two_shorts_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1652,8 +1652,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 18 -; CHECK: LV: Vector loop of width 2 costs: 22 -; CHECK: LV: Vector loop of width 4 costs: 11. +; CHECK: LV: Vector loop of width 2 costs: 24 +; CHECK: LV: Vector loop of width 4 costs: 12 ; CHECK: LV: Selecting VF: 4. define hidden void @two_shorts_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1696,9 +1696,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 16 -; CHECK: LV: Vector loop of width 2 costs: 20 -; CHECK: LV: Vector loop of width 4 costs: 13. -; CHECK: LV: Selecting VF: 4. +; CHECK: LV: Vector loop of width 2 costs: 23 +; CHECK: LV: Vector loop of width 4 costs: 14 +; CHECK: LV: Selecting VF: 4 define hidden void @two_floats_two_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp22.not = icmp eq i32 %N, 0 @@ -1738,9 +1738,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 2 ; CHECK: LV: Scalar loop costs: 16 -; CHECK: LV: Vector loop of width 2 costs: 20 -; CHECK: LV: Vector loop of width 4 costs: 13. -; CHECK: LV: Selecting VF: 4. +; CHECK: LV: Vector loop of width 2 costs: 23 +; CHECK: LV: Vector loop of width 4 costs: 14 +; CHECK: LV: Selecting VF: 4 define hidden void @two_floats_two_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp21.not = icmp eq i32 %N, 0 @@ -1883,8 +1883,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 32 -; CHECK: LV: Vector loop of width 2 costs: 43 -; CHECK: LV: Vector loop of width 4 costs: 23 +; CHECK: LV: Vector loop of width 2 costs: 51 +; CHECK: LV: Vector loop of width 4 costs: 27 ; CHECK: LV: Selecting VF: 4 define hidden void @four_bytes_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1943,8 +1943,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 32 -; CHECK: LV: Vector loop of width 2 costs: 43 -; CHECK: LV: Vector loop of width 4 costs: 23 +; CHECK: LV: Vector loop of width 2 costs: 51 +; CHECK: LV: Vector loop of width 4 costs: 27 ; CHECK: LV: Selecting VF: 4 define hidden void @four_bytes_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2004,9 +2004,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 28 -; CHECK: LV: Vector loop of width 2 costs: 38 -; CHECK: LV: Vector loop of width 4 costs: 26 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Vector loop of width 2 costs: 48 +; CHECK: LV: Vector loop of width 4 costs: 31 +; CHECK: LV: Selecting VF: 1 define hidden void @four_floats_four_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp48.not = icmp eq i32 %N, 0 @@ -2061,9 +2061,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 28 -; CHECK: LV: Vector loop of width 2 costs: 38 -; CHECK: LV: Vector loop of width 4 costs: 26 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Vector loop of width 2 costs: 48 +; CHECK: LV: Vector loop of width 4 costs: 31 +; CHECK: LV: Selecting VF: 1 define hidden void @four_floats_four_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp45.not = icmp eq i32 %N, 0 @@ -2119,8 +2119,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 32 -; CHECK: LV: Vector loop of width 2 costs: 37 -; CHECK: LV: Vector loop of width 4 costs: 23 +; CHECK: LV: Vector loop of width 2 costs: 41 +; CHECK: LV: Vector loop of width 4 costs: 25 ; CHECK: LV: Selecting VF: 4 define hidden void @four_shorts_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2181,8 +2181,8 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 32 -; CHECK: LV: Vector loop of width 2 costs: 37 -; CHECK: LV: Vector loop of width 4 costs: 23 +; CHECK: LV: Vector loop of width 2 costs: 41 +; CHECK: LV: Vector loop of width 4 costs: 25 ; CHECK: LV: Selecting VF: 4 define hidden void @four_shorts_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2243,9 +2243,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 28 -; CHECK: LV: Vector loop of width 2 costs: 35 -; CHECK: LV: Vector loop of width 4 costs: 26 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Vector loop of width 2 costs: 41 +; CHECK: LV: Vector loop of width 4 costs: 29 +; CHECK: LV: Selecting VF: 1 define hidden void @four_floats_four_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp48.not = icmp eq i32 %N, 0 @@ -2301,9 +2301,9 @@ for.body: ; preds = %entry, %for.body ; CHECK: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 ; CHECK: LV: Scalar loop costs: 28 -; CHECK: LV: Vector loop of width 2 costs: 35 -; CHECK: LV: Vector loop of width 4 costs: 26 -; CHECK: LV: Selecting VF: 4 +; CHECK: LV: Vector loop of width 2 costs: 41 +; CHECK: LV: Vector loop of width 4 costs: 29 +; CHECK: LV: Selecting VF: 1 define hidden void @four_floats_four_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: %cmp45.not = icmp eq i32 %N, 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll index a286df9..c2c04ce 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll @@ -85,13 +85,13 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 10 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 17 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 21 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 35 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 43 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 71 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED-LABEL: 'test2' @@ -99,8 +99,8 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index cc84fab..002d811 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -435,67 +435,16 @@ define void @test_first_order_recurrence_tried_to_scalarized(ptr %dst, i1 %c, i3 ; CHECK-LABEL: @test_first_order_recurrence_tried_to_scalarized( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N:%.*]] = select i1 [[C:%.*]], i32 8, i32 9 -; CHECK-NEXT: br label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[N]], 1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND]], [[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i32, ptr [[DST:%.*]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = sub nsw i32 10, [[TMP5]] -; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] -; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] -; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = sub nsw i32 10, [[TMP10]] -; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] -; CHECK: pred.store.continue2: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 -; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] -; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2 +; CHECK: loop: +; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ 4, [[ENTRY]] ], [ [[TMP18]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[TMP18]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = sub nsw i32 10, [[TMP15]] -; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP14]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP21:%.*]] = sub nsw i32 10, [[TMP20]] -; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP19]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue6: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i32, ptr [[DST:%.*]], i32 [[TMP18]] +; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP19]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[VECTOR_BODY]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll index c1272e5..6e3b2a5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll @@ -12,27 +12,22 @@ define void @test_tc_17_no_epilogue_vectorization(ptr noalias %src, ptr noalias ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 64 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 64 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1:%.*]], align 64 +; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3:%.*]], align 64 +; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ 16, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[LDADDR:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I]] +; CHECK-NEXT: [[LDADDR:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[I]] ; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[LDADDR]], align 64 -; CHECK-NEXT: [[STADDR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I]] +; CHECK-NEXT: [[STADDR:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 [[I]] ; CHECK-NEXT: store i8 [[VAL]], ptr [[STADDR]], align 64 ; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 ; CHECK-NEXT: [[IS_NEXT:%.*]] = icmp ult i64 [[I_NEXT]], 17 -; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -69,11 +64,11 @@ define void @test_tc_18(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF4:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -140,7 +135,7 @@ define void @test_tc_19(ptr noalias %src, ptr noalias %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF4]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -219,7 +214,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -231,7 +226,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD5]], ptr [[TMP15]], align 64 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], 20 -; CHECK-NEXT: br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -245,7 +240,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: store i8 [[VAL]], ptr [[STADDR]], align 64 ; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 ; CHECK-NEXT: [[IS_NEXT:%.*]] = icmp ult i64 [[I_NEXT]], 20 -; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -281,7 +276,7 @@ define void @limit_main_loop_vf_to_avoid_dead_main_vector_loop(ptr noalias %src, ; CHECK-NEXT: store <8 x i8> [[STRIDED_VEC]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -294,7 +289,7 @@ define void @limit_main_loop_vf_to_avoid_dead_main_vector_loop(ptr noalias %src, ; CHECK-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 32 -; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 8771dc9..6605338 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -2581,8 +2581,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP114]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI5]] ; CHECK-NEXT: [[TMP115]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP116:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP116]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP113]], [[TMP112]] ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP114]], [[BIN_RDX]] diff --git a/llvm/test/Transforms/LoopVectorize/debugloc.ll b/llvm/test/Transforms/LoopVectorize/debugloc.ll index 40cd6b6..03e0853 100644 --- a/llvm/test/Transforms/LoopVectorize/debugloc.ll +++ b/llvm/test/Transforms/LoopVectorize/debugloc.ll @@ -253,10 +253,10 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !32 = distinct !DILexicalBlock(scope: !31, file: !5, line: 137, column: 2) !33 = !DILocation(line: 210, column: 44, scope: !32) !34 = !DILocation(line: 320, column: 44, scope: !32) -!35 = distinct !DISubprogram(name: "test_misc", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 3, file: !5, scope: !6, type: !7, retainedNodes: !12) +!35 = distinct !DISubprogram(name: "test_misc", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 3, file: !5, scope: !6, type: !7, retainedNodes: !2) !36 = distinct !DILexicalBlock(scope: !35, file: !5, line: 137, column: 2) !37 = !DILocation(line: 430, column: 44, scope: !36) !38 = !DILocation(line: 540, column: 44, scope: !36) -!39 = distinct !DISubprogram(name: "test_scalar_Steps", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 3, file: !5, scope: !6, type: !7, retainedNodes: !12) +!39 = distinct !DISubprogram(name: "test_scalar_Steps", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 3, file: !5, scope: !6, type: !7, retainedNodes: !2) !40 = distinct !DILexicalBlock(scope: !39, file: !5, line: 137, column: 2) !41 = !DILocation(line: 650, column: 44, scope: !40) diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index fe230fa..b72cbd3 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -49,6 +49,8 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: loop.0: ; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED1]]> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0>, ir<%conv> +; CHECK-NEXT: WIDEN ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> +; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, ir<%rem> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: <xVFxUF> pred.store: { @@ -57,9 +59,7 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]> -; CHECK-NEXT: REPLICATE ir<%add> = add ir<%conv>, ir<%rem> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.dst> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: @@ -293,27 +293,44 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: loop.0: ; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED]]> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0>, ir<%conv> -; CHECK-NEXT: Successor(s): pred.store +; CHECK-NEXT: WIDEN ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> +; CHECK-NEXT: Successor(s): pred.load ; CHECK-EMPTY: -; CHECK: <xVFxUF> pred.store: { -; CHECK-NEXT: pred.store.entry: +; CHECK: <xVFxUF> pred.load: { +; CHECK-NEXT: pred.load.entry: ; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]> -; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue +; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue ; CHECK-EMPTY: -; CHECK: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> -; CHECK-NEXT: REPLICATE ir<%conv.lv.2> = sext ir<%lv.2> -; CHECK-NEXT: REPLICATE ir<%add.1> = add ir<%conv>, ir<%rem> -; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]> -; CHECK-NEXT: REPLICATE ir<%add> = add ir<%add.1>, ir<%conv.lv.2> -; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.dst> -; CHECK-NEXT: Successor(s): pred.store.continue +; CHECK: pred.load.if: +; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> (S->V) +; CHECK-NEXT: Successor(s): pred.load.continue ; CHECK-EMPTY: -; CHECK: pred.store.continue: +; CHECK: pred.load.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%lv.2> ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): loop.2 +; CHECK-NEXT: Successor(s): loop.1 +; CHECK-EMPTY: +; CHECK-NEXT: loop.1: +; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, ir<%rem> +; CHECK-NEXT: WIDEN-CAST ir<%conv.lv.2> = sext vp<%9> to i32 +; CHECK-NEXT: WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2> +; CHECK-NEXT: Successor(s): pred.store +; CHECK-EMPTY: +; CHECK-NEXT: <xVFxUF> pred.store: { +; CHECK-NEXT: pred.store.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]> +; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]> +; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.dst> +; CHECK-NEXT: Successor(s): pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK: loop.2: ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> @@ -377,6 +394,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur>, ir<%recur.next> +; CHECK-NEXT: WIDEN ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: <xVFxUF> pred.store: { @@ -386,7 +404,6 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, ir<%rem> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep> @@ -457,6 +474,7 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp ule vp<[[WIDE_IV]]>, vp<[[BTC]]> ; CHECK-NEXT: CLONE ir<[[L]]> = load ir<%src> ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%.pn>, ir<[[L]]> +; CHECK-NEXT: WIDEN ir<%val> = sdiv vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: <xVFxUF> pred.store: { @@ -467,7 +485,6 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[SCALAR_STEPS]]> -; CHECK-NEXT: REPLICATE ir<%val> = sdiv vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: REPLICATE store ir<%val>, ir<%gep.dst> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll index 8a57973..372876c 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll @@ -134,22 +134,18 @@ define i16 @for_phi_removed(ptr %src) { ; UNROLL-NO-IC: [[VECTOR_BODY]]: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4 -; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 -; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select i1 [[TMP4]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 1, i16 0 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 104 ; UNROLL-NO-IC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL-NO-IC: [[MIDDLE_BLOCK]]: -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 ; UNROLL-NO-IC-NEXT: br label %[[SCALAR_PH:.*]] ; UNROLL-NO-IC: [[SCALAR_PH]]: ; UNROLL-NO-IC-NEXT: br label %[[LOOP:.*]] ; UNROLL-NO-IC: [[LOOP]]: ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ 104, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[TMP2]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] ; UNROLL-NO-IC-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4 ; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; UNROLL-NO-IC-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0 @@ -200,22 +196,18 @@ define i16 @for_phi_removed(ptr %src) { ; SINK-AFTER: [[VECTOR_BODY]]: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4 -; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 -; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer -; SINK-AFTER-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; SINK-AFTER-NEXT: [[TMP2:%.*]] = select i1 [[TMP4]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer +; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +; SINK-AFTER-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 1, i16 0 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 108 ; SINK-AFTER-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SINK-AFTER: [[MIDDLE_BLOCK]]: -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 ; SINK-AFTER-NEXT: br label %[[SCALAR_PH:.*]] ; SINK-AFTER: [[SCALAR_PH]]: ; SINK-AFTER-NEXT: br label %[[LOOP:.*]] ; SINK-AFTER: [[LOOP]]: ; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ 108, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[TMP2]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] ; SINK-AFTER-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4 ; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 ; SINK-AFTER-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0 diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll index 7b0c366..440309d 100644 --- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll +++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll @@ -153,3 +153,79 @@ loop: exit: ret void } + +define void @narrow_widen_store_user(i32 %x, ptr noalias %A, ptr noalias %B) { +; VF4IC1-LABEL: define void @narrow_widen_store_user( +; VF4IC1-SAME: i32 [[X:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { +; VF4IC1-NEXT: [[ENTRY:.*:]] +; VF4IC1-NEXT: br label %[[VECTOR_PH:.*]] +; VF4IC1: [[VECTOR_PH]]: +; VF4IC1-NEXT: [[TMP0:%.*]] = add i32 [[X]], 1 +; VF4IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 +; VF4IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; VF4IC1-NEXT: [[TMP5:%.*]] = mul i32 [[TMP0]], 3 +; VF4IC1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 +; VF4IC1-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer +; VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF4IC1: [[VECTOR_BODY]]: +; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]] +; VF4IC1-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]] +; VF4IC1-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 4 +; VF4IC1-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP3]], align 4 +; VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; VF4IC1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; VF4IC1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF4IC1: [[MIDDLE_BLOCK]]: +; VF4IC1-NEXT: br label %[[EXIT:.*]] +; VF4IC1: [[EXIT]]: +; VF4IC1-NEXT: ret void +; +; VF2IC2-LABEL: define void @narrow_widen_store_user( +; VF2IC2-SAME: i32 [[X:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { +; VF2IC2-NEXT: [[ENTRY:.*:]] +; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2IC2: [[VECTOR_PH]]: +; VF2IC2-NEXT: [[TMP0:%.*]] = add i32 [[X]], 1 +; VF2IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 +; VF2IC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; VF2IC2-NEXT: [[TMP7:%.*]] = mul i32 [[TMP0]], 3 +; VF2IC2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i64 0 +; VF2IC2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer +; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2IC2: [[VECTOR_BODY]]: +; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]] +; VF2IC2-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]] +; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP2]], i32 2 +; VF2IC2-NEXT: store <2 x i32> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 4 +; VF2IC2-NEXT: store <2 x i32> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 4 +; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i32 2 +; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP3]], align 4 +; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP5]], align 4 +; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; VF2IC2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; VF2IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2IC2: [[MIDDLE_BLOCK]]: +; VF2IC2-NEXT: br label %[[EXIT:.*]] +; VF2IC2: [[EXIT]]: +; VF2IC2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A = getelementptr i32, ptr %A, i32 %iv + %gep.B = getelementptr i32, ptr %B, i32 %iv + %wide.add = add i32 %x, 1 + %wide.mul = mul i32 %wide.add, 3 + store i32 %wide.add, ptr %gep.A + store i32 %wide.mul, ptr %gep.B + %iv.next = add i32 %iv, 1 + %ec = icmp ne i32 %iv.next, 1024 + br i1 %ec, label %loop, label %exit + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/pr50686.ll b/llvm/test/Transforms/LoopVectorize/pr50686.ll index 878fbec..be9110c 100644 --- a/llvm/test/Transforms/LoopVectorize/pr50686.ll +++ b/llvm/test/Transforms/LoopVectorize/pr50686.ll @@ -18,20 +18,16 @@ define void @m(ptr nocapture %p, ptr nocapture %p2, i32 %q) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[P2]], align 4, !alias.scope [[META0:![0-9]+]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> zeroinitializer, [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9_1]], align 4, !alias.scope [[META0]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], [[BROADCAST_SPLAT3]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX9_2]], align 4, !alias.scope [[META0]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[P2]], align 4, !alias.scope [[META0:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i32 0, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX9_1]], align 4, !alias.scope [[META0]] +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX9_2]], align 4, !alias.scope [[META0]] +; CHECK-NEXT: [[TMP5:%.*]] = sub nsw i32 [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> [[TMP4]], [[BROADCAST_SPLAT5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP7]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP7]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 60 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index e160a15..bba459f 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -1140,18 +1140,14 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16 ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF8UF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8 -; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1 +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; VF8UF2-NEXT: [[TMP2:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) ; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10) -; VF8UF2-NEXT: store <8 x i8> [[TMP2]], ptr [[NEXT_GEP]], align 1 +; VF8UF2-NEXT: store <8 x i8> [[TMP2]], ptr [[A]], align 1 ; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[TMP1]], align 1 -; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; VF8UF2-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br label %[[SCALAR_PH:.*]] ; VF8UF2: [[SCALAR_PH]]: @@ -1165,7 +1161,7 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF8UF2-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1 ; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 -; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; VF8UF2: [[EXIT]]: ; VF8UF2-NEXT: ret void ; @@ -1177,14 +1173,10 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16 ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF16UF1: [[VECTOR_BODY]]: -; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF16UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 +; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[A]], align 1 ; VF16UF1-NEXT: [[TMP1:%.*]] = add nsw <16 x i8> [[WIDE_LOAD]], splat (i8 10) -; VF16UF1-NEXT: store <16 x i8> [[TMP1]], ptr [[NEXT_GEP]], align 1 -; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; VF16UF1-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; VF16UF1-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF16UF1-NEXT: store <16 x i8> [[TMP1]], ptr [[A]], align 1 +; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: ; VF16UF1-NEXT: br label %[[SCALAR_PH:.*]] ; VF16UF1: [[SCALAR_PH]]: @@ -1198,7 +1190,7 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF16UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1 ; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 -; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; VF16UF1: [[EXIT]]: ; VF16UF1-NEXT: ret void ; @@ -1232,12 +1224,10 @@ exit: ; VF8UF2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; VF8UF2: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} ; VF8UF2: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} -; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; VF8UF2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} ;. ; VF16UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; VF16UF1: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} ; VF16UF1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} -; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; VF16UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 2dd6a04..3161a0d 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -debug -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -force-widen-divrem-via-safe-divisor=0 -debug -disable-output %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/remarks-inlining.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/remarks-inlining.ll index aaabd18..618ec86 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/remarks-inlining.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/remarks-inlining.ll @@ -118,18 +118,18 @@ declare <2 x float> @llvm.matrix.transpose(<2 x float>, i32, i32) !4 = !{i32 2, !"Debug Info Version", i32 3} !5 = distinct !DISubprogram(name: "load_fn", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) !17 = !DIFile(filename: "toplevel.c", directory: "/test") -!16 = distinct !DISubprogram(name: "toplevel", scope: !1, file: !17, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!16 = distinct !DISubprogram(name: "toplevel", scope: !1, file: !17, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !18 = !DIFile(filename: "assign.h", directory: "/test") -!19 = distinct !DISubprogram(name: "assign", scope: !1, file: !18, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!19 = distinct !DISubprogram(name: "assign", scope: !1, file: !18, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !20 = !DIFile(filename: "add.h", directory: "/test") -!21 = distinct !DISubprogram(name: "add_fn", scope: !1, file: !20, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!21 = distinct !DISubprogram(name: "add_fn", scope: !1, file: !20, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !22 = !DIFile(filename: "store.h", directory: "/test") -!23 = distinct !DISubprogram(name: "store_fn", scope: !1, file: !22, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!23 = distinct !DISubprogram(name: "store_fn", scope: !1, file: !22, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !24 = !DIFile(filename: "transpose.h", directory: "/test") -!25 = distinct !DISubprogram(name: "transpose", scope: !1, file: !24, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!25 = distinct !DISubprogram(name: "transpose", scope: !1, file: !24, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !6 = !DISubroutineType(types: !7) diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll index 628ff08..ff41c57 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll @@ -163,26 +163,26 @@ declare void @llvm.matrix.column.major.store(<9 x double>, ptr, i64, i1, i32, i3 !19 = !DILocation(line: 10, column: 20, scope: !5) !20 = !DILocation(line: 10, column: 10, scope: !5) -!21 = distinct !DISubprogram(name: "fn2", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!21 = distinct !DISubprogram(name: "fn2", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !22 = !DILocation(line: 30, column: 20, scope: !21) -!23 = distinct !DISubprogram(name: "fn3", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!23 = distinct !DISubprogram(name: "fn3", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !24 = !DILocation(line: 40, column: 20, scope: !23) -!25 = distinct !DISubprogram(name: "fn4", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!25 = distinct !DISubprogram(name: "fn4", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !26 = !DILocation(line: 50, column: 20, scope: !25) -!27 = distinct !DISubprogram(name: "fn5", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!27 = distinct !DISubprogram(name: "fn5", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !28 = !DILocation(line: 60, column: 20, scope: !27) -!29 = distinct !DISubprogram(name: "fn6", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!29 = distinct !DISubprogram(name: "fn6", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !30 = !DILocation(line: 70, column: 20, scope: !29) -!31 = distinct !DISubprogram(name: "fn7", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!31 = distinct !DISubprogram(name: "fn7", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !32 = !DILocation(line: 80, column: 20, scope: !31) -!33 = distinct !DISubprogram(name: "fn8", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!33 = distinct !DISubprogram(name: "fn8", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !34 = !DILocation(line: 90, column: 20, scope: !33) -!35 = distinct !DISubprogram(name: "fn9", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!35 = distinct !DISubprogram(name: "fn9", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) !36 = !DILocation(line: 100, column: 20, scope: !35) diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll index e3765ed..75276c0 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -106,23 +106,6 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP4]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP6]], i64 1 -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <2 x i64> [[TMP8]], splat (i64 225) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i64 1 -; CHECK-NEXT: [[TMP12:%.*]] = icmp ult <2 x i64> [[TMP10]], splat (i64 225) -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP12]], i64 0 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP12]], i64 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]]) ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw double, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP17]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP17]], align 8, !alias.scope [[META0:![0-9]+]] @@ -182,23 +165,6 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.1: ; CHECK-NEXT: [[INDEX_1:%.*]] = phi i64 [ 0, [[VECTOR_PH_1]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_1]] ] ; CHECK-NEXT: [[TMP33:%.*]] = add nuw nsw i64 [[INDEX_1]], 15 -; CHECK-NEXT: [[TMP34:%.*]] = add nuw nsw i64 [[INDEX_1]], 16 -; CHECK-NEXT: [[TMP35:%.*]] = insertelement <2 x i64> poison, i64 [[TMP33]], i64 0 -; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i64> [[TMP35]], i64 [[TMP34]], i64 1 -; CHECK-NEXT: [[TMP37:%.*]] = add nuw nsw i64 [[INDEX_1]], 17 -; CHECK-NEXT: [[TMP38:%.*]] = add nuw nsw i64 [[INDEX_1]], 18 -; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i64> poison, i64 [[TMP37]], i64 0 -; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i64> [[TMP39]], i64 [[TMP38]], i64 1 -; CHECK-NEXT: [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], splat (i64 225) -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i1> [[TMP41]], i64 0 -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP41]], i64 1 -; CHECK-NEXT: [[TMP42:%.*]] = icmp ult <2 x i64> [[TMP40]], splat (i64 225) -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i1> [[TMP42]], i64 0 -; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i1> [[TMP42]], i64 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP43]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP44]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP45]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP46]]) ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw double, ptr [[A]], i64 [[TMP33]] ; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP47]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <2 x double>, ptr [[TMP47]], align 8, !alias.scope [[META0]] @@ -259,23 +225,6 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.2: ; CHECK-NEXT: [[INDEX_2:%.*]] = phi i64 [ 0, [[VECTOR_PH_2]] ], [ [[INDEX_NEXT_2:%.*]], [[VECTOR_BODY_2]] ] ; CHECK-NEXT: [[TMP64:%.*]] = add nuw nsw i64 [[INDEX_2]], 30 -; CHECK-NEXT: [[TMP65:%.*]] = add nuw nsw i64 [[INDEX_2]], 31 -; CHECK-NEXT: [[TMP66:%.*]] = insertelement <2 x i64> poison, i64 [[TMP64]], i64 0 -; CHECK-NEXT: [[TMP67:%.*]] = insertelement <2 x i64> [[TMP66]], i64 [[TMP65]], i64 1 -; CHECK-NEXT: [[TMP68:%.*]] = add nuw nsw i64 [[INDEX_2]], 32 -; CHECK-NEXT: [[TMP69:%.*]] = add nuw nsw i64 [[INDEX_2]], 33 -; CHECK-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> poison, i64 [[TMP68]], i64 0 -; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i64> [[TMP70]], i64 [[TMP69]], i64 1 -; CHECK-NEXT: [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], splat (i64 225) -; CHECK-NEXT: [[TMP74:%.*]] = extractelement <2 x i1> [[TMP72]], i64 0 -; CHECK-NEXT: [[TMP75:%.*]] = extractelement <2 x i1> [[TMP72]], i64 1 -; CHECK-NEXT: [[TMP73:%.*]] = icmp ult <2 x i64> [[TMP71]], splat (i64 225) -; CHECK-NEXT: [[TMP76:%.*]] = extractelement <2 x i1> [[TMP73]], i64 0 -; CHECK-NEXT: [[TMP77:%.*]] = extractelement <2 x i1> [[TMP73]], i64 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP74]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP75]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP76]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP77]]) ; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw double, ptr [[A]], i64 [[TMP64]] ; CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP78]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <2 x double>, ptr [[TMP78]], align 8, !alias.scope [[META0]] @@ -336,23 +285,6 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: vector.body.3: ; CHECK-NEXT: [[INDEX_3:%.*]] = phi i64 [ 0, [[VECTOR_PH_3]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY_3]] ] ; CHECK-NEXT: [[TMP95:%.*]] = add nuw nsw i64 [[INDEX_3]], 45 -; CHECK-NEXT: [[TMP96:%.*]] = add nuw nsw i64 [[INDEX_3]], 46 -; CHECK-NEXT: [[TMP97:%.*]] = insertelement <2 x i64> poison, i64 [[TMP95]], i64 0 -; CHECK-NEXT: [[TMP98:%.*]] = insertelement <2 x i64> [[TMP97]], i64 [[TMP96]], i64 1 -; CHECK-NEXT: [[TMP99:%.*]] = add nuw nsw i64 [[INDEX_3]], 47 -; CHECK-NEXT: [[TMP100:%.*]] = add nuw nsw i64 [[INDEX_3]], 48 -; CHECK-NEXT: [[TMP101:%.*]] = insertelement <2 x i64> poison, i64 [[TMP99]], i64 0 -; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i64> [[TMP101]], i64 [[TMP100]], i64 1 -; CHECK-NEXT: [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], splat (i64 225) -; CHECK-NEXT: [[TMP105:%.*]] = extractelement <2 x i1> [[TMP103]], i64 0 -; CHECK-NEXT: [[TMP106:%.*]] = extractelement <2 x i1> [[TMP103]], i64 1 -; CHECK-NEXT: [[TMP104:%.*]] = icmp ult <2 x i64> [[TMP102]], splat (i64 225) -; CHECK-NEXT: [[TMP107:%.*]] = extractelement <2 x i1> [[TMP104]], i64 0 -; CHECK-NEXT: [[TMP108:%.*]] = extractelement <2 x i1> [[TMP104]], i64 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP105]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP106]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP107]]) -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP108]]) ; CHECK-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw double, ptr [[A]], i64 [[TMP95]] ; CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP109]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <2 x double>, ptr [[TMP109]], align 8, !alias.scope [[META0]] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll index e914979..fd7b75f 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll @@ -8,7 +8,6 @@ define i64 @std_find_i16_constant_offset_with_assumptions(ptr %first.coerce, i16 ; CHECK-SAME: ptr [[FIRST_COERCE:%.*]], i16 noundef signext [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST_COERCE]], i64 2) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[FIRST_COERCE]], i64 256) ] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[S]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -133,15 +132,14 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) { ; CHECK-LABEL: define noundef ptr @std_find_caller( ; CHECK-SAME: ptr noundef [[FIRST:%.*]], ptr noundef [[LAST:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[FIRST3:%.*]] = ptrtoint ptr [[FIRST]] to i64 -; CHECK-NEXT: [[LAST_I64:%.*]] = ptrtoint ptr [[LAST]] to i64 -; CHECK-NEXT: [[PTR_SUB:%.*]] = sub i64 [[LAST_I64]], [[FIRST3]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2) ] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[LAST]], i64 2) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[FIRST]], i64 [[PTR_SUB]]) ] ; CHECK-NEXT: [[PRE_I:%.*]] = icmp eq ptr [[FIRST]], [[LAST]] ; CHECK-NEXT: br i1 [[PRE_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT:.*]], label %[[LOOP_HEADER_I_PREHEADER:.*]] ; CHECK: [[LOOP_HEADER_I_PREHEADER]]: +; CHECK-NEXT: [[LAST_I64:%.*]] = ptrtoint ptr [[LAST]] to i64 +; CHECK-NEXT: [[FIRST3:%.*]] = ptrtoint ptr [[FIRST]] to i64 +; CHECK-NEXT: [[PTR_SUB:%.*]] = sub i64 [[LAST_I64]], [[FIRST3]] ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[PTR_SUB]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST_I64]], -2 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST3]] diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll new file mode 100644 index 0000000..1d9cf6a --- /dev/null +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/armpl.ll @@ -0,0 +1,21 @@ +; REQUIRES: aarch64-registered-target +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s + +; CHECK: declare void @armpl_svsincos_f32_x(<vscale x 4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, <vscale x 4 x i1>) [[ATTRS:#[0-9]+]] + +; CHECK: declare void @armpl_svsincos_f64_x(<vscale x 2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, <vscale x 2 x i1>) [[ATTRS]] + +; CHECK: declare void @armpl_svsincospi_f32_x(<vscale x 4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, <vscale x 4 x i1>) [[ATTRS]] + +; CHECK: declare void @armpl_svsincospi_f64_x(<vscale x 2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16, <vscale x 2 x i1>) [[ATTRS]] + +; CHECK: declare void @armpl_vsincospiq_f32(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @armpl_vsincospiq_f64(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare aarch64_vector_pcs void @armpl_vsincosq_f32(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare aarch64_vector_pcs void @armpl_vsincosq_f64(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + + +; CHECK: attributes [[ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } diff --git a/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll new file mode 100644 index 0000000..2c69007 --- /dev/null +++ b/llvm/test/Transforms/Util/DeclareRuntimeLibcalls/sleef.ll @@ -0,0 +1,20 @@ +; REQUIRES: aarch64-registered-target +; RUN: opt -S -passes=declare-runtime-libcalls -mtriple=aarch64-unknown-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s + +; CHECK: declare void @_ZGVnN2vl8l8_sincos(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS:#[0-9]+]] + +; CHECK: declare void @_ZGVnN2vl8l8_sincospi(<2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @_ZGVnN4vl4l4_sincosf(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @_ZGVnN4vl4l4_sincospif(<4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @_ZGVsNxvl4l4_sincosf(<vscale x 4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @_ZGVsNxvl4l4_sincospif(<vscale x 4 x float>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @_ZGVsNxvl8l8_sincos(<vscale x 2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: declare void @_ZGVsNxvl8l8_sincospi(<vscale x 2 x double>, ptr noalias nonnull writeonly align 16, ptr noalias nonnull writeonly align 16) [[ATTRS]] + +; CHECK: attributes [[ATTRS]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } diff --git a/llvm/test/Transforms/Util/annotation-remarks-dbg-info.ll b/llvm/test/Transforms/Util/annotation-remarks-dbg-info.ll index a0fa79a..7fc7207 100644 --- a/llvm/test/Transforms/Util/annotation-remarks-dbg-info.ll +++ b/llvm/test/Transforms/Util/annotation-remarks-dbg-info.ll @@ -72,5 +72,5 @@ entry: !14 = !{!15} !15 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 1, type: !10) !16 = !DILocation(line: 400, column: 3, scope: !7) -!17 = distinct !DISubprogram(name: "test2", scope: !1, file: !1, line: 21, type: !8, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !14) +!17 = distinct !DISubprogram(name: "test2", scope: !1, file: !1, line: 21, type: !8, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) !18 = !DILocation(line: 200, column: 3, scope: !17) |
