diff options
Diffstat (limited to 'llvm/test/Transforms')
17 files changed, 1847 insertions, 56 deletions
diff --git a/llvm/test/Transforms/Inline/drop-callee-type-metadata.ll b/llvm/test/Transforms/Inline/drop-callee-type-metadata.ll new file mode 100644 index 0000000..5475880 --- /dev/null +++ b/llvm/test/Transforms/Inline/drop-callee-type-metadata.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +;; Test if the callee_type metadata is dropped when it is +;; is mapped to a direct function call from an indirect call during inlining. + +; RUN: opt -passes=inline -S < %s | FileCheck %s + +define i32 @_Z13call_indirectPFicEc(ptr %func, i8 %x) !type !0 { +; CHECK-LABEL: define i32 @_Z13call_indirectPFicEc( +; CHECK-SAME: ptr [[FUNC:%.*]], i8 [[X:%.*]]) !type [[META0:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 [[FUNC]](i8 [[X]]), !callee_type [[META1:![0-9]+]] +; CHECK-NEXT: ret i32 [[CALL]] +; +entry: + %call = call i32 %func(i8 %x), !callee_type !1 + ret i32 %call +} + +define i32 @_Z3barv() !type !3 { +; CHECK-LABEL: define i32 @_Z3barv( +; CHECK-SAME: ) !type [[META3:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL_I:%.*]] = call i32 @_Z3fooc(i8 97) +; CHECK-NEXT: ret i32 [[CALL_I]] +; +entry: + %call = call i32 @_Z13call_indirectPFicEc(ptr nonnull @_Z3fooc, i8 97) + ret i32 %call +} +declare !type !2 i32 @_Z3fooc(i8 signext) + +!0 = !{i64 0, !"_ZTSFiPvcE.generalized"} +!1 = !{!2} +!2 = !{i64 0, !"_ZTSFicE.generalized"} +!3 = !{i64 0, !"_ZTSFivE.generalized"} +;. +; CHECK: [[META0]] = !{i64 0, !"_ZTSFiPvcE.generalized"} +; CHECK: [[META1]] = !{[[META2:![0-9]+]]} +; CHECK: [[META2]] = !{i64 0, !"_ZTSFicE.generalized"} +; CHECK: [[META3]] = !{i64 0, !"_ZTSFivE.generalized"} +;. diff --git a/llvm/test/Transforms/Inline/memprof_inline2.ll b/llvm/test/Transforms/Inline/memprof_inline2.ll index 21448f1..d2e3927 100644 --- a/llvm/test/Transforms/Inline/memprof_inline2.ll +++ b/llvm/test/Transforms/Inline/memprof_inline2.ll @@ -38,6 +38,9 @@ ;; } ; RUN: opt -passes=inline %s -S | FileCheck %s +;; We should not perform additional discarding of non-cold contexts when +;; rebuilding the tries after inlining, even with a very low threshold. +; RUN: opt -passes=inline -memprof-callsite-cold-threshold=1 %s -S | FileCheck %s ; ModuleID = 'memprof_inline2.cc' source_filename = "memprof_inline2.cc" diff --git a/llvm/test/Transforms/Inline/memprof_inline3.ll b/llvm/test/Transforms/Inline/memprof_inline3.ll new file mode 100644 index 0000000..e802f2b --- /dev/null +++ b/llvm/test/Transforms/Inline/memprof_inline3.ll @@ -0,0 +1,296 @@ +;; This test is the same code as memprof_inline2.ll, except that it has +;; manually synthesized context size information. This test ensures that we +;; don't attempt to apply -memprof-callsite-cold-threshold again when +;; rebuilding the metadata after inlining. +; +; RUN: opt -passes=inline %s -S | FileCheck %s +;; We should not perform additional discarding of non-cold contexts when +;; rebuilding the tries after inlining, even with a very low threshold. +; RUN: opt -passes=inline -memprof-callsite-cold-threshold=0 %s -S | FileCheck %s + +; ModuleID = 'memprof_inline2.cc' +source_filename = "memprof_inline2.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: mustprogress uwtable +; CHECK-LABEL: define dso_local noundef ptr @_Z3foov +define dso_local noundef ptr @_Z3foov() #0 !dbg !39 { +entry: + ;; We should keep the original memprof metadata intact. + ; CHECK: call {{.*}} @_Znam{{.*}} !memprof ![[ORIGMEMPROF:[0-9]+]] + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !dbg !42, !memprof !43, !callsite !52 + ret ptr %call, !dbg !53 +} + +; Function Attrs: nobuiltin allocsize(0) +declare noundef nonnull ptr @_Znam(i64 noundef) #1 + +;; Mark noinline so we don't inline into calls from bar and baz. We should end +;; up with a memprof metadata on the call to foo below. +; Function Attrs: mustprogress noinline uwtable +; CHECK-LABEL: define dso_local noundef ptr @_Z4foo2v +define dso_local noundef ptr @_Z4foo2v() #2 !dbg !54 { +entry: + ;; We should have memprof metadata for the call stacks from bar and baz, + ;; and the callsite metadata should be the concatentation of the id from the + ;; inlined call to new and the original callsite. + ; CHECK: call {{.*}} @_Znam{{.*}} !memprof ![[NEWMEMPROF:[0-9]+]], !callsite ![[NEWCALLSITE:[0-9]+]] + %call = call noundef ptr @_Z3foov(), !dbg !55, !callsite !56 + ret ptr %call, !dbg !57 +} + +; Function Attrs: mustprogress uwtable +define dso_local noundef ptr @_Z3barv() #0 !dbg !58 { +entry: + %call = call noundef ptr @_Z4foo2v(), !dbg !59, !callsite !60 + ret ptr %call, !dbg !61 +} + +; Function Attrs: mustprogress uwtable +define dso_local noundef ptr @_Z3bazv() #0 !dbg !62 { +entry: + %call = call noundef ptr @_Z4foo2v(), !dbg !63, !callsite !64 + ret ptr %call, !dbg !65 +} + +;; Make sure we don't propagate any memprof/callsite metadata +; Function Attrs: mustprogress uwtable +; CHECK-LABEL: define dso_local noundef ptr @notprofiled +define dso_local noundef ptr @notprofiled() #0 !dbg !66 { +entry: + ;; When foo is inlined, both the memprof and callsite metadata should be + ;; stripped from the inlined call to new, as there is no callsite metadata on + ;; the call. + ; CHECK: call {{.*}} @_Znam + ; CHECK-NOT: !memprof + ; CHECK-NOT: !callsite + %call = call noundef ptr @_Z3foov(), !dbg !67 + ;; When baz is inlined, the callsite metadata should be stripped from the + ;; inlined call to foo2, as there is no callsite metadata on the call. + ; CHECK: call {{.*}} @_Z4foo2v + ; CHECK-NOT: !callsite + %call2 = call noundef ptr @_Z3bazv() + ; CHECK-NEXT: ret + ret ptr %call, !dbg !68 +} + +; Function Attrs: mustprogress noinline norecurse optnone uwtable +define dso_local noundef i32 @main(i32 noundef %argc, ptr noundef %argv) #3 !dbg !69 { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca ptr, align 8 + %c = alloca ptr, align 8 + %d = alloca ptr, align 8 + %e = alloca ptr, align 8 + %f = alloca ptr, align 8 + store i32 0, ptr %retval, align 4 + store i32 %argc, ptr %argc.addr, align 4 + store ptr %argv, ptr %argv.addr, align 8 + ;; The below 4 callsites are all annotated as noinline + %call = call noundef ptr @_Z3foov() #8, !dbg !70, !callsite !71 + store ptr %call, ptr %c, align 8, !dbg !72 + %call1 = call noundef ptr @_Z3foov() #8, !dbg !73, !callsite !74 + store ptr %call1, ptr %d, align 8, !dbg !75 + %call2 = call noundef ptr @_Z3barv() #8, !dbg !76, !callsite !77 + store ptr %call2, ptr %e, align 8, !dbg !78 + %call3 = call noundef ptr @_Z3bazv() #8, !dbg !79, !callsite !80 + store ptr %call3, ptr %f, align 8, !dbg !81 + %0 = load ptr, ptr %c, align 8, !dbg !82 + call void @llvm.memset.p0.i64(ptr align 1 %0, i8 0, i64 10, i1 false), !dbg !83 + %1 = load ptr, ptr %d, align 8, !dbg !84 + call void @llvm.memset.p0.i64(ptr align 1 %1, i8 0, i64 10, i1 false), !dbg !85 + %2 = load ptr, ptr %e, align 8, !dbg !86 + call void @llvm.memset.p0.i64(ptr align 1 %2, i8 0, i64 10, i1 false), !dbg !87 + %3 = load ptr, ptr %f, align 8, !dbg !88 + call void @llvm.memset.p0.i64(ptr align 1 %3, i8 0, i64 10, i1 false), !dbg !89 + %4 = load ptr, ptr %c, align 8, !dbg !90 + %isnull = icmp eq ptr %4, null, !dbg !91 + br i1 %isnull, label %delete.end, label %delete.notnull, !dbg !91 + +delete.notnull: ; preds = %entry + call void @_ZdaPv(ptr noundef %4) #9, !dbg !92 + br label %delete.end, !dbg !92 + +delete.end: ; preds = %delete.notnull, %entry + %call4 = call i32 @sleep(i32 noundef 200), !dbg !94 + %5 = load ptr, ptr %d, align 8, !dbg !95 + %isnull5 = icmp eq ptr %5, null, !dbg !96 + br i1 %isnull5, label %delete.end7, label %delete.notnull6, !dbg !96 + +delete.notnull6: ; preds = %delete.end + call void @_ZdaPv(ptr noundef %5) #9, !dbg !97 + br label %delete.end7, !dbg !97 + +delete.end7: ; preds = %delete.notnull6, %delete.end + %6 = load ptr, ptr %e, align 8, !dbg !98 + %isnull8 = icmp eq ptr %6, null, !dbg !99 + br i1 %isnull8, label %delete.end10, label %delete.notnull9, !dbg !99 + +delete.notnull9: ; preds = %delete.end7 + call void @_ZdaPv(ptr noundef %6) #9, !dbg !100 + br label %delete.end10, !dbg !100 + +delete.end10: ; preds = %delete.notnull9, %delete.end7 + %7 = load ptr, ptr %f, align 8, !dbg !101 + %isnull11 = icmp eq ptr %7, null, !dbg !102 + br i1 %isnull11, label %delete.end13, label %delete.notnull12, !dbg !102 + +delete.notnull12: ; preds = %delete.end10 + call void @_ZdaPv(ptr noundef %7) #9, !dbg !103 + br label %delete.end13, !dbg !103 + +delete.end13: ; preds = %delete.notnull12, %delete.end10 + ret i32 0, !dbg !104 +} + +; Function Attrs: argmemonly nofree nounwind willreturn writeonly +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 + +; Function Attrs: nobuiltin nounwind +declare void @_ZdaPv(ptr noundef) #5 + +declare i32 @sleep(i32 noundef) #6 + +attributes #0 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nobuiltin allocsize(0) "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #3 = { mustprogress noinline norecurse optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #4 = { argmemonly nofree nounwind willreturn writeonly } +attributes #5 = { nobuiltin nounwind "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #6 = { "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #7 = { builtin allocsize(0) } +attributes #8 = { noinline } +attributes #9 = { builtin nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8, !9} +!llvm.ident = !{!38} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 15.0.0 (https://github.com/llvm/llvm-project.git e09c924f98ec157adeaa74819b0aec9a07a1b552)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "memprof_inline.cc", directory: "/usr/local/google/home/tejohnson/llvm/tmp", checksumkind: CSK_MD5, checksum: "8711f6fd269e6cb5611fef48bc906eab") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 7, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{i32 7, !"frame-pointer", i32 2} +!9 = !{i32 1, !"ProfileSummary", !10} +!10 = !{!11, !12, !13, !14, !15, !16, !17, !18, !19, !20} +!11 = !{!"ProfileFormat", !"InstrProf"} +!12 = !{!"TotalCount", i64 0} +!13 = !{!"MaxCount", i64 0} +!14 = !{!"MaxInternalCount", i64 0} +!15 = !{!"MaxFunctionCount", i64 0} +!16 = !{!"NumCounts", i64 0} +!17 = !{!"NumFunctions", i64 0} +!18 = !{!"IsPartialProfile", i64 0} +!19 = !{!"PartialProfileRatio", double 0.000000e+00} +!20 = !{!"DetailedSummary", !21} +!21 = !{!22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37} +!22 = !{i32 10000, i64 0, i32 0} +!23 = !{i32 100000, i64 0, i32 0} +!24 = !{i32 200000, i64 0, i32 0} +!25 = !{i32 300000, i64 0, i32 0} +!26 = !{i32 400000, i64 0, i32 0} +!27 = !{i32 500000, i64 0, i32 0} +!28 = !{i32 600000, i64 0, i32 0} +!29 = !{i32 700000, i64 0, i32 0} +!30 = !{i32 800000, i64 0, i32 0} +!31 = !{i32 900000, i64 0, i32 0} +!32 = !{i32 950000, i64 0, i32 0} +!33 = !{i32 990000, i64 0, i32 0} +!34 = !{i32 999000, i64 0, i32 0} +!35 = !{i32 999900, i64 0, i32 0} +!36 = !{i32 999990, i64 0, i32 0} +!37 = !{i32 999999, i64 0, i32 0} +!38 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git e09c924f98ec157adeaa74819b0aec9a07a1b552)"} +!39 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !40, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !41) +!40 = !DISubroutineType(types: !41) +!41 = !{} +!42 = !DILocation(line: 5, column: 10, scope: !39) +;; The first 2 are from the direct calls to foo from main. Those stay on the +;; callsite in foo, which isn't inlined into main due to the callsites in main +;; being annotated as noinline. +;; The second 2 are from the calls from foo2, which inlines its callsite to foo +;; but is not itself inlined into its callers. Therefore they get moved to a +;; new memprof metadata within foo2. +!43 = !{!44, !46, !48, !50} +!44 = !{!45, !"cold", !105} +!105 = !{i64 123, i64 5000} +!45 = !{i64 -2458008693472584243, i64 7394638144382192936} +!46 = !{!47, !"notcold", !106} +!47 = !{i64 -2458008693472584243, i64 -8908997186479157179} +!106 = !{i64 345, i64 1} +!48 = !{!49, !"notcold", !107} +!49 = !{i64 -2458008693472584243, i64 -8079659623765193173, i64 -4805294506621015872} +!107 = !{i64 678, i64 1} +!50 = !{!51, !"cold", !108} +!51 = !{i64 -2458008693472584243, i64 -8079659623765193173, i64 -972865200055133905} +!108 = !{i64 234, i64 5000} +; CHECK: ![[ORIGMEMPROF]] = !{![[ORIGMIB1:[0-9]+]], ![[ORIGMIB2:[0-9]+]], ![[ORIGMIB3:[0-9]+]], ![[ORIGMIB4:[0-9]+]]} +; CHECK: ![[ORIGMIB1]] = !{![[ORIGMIBSTACK1:[0-9]+]], !"cold" +; CHECK: ![[ORIGMIBSTACK1]] = !{i64 -2458008693472584243, i64 7394638144382192936} +; CHECK: ![[ORIGMIB2]] = !{![[ORIGMIBSTACK2:[0-9]+]], !"notcold" +; CHECK: ![[ORIGMIBSTACK2]] = !{i64 -2458008693472584243, i64 -8908997186479157179} +; CHECK: ![[ORIGMIB3]] = !{![[ORIGMIBSTACK3:[0-9]+]], !"notcold" +; CHECK: ![[ORIGMIBSTACK3]] = !{i64 -2458008693472584243, i64 -8079659623765193173, i64 -4805294506621015872} +; CHECK: ![[ORIGMIB4]] = !{![[ORIGMIBSTACK4:[0-9]+]], !"cold" +; CHECK: ![[ORIGMIBSTACK4]] = !{i64 -2458008693472584243, i64 -8079659623765193173, i64 -972865200055133905} +; CHECK: ![[NEWMEMPROF]] = !{![[ORIGMIB3:[0-9]+]], ![[ORIGMIB4:[0-9]+]]} +; CHECK: ![[NEWCALLSITE]] = !{i64 -2458008693472584243, i64 -8079659623765193173} +!52 = !{i64 -2458008693472584243} +!53 = !DILocation(line: 5, column: 3, scope: !39) +!54 = distinct !DISubprogram(name: "foo2", linkageName: "_Z4foo2v", scope: !1, file: !1, line: 7, type: !40, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !41) +!55 = !DILocation(line: 8, column: 10, scope: !54) +!56 = !{i64 -8079659623765193173} +!57 = !DILocation(line: 8, column: 3, scope: !54) +!58 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 10, type: !40, scopeLine: 10, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !41) +!59 = !DILocation(line: 11, column: 10, scope: !58) +!60 = !{i64 -972865200055133905} +!61 = !DILocation(line: 11, column: 3, scope: !58) +!62 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 13, type: !40, scopeLine: 13, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !41) +!63 = !DILocation(line: 14, column: 10, scope: !62) +!64 = !{i64 -4805294506621015872} +!65 = !DILocation(line: 14, column: 3, scope: !62) +!66 = distinct !DISubprogram(name: "notprofiled", linkageName: "notprofiled", scope: !1, file: !1, line: 400, type: !40, scopeLine: 400, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !41) +!67 = !DILocation(line: 401, column: 10, scope: !66) +!68 = !DILocation(line: 401, column: 3, scope: !66) +!69 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 16, type: !40, scopeLine: 16, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !41) +!70 = !DILocation(line: 17, column: 13, scope: !69) +!71 = !{i64 -8908997186479157179} +!72 = !DILocation(line: 17, column: 9, scope: !69) +!73 = !DILocation(line: 18, column: 13, scope: !69) +!74 = !{i64 7394638144382192936} +!75 = !DILocation(line: 18, column: 9, scope: !69) +!76 = !DILocation(line: 19, column: 13, scope: !69) +!77 = !{i64 -5510257407004945023} +!78 = !DILocation(line: 19, column: 9, scope: !69) +!79 = !DILocation(line: 20, column: 13, scope: !69) +!80 = !{i64 8771588133652501463} +!81 = !DILocation(line: 20, column: 9, scope: !69) +!82 = !DILocation(line: 21, column: 10, scope: !69) +!83 = !DILocation(line: 21, column: 3, scope: !69) +!84 = !DILocation(line: 22, column: 10, scope: !69) +!85 = !DILocation(line: 22, column: 3, scope: !69) +!86 = !DILocation(line: 23, column: 10, scope: !69) +!87 = !DILocation(line: 23, column: 3, scope: !69) +!88 = !DILocation(line: 24, column: 10, scope: !69) +!89 = !DILocation(line: 24, column: 3, scope: !69) +!90 = !DILocation(line: 25, column: 12, scope: !69) +!91 = !DILocation(line: 25, column: 3, scope: !69) +!92 = !DILocation(line: 25, column: 3, scope: !93) +!93 = !DILexicalBlockFile(scope: !69, file: !1, discriminator: 2) +!94 = !DILocation(line: 26, column: 3, scope: !69) +!95 = !DILocation(line: 27, column: 12, scope: !69) +!96 = !DILocation(line: 27, column: 3, scope: !69) +!97 = !DILocation(line: 27, column: 3, scope: !93) +!98 = !DILocation(line: 28, column: 12, scope: !69) +!99 = !DILocation(line: 28, column: 3, scope: !69) +!100 = !DILocation(line: 28, column: 3, scope: !93) +!101 = !DILocation(line: 29, column: 12, scope: !69) +!102 = !DILocation(line: 29, column: 3, scope: !69) +!103 = !DILocation(line: 29, column: 3, scope: !93) +!104 = !DILocation(line: 30, column: 3, scope: !69) diff --git a/llvm/test/Transforms/InstCombine/drop-callee-type-metadata.ll b/llvm/test/Transforms/InstCombine/drop-callee-type-metadata.ll new file mode 100644 index 0000000..83215f7 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/drop-callee-type-metadata.ll @@ -0,0 +1,25 @@ +;; Test if the callee_type metadata is dropped when it is attached +;; to a direct function call during instcombine. + +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +define i32 @_Z3barv() !type !0 { +; CHECK-LABEL: define i32 @_Z3barv( +; CHECK-SAME: ) !type [[META0:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @_Z3fooc(i8 97){{$}} +; CHECK-NEXT: ret i32 [[CALL]] +; +entry: + %call = call i32 @_Z3fooc(i8 97), !callee_type !1 + ret i32 %call +} + +declare !type !2 i32 @_Z3fooc(i8 signext) + +!0 = !{i64 0, !"_ZTSFivE.generalized"} +!1 = !{!2} +!2 = !{i64 0, !"_ZTSFicE.generalized"} +;. +; CHECK: [[META0]] = !{i64 0, !"_ZTSFivE.generalized"} +;. diff --git a/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll b/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll new file mode 100644 index 0000000..371f9b6 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll @@ -0,0 +1,674 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + + +define i1 @fcmp_trunc(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp oge double [[TMP0]], 0x4058FFFFF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp oge float %trunc, 1.000000e+02 + ret i1 %result +} + +define i1 @fcmp_trunc_ult(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_ult( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ult double [[TMP0]], 0x4068FFFFF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ult float %trunc, 2.000000e+02 + ret i1 %result +} + +define i1 @fcmp_trunc_ole(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_ole( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ole double [[TMP0]], 0x4072C00010000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ole float %trunc, 3.000000e+02 + ret i1 %result +} + +define i1 @fcmp_trunc_ogt(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_ogt( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ogt double [[TMP0]], 0x4079000010000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ogt float %trunc, 4.000000e+02 + ret i1 %result +} + +define i1 @fcmp_trunc_zero(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_zero( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp oge double [[TMP0]], 0xB690000000000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp oge float %trunc, 0.000000 + ret i1 %result +} + +define i1 @fcmp_trunc_with_nnan(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_with_nnan( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp nnan oge double [[TMP0]], 0x4058FFFFF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp nnan oge float %trunc, 1.000000e+02 + ret i1 %result +} + +define i1 @fcmp_trunc_with_ninf(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_with_ninf( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ninf oge double [[TMP0]], 0x4058FFFFF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ninf oge float %trunc, 1.000000e+02 + ret i1 %result +} + +define i1 @fcmp_trunc_with_nsz(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_with_nsz( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp nsz oge double [[TMP0]], 0x4058FFFFF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp nsz oge float %trunc, 1.000000e+02 + ret i1 %result +} + +define i1 @fcmp_trunc_with_reassoc(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_with_reassoc( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp reassoc oge double [[TMP0]], 0x4058FFFFF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp reassoc oge float %trunc, 1.000000e+02 + ret i1 %result +} + +define i1 @fcmp_trunc_with_fast(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_with_fast( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast oge double [[TMP0]], 0x4058FFFFF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp fast oge float %trunc, 1.000000e+02 + ret i1 %result +} + +define <4 x i1> @fcmp_vec_trunc(<4 x double> %0) { +; CHECK-LABEL: define <4 x i1> @fcmp_vec_trunc( +; CHECK-SAME: <4 x double> [[TMP0:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[TMP0]], splat (double 0x3FEFFFFFF0000000) +; CHECK-NEXT: ret <4 x i1> [[CMP]] +; + %vec = fptrunc <4 x double> %0 to <4 x float> + %cmp = fcmp olt <4 x float> %vec, <float 1.0, float 1.0, float 1.0, float 1.0> + ret <4 x i1> %cmp +} + +define <1 x i1> @fcmp_vec_trunc_scalar(<1 x double> %0) { +; CHECK-LABEL: define <1 x i1> @fcmp_vec_trunc_scalar( +; CHECK-SAME: <1 x double> [[TMP0:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = fcmp fast olt <1 x double> [[TMP0]], splat (double 0x3FEFFFFFF0000000) +; CHECK-NEXT: ret <1 x i1> [[CMP]] +; + %vec = fptrunc <1 x double> %0 to <1 x float> + %cmp = fcmp fast olt <1 x float> %vec, <float 1.0> + ret <1 x i1> %cmp +} + +define i1 @fcmp_trunc_fp128(fp128 %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_fp128( +; CHECK-SAME: fp128 [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast oge fp128 [[TMP0]], 0xL000000000000000040058FFFFF000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc fp128 %0 to float + %result = fcmp fast oge float %trunc, 1.000000e+02 + ret i1 %result +} + +define i1 @fcmp_trunc_x86_fp80(x86_fp80 %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_x86_fp80( +; CHECK-SAME: x86_fp80 [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast oge x86_fp80 [[TMP0]], 0xK4005C7FFFF8000000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc x86_fp80 %0 to float + %result = fcmp fast oge float %trunc, 1.000000e+02 + ret i1 %result +} + +define i1 @fcmp_trunc_ppc_fp128(ppc_fp128 %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_ppc_fp128( +; CHECK-SAME: ppc_fp128 [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast oge ppc_fp128 [[TMP0]], 0xM4058FFFFF00000000000000000000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc ppc_fp128 %0 to float + %result = fcmp fast oge float %trunc, 1.000000e+02 + ret i1 %result +} + +define i1 @fcmp_trunc_nan(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_nan( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: ret i1 false +; + %trunc = fptrunc double %0 to float + %result = fcmp oge float %trunc, 0x7FF8000000000000 + ret i1 %result +} + +; denomalized 0x00000001 +define i1 @fcmp_trunc_d1(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_d1( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp oge double [[TMP0]], 0x3690000000000001 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp oge float %trunc, 1.40129846432481707092372958328991613128026194187651577175706828388979108268586060148663818836212158203125e-45 + ret i1 %result +} + +; denomalized 0x00000001 ole +define i1 @fcmp_trunc_d1_ole(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_d1_ole( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ole double [[TMP0]], 0x36A7FFFFFFFFFFFF +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ole float %trunc, 1.40129846432481707092372958328991613128026194187651577175706828388979108268586060148663818836212158203125e-45 + ret i1 %result +} + +; denomalized 0x00000002 +define i1 @fcmp_trunc_d2(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_d2( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp oge double [[TMP0]], 0x36A8000000000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp oge float %trunc, 2.8025969286496341418474591665798322625605238837530315435141365677795821653717212029732763767242431640625e-45 + ret i1 %result +} + +; denomalized 0x7fffff +define i1 @fcmp_trunc_d3(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_d3( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ogt double [[TMP0]], 0x380FFFFFDFFFFFFF +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ogt float %trunc, 1.175494210692441075487029444849287348827052428745893333857174530571588870475618904265502351336181163787841796875e-38 + ret i1 %result +} + +; denomalized 0x80000001 +define i1 @fcmp_trunc_d4(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_d4( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ogt double [[TMP0]], 0xB690000000000001 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ogt float %trunc, -1.40129846432481707092372958328991613128026194187651577175706828388979108268586060148663818836212158203125e-45 + ret i1 %result +} + +; denomalized 0x80000001 +define i1 @fcmp_trunc_d5(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_d5( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp olt double [[TMP0]], 0xB80FFFFFDFFFFFFF +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp olt float %trunc, -1.175494210692441075487029444849287348827052428745893333857174530571588870475618904265502351336181163787841796875e-38 + ret i1 %result +} + + +; +0 +define i1 @fcmp_trunc_p0(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_p0( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp oge double [[TMP0]], 0xB690000000000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp oge float %trunc, 0x00000000 + ret i1 %result +} + + +; -0 +define i1 @fcmp_trunc_n0(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_n0( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ogt double [[TMP0]], 0x3690000000000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ogt float %trunc, 0x8000000000000000 + ret i1 %result +} + + +; max representable +define i1 @fcmp_trunc_mx(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_mx( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ogt double [[TMP0]], 0x47EFFFFFEFFFFFFF +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ogt float %trunc, 0x47EFFFFFE0000000 + ret i1 %result +} + +; negative max representable +define i1 @fcmp_trunc_mn(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_mn( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp olt double [[TMP0]], 0xC7EFFFFFEFFFFFFF +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp olt float %trunc, -3.4028234663852885981170418348451692544e38 + ret i1 %result +} + + +define i1 @fcmp_trunc_literal_nan(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_literal_nan( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: ret i1 false +; + %trunc = fptrunc double %0 to float + %result = fcmp oge float %trunc, 0x7FF8000000000000 + ret i1 %result +} + +define i1 @fcmp_trunc_literal_positive_inf(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_literal_positive_inf( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[TRUNC:%.*]] = fptrunc double [[TMP0]] to float +; CHECK-NEXT: [[RESULT:%.*]] = fcmp oeq float [[TRUNC]], 0x7FF0000000000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp oge float %trunc, 0x7FF0000000000000 + ret i1 %result +} + + +define i1 @fcmp_trunc_literal_negative_inf(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_literal_negative_inf( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[TRUNC:%.*]] = fptrunc double [[TMP0]] to float +; CHECK-NEXT: [[RESULT:%.*]] = fcmp uno float [[TRUNC]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ult float %trunc, 0xFFF0000000000000 + ret i1 %result +} + + +define i1 @fcmp_trunc_nan_ugt(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_nan_ugt( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %trunc = fptrunc double %0 to float + %result = fcmp ugt float %trunc, 0x7FF8000000000000 + ret i1 %result +} + +define i1 @fcmp_trunc_inf_uge(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_inf_uge( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[TRUNC:%.*]] = fptrunc double [[TMP0]] to float +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ueq float [[TRUNC]], 0x7FF0000000000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp uge float %trunc, 0x7FF0000000000000 + ret i1 %result +} + + +define i1 @fcmp_trunc_ninf_olt(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_ninf_olt( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: ret i1 false +; + %trunc = fptrunc double %0 to float + %result = fcmp olt float %trunc, 0xFFF0000000000000 + ret i1 %result +} + + +define i1 @fcmp_trunc_uge(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_uge( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp uge double [[TMP0]], 0x405EBFFFF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp uge float %trunc, 123.0 + ret i1 %result +} + + +define i1 @fcmp_trunc_neg_uge(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_neg_uge( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp uge double [[TMP0]], 0xC05EC00010000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp uge float %trunc, -123.0 + ret i1 %result +} + + +define i1 @fcmp_trunc_oge(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_oge( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp oge double [[TMP0]], 0x405EBFFFF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp oge float %trunc, 123.0 + ret i1 %result +} + + +define i1 @fcmp_trunc_neg_oge(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_neg_oge( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp oge double [[TMP0]], 0xC05EC00010000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp oge float %trunc, -123.0 + ret i1 %result +} + + +define i1 @fcmp_trunc_ugt(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_ugt( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ugt double [[TMP0]], 0x40FE0F3010000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ugt float %trunc, 123123.0 + ret i1 %result +} + + +define i1 @fcmp_trunc_neg_ugt(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_neg_ugt( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ugt double [[TMP0]], 0xC0FE1B8FF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ugt float %trunc, -123321.0 + ret i1 %result +} + + +define i1 @fcmp_trunc_neg_ogt(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_neg_ogt( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ogt double [[TMP0]], 0xC0FE1B8FF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ogt float %trunc, -123321.0 + ret i1 %result +} + + + +define i1 @fcmp_trunc_ule(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_ule( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ule double [[TMP0]], 0x408ED80010000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ule float %trunc, 987.0 + ret i1 %result +} + + + +define i1 @fcmp_trunc_neg_ule(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_neg_ule( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ule double [[TMP0]], 0xC088A7FFF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ule float %trunc, -789.0 + ret i1 %result +} + + + +define i1 @fcmp_trunc_neg_ole(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_neg_ole( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ole double [[TMP0]], 0xC088A7FFF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ole float %trunc, -789.0 + ret i1 %result +} + + +define i1 @fcmp_trunc_neg_ult(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_neg_ult( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ult double [[TMP0]], 0xC088A80010000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp ult float %trunc, -789.0 + ret i1 %result +} + + + +define i1 @fcmp_trunc_olt(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_olt( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp olt double [[TMP0]], 0x408ED7FFF0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp olt float %trunc, 987.0 + ret i1 %result +} + + + +define i1 @fcmp_trunc_neg_olt(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_neg_olt( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp olt double [[TMP0]], 0xC088A80010000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp olt float %trunc, -789.0 + ret i1 %result +} + + +define i1 @fcmp_trunc_neg_nsz_uge(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_neg_nsz_uge( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp nsz uge double [[TMP0]], 0xC05EC00010000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp nsz uge float %trunc, -123.0 + ret i1 %result +} + + + +define i1 @fcmp_trunc_reassoc_ugt(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_reassoc_ugt( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp reassoc ugt double [[TMP0]], 0x40889F8210000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp reassoc ugt float %trunc, 787.9384765625 + ret i1 %result +} + + +define i1 @fcmp_trunc_neg_reassoc_ugt(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_neg_reassoc_ugt( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp reassoc ugt double [[TMP0]], 0xC0889F81F0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp reassoc ugt float %trunc, -787.9384765625 + ret i1 %result +} + + + +define i1 @fcmp_trunc_fast_ult(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_fast_ult( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast uge double [[TMP0]], 0x40F8E8E010000001 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp fast uge float %trunc, 102030.0078125 + ret i1 %result +} + + +define i1 @fcmp_trunc_neg_fast_ult(double %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_neg_fast_ult( +; CHECK-SAME: double [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast uge double [[TMP0]], 0xC0F8E8E02FFFFFFF +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc double %0 to float + %result = fcmp fast uge float %trunc, -102030.0078125 + ret i1 %result +} + + +; max representable float to fp128 +define i1 @fcmp_trunc_mx_fp128(fp128 %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_mx_fp128( +; CHECK-SAME: fp128 [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ole fp128 [[TMP0]], 0xLFFFFFFFFFFFFFFFF407EFFFFFEFFFFFF +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc fp128 %0 to float + %result = fcmp ole float %trunc, 0x47EFFFFFE0000000 + ret i1 %result +} + + +; max representable float to x86_fp80 +define i1 @fcmp_trunc_mx_x86_fp80(x86_fp80 %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_mx_x86_fp80( +; CHECK-SAME: x86_fp80 [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ule x86_fp80 [[TMP0]], 0xK407EFFFFFF7FFFFFFFFF +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc x86_fp80 %0 to float + %result = fcmp ule float %trunc, 0x47EFFFFFE0000000 + ret i1 %result +} + + +; max representable float to ppc_fp128 +define i1 @fcmp_trunc_mx_ppc_fp128(ppc_fp128 %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_mx_ppc_fp128( +; CHECK-SAME: ppc_fp128 [[TMP0:%.*]]) { +; CHECK-NEXT: [[TRUNC:%.*]] = fptrunc ppc_fp128 [[TMP0]] to float +; CHECK-NEXT: [[RESULT:%.*]] = fcmp ole float [[TRUNC]], 0x47EFFFFFE0000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc ppc_fp128 %0 to float + %result = fcmp ole float %trunc, 0x47EFFFFFE0000000 + ret i1 %result +} + + +; negative max representable float to fp128 +define i1 @fcmp_trunc_mn_fp128(fp128 %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_mn_fp128( +; CHECK-SAME: fp128 [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp olt fp128 [[TMP0]], 0xL0000000000000000C07EFFFFF1000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc fp128 %0 to float + %result = fcmp olt float %trunc, 0xC7EFFFFF00000000 + ret i1 %result +} + + +; negative max representable float to x86_fp80 +define i1 @fcmp_trunc_mn_x86_fp80(x86_fp80 %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_mn_x86_fp80( +; CHECK-SAME: x86_fp80 [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp oge x86_fp80 [[TMP0]], 0xKC07EFFFFF88000000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc x86_fp80 %0 to float + %result = fcmp oge float %trunc, 0xC7EFFFFF00000000 + ret i1 %result +} + + +; negative max representable float to ppc_fp128 +define i1 @fcmp_trunc_mn_ppc_fp128(ppc_fp128 %0) { +; CHECK-LABEL: define i1 @fcmp_trunc_mn_ppc_fp128( +; CHECK-SAME: ppc_fp128 [[TMP0:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = fcmp uge ppc_fp128 [[TMP0]], 0xMC7EFFFFF100000000000000000000000 +; CHECK-NEXT: ret i1 [[RESULT]] +; + %trunc = fptrunc ppc_fp128 %0 to float + %result = fcmp uge float %trunc, 0xC7EFFFFF00000000 + ret i1 %result +} + diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/lifetime-use.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/lifetime-use.ll new file mode 100644 index 0000000..c7a0de22 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/lifetime-use.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=loop-reduce -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define void @test(ptr %p, i64 %idx) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[P:%.*]], i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4 x [4 x i32]], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr [[ALLOCA]]) +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[IDX]], 6 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 48 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP1]] +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr nuw i8, ptr [[ALLOCA]], i64 48 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[LOOP]] ], [ -8, %[[ENTRY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[LSR_IV]], 2 +; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP2]] +; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i8, ptr [[SCEVGEP8]], i64 32 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[SCEVGEP9]], align 4 +; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[P]], i64 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[SCEVGEP6]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[SCEVGEP7]], align 4 +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[SCEVGEP3]], i64 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[SCEVGEP5]], align 4 +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SCEVGEP2]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 4 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr [[ALLOCA]]) +; CHECK-NEXT: ret void +; +entry: + %alloca = alloca [4 x [4 x i32]], align 16 + call void @llvm.lifetime.start.p0(i64 64, ptr %alloca) + br label %loop + +loop: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ] + %gep1 = getelementptr [4 x [12 x [4 x [4 x i32]]]], ptr %p, i64 0, i64 0, i64 0, i64 %indvars.iv, i64 0 + %0 = load i32, ptr %gep1, align 4 + %gep2 = getelementptr [6 x [4 x [4 x i32]]], ptr %p, i64 0, i64 0, i64 0, i64 %indvars.iv + %1 = load i32, ptr %gep2, align 4 + %gep3 = getelementptr [4 x [4 x i32]], ptr %alloca, i64 0, i64 3, i64 %indvars.iv + %2 = load i32, ptr %gep3, align 4 + %gep4 = getelementptr [6 x [4 x [4 x i32]]], ptr %p, i64 0, i64 %idx, i64 3, i64 %indvars.iv + %3 = load i32, ptr %gep4, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv, 1 + br i1 %exitcond.not, label %exit, label %loop + +exit: + call void @llvm.lifetime.end.p0(i64 64, ptr %alloca) + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll index 451574a..427a05c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll @@ -42,18 +42,59 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-LABEL: define float @fmaxnum( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP8]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD2]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]] +; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP6]], i64 [[IV]], i64 [[N_VEC]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]]) +; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = and i1 [[CMP_N]], [[TMP16]] +; CHECK-NEXT: br i1 [[TMP17]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP14]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ -1.000000e+07, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC1]], align 4 ; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.maxnum.f32(float [[MAX]], float [[L]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll index e93ee55..1a8e594 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll @@ -42,18 +42,59 @@ define float @fminnum(ptr %src, i64 %n) { ; CHECK-LABEL: define float @fminnum( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP8]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD2]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]] +; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP6]], i64 [[IV]], i64 [[N_VEC]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]]) +; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = and i1 [[CMP_N]], [[TMP16]] +; CHECK-NEXT: br i1 [[TMP17]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP14]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ -1.000000e+07, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC1]], align 4 ; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.minnum.f32(float [[MAX]], float [[L]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll index 173766cc..ccfa725 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll @@ -386,8 +386,7 @@ define void @single_fmul_used_by_each_member(ptr noalias %A, ptr noalias %B, ptr ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX24:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX24]] -; CHECK-NEXT: [[TMP46:%.*]] = getelementptr double, ptr [[TMP45]], i32 0 -; CHECK-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP46]], align 8 +; CHECK-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP45]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP47]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP48:%.*]] = fmul <2 x double> [[BROADCAST_SPLAT]], splat (double 5.000000e+00) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll index 813d61b..38e224f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll @@ -166,8 +166,7 @@ define void @test_2xi64(ptr noalias %data, ptr noalias %factor) { ; VF2: [[VECTOR_BODY]]: ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] -; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 +; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP1]], align 8 ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0 ; VF2-NEXT: [[WIDE_LOAD:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; VF2-NEXT: [[TMP6:%.*]] = shl nsw i64 [[INDEX]], 1 @@ -959,13 +958,11 @@ define void @test_2xi64_sub_of_wide_loads(ptr noalias %data, ptr noalias %A, ptr ; VF2: [[VECTOR_BODY]]: ; VF2-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[IV_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; VF2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +; VF2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP6]], align 8 ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0 ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VF2-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 +; VF2-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 8 ; VF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0 ; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer ; VF2-NEXT: [[TMP13:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory.ll b/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory.ll index f226ae9..cb7f0bf 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory.ll @@ -18,8 +18,7 @@ define void @test_4xi64(ptr noalias %data, ptr noalias %factor, i64 noundef %n) ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[ARRAYIDX]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds { i64, i64, i64, i64 }, ptr [[DATA]], i64 [[IV]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll index b2e080f..a2eddad 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll @@ -42,18 +42,59 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-LABEL: define float @fmaxnum( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP8]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD2]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]] +; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP6]], i64 [[IV]], i64 [[N_VEC]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]]) +; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = and i1 [[CMP_N]], [[TMP16]] +; CHECK-NEXT: br i1 [[TMP17]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP14]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ -1.000000e+07, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC1]], align 4 ; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.maxnum.f32(float [[MAX]], float [[L]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll index 5661406..1ca5586 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll @@ -192,18 +192,51 @@ define float @fmaxnum_1(ptr %src, i64 %n) { ; CHECK-LABEL: define float @fmaxnum_1( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] +; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[CMP_N]], [[TMP11]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ -1.000000e+07, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC1]], align 4 ; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.maxnum.f32(float [[L]], float [[MAX]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]] ; entry: @@ -227,18 +260,234 @@ define float @fmaxnum_2(ptr %src, i64 %n) { ; CHECK-LABEL: define float @fmaxnum_2( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] +; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[CMP_N]], [[TMP11]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ -1.000000e+07, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC1]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.maxnum.f32(float [[MAX]], float [[L]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv + %l = load float, ptr %gep.src, align 4 + %max.next = call float @llvm.maxnum.f32(float %max, float %l) + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop + +exit: + ret float %max.next +} + +define float @fmaxnum_induction_starts_at_10(ptr %src, i64 %n) { +; CHECK-LABEL: define float @fmaxnum_induction_starts_at_10( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -10 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = add i64 10, [[INDEX]] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP6]], i64 [[INDEX]], i64 [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = add i64 10, [[TMP9]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = and i1 [[CMP_N]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP11]], %[[MIDDLE_BLOCK]] ], [ 10, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ -1.000000e+07, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC1]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.maxnum.f32(float [[L]], float [[MAX]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 10, %entry ], [ %iv.next, %loop ] + %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv + %l = load float, ptr %gep.src, align 4 + %max.next = call float @llvm.maxnum.f32(float %l, float %max) + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop + +exit: + ret float %max.next +} + +define float @fmaxnum_induction_starts_at_value(ptr %src, i64 %start, i64 %n) { +; CHECK-LABEL: define float @fmaxnum_induction_starts_at_value( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[START:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[N]], [[START]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = add i64 [[START]], [[INDEX]] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP6]], i64 [[INDEX]], i64 [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[START]], [[TMP9]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = and i1 [[CMP_N]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP11]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ -1.000000e+07, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC1]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.maxnum.f32(float [[L]], float [[MAX]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ %start, %entry ], [ %iv.next, %loop ] + %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv + %l = load float, ptr %gep.src, align 4 + %max.next = call float @llvm.maxnum.f32(float %l, float %max) + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop + +exit: + ret float %max.next +} + +define float @fmaxnum_with_additional_add(ptr noalias %src, ptr noalias %src.2, i64 %n) { +; CHECK-LABEL: define float @fmaxnum_with_additional_add( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[SRC_2:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC_2]], i64 [[IV]] +; CHECK-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 +; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[L_SRC_2]] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4 -; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.maxnum.f32(float [[MAX]], float [[L]]) +; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.maxnum.f32(float [[L]], float [[MAX]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: +; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT]], %[[LOOP]] ] ; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: store i32 [[SUM_NEXT_LCSSA]], ptr [[SRC_2]], align 4 ; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]] ; entry: @@ -247,14 +496,19 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %max = phi float [ -1.000000e+07, %entry ], [ %max.next, %loop ] + %sum = phi i32 [ 0, %entry ], [ %sum.next, %loop ] + %gep.src.2 = getelementptr inbounds nuw i32, ptr %src.2, i64 %iv + %l.src.2 = load i32, ptr %gep.src.2, align 4 + %sum.next = add i32 %sum, %l.src.2 %gep.src = getelementptr inbounds nuw float, ptr %src, i64 %iv %l = load float, ptr %gep.src, align 4 - %max.next = call float @llvm.maxnum.f32(float %max, float %l) + %max.next = call float @llvm.maxnum.f32(float %l, float %max) %iv.next = add nuw nsw i64 %iv, 1 %ec = icmp eq i64 %iv.next, %n br i1 %ec, label %exit, label %loop exit: + store i32 %sum.next, ptr %src.2 ret float %max.next } diff --git a/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll index 148beb6..68bc8d0 100644 --- a/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll @@ -192,18 +192,51 @@ define float @fminnum_1(ptr %src, i64 %n) { ; CHECK-LABEL: define float @fminnum_1( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] +; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[CMP_N]], [[TMP11]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ -1.000000e+07, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC1]], align 4 ; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.minnum.f32(float [[L]], float [[MAX]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]] ; entry: @@ -227,18 +260,51 @@ define float @fminnum_2(ptr %src, i64 %n) { ; CHECK-LABEL: define float @fminnum_2( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] +; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[CMP_N]], [[TMP11]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ -1.000000e+07, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC1]], align 4 ; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.minnum.f32(float [[MAX]], float [[L]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll index 85a90f2..e7ab02c 100644 --- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll @@ -1001,8 +1001,10 @@ for.body: ; preds = %entry, %for.body br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } +; This can be vectorized with additional runtime checks for NaNs. ; CHECK-LABEL: @fmin_intrinsic_nofast( -; CHECK-NOT: <2 x float> @llvm.minnum.v2f32 +; CHECK: <2 x float> @llvm.minnum.v2f32 +; CHECK: fcmp uno <2 x float> [[OP:.+]], [[OP]] define float @fmin_intrinsic_nofast(ptr nocapture readonly %x) { entry: br label %for.body @@ -1021,8 +1023,10 @@ for.body: ; preds = %entry, %for.body br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } +; This can be vectorized with additional runtime checks for NaNs. ; CHECK-LABEL: @fmax_intrinsic_nofast( -; CHECK-NOT: <2 x float> @llvm.maxnum.v2f32 +; CHECK: <2 x float> @llvm.maxnum.v2f32 +; CHECK: fcmp uno <2 x float> [[OP:.+]], [[OP]] define float @fmax_intrinsic_nofast(ptr nocapture readonly %x) { entry: br label %for.body diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reordered-buildvector-scalars.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reordered-buildvector-scalars.ll index f7e629f..d4e3238 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reordered-buildvector-scalars.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reordered-buildvector-scalars.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mcpu=sifive-x280 < %s | FileCheck %s +; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mcpu=sifive-x280 < %s -slp-threshold=-3 | FileCheck %s --check-prefix=THRESH %struct.ImageParameters = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, ptr, ptr, i32, ptr, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], ptr, ptr, ptr, ptr, ptr, [1200 x %struct.syntaxelement], ptr, ptr, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, ptr, ptr, ptr, ptr, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [15 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, ptr, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, ptr, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, i32, i32 } %struct.syntaxelement = type { i32, i32, i32, i32, i32, i32, i32, i32, ptr, ptr } @@ -94,6 +95,89 @@ define fastcc i32 @test(i32 %0, i32 %add111.i.i, <4 x i32> %PredPel.i.sroa.86.72 ; CHECK-NEXT: store i16 [[CONV2206_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8218), align 2 ; CHECK-NEXT: ret i32 0 ; +; THRESH-LABEL: define fastcc i32 @test( +; THRESH-SAME: i32 [[TMP0:%.*]], i32 [[ADD111_I_I:%.*]], <4 x i32> [[PREDPEL_I_SROA_86_72_VEC_EXTRACT:%.*]], <4 x i32> [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; THRESH-NEXT: [[ENTRY:.*:]] +; THRESH-NEXT: [[LOOPARRAY_SROA_24_0_I_I3:%.*]] = ashr i32 [[TMP0]], 1 +; THRESH-NEXT: [[SHR143_5_I_I9:%.*]] = ashr i32 [[TMP0]], 1 +; THRESH-NEXT: [[ADD1392_I:%.*]] = add i32 [[TMP0]], 1 +; THRESH-NEXT: [[MUL1445_I:%.*]] = shl i32 [[TMP0]], 1 +; THRESH-NEXT: [[ADD2136_I:%.*]] = or i32 [[LOOPARRAY_SROA_24_0_I_I3]], [[TMP0]] +; THRESH-NEXT: [[SHR2137_I:%.*]] = lshr i32 [[ADD2136_I]], 1 +; THRESH-NEXT: [[CONV2138_I:%.*]] = trunc i32 [[SHR2137_I]] to i16 +; THRESH-NEXT: [[ADD2174_I:%.*]] = add i32 [[MUL1445_I]], 2 +; THRESH-NEXT: [[SHR2175_I:%.*]] = lshr i32 [[ADD2174_I]], 2 +; THRESH-NEXT: [[CONV2176_I:%.*]] = trunc i32 [[SHR2175_I]] to i16 +; THRESH-NEXT: [[ADD2190_I:%.*]] = or i32 [[ADD1392_I]], 1 +; THRESH-NEXT: [[ADD2191_I:%.*]] = add i32 [[ADD2190_I]], [[TMP0]] +; THRESH-NEXT: [[CONV2193_I:%.*]] = trunc i32 [[ADD2191_I]] to i16 +; THRESH-NEXT: [[ADD2203_I:%.*]] = or i32 [[TMP0]], 1 +; THRESH-NEXT: [[ADD2204_I:%.*]] = add i32 [[ADD2203_I]], [[TMP0]] +; THRESH-NEXT: [[CONV2206_I:%.*]] = trunc i32 [[ADD2204_I]] to i16 +; THRESH-NEXT: [[ADD2235_I16:%.*]] = or i32 [[TMP0]], 1 +; THRESH-NEXT: [[ADD2236_I:%.*]] = add i32 [[ADD2235_I16]], 1 +; THRESH-NEXT: [[SHR2237_I:%.*]] = lshr i32 [[ADD2236_I]], 1 +; THRESH-NEXT: [[CONV2238_I:%.*]] = trunc i32 [[SHR2237_I]] to i16 +; THRESH-NEXT: store i16 [[CONV2238_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8196), align 4 +; THRESH-NEXT: store i16 [[CONV2238_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8176), align 8 +; THRESH-NEXT: [[ADD2258_I:%.*]] = or i32 [[ADD111_I_I]], [[TMP0]] +; THRESH-NEXT: [[SHR2259_I:%.*]] = lshr i32 [[ADD2258_I]], 1 +; THRESH-NEXT: [[CONV2260_I:%.*]] = trunc i32 [[SHR2259_I]] to i16 +; THRESH-NEXT: store i16 [[CONV2260_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8212), align 4 +; THRESH-NEXT: store i16 [[CONV2260_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8192), align 8 +; THRESH-NEXT: store i16 [[CONV2260_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8172), align 4 +; THRESH-NEXT: [[ADD2302_I:%.*]] = add i32 [[TMP0]], 1 +; THRESH-NEXT: [[SHR2303_I:%.*]] = lshr i32 [[ADD2302_I]], 1 +; THRESH-NEXT: [[CONV2304_I:%.*]] = trunc i32 [[SHR2303_I]] to i16 +; THRESH-NEXT: store i16 [[CONV2304_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8224), align 8 +; THRESH-NEXT: store i16 [[CONV2304_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8204), align 4 +; THRESH-NEXT: store i16 [[CONV2304_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8184), align 8 +; THRESH-NEXT: [[ADD2323_I:%.*]] = add i32 [[TMP0]], 1 +; THRESH-NEXT: [[ADD2324_I:%.*]] = or i32 [[ADD2323_I]], [[TMP0]] +; THRESH-NEXT: [[SHR2325_I:%.*]] = lshr i32 [[ADD2324_I]], 1 +; THRESH-NEXT: [[CONV2326_I:%.*]] = trunc i32 [[SHR2325_I]] to i16 +; THRESH-NEXT: store i16 [[CONV2326_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8220), align 4 +; THRESH-NEXT: store i16 [[CONV2326_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8200), align 8 +; THRESH-NEXT: [[ADD2342_I:%.*]] = add i32 [[SHR143_5_I_I9]], 1 +; THRESH-NEXT: [[SHR2343_I:%.*]] = lshr i32 [[ADD2342_I]], 1 +; THRESH-NEXT: [[CONV2344_I:%.*]] = trunc i32 [[SHR2343_I]] to i16 +; THRESH-NEXT: store i16 [[CONV2344_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8216), align 8 +; THRESH-NEXT: [[ADD2355_I:%.*]] = or i32 [[SHR143_5_I_I9]], 1 +; THRESH-NEXT: [[ADD2356_I:%.*]] = add i32 [[ADD2355_I]], [[TMP0]] +; THRESH-NEXT: [[CONV2358_I:%.*]] = trunc i32 [[ADD2356_I]] to i16 +; THRESH-NEXT: store i16 [[CONV2358_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8232), align 8 +; THRESH-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 poison, i32 0> +; THRESH-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[LOOPARRAY_SROA_24_0_I_I3]], i32 0 +; THRESH-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], splat (i32 1) +; THRESH-NEXT: [[TMP5:%.*]] = lshr <2 x i32> [[TMP4]], splat (i32 1) +; THRESH-NEXT: [[TMP6:%.*]] = trunc <2 x i32> [[TMP5]] to <2 x i16> +; THRESH-NEXT: store <2 x i16> [[TMP6]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8180), align 4 +; THRESH-NEXT: [[ADD2393_I:%.*]] = or i32 [[LOOPARRAY_SROA_24_0_I_I3]], 1 +; THRESH-NEXT: [[ADD2394_I:%.*]] = add i32 [[ADD2393_I]], [[TMP0]] +; THRESH-NEXT: [[CONV2396_I:%.*]] = trunc i32 [[ADD2394_I]] to i16 +; THRESH-NEXT: store i16 [[CONV2396_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8198), align 2 +; THRESH-NEXT: store i16 [[CONV2396_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8178), align 2 +; THRESH-NEXT: store i16 [[CONV2138_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8214), align 2 +; THRESH-NEXT: store i16 [[CONV2138_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8194), align 2 +; THRESH-NEXT: store i16 [[CONV2138_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8174), align 2 +; THRESH-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[PREDPEL_I_SROA_86_72_VEC_EXTRACT]], <4 x i32> poison, <2 x i32> <i32 poison, i32 0> +; THRESH-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[ADD111_I_I]], i32 0 +; THRESH-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP8]], splat (i32 1) +; THRESH-NEXT: [[TMP10:%.*]] = lshr <2 x i32> [[TMP9]], splat (i32 1) +; THRESH-NEXT: [[TMP11:%.*]] = trunc <2 x i32> [[TMP10]] to <2 x i16> +; THRESH-NEXT: [[TMP12:%.*]] = extractelement <2 x i16> [[TMP11]], i32 1 +; THRESH-NEXT: store <2 x i16> [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8228), align 4 +; THRESH-NEXT: store <2 x i16> [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8208), align 8 +; THRESH-NEXT: store <2 x i16> [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8188), align 4 +; THRESH-NEXT: store i16 [[TMP12]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8170), align 2 +; THRESH-NEXT: store i16 [[CONV2176_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8226), align 2 +; THRESH-NEXT: store i16 [[CONV2176_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8206), align 2 +; THRESH-NEXT: store i16 [[CONV2176_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8186), align 2 +; THRESH-NEXT: store i16 [[CONV2193_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8222), align 2 +; THRESH-NEXT: store i16 [[CONV2193_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8202), align 2 +; THRESH-NEXT: store i16 [[CONV2206_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8218), align 2 +; THRESH-NEXT: ret i32 0 +; entry: %LoopArray.sroa.24.0.i.i3 = ashr i32 %0, 1 %shr143.5.i.i9 = ashr i32 %0, 1 diff --git a/llvm/test/Transforms/SimplifyCFG/merge-callee-type-metadata.ll b/llvm/test/Transforms/SimplifyCFG/merge-callee-type-metadata.ll new file mode 100644 index 0000000..3e56939 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/merge-callee-type-metadata.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +;; Test if the callee_type metadata is merged correctly. + +; RUN: opt -passes=simplifycfg -S < %s | FileCheck %s + +;; Test if the callee_type metadata is merged correctly when +;; the instructions carry differring callee_type metadata. +define ptr @_Z10test_diffb(i1 zeroext %b) { +; CHECK-LABEL: define ptr @_Z10test_diffb( +; CHECK-SAME: i1 zeroext [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[FN:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store ptr @_Znwm, ptr [[FN]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = call ptr [[FN]](i64 4), !callee_type [[META0:![0-9]+]] +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + %fn = alloca ptr + store ptr @_Znwm, ptr %fn + br i1 %b, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = call ptr %fn(i64 4), !callee_type !4 + br label %if.end + +if.else: ; preds = %entry + %call1 = call ptr %fn(i64 4), !callee_type !3 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} + +;; Test if the callee_type metadata is merged correctly when +;; the instructions carry same callee_type metadata. +define ptr @_Z10test_sameb(i1 zeroext %b) { +; CHECK-LABEL: define ptr @_Z10test_sameb( +; CHECK-SAME: i1 zeroext [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[FN:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store ptr @_Znwm, ptr [[FN]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = call ptr [[FN]](i64 4), !callee_type [[META3:![0-9]+]] +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + %fn = alloca ptr + store ptr @_Znwm, ptr %fn + br i1 %b, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = call ptr %fn(i64 4), !callee_type !3 + br label %if.end + +if.else: ; preds = %entry + %call1 = call ptr %fn(i64 4), !callee_type !3 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} + +;; Test if the callee_type metadata is dropped correctly when +;; only the left instruction has callee_type metadata. +define ptr @_Z10test_leftb(i1 zeroext %b) { +; CHECK-LABEL: define ptr @_Z10test_leftb( +; CHECK-SAME: i1 zeroext [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[FN:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store ptr @_Znwm, ptr [[FN]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = call ptr [[FN]](i64 4) +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + %fn = alloca ptr + store ptr @_Znwm, ptr %fn + br i1 %b, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = call ptr %fn(i64 4), !callee_type !4 + br label %if.end + +if.else: ; preds = %entry + %call1 = call ptr %fn(i64 4) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} + +;; Test if the callee_type metadata is dropped correctly when +;; only the right instruction has callee_type metadata. +define ptr @_Z10test_rightb(i1 zeroext %b) { +; CHECK-LABEL: define ptr @_Z10test_rightb( +; CHECK-SAME: i1 zeroext [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[FN:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store ptr @_Znwm, ptr [[FN]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = call ptr [[FN]](i64 4) +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + %fn = alloca ptr + store ptr @_Znwm, ptr %fn + br i1 %b, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = call ptr %fn(i64 4) + br label %if.end + +if.else: ; preds = %entry + %call1 = call ptr %fn(i64 4), !callee_type !3 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} + +;; Test if the callee_type metadata is merged correctly when +;; each of the callee_type metadata are lists. +define ptr @_Z10test_listb(i1 zeroext %b) { +; CHECK-LABEL: define ptr @_Z10test_listb( +; CHECK-SAME: i1 zeroext [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[FN:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store ptr @_Znwm, ptr [[FN]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = call ptr [[FN]](i64 4), !callee_type [[META4:![0-9]+]] +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + %fn = alloca ptr + store ptr @_Znwm, ptr %fn + br i1 %b, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = call ptr %fn(i64 4), !callee_type !6 + br label %if.end + +if.else: ; preds = %entry + %call1 = call ptr %fn(i64 4), !callee_type !5 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} + +declare ptr @_Znwm(i64) + +!0 = !{i64 0, !"callee_type0.generalized"} +!1 = !{i64 0, !"callee_type1.generalized"} +!2 = !{i64 0, !"callee_type2.generalized"} +!3 = !{!0} +!4 = !{!2} +!5 = !{!1, !2} +!6 = !{!0, !2} +;. +; CHECK: [[META0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{i64 0, !"callee_type2.generalized"} +; CHECK: [[META2]] = !{i64 0, !"callee_type0.generalized"} +; CHECK: [[META3]] = !{[[META2]]} +; CHECK: [[META4]] = !{[[META2]], [[META1]], [[META5:![0-9]+]]} +; CHECK: [[META5]] = !{i64 0, !"callee_type1.generalized"} +;. |